aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms')
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp128
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp166
-rw-r--r--contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp83
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp54
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp121
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp1873
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp433
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp114
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp629
-rw-r--r--contrib/llvm/lib/Transforms/IPO/IPO.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp937
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp27
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp25
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Inliner.cpp150
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Internalize.cpp103
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp20
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp589
-rw-r--r--contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp560
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp18
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp125
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PruneEH.cpp31
-rw-r--r--contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp (renamed from contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp)736
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp62
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp58
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp443
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp930
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp353
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp242
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h12
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp136
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp26
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp113
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp78
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp90
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp121
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp346
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp253
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h217
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp84
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp49
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp289
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp39
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp434
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp718
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/SafeStack.cpp491
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp117
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp40
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.cpp673
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.h123
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h242
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp13
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp168
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h74
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp28
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp77
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h2
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/PtrState.h8
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ADCE.cpp69
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp17
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/BDCE.cpp350
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp120
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DCE.cpp78
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp264
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp269
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp49
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVN.cpp319
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp724
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp24
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp263
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LICM.cpp65
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp19
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp113
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp1347
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp79
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp566
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp141
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp447
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp828
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp278
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp455
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp174
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp51
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp199
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp104
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp109
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp1826
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SCCP.cpp166
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SROA.cpp991
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Scalar.cpp15
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp7
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp32
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp251
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Sink.cpp25
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp42
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp149
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp54
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp19
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp28
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp122
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneModule.cpp89
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp36
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp21
-rw-r--r--contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp34
-rw-r--r--contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp431
-rw-r--r--contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp58
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LCSSA.cpp76
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Local.cpp270
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp118
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp103
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp36
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp276
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp79
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp93
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp3
-rw-r--r--contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp69
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp57
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp706
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp130
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp445
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SplitModule.cpp85
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp1
-rw-r--r--contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp256
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp175
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp2475
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp489
154 files changed, 19960 insertions, 12172 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 4762011d63d8..0e05129b5261 100644
--- a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -34,8 +34,11 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/CallSite.h"
@@ -63,7 +66,8 @@ namespace {
///
struct ArgPromotion : public CallGraphSCCPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
CallGraphSCCPass::getAnalysisUsage(AU);
}
@@ -81,7 +85,8 @@ namespace {
bool isDenselyPacked(Type *type, const DataLayout &DL);
bool canPaddingBeAccessed(Argument *Arg);
CallGraphNode *PromoteArguments(CallGraphNode *CGN);
- bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const;
+ bool isSafeToPromoteArgument(Argument *Arg, bool isByVal,
+ AAResults &AAR) const;
CallGraphNode *DoPromotion(Function *F,
SmallPtrSetImpl<Argument*> &ArgsToPromote,
SmallPtrSetImpl<Argument*> &ByValArgsToTransform);
@@ -90,15 +95,15 @@ namespace {
bool doInitialization(CallGraph &CG) override;
/// The maximum number of elements to expand, or 0 for unlimited.
unsigned maxElements;
- DenseMap<const Function *, DISubprogram *> FunctionDIs;
};
}
char ArgPromotion::ID = 0;
INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
"Promote 'by reference' arguments to scalars", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
"Promote 'by reference' arguments to scalars", false, false)
@@ -217,9 +222,9 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
// First check: see if there are any pointer arguments! If not, quick exit.
SmallVector<Argument*, 16> PointerArgs;
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
- if (I->getType()->isPointerTy())
- PointerArgs.push_back(I);
+ for (Argument &I : F->args())
+ if (I.getType()->isPointerTy())
+ PointerArgs.push_back(&I);
if (PointerArgs.empty()) return nullptr;
// Second check: make sure that all callers are direct callers. We can't
@@ -237,6 +242,14 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
const DataLayout &DL = F->getParent()->getDataLayout();
+ // We need to manually construct BasicAA directly in order to disable its use
+ // of other function analyses.
+ BasicAAResult BAR(createLegacyPMBasicAAResult(*this, *F));
+
+ // Construct our own AA results for this function. We do this manually to
+ // work around the limitations of the legacy pass manager.
+ AAResults AAR(createLegacyPMAAResults(*this, *F, BAR));
+
// Check to see which arguments are promotable. If an argument is promotable,
// add it to ArgsToPromote.
SmallPtrSet<Argument*, 8> ArgsToPromote;
@@ -281,8 +294,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
// If all the elements are single-value types, we can promote it.
bool AllSimple = true;
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- if (!STy->getElementType(i)->isSingleValueType()) {
+ for (const auto *EltTy : STy->elements()) {
+ if (!EltTy->isSingleValueType()) {
AllSimple = false;
break;
}
@@ -303,8 +316,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
if (isSelfRecursive) {
if (StructType *STy = dyn_cast<StructType>(AgTy)) {
bool RecursiveType = false;
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- if (STy->getElementType(i) == PtrArg->getType()) {
+ for (const auto *EltTy : STy->elements()) {
+ if (EltTy == PtrArg->getType()) {
RecursiveType = true;
break;
}
@@ -315,7 +328,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
}
// Otherwise, see if we can promote the pointer to its value.
- if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr()))
+ if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR))
ArgsToPromote.insert(PtrArg);
}
@@ -416,7 +429,8 @@ static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark,
/// elements of the aggregate in order to avoid exploding the number of
/// arguments passed in.
bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
- bool isByValOrInAlloca) const {
+ bool isByValOrInAlloca,
+ AAResults &AAR) const {
typedef std::set<IndicesVector> GEPIndicesSet;
// Quick exit for unused arguments
@@ -453,12 +467,11 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
// First, iterate the entry block and mark loads of (geps of) arguments as
// safe.
- BasicBlock *EntryBlock = Arg->getParent()->begin();
+ BasicBlock &EntryBlock = Arg->getParent()->front();
// Declare this here so we can reuse it
IndicesVector Indices;
- for (BasicBlock::iterator I = EntryBlock->begin(), E = EntryBlock->end();
- I != E; ++I)
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ for (Instruction &I : EntryBlock)
+ if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
Value *V = LI->getPointerOperand();
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
V = GEP->getPointerOperand();
@@ -501,12 +514,11 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
if (GEP->use_empty()) {
// Dead GEP's cause trouble later. Just remove them if we run into
// them.
- getAnalysis<AliasAnalysis>().deleteValue(GEP);
GEP->eraseFromParent();
// TODO: This runs the above loop over and over again for dead GEPs
// Couldn't we just do increment the UI iterator earlier and erase the
// use?
- return isSafeToPromoteArgument(Arg, isByValOrInAlloca);
+ return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR);
}
// Ensure that all of the indices are constants.
@@ -563,8 +575,6 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
// blocks we know to be transparent to the load.
SmallPtrSet<BasicBlock*, 16> TranspBlocks;
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-
for (unsigned i = 0, e = Loads.size(); i != e; ++i) {
// Check to see if the load is invalidated from the start of the block to
// the load itself.
@@ -572,8 +582,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
BasicBlock *BB = Load->getParent();
MemoryLocation Loc = MemoryLocation::get(Load);
- if (AA.canInstructionRangeModRef(BB->front(), *Load, Loc,
- AliasAnalysis::Mod))
+ if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, MRI_Mod))
return false; // Pointer is invalidated!
// Now check every path from the entry block to the load for transparency.
@@ -581,7 +590,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
// loading block.
for (BasicBlock *P : predecessors(BB)) {
for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks))
- if (AA.canBasicBlockModify(*TranspBB, Loc))
+ if (AAR.canBasicBlockModify(*TranspBB, Loc))
return false;
}
}
@@ -637,13 +646,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
unsigned ArgIndex = 1;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
++I, ++ArgIndex) {
- if (ByValArgsToTransform.count(I)) {
+ if (ByValArgsToTransform.count(&*I)) {
// Simple byval argument? Just add all the struct element types.
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
StructType *STy = cast<StructType>(AgTy);
Params.insert(Params.end(), STy->element_begin(), STy->element_end());
++NumByValArgsPromoted;
- } else if (!ArgsToPromote.count(I)) {
+ } else if (!ArgsToPromote.count(&*I)) {
// Unchanged argument
Params.push_back(I->getType());
AttributeSet attrs = PAL.getParamAttributes(ArgIndex);
@@ -661,7 +670,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// In this table, we will track which indices are loaded from the argument
// (where direct loads are tracked as no indices).
- ScalarizeTable &ArgIndices = ScalarizedElements[I];
+ ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
for (User *U : I->users()) {
Instruction *UI = cast<Instruction>(U);
Type *SrcTy;
@@ -687,7 +696,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
else
// Take any load, we will use it only to update Alias Analysis
OrigLoad = cast<LoadInst>(UI->user_back());
- OriginalLoads[std::make_pair(I, Indices)] = OrigLoad;
+ OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad;
}
// Add a parameter to the function for each element passed in.
@@ -722,15 +731,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
NF->copyAttributesFrom(F);
// Patch the pointer to LLVM function in debug info descriptor.
- auto DI = FunctionDIs.find(F);
- if (DI != FunctionDIs.end()) {
- DISubprogram *SP = DI->second;
- SP->replaceFunction(NF);
- // Ensure the map is updated so it can be reused on subsequent argument
- // promotions of the same function.
- FunctionDIs.erase(DI);
- FunctionDIs[NF] = SP;
- }
+ NF->setSubprogram(F->getSubprogram());
+ F->setSubprogram(nullptr);
DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
<< "From: " << *F);
@@ -740,13 +742,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
NF->setAttributes(AttributeSet::get(F->getContext(), AttributesVec));
AttributesVec.clear();
- F->getParent()->getFunctionList().insert(F, NF);
+ F->getParent()->getFunctionList().insert(F->getIterator(), NF);
NF->takeName(F);
- // Get the alias analysis information that we need to update to reflect our
- // changes.
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-
// Get the callgraph information that we need to update to reflect our
// changes.
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
@@ -775,7 +773,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
ArgIndex = 1;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
I != E; ++I, ++AI, ++ArgIndex)
- if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
+ if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
Args.push_back(*AI); // Unmodified argument
if (CallPAL.hasAttributes(ArgIndex)) {
@@ -783,7 +781,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
AttributesVec.
push_back(AttributeSet::get(F->getContext(), Args.size(), B));
}
- } else if (ByValArgsToTransform.count(I)) {
+ } else if (ByValArgsToTransform.count(&*I)) {
// Emit a GEP and load for each element of the struct.
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
StructType *STy = cast<StructType>(AgTy);
@@ -798,14 +796,14 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
}
} else if (!I->use_empty()) {
// Non-dead argument: insert GEPs and loads as appropriate.
- ScalarizeTable &ArgIndices = ScalarizedElements[I];
+ ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
// Store the Value* version of the indices in here, but declare it now
// for reuse.
std::vector<Value*> Ops;
for (ScalarizeTable::iterator SI = ArgIndices.begin(),
E = ArgIndices.end(); SI != E; ++SI) {
Value *V = *AI;
- LoadInst *OrigLoad = OriginalLoads[std::make_pair(I, SI->second)];
+ LoadInst *OrigLoad = OriginalLoads[std::make_pair(&*I, SI->second)];
if (!SI->second.empty()) {
Ops.reserve(SI->second.size());
Type *ElTy = V->getType();
@@ -873,10 +871,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
Args.clear();
AttributesVec.clear();
- // Update the alias analysis implementation to know that we are replacing
- // the old call with a new one.
- AA.replaceWithNewValue(Call, New);
-
// Update the callgraph to know that the callsite has been transformed.
CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN);
@@ -901,20 +895,19 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
//
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
I2 = NF->arg_begin(); I != E; ++I) {
- if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
+ if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
// If this is an unmodified argument, move the name and users over to the
// new version.
- I->replaceAllUsesWith(I2);
- I2->takeName(I);
- AA.replaceWithNewValue(I, I2);
+ I->replaceAllUsesWith(&*I2);
+ I2->takeName(&*I);
++I2;
continue;
}
- if (ByValArgsToTransform.count(I)) {
+ if (ByValArgsToTransform.count(&*I)) {
// In the callee, we create an alloca, and store each of the new incoming
// arguments into the alloca.
- Instruction *InsertPt = NF->begin()->begin();
+ Instruction *InsertPt = &NF->begin()->front();
// Just add all the struct element types.
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
@@ -929,13 +922,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
InsertPt);
I2->setName(I->getName()+"."+Twine(i));
- new StoreInst(I2++, Idx, InsertPt);
+ new StoreInst(&*I2++, Idx, InsertPt);
}
// Anything that used the arg should now use the alloca.
I->replaceAllUsesWith(TheAlloca);
- TheAlloca->takeName(I);
- AA.replaceWithNewValue(I, TheAlloca);
+ TheAlloca->takeName(&*I);
// If the alloca is used in a call, we must clear the tail flag since
// the callee now uses an alloca from the caller.
@@ -948,23 +940,20 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
continue;
}
- if (I->use_empty()) {
- AA.deleteValue(I);
+ if (I->use_empty())
continue;
- }
// Otherwise, if we promoted this argument, then all users are load
// instructions (or GEPs with only load users), and all loads should be
// using the new argument that we added.
- ScalarizeTable &ArgIndices = ScalarizedElements[I];
+ ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
while (!I->use_empty()) {
if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
assert(ArgIndices.begin()->second.empty() &&
"Load element should sort to front!");
I2->setName(I->getName()+".val");
- LI->replaceAllUsesWith(I2);
- AA.replaceWithNewValue(LI, I2);
+ LI->replaceAllUsesWith(&*I2);
LI->eraseFromParent();
DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
<< "' in function '" << F->getName() << "'\n");
@@ -1000,11 +989,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// the argument specified by ArgNo.
while (!GEP->use_empty()) {
LoadInst *L = cast<LoadInst>(GEP->user_back());
- L->replaceAllUsesWith(TheArg);
- AA.replaceWithNewValue(L, TheArg);
+ L->replaceAllUsesWith(&*TheArg);
L->eraseFromParent();
}
- AA.deleteValue(GEP);
GEP->eraseFromParent();
}
}
@@ -1013,10 +1000,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
std::advance(I2, ArgIndices.size());
}
- // Tell the alias analysis that the old function is about to disappear.
- AA.replaceWithNewValue(F, NF);
-
-
NF_CGN->stealCalledFunctionsFrom(CG[F]);
// Now that the old function is dead, delete it. If there is a dangling
@@ -1032,6 +1015,5 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
}
bool ArgPromotion::doInitialization(CallGraph &CG) {
- FunctionDIs = makeSubprogramMap(CG.getModule());
return CallGraphSCCPass::doInitialization(CG);
}
diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
index 8ce7646621ff..0aa49d6fde01 100644
--- a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -119,7 +119,7 @@ bool ConstantMerge::runOnModule(Module &M) {
// First: Find the canonical constants others will be merged with.
for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
GVI != E; ) {
- GlobalVariable *GV = GVI++;
+ GlobalVariable *GV = &*GVI++;
// If this GV is dead, remove it.
GV->removeDeadConstantUsers();
@@ -160,7 +160,7 @@ bool ConstantMerge::runOnModule(Module &M) {
// invalidating the Constant* pointers in CMap.
for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
GVI != E; ) {
- GlobalVariable *GV = GVI++;
+ GlobalVariable *GV = &*GVI++;
// Only process constants with initializers in the default address space.
if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
diff --git a/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp b/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
new file mode 100644
index 000000000000..5bbb7513005c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
@@ -0,0 +1,166 @@
+//===-- CrossDSOCFI.cpp - Externalize this module's CFI checks ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass exports all llvm.bitset's found in the module in the form of a
+// __cfi_check function, which can be used to verify cross-DSO call targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "cross-dso-cfi"
+
+STATISTIC(TypeIds, "Number of unique type identifiers");
+
+namespace {
+
+struct CrossDSOCFI : public ModulePass {
+ static char ID;
+ CrossDSOCFI() : ModulePass(ID) {
+ initializeCrossDSOCFIPass(*PassRegistry::getPassRegistry());
+ }
+
+ Module *M;
+ MDNode *VeryLikelyWeights;
+
+ ConstantInt *extractBitSetTypeId(MDNode *MD);
+ void buildCFICheck();
+
+ bool doInitialization(Module &M) override;
+ bool runOnModule(Module &M) override;
+};
+
+} // anonymous namespace
+
+INITIALIZE_PASS_BEGIN(CrossDSOCFI, "cross-dso-cfi", "Cross-DSO CFI", false,
+ false)
+INITIALIZE_PASS_END(CrossDSOCFI, "cross-dso-cfi", "Cross-DSO CFI", false, false)
+char CrossDSOCFI::ID = 0;
+
+ModulePass *llvm::createCrossDSOCFIPass() { return new CrossDSOCFI; }
+
+bool CrossDSOCFI::doInitialization(Module &Mod) {
+ M = &Mod;
+ VeryLikelyWeights =
+ MDBuilder(M->getContext()).createBranchWeights((1U << 20) - 1, 1);
+
+ return false;
+}
+
+/// extractBitSetTypeId - Extracts TypeId from a hash-based bitset MDNode.
+ConstantInt *CrossDSOCFI::extractBitSetTypeId(MDNode *MD) {
+ // This check excludes vtables for classes inside anonymous namespaces.
+ auto TM = dyn_cast<ValueAsMetadata>(MD->getOperand(0));
+ if (!TM)
+ return nullptr;
+ auto C = dyn_cast_or_null<ConstantInt>(TM->getValue());
+ if (!C) return nullptr;
+ // We are looking for i64 constants.
+ if (C->getBitWidth() != 64) return nullptr;
+
+ // Sanity check.
+ auto FM = dyn_cast_or_null<ValueAsMetadata>(MD->getOperand(1));
+ // Can be null if a function was removed by an optimization.
+ if (FM) {
+ auto F = dyn_cast<Function>(FM->getValue());
+ // But can never be a function declaration.
+ assert(!F || !F->isDeclaration());
+ (void)F; // Suppress unused variable warning in the no-asserts build.
+ }
+ return C;
+}
+
+/// buildCFICheck - emits __cfi_check for the current module.
+void CrossDSOCFI::buildCFICheck() {
+ // FIXME: verify that __cfi_check ends up near the end of the code section,
+ // but before the jump slots created in LowerBitSets.
+ llvm::DenseSet<uint64_t> BitSetIds;
+ NamedMDNode *BitSetNM = M->getNamedMetadata("llvm.bitsets");
+
+ if (BitSetNM)
+ for (unsigned I = 0, E = BitSetNM->getNumOperands(); I != E; ++I)
+ if (ConstantInt *TypeId = extractBitSetTypeId(BitSetNM->getOperand(I)))
+ BitSetIds.insert(TypeId->getZExtValue());
+
+ LLVMContext &Ctx = M->getContext();
+ Constant *C = M->getOrInsertFunction(
+ "__cfi_check",
+ FunctionType::get(
+ Type::getVoidTy(Ctx),
+ {Type::getInt64Ty(Ctx), PointerType::getUnqual(Type::getInt8Ty(Ctx))},
+ false));
+ Function *F = dyn_cast<Function>(C);
+ F->setAlignment(4096);
+ auto args = F->arg_begin();
+ Argument &CallSiteTypeId = *(args++);
+ CallSiteTypeId.setName("CallSiteTypeId");
+ Argument &Addr = *(args++);
+ Addr.setName("Addr");
+ assert(args == F->arg_end());
+
+ BasicBlock *BB = BasicBlock::Create(Ctx, "entry", F);
+
+ BasicBlock *TrapBB = BasicBlock::Create(Ctx, "trap", F);
+ IRBuilder<> IRBTrap(TrapBB);
+ Function *TrapFn = Intrinsic::getDeclaration(M, Intrinsic::trap);
+ llvm::CallInst *TrapCall = IRBTrap.CreateCall(TrapFn);
+ TrapCall->setDoesNotReturn();
+ TrapCall->setDoesNotThrow();
+ IRBTrap.CreateUnreachable();
+
+ BasicBlock *ExitBB = BasicBlock::Create(Ctx, "exit", F);
+ IRBuilder<> IRBExit(ExitBB);
+ IRBExit.CreateRetVoid();
+
+ IRBuilder<> IRB(BB);
+ SwitchInst *SI = IRB.CreateSwitch(&CallSiteTypeId, TrapBB, BitSetIds.size());
+ for (uint64_t TypeId : BitSetIds) {
+ ConstantInt *CaseTypeId = ConstantInt::get(Type::getInt64Ty(Ctx), TypeId);
+ BasicBlock *TestBB = BasicBlock::Create(Ctx, "test", F);
+ IRBuilder<> IRBTest(TestBB);
+ Function *BitsetTestFn =
+ Intrinsic::getDeclaration(M, Intrinsic::bitset_test);
+
+ Value *Test = IRBTest.CreateCall(
+ BitsetTestFn, {&Addr, MetadataAsValue::get(
+ Ctx, ConstantAsMetadata::get(CaseTypeId))});
+ BranchInst *BI = IRBTest.CreateCondBr(Test, ExitBB, TrapBB);
+ BI->setMetadata(LLVMContext::MD_prof, VeryLikelyWeights);
+
+ SI->addCase(CaseTypeId, TestBB);
+ ++TypeIds;
+ }
+}
+
+bool CrossDSOCFI::runOnModule(Module &M) {
+ if (M.getModuleFlag("Cross-DSO CFI") == nullptr)
+ return false;
+ buildCFICheck();
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index d0447640259e..4de3d95ab11d 100644
--- a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -35,6 +35,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <map>
#include <set>
#include <tuple>
@@ -121,14 +122,6 @@ namespace {
typedef SmallVector<RetOrArg, 5> UseVector;
- // Map each LLVM function to corresponding metadata with debug info. If
- // the function is replaced with another one, we should patch the pointer
- // to LLVM function in metadata.
- // As the code generation for module is finished (and DIBuilder is
- // finalized) we assume that subprogram descriptors won't be changed, and
- // they are stored in map for short duration anyway.
- DenseMap<const Function *, DISubprogram *> FunctionDIs;
-
protected:
// DAH uses this to specify a different ID.
explicit DAE(char &ID) : ModulePass(ID) {}
@@ -198,6 +191,13 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
if (Fn.hasAddressTaken())
return false;
+ // Don't touch naked functions. The assembly might be using an argument, or
+ // otherwise rely on the frame layout in a way that this analysis will not
+ // see.
+ if (Fn.hasFnAttribute(Attribute::Naked)) {
+ return false;
+ }
+
// Okay, we know we can transform this function if safe. Scan its body
// looking for calls marked musttail or calls to llvm.vastart.
for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
@@ -229,7 +229,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
// Create the new function body and insert it into the module...
Function *NF = Function::Create(NFTy, Fn.getLinkage());
NF->copyAttributesFrom(&Fn);
- Fn.getParent()->getFunctionList().insert(&Fn, NF);
+ Fn.getParent()->getFunctionList().insert(Fn.getIterator(), NF);
NF->takeName(&Fn);
// Loop over all of the callers of the function, transforming the call sites
@@ -296,20 +296,12 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(),
I2 = NF->arg_begin(); I != E; ++I, ++I2) {
// Move the name and users over to the new version.
- I->replaceAllUsesWith(I2);
- I2->takeName(I);
+ I->replaceAllUsesWith(&*I2);
+ I2->takeName(&*I);
}
// Patch the pointer to LLVM function in debug info descriptor.
- auto DI = FunctionDIs.find(&Fn);
- if (DI != FunctionDIs.end()) {
- DISubprogram *SP = DI->second;
- SP->replaceFunction(NF);
- // Ensure the map is updated so it can be reused on non-varargs argument
- // eliminations of the same function.
- FunctionDIs.erase(DI);
- FunctionDIs[NF] = SP;
- }
+ NF->setSubprogram(Fn.getSubprogram());
// Fix up any BlockAddresses that refer to the function.
Fn.replaceAllUsesWith(ConstantExpr::getBitCast(NF, Fn.getType()));
@@ -345,16 +337,19 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
if (Fn.hasLocalLinkage() && !Fn.getFunctionType()->isVarArg())
return false;
+ // Don't touch naked functions. The assembly might be using an argument, or
+ // otherwise rely on the frame layout in a way that this analysis will not
+ // see.
+ if (Fn.hasFnAttribute(Attribute::Naked))
+ return false;
+
if (Fn.use_empty())
return false;
SmallVector<unsigned, 8> UnusedArgs;
- for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end();
- I != E; ++I) {
- Argument *Arg = I;
-
- if (Arg->use_empty() && !Arg->hasByValOrInAllocaAttr())
- UnusedArgs.push_back(Arg->getArgNo());
+ for (Argument &Arg : Fn.args()) {
+ if (Arg.use_empty() && !Arg.hasByValOrInAllocaAttr())
+ UnusedArgs.push_back(Arg.getArgNo());
}
if (UnusedArgs.empty())
@@ -485,6 +480,10 @@ DAE::Liveness DAE::SurveyUse(const Use *U,
if (F) {
// Used in a direct call.
+ // The function argument is live if it is used as a bundle operand.
+ if (CS.isBundleOperand(U))
+ return Live;
+
// Find the argument number. We know for sure that this use is an
// argument, since if it was the function argument this would be an
// indirect call and the we know can't be looking at a value of the
@@ -543,6 +542,14 @@ void DAE::SurveyFunction(const Function &F) {
return;
}
+ // Don't touch naked functions. The assembly might be using an argument, or
+ // otherwise rely on the frame layout in a way that this analysis will not
+ // see.
+ if (F.hasFnAttribute(Attribute::Naked)) {
+ MarkLive(F);
+ return;
+ }
+
unsigned RetCount = NumRetVals(&F);
// Assume all return values are dead
typedef SmallVector<Liveness, 5> RetVals;
@@ -648,7 +655,7 @@ void DAE::SurveyFunction(const Function &F) {
} else {
// See what the effect of this use is (recording any uses that cause
// MaybeLive in MaybeLiveArgUses).
- Result = SurveyUses(AI, MaybeLiveArgUses);
+ Result = SurveyUses(&*AI, MaybeLiveArgUses);
}
// Mark the result.
@@ -878,7 +885,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
NF->setAttributes(NewPAL);
// Insert the new function before the old function, so we won't be processing
// it again.
- F->getParent()->getFunctionList().insert(F, NF);
+ F->getParent()->getFunctionList().insert(F->getIterator(), NF);
NF->takeName(F);
// Loop over all of the callers of the function, transforming the call sites
@@ -946,7 +953,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
Instruction *New;
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
- Args, "", Call);
+ Args, "", Call->getParent());
cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
cast<InvokeInst>(New)->setAttributes(NewCallPAL);
} else {
@@ -976,9 +983,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
" must have been a struct or an array!");
Instruction *InsertPt = Call;
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
- BasicBlock::iterator IP = II->getNormalDest()->begin();
- while (isa<PHINode>(IP)) ++IP;
- InsertPt = IP;
+ BasicBlock *NewEdge = SplitEdge(New->getParent(), II->getNormalDest());
+ InsertPt = &*NewEdge->getFirstInsertionPt();
}
// We used to return a struct or array. Instead of doing smart stuff
@@ -1026,8 +1032,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
if (ArgAlive[i]) {
// If this is a live argument, move the name and users over to the new
// version.
- I->replaceAllUsesWith(I2);
- I2->takeName(I);
+ I->replaceAllUsesWith(&*I2);
+ I2->takeName(&*I);
++I2;
} else {
// If this argument is dead, replace any uses of it with null constants
@@ -1079,9 +1085,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
}
// Patch the pointer to LLVM function in debug info descriptor.
- auto DI = FunctionDIs.find(F);
- if (DI != FunctionDIs.end())
- DI->second->replaceFunction(NF);
+ NF->setSubprogram(F->getSubprogram());
// Now that the old function is dead, delete it.
F->eraseFromParent();
@@ -1092,9 +1096,6 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
bool DAE::runOnModule(Module &M) {
bool Changed = false;
- // Collect debug info descriptors for functions.
- FunctionDIs = makeSubprogramMap(M);
-
// First pass: Do a simple check to see if any functions can have their "..."
// removed. We can do this if they never call va_start. This loop cannot be
// fused with the next loop, because deleting a function invalidates
@@ -1119,7 +1120,7 @@ bool DAE::runOnModule(Module &M) {
for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
// Increment now, because the function will probably get removed (ie.
// replaced by a new one).
- Function *F = I++;
+ Function *F = &*I++;
Changed |= RemoveDeadStuffFromFunction(F);
}
diff --git a/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp b/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
index 67ba72d6a360..af313a6b001d 100644
--- a/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -1,4 +1,5 @@
-//===-- ElimAvailExtern.cpp - DCE unreachable internal functions ----------------===//
+//===-- ElimAvailExtern.cpp - DCE unreachable internal functions
+//----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,9 +16,7 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
-#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/Pass.h"
using namespace llvm;
@@ -28,18 +27,18 @@ STATISTIC(NumFunctions, "Number of functions removed");
STATISTIC(NumVariables, "Number of global variables removed");
namespace {
- struct EliminateAvailableExternally : public ModulePass {
- static char ID; // Pass identification, replacement for typeid
- EliminateAvailableExternally() : ModulePass(ID) {
- initializeEliminateAvailableExternallyPass(
- *PassRegistry::getPassRegistry());
- }
+struct EliminateAvailableExternally : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ EliminateAvailableExternally() : ModulePass(ID) {
+ initializeEliminateAvailableExternallyPass(
+ *PassRegistry::getPassRegistry());
+ }
- // run - Do the EliminateAvailableExternally pass on the specified module,
- // optionally updating the specified callgraph to reflect the changes.
- //
- bool runOnModule(Module &M) override;
- };
+ // run - Do the EliminateAvailableExternally pass on the specified module,
+ // optionally updating the specified callgraph to reflect the changes.
+ //
+ bool runOnModule(Module &M) override;
+};
}
char EliminateAvailableExternally::ID = 0;
@@ -54,30 +53,31 @@ bool EliminateAvailableExternally::runOnModule(Module &M) {
bool Changed = false;
// Drop initializers of available externally global variables.
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I) {
- if (!I->hasAvailableExternallyLinkage())
+ for (GlobalVariable &GV : M.globals()) {
+ if (!GV.hasAvailableExternallyLinkage())
continue;
- if (I->hasInitializer()) {
- Constant *Init = I->getInitializer();
- I->setInitializer(nullptr);
+ if (GV.hasInitializer()) {
+ Constant *Init = GV.getInitializer();
+ GV.setInitializer(nullptr);
if (isSafeToDestroyConstant(Init))
Init->destroyConstant();
}
- I->removeDeadConstantUsers();
- I->setLinkage(GlobalValue::ExternalLinkage);
+ GV.removeDeadConstantUsers();
+ GV.setLinkage(GlobalValue::ExternalLinkage);
NumVariables++;
+ Changed = true;
}
// Drop the bodies of available externally functions.
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- if (!I->hasAvailableExternallyLinkage())
+ for (Function &F : M) {
+ if (!F.hasAvailableExternallyLinkage())
continue;
- if (!I->isDeclaration())
+ if (!F.isDeclaration())
// This will set the linkage to external
- I->deleteBody();
- I->removeDeadConstantUsers();
+ F.deleteBody();
+ F.removeDeadConstantUsers();
NumFunctions++;
+ Changed = true;
}
return Changed;
diff --git a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
index b9462f2ffc72..1a3b9253d72f 100644
--- a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
@@ -83,7 +83,7 @@ namespace {
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I) {
bool Delete =
- deleteStuff == (bool)Named.count(I) && !I->isDeclaration();
+ deleteStuff == (bool)Named.count(&*I) && !I->isDeclaration();
if (!Delete) {
if (I->hasAvailableExternallyLinkage())
continue;
@@ -103,7 +103,7 @@ namespace {
// Visit the Functions.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
bool Delete =
- deleteStuff == (bool)Named.count(I) && !I->isDeclaration();
+ deleteStuff == (bool)Named.count(&*I) && !I->isDeclaration();
if (!Delete) {
if (I->hasAvailableExternallyLinkage())
continue;
@@ -124,7 +124,7 @@ namespace {
Module::alias_iterator CurI = I;
++I;
- bool Delete = deleteStuff == (bool)Named.count(CurI);
+ bool Delete = deleteStuff == (bool)Named.count(&*CurI);
makeVisible(*CurI, Delete);
if (Delete) {
@@ -143,7 +143,7 @@ namespace {
}
CurI->replaceAllUsesWith(Declaration);
- delete CurI;
+ delete &*CurI;
}
}
diff --git a/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
new file mode 100644
index 000000000000..816291dac9e8
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -0,0 +1,121 @@
+//===- ForceFunctionAttrs.cpp - Force function attrs for debugging --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "forceattrs"
+
+static cl::list<std::string>
+ ForceAttributes("force-attribute", cl::Hidden,
+ cl::desc("Add an attribute to a function. This should be a "
+ "pair of 'function-name:attribute-name', for "
+ "example -force-add-attribute=foo:noinline. This "
+ "option can be specified multiple times."));
+
+static Attribute::AttrKind parseAttrKind(StringRef Kind) {
+ return StringSwitch<Attribute::AttrKind>(Kind)
+ .Case("alwaysinline", Attribute::AlwaysInline)
+ .Case("builtin", Attribute::Builtin)
+ .Case("cold", Attribute::Cold)
+ .Case("convergent", Attribute::Convergent)
+ .Case("inlinehint", Attribute::InlineHint)
+ .Case("jumptable", Attribute::JumpTable)
+ .Case("minsize", Attribute::MinSize)
+ .Case("naked", Attribute::Naked)
+ .Case("nobuiltin", Attribute::NoBuiltin)
+ .Case("noduplicate", Attribute::NoDuplicate)
+ .Case("noimplicitfloat", Attribute::NoImplicitFloat)
+ .Case("noinline", Attribute::NoInline)
+ .Case("nonlazybind", Attribute::NonLazyBind)
+ .Case("noredzone", Attribute::NoRedZone)
+ .Case("noreturn", Attribute::NoReturn)
+ .Case("norecurse", Attribute::NoRecurse)
+ .Case("nounwind", Attribute::NoUnwind)
+ .Case("optnone", Attribute::OptimizeNone)
+ .Case("optsize", Attribute::OptimizeForSize)
+ .Case("readnone", Attribute::ReadNone)
+ .Case("readonly", Attribute::ReadOnly)
+ .Case("argmemonly", Attribute::ArgMemOnly)
+ .Case("returns_twice", Attribute::ReturnsTwice)
+ .Case("safestack", Attribute::SafeStack)
+ .Case("sanitize_address", Attribute::SanitizeAddress)
+ .Case("sanitize_memory", Attribute::SanitizeMemory)
+ .Case("sanitize_thread", Attribute::SanitizeThread)
+ .Case("ssp", Attribute::StackProtect)
+ .Case("sspreq", Attribute::StackProtectReq)
+ .Case("sspstrong", Attribute::StackProtectStrong)
+ .Case("uwtable", Attribute::UWTable)
+ .Default(Attribute::None);
+}
+
+/// If F has any forced attributes given on the command line, add them.
+static void addForcedAttributes(Function &F) {
+ for (auto &S : ForceAttributes) {
+ auto KV = StringRef(S).split(':');
+ if (KV.first != F.getName())
+ continue;
+
+ auto Kind = parseAttrKind(KV.second);
+ if (Kind == Attribute::None) {
+ DEBUG(dbgs() << "ForcedAttribute: " << KV.second
+ << " unknown or not handled!\n");
+ continue;
+ }
+ if (F.hasFnAttribute(Kind))
+ continue;
+ F.addFnAttr(Kind);
+ }
+}
+
+PreservedAnalyses ForceFunctionAttrsPass::run(Module &M) {
+ if (ForceAttributes.empty())
+ return PreservedAnalyses::all();
+
+ for (Function &F : M.functions())
+ addForcedAttributes(F);
+
+ // Just conservatively invalidate analyses, this isn't likely to be important.
+ return PreservedAnalyses::none();
+}
+
+namespace {
+struct ForceFunctionAttrsLegacyPass : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ ForceFunctionAttrsLegacyPass() : ModulePass(ID) {
+ initializeForceFunctionAttrsLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override {
+ if (ForceAttributes.empty())
+ return false;
+
+ for (Function &F : M.functions())
+ addForcedAttributes(F);
+
+ // Conservatively assume we changed something.
+ return true;
+ }
+};
+}
+
+char ForceFunctionAttrsLegacyPass::ID = 0;
+INITIALIZE_PASS(ForceFunctionAttrsLegacyPass, "forceattrs",
+ "Force set function attributes", false, false)
+
+Pass *llvm::createForceFunctionAttrsLegacyPass() {
+ return new ForceFunctionAttrsLegacyPass();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index bb5e64aef338..6dcfb3f83004 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -23,14 +23,21 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
using namespace llvm;
@@ -42,230 +49,191 @@ STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
STATISTIC(NumReadNoneArg, "Number of arguments marked readnone");
STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly");
STATISTIC(NumNoAlias, "Number of function returns marked noalias");
-STATISTIC(NumAnnotated, "Number of attributes added to library functions");
+STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull");
+STATISTIC(NumNoRecurse, "Number of functions marked as norecurse");
namespace {
- struct FunctionAttrs : public CallGraphSCCPass {
- static char ID; // Pass identification, replacement for typeid
- FunctionAttrs() : CallGraphSCCPass(ID), AA(nullptr) {
- initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
- }
-
- // runOnSCC - Analyze the SCC, performing the transformation if possible.
- bool runOnSCC(CallGraphSCC &SCC) override;
-
- // AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
- bool AddReadAttrs(const CallGraphSCC &SCC);
-
- // AddArgumentAttrs - Deduce nocapture attributes for the SCC.
- bool AddArgumentAttrs(const CallGraphSCC &SCC);
-
- // IsFunctionMallocLike - Does this function allocate new memory?
- bool IsFunctionMallocLike(Function *F,
- SmallPtrSet<Function*, 8> &) const;
-
- // AddNoAliasAttrs - Deduce noalias attributes for the SCC.
- bool AddNoAliasAttrs(const CallGraphSCC &SCC);
-
- // Utility methods used by inferPrototypeAttributes to add attributes
- // and maintain annotation statistics.
-
- void setDoesNotAccessMemory(Function &F) {
- if (!F.doesNotAccessMemory()) {
- F.setDoesNotAccessMemory();
- ++NumAnnotated;
- }
- }
-
- void setOnlyReadsMemory(Function &F) {
- if (!F.onlyReadsMemory()) {
- F.setOnlyReadsMemory();
- ++NumAnnotated;
- }
- }
-
- void setDoesNotThrow(Function &F) {
- if (!F.doesNotThrow()) {
- F.setDoesNotThrow();
- ++NumAnnotated;
- }
- }
-
- void setDoesNotCapture(Function &F, unsigned n) {
- if (!F.doesNotCapture(n)) {
- F.setDoesNotCapture(n);
- ++NumAnnotated;
- }
- }
-
- void setOnlyReadsMemory(Function &F, unsigned n) {
- if (!F.onlyReadsMemory(n)) {
- F.setOnlyReadsMemory(n);
- ++NumAnnotated;
- }
- }
-
- void setDoesNotAlias(Function &F, unsigned n) {
- if (!F.doesNotAlias(n)) {
- F.setDoesNotAlias(n);
- ++NumAnnotated;
- }
- }
-
- // inferPrototypeAttributes - Analyze the name and prototype of the
- // given function and set any applicable attributes. Returns true
- // if any attributes were set and false otherwise.
- bool inferPrototypeAttributes(Function &F);
+typedef SmallSetVector<Function *, 8> SCCNodeSet;
+}
- // annotateLibraryCalls - Adds attributes to well-known standard library
- // call declarations.
- bool annotateLibraryCalls(const CallGraphSCC &SCC);
+namespace {
+struct FunctionAttrs : public CallGraphSCCPass {
+ static char ID; // Pass identification, replacement for typeid
+ FunctionAttrs() : CallGraphSCCPass(ID) {
+ initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
+ }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<AliasAnalysis>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- CallGraphSCCPass::getAnalysisUsage(AU);
- }
+ bool runOnSCC(CallGraphSCC &SCC) override;
+ bool doInitialization(CallGraph &CG) override {
+ Revisit.clear();
+ return false;
+ }
+ bool doFinalization(CallGraph &CG) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ CallGraphSCCPass::getAnalysisUsage(AU);
+ }
- private:
- AliasAnalysis *AA;
- TargetLibraryInfo *TLI;
- };
+private:
+ TargetLibraryInfo *TLI;
+ SmallVector<WeakVH,16> Revisit;
+};
}
char FunctionAttrs::ID = 0;
INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
- "Deduce function attributes", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+ "Deduce function attributes", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
- "Deduce function attributes", false, false)
+ "Deduce function attributes", false, false)
Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); }
+namespace {
+/// The three kinds of memory access relevant to 'readonly' and
+/// 'readnone' attributes.
+enum MemoryAccessKind {
+ MAK_ReadNone = 0,
+ MAK_ReadOnly = 1,
+ MAK_MayWrite = 2
+};
+}
-/// AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
-bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
- SmallPtrSet<Function*, 8> SCCNodes;
-
- // Fill SCCNodes with the elements of the SCC. Used for quickly
- // looking up whether a given CallGraphNode is in this SCC.
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
- SCCNodes.insert((*I)->getFunction());
+static MemoryAccessKind checkFunctionMemoryAccess(Function &F, AAResults &AAR,
+ const SCCNodeSet &SCCNodes) {
+ FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F);
+ if (MRB == FMRB_DoesNotAccessMemory)
+ // Already perfect!
+ return MAK_ReadNone;
+
+ // Definitions with weak linkage may be overridden at linktime with
+ // something that writes memory, so treat them like declarations.
+ if (F.isDeclaration() || F.mayBeOverridden()) {
+ if (AliasAnalysis::onlyReadsMemory(MRB))
+ return MAK_ReadOnly;
+
+ // Conservatively assume it writes to memory.
+ return MAK_MayWrite;
+ }
- // Check if any of the functions in the SCC read or write memory. If they
- // write memory then they can't be marked readnone or readonly.
+ // Scan the function body for instructions that may read or write memory.
bool ReadsMemory = false;
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Function *F = (*I)->getFunction();
-
- if (!F || F->hasFnAttribute(Attribute::OptimizeNone))
- // External node or node we don't want to optimize - assume it may write
- // memory and give up.
- return false;
+ for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
+ Instruction *I = &*II;
+
+ // Some instructions can be ignored even if they read or write memory.
+ // Detect these now, skipping to the next instruction if one is found.
+ CallSite CS(cast<Value>(I));
+ if (CS) {
+ // Ignore calls to functions in the same SCC.
+ if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
+ continue;
+ FunctionModRefBehavior MRB = AAR.getModRefBehavior(CS);
- AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(F);
- if (MRB == AliasAnalysis::DoesNotAccessMemory)
- // Already perfect!
- continue;
+ // If the call doesn't access memory, we're done.
+ if (!(MRB & MRI_ModRef))
+ continue;
- // Definitions with weak linkage may be overridden at linktime with
- // something that writes memory, so treat them like declarations.
- if (F->isDeclaration() || F->mayBeOverridden()) {
- if (!AliasAnalysis::onlyReadsMemory(MRB))
- // May write memory. Just give up.
- return false;
+ if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) {
+ // The call could access any memory. If that includes writes, give up.
+ if (MRB & MRI_Mod)
+ return MAK_MayWrite;
+ // If it reads, note it.
+ if (MRB & MRI_Ref)
+ ReadsMemory = true;
+ continue;
+ }
- ReadsMemory = true;
- continue;
- }
+ // Check whether all pointer arguments point to local memory, and
+ // ignore calls that only access local memory.
+ for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+ CI != CE; ++CI) {
+ Value *Arg = *CI;
+ if (!Arg->getType()->isPtrOrPtrVectorTy())
+ continue;
- // Scan the function body for instructions that may read or write memory.
- for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
- Instruction *I = &*II;
+ AAMDNodes AAInfo;
+ I->getAAMetadata(AAInfo);
+ MemoryLocation Loc(Arg, MemoryLocation::UnknownSize, AAInfo);
- // Some instructions can be ignored even if they read or write memory.
- // Detect these now, skipping to the next instruction if one is found.
- CallSite CS(cast<Value>(I));
- if (CS) {
- // Ignore calls to functions in the same SCC.
- if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
+ // Skip accesses to local or constant memory as they don't impact the
+ // externally visible mod/ref behavior.
+ if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
- AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(CS);
- // If the call doesn't access arbitrary memory, we may be able to
- // figure out something.
- if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
- // If the call does access argument pointees, check each argument.
- if (AliasAnalysis::doesAccessArgPointees(MRB))
- // Check whether all pointer arguments point to local memory, and
- // ignore calls that only access local memory.
- for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
- CI != CE; ++CI) {
- Value *Arg = *CI;
- if (Arg->getType()->isPointerTy()) {
- AAMDNodes AAInfo;
- I->getAAMetadata(AAInfo);
-
- MemoryLocation Loc(Arg, MemoryLocation::UnknownSize, AAInfo);
- if (!AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) {
- if (MRB & AliasAnalysis::Mod)
- // Writes non-local memory. Give up.
- return false;
- if (MRB & AliasAnalysis::Ref)
- // Ok, it reads non-local memory.
- ReadsMemory = true;
- }
- }
- }
- continue;
- }
- // The call could access any memory. If that includes writes, give up.
- if (MRB & AliasAnalysis::Mod)
- return false;
- // If it reads, note it.
- if (MRB & AliasAnalysis::Ref)
+
+ if (MRB & MRI_Mod)
+ // Writes non-local memory. Give up.
+ return MAK_MayWrite;
+ if (MRB & MRI_Ref)
+ // Ok, it reads non-local memory.
ReadsMemory = true;
- continue;
- } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- // Ignore non-volatile loads from local memory. (Atomic is okay here.)
- if (!LI->isVolatile()) {
- MemoryLocation Loc = MemoryLocation::get(LI);
- if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
- continue;
- }
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- // Ignore non-volatile stores to local memory. (Atomic is okay here.)
- if (!SI->isVolatile()) {
- MemoryLocation Loc = MemoryLocation::get(SI);
- if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
- continue;
- }
- } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) {
- // Ignore vaargs on local memory.
- MemoryLocation Loc = MemoryLocation::get(VI);
- if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ }
+ continue;
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ // Ignore non-volatile loads from local memory. (Atomic is okay here.)
+ if (!LI->isVolatile()) {
+ MemoryLocation Loc = MemoryLocation::get(LI);
+ if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ continue;
+ }
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // Ignore non-volatile stores to local memory. (Atomic is okay here.)
+ if (!SI->isVolatile()) {
+ MemoryLocation Loc = MemoryLocation::get(SI);
+ if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
}
+ } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) {
+ // Ignore vaargs on local memory.
+ MemoryLocation Loc = MemoryLocation::get(VI);
+ if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ continue;
+ }
- // Any remaining instructions need to be taken seriously! Check if they
- // read or write memory.
- if (I->mayWriteToMemory())
- // Writes memory. Just give up.
- return false;
+ // Any remaining instructions need to be taken seriously! Check if they
+ // read or write memory.
+ if (I->mayWriteToMemory())
+ // Writes memory. Just give up.
+ return MAK_MayWrite;
+
+ // If this instruction may read memory, remember that.
+ ReadsMemory |= I->mayReadFromMemory();
+ }
+
+ return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone;
+}
- // If this instruction may read memory, remember that.
- ReadsMemory |= I->mayReadFromMemory();
+/// Deduce readonly/readnone attributes for the SCC.
+template <typename AARGetterT>
+static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT AARGetter) {
+ // Check if any of the functions in the SCC read or write memory. If they
+ // write memory then they can't be marked readnone or readonly.
+ bool ReadsMemory = false;
+ for (Function *F : SCCNodes) {
+ // Call the callable parameter to look up AA results for this function.
+ AAResults &AAR = AARGetter(*F);
+
+ switch (checkFunctionMemoryAccess(*F, AAR, SCCNodes)) {
+ case MAK_MayWrite:
+ return false;
+ case MAK_ReadOnly:
+ ReadsMemory = true;
+ break;
+ case MAK_ReadNone:
+ // Nothing to do!
+ break;
}
}
// Success! Functions in this SCC do not access memory, or only read memory.
// Give them the appropriate attribute.
bool MadeChange = false;
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Function *F = (*I)->getFunction();
-
+ for (Function *F : SCCNodes) {
if (F->doesNotAccessMemory())
// Already perfect!
continue;
@@ -278,11 +246,10 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
// Clear out any existing attributes.
AttrBuilder B;
- B.addAttribute(Attribute::ReadOnly)
- .addAttribute(Attribute::ReadNone);
- F->removeAttributes(AttributeSet::FunctionIndex,
- AttributeSet::get(F->getContext(),
- AttributeSet::FunctionIndex, B));
+ B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
+ F->removeAttributes(
+ AttributeSet::FunctionIndex,
+ AttributeSet::get(F->getContext(), AttributeSet::FunctionIndex, B));
// Add in the new attribute.
F->addAttribute(AttributeSet::FunctionIndex,
@@ -298,124 +265,140 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
}
namespace {
- // For a given pointer Argument, this retains a list of Arguments of functions
- // in the same SCC that the pointer data flows into. We use this to build an
- // SCC of the arguments.
- struct ArgumentGraphNode {
- Argument *Definition;
- SmallVector<ArgumentGraphNode*, 4> Uses;
- };
-
- class ArgumentGraph {
- // We store pointers to ArgumentGraphNode objects, so it's important that
- // that they not move around upon insert.
- typedef std::map<Argument*, ArgumentGraphNode> ArgumentMapTy;
+/// For a given pointer Argument, this retains a list of Arguments of functions
+/// in the same SCC that the pointer data flows into. We use this to build an
+/// SCC of the arguments.
+struct ArgumentGraphNode {
+ Argument *Definition;
+ SmallVector<ArgumentGraphNode *, 4> Uses;
+};
+
+class ArgumentGraph {
+ // We store pointers to ArgumentGraphNode objects, so it's important that
+ // that they not move around upon insert.
+ typedef std::map<Argument *, ArgumentGraphNode> ArgumentMapTy;
+
+ ArgumentMapTy ArgumentMap;
+
+ // There is no root node for the argument graph, in fact:
+ // void f(int *x, int *y) { if (...) f(x, y); }
+ // is an example where the graph is disconnected. The SCCIterator requires a
+ // single entry point, so we maintain a fake ("synthetic") root node that
+ // uses every node. Because the graph is directed and nothing points into
+ // the root, it will not participate in any SCCs (except for its own).
+ ArgumentGraphNode SyntheticRoot;
+
+public:
+ ArgumentGraph() { SyntheticRoot.Definition = nullptr; }
+
+ typedef SmallVectorImpl<ArgumentGraphNode *>::iterator iterator;
+
+ iterator begin() { return SyntheticRoot.Uses.begin(); }
+ iterator end() { return SyntheticRoot.Uses.end(); }
+ ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; }
+
+ ArgumentGraphNode *operator[](Argument *A) {
+ ArgumentGraphNode &Node = ArgumentMap[A];
+ Node.Definition = A;
+ SyntheticRoot.Uses.push_back(&Node);
+ return &Node;
+ }
+};
- ArgumentMapTy ArgumentMap;
+/// This tracker checks whether callees are in the SCC, and if so it does not
+/// consider that a capture, instead adding it to the "Uses" list and
+/// continuing with the analysis.
+struct ArgumentUsesTracker : public CaptureTracker {
+ ArgumentUsesTracker(const SCCNodeSet &SCCNodes)
+ : Captured(false), SCCNodes(SCCNodes) {}
- // There is no root node for the argument graph, in fact:
- // void f(int *x, int *y) { if (...) f(x, y); }
- // is an example where the graph is disconnected. The SCCIterator requires a
- // single entry point, so we maintain a fake ("synthetic") root node that
- // uses every node. Because the graph is directed and nothing points into
- // the root, it will not participate in any SCCs (except for its own).
- ArgumentGraphNode SyntheticRoot;
+ void tooManyUses() override { Captured = true; }
- public:
- ArgumentGraph() { SyntheticRoot.Definition = nullptr; }
+ bool captured(const Use *U) override {
+ CallSite CS(U->getUser());
+ if (!CS.getInstruction()) {
+ Captured = true;
+ return true;
+ }
- typedef SmallVectorImpl<ArgumentGraphNode*>::iterator iterator;
+ Function *F = CS.getCalledFunction();
+ if (!F || F->isDeclaration() || F->mayBeOverridden() ||
+ !SCCNodes.count(F)) {
+ Captured = true;
+ return true;
+ }
- iterator begin() { return SyntheticRoot.Uses.begin(); }
- iterator end() { return SyntheticRoot.Uses.end(); }
- ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; }
+ // Note: the callee and the two successor blocks *follow* the argument
+ // operands. This means there is no need to adjust UseIndex to account for
+ // these.
- ArgumentGraphNode *operator[](Argument *A) {
- ArgumentGraphNode &Node = ArgumentMap[A];
- Node.Definition = A;
- SyntheticRoot.Uses.push_back(&Node);
- return &Node;
- }
- };
+ unsigned UseIndex =
+ std::distance(const_cast<const Use *>(CS.arg_begin()), U);
- // This tracker checks whether callees are in the SCC, and if so it does not
- // consider that a capture, instead adding it to the "Uses" list and
- // continuing with the analysis.
- struct ArgumentUsesTracker : public CaptureTracker {
- ArgumentUsesTracker(const SmallPtrSet<Function*, 8> &SCCNodes)
- : Captured(false), SCCNodes(SCCNodes) {}
+ assert(UseIndex < CS.data_operands_size() &&
+ "Indirect function calls should have been filtered above!");
- void tooManyUses() override { Captured = true; }
+ if (UseIndex >= CS.getNumArgOperands()) {
+ // Data operand, but not a argument operand -- must be a bundle operand
+ assert(CS.hasOperandBundles() && "Must be!");
- bool captured(const Use *U) override {
- CallSite CS(U->getUser());
- if (!CS.getInstruction()) { Captured = true; return true; }
+ // CaptureTracking told us that we're being captured by an operand bundle
+ // use. In this case it does not matter if the callee is within our SCC
+ // or not -- we've been captured in some unknown way, and we have to be
+ // conservative.
+ Captured = true;
+ return true;
+ }
- Function *F = CS.getCalledFunction();
- if (!F || !SCCNodes.count(F)) { Captured = true; return true; }
-
- bool Found = false;
- Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end();
- for (CallSite::arg_iterator PI = CS.arg_begin(), PE = CS.arg_end();
- PI != PE; ++PI, ++AI) {
- if (AI == AE) {
- assert(F->isVarArg() && "More params than args in non-varargs call");
- Captured = true;
- return true;
- }
- if (PI == U) {
- Uses.push_back(AI);
- Found = true;
- break;
- }
- }
- assert(Found && "Capturing call-site captured nothing?");
- (void)Found;
- return false;
+ if (UseIndex >= F->arg_size()) {
+ assert(F->isVarArg() && "More params than args in non-varargs call");
+ Captured = true;
+ return true;
}
- bool Captured; // True only if certainly captured (used outside our SCC).
- SmallVector<Argument*, 4> Uses; // Uses within our SCC.
+ Uses.push_back(&*std::next(F->arg_begin(), UseIndex));
+ return false;
+ }
- const SmallPtrSet<Function*, 8> &SCCNodes;
- };
+ bool Captured; // True only if certainly captured (used outside our SCC).
+ SmallVector<Argument *, 4> Uses; // Uses within our SCC.
+
+ const SCCNodeSet &SCCNodes;
+};
}
namespace llvm {
- template<> struct GraphTraits<ArgumentGraphNode*> {
- typedef ArgumentGraphNode NodeType;
- typedef SmallVectorImpl<ArgumentGraphNode*>::iterator ChildIteratorType;
+template <> struct GraphTraits<ArgumentGraphNode *> {
+ typedef ArgumentGraphNode NodeType;
+ typedef SmallVectorImpl<ArgumentGraphNode *>::iterator ChildIteratorType;
- static inline NodeType *getEntryNode(NodeType *A) { return A; }
- static inline ChildIteratorType child_begin(NodeType *N) {
- return N->Uses.begin();
- }
- static inline ChildIteratorType child_end(NodeType *N) {
- return N->Uses.end();
- }
- };
- template<> struct GraphTraits<ArgumentGraph*>
- : public GraphTraits<ArgumentGraphNode*> {
- static NodeType *getEntryNode(ArgumentGraph *AG) {
- return AG->getEntryNode();
- }
- static ChildIteratorType nodes_begin(ArgumentGraph *AG) {
- return AG->begin();
- }
- static ChildIteratorType nodes_end(ArgumentGraph *AG) {
- return AG->end();
- }
- };
+ static inline NodeType *getEntryNode(NodeType *A) { return A; }
+ static inline ChildIteratorType child_begin(NodeType *N) {
+ return N->Uses.begin();
+ }
+ static inline ChildIteratorType child_end(NodeType *N) {
+ return N->Uses.end();
+ }
+};
+template <>
+struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> {
+ static NodeType *getEntryNode(ArgumentGraph *AG) {
+ return AG->getEntryNode();
+ }
+ static ChildIteratorType nodes_begin(ArgumentGraph *AG) {
+ return AG->begin();
+ }
+ static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); }
+};
}
-// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone.
+/// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone.
static Attribute::AttrKind
determinePointerReadAttrs(Argument *A,
- const SmallPtrSet<Argument*, 8> &SCCNodes) {
-
- SmallVector<Use*, 32> Worklist;
- SmallSet<Use*, 32> Visited;
- int Count = 0;
+ const SmallPtrSet<Argument *, 8> &SCCNodes) {
+
+ SmallVector<Use *, 32> Worklist;
+ SmallSet<Use *, 32> Visited;
// inalloca arguments are always clobbered by the call.
if (A->hasInAllocaAttr())
@@ -425,9 +408,6 @@ determinePointerReadAttrs(Argument *A,
// We don't need to track IsWritten. If A is written to, return immediately.
for (Use &U : A->uses()) {
- if (Count++ >= 20)
- return Attribute::None;
-
Visited.insert(&U);
Worklist.push_back(&U);
}
@@ -435,7 +415,6 @@ determinePointerReadAttrs(Argument *A,
while (!Worklist.empty()) {
Use *U = Worklist.pop_back_val();
Instruction *I = cast<Instruction>(U->getUser());
- Value *V = U->get();
switch (I->getOpcode()) {
case Instruction::BitCast:
@@ -479,24 +458,44 @@ determinePointerReadAttrs(Argument *A,
return Attribute::None;
}
- Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end();
- CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
- for (CallSite::arg_iterator A = B; A != E; ++A, ++AI) {
- if (A->get() == V) {
- if (AI == AE) {
- assert(F->isVarArg() &&
- "More params than args in non-varargs call.");
- return Attribute::None;
- }
- Captures &= !CS.doesNotCapture(A - B);
- if (SCCNodes.count(AI))
- continue;
- if (!CS.onlyReadsMemory() && !CS.onlyReadsMemory(A - B))
- return Attribute::None;
- if (!CS.doesNotAccessMemory(A - B))
- IsRead = true;
- }
+ // Note: the callee and the two successor blocks *follow* the argument
+ // operands. This means there is no need to adjust UseIndex to account
+ // for these.
+
+ unsigned UseIndex = std::distance(CS.arg_begin(), U);
+
+ // U cannot be the callee operand use: since we're exploring the
+ // transitive uses of an Argument, having such a use be a callee would
+ // imply the CallSite is an indirect call or invoke; and we'd take the
+ // early exit above.
+ assert(UseIndex < CS.data_operands_size() &&
+ "Data operand use expected!");
+
+ bool IsOperandBundleUse = UseIndex >= CS.getNumArgOperands();
+
+ if (UseIndex >= F->arg_size() && !IsOperandBundleUse) {
+ assert(F->isVarArg() && "More params than args in non-varargs call");
+ return Attribute::None;
}
+
+ Captures &= !CS.doesNotCapture(UseIndex);
+
+ // Since the optimizer (by design) cannot see the data flow corresponding
+ // to a operand bundle use, these cannot participate in the optimistic SCC
+ // analysis. Instead, we model the operand bundle uses as arguments in
+ // call to a function external to the SCC.
+ if (!SCCNodes.count(&*std::next(F->arg_begin(), UseIndex)) ||
+ IsOperandBundleUse) {
+
+ // The accessors used on CallSite here do the right thing for calls and
+ // invokes with operand bundles.
+
+ if (!CS.onlyReadsMemory() && !CS.onlyReadsMemory(UseIndex))
+ return Attribute::None;
+ if (!CS.doesNotAccessMemory(UseIndex))
+ IsRead = true;
+ }
+
AddUsersToWorklistIfCapturing();
break;
}
@@ -517,21 +516,10 @@ determinePointerReadAttrs(Argument *A,
return IsRead ? Attribute::ReadOnly : Attribute::ReadNone;
}
-/// AddArgumentAttrs - Deduce nocapture attributes for the SCC.
-bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
+/// Deduce nocapture attributes for the SCC.
+static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
bool Changed = false;
- SmallPtrSet<Function*, 8> SCCNodes;
-
- // Fill SCCNodes with the elements of the SCC. Used for quickly
- // looking up whether a given CallGraphNode is in this SCC.
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Function *F = (*I)->getFunction();
- if (F && !F->isDeclaration() && !F->mayBeOverridden() &&
- !F->hasFnAttribute(Attribute::OptimizeNone))
- SCCNodes.insert(F);
- }
-
ArgumentGraph AG;
AttrBuilder B;
@@ -539,14 +527,7 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
// Check each function in turn, determining which pointer arguments are not
// captured.
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Function *F = (*I)->getFunction();
-
- if (!F || F->hasFnAttribute(Attribute::OptimizeNone))
- // External node or function we're trying not to optimize - only a problem
- // for arguments that we pass to it.
- continue;
-
+ for (Function *F : SCCNodes) {
// Definitions with weak linkage may be overridden at linktime with
// something that captures pointers, so treat them like declarations.
if (F->isDeclaration() || F->mayBeOverridden())
@@ -556,8 +537,8 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
// a value can't capture arguments. Don't analyze them.
if (F->onlyReadsMemory() && F->doesNotThrow() &&
F->getReturnType()->isVoidTy()) {
- for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end();
- A != E; ++A) {
+ for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E;
+ ++A) {
if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo() + 1, B));
++NumNoCapture;
@@ -567,26 +548,30 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
continue;
}
- for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end();
- A != E; ++A) {
- if (!A->getType()->isPointerTy()) continue;
+ for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E;
+ ++A) {
+ if (!A->getType()->isPointerTy())
+ continue;
bool HasNonLocalUses = false;
if (!A->hasNoCaptureAttr()) {
ArgumentUsesTracker Tracker(SCCNodes);
- PointerMayBeCaptured(A, &Tracker);
+ PointerMayBeCaptured(&*A, &Tracker);
if (!Tracker.Captured) {
if (Tracker.Uses.empty()) {
// If it's trivially not captured, mark it nocapture now.
- A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo()+1, B));
+ A->addAttr(
+ AttributeSet::get(F->getContext(), A->getArgNo() + 1, B));
++NumNoCapture;
Changed = true;
} else {
// If it's not trivially captured and not trivially not captured,
// then it must be calling into another function in our SCC. Save
// its particulars for Argument-SCC analysis later.
- ArgumentGraphNode *Node = AG[A];
- for (SmallVectorImpl<Argument*>::iterator UI = Tracker.Uses.begin(),
- UE = Tracker.Uses.end(); UI != UE; ++UI) {
+ ArgumentGraphNode *Node = AG[&*A];
+ for (SmallVectorImpl<Argument *>::iterator
+ UI = Tracker.Uses.begin(),
+ UE = Tracker.Uses.end();
+ UI != UE; ++UI) {
Node->Uses.push_back(AG[*UI]);
if (*UI != A)
HasNonLocalUses = true;
@@ -600,9 +585,9 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
// Note that we don't allow any calls at all here, or else our result
// will be dependent on the iteration order through the functions in the
// SCC.
- SmallPtrSet<Argument*, 8> Self;
- Self.insert(A);
- Attribute::AttrKind R = determinePointerReadAttrs(A, Self);
+ SmallPtrSet<Argument *, 8> Self;
+ Self.insert(&*A);
+ Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self);
if (R != Attribute::None) {
AttrBuilder B;
B.addAttribute(R);
@@ -621,10 +606,11 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
// made. If the definition doesn't have a 'nocapture' attribute by now, it
// captures.
- for (scc_iterator<ArgumentGraph*> I = scc_begin(&AG); !I.isAtEnd(); ++I) {
+ for (scc_iterator<ArgumentGraph *> I = scc_begin(&AG); !I.isAtEnd(); ++I) {
const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I;
if (ArgumentSCC.size() == 1) {
- if (!ArgumentSCC[0]->Definition) continue; // synthetic root node
+ if (!ArgumentSCC[0]->Definition)
+ continue; // synthetic root node
// eg. "void f(int* x) { if (...) f(x); }"
if (ArgumentSCC[0]->Uses.size() == 1 &&
@@ -646,9 +632,10 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
SCCCaptured = true;
}
}
- if (SCCCaptured) continue;
+ if (SCCCaptured)
+ continue;
- SmallPtrSet<Argument*, 8> ArgumentSCCNodes;
+ SmallPtrSet<Argument *, 8> ArgumentSCCNodes;
// Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for
// quickly looking up whether a given Argument is in this ArgumentSCC.
for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E; ++I) {
@@ -658,8 +645,9 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end();
I != E && !SCCCaptured; ++I) {
ArgumentGraphNode *N = *I;
- for (SmallVectorImpl<ArgumentGraphNode*>::iterator UI = N->Uses.begin(),
- UE = N->Uses.end(); UI != UE; ++UI) {
+ for (SmallVectorImpl<ArgumentGraphNode *>::iterator UI = N->Uses.begin(),
+ UE = N->Uses.end();
+ UI != UE; ++UI) {
Argument *A = (*UI)->Definition;
if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A))
continue;
@@ -667,7 +655,8 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
break;
}
}
- if (SCCCaptured) continue;
+ if (SCCCaptured)
+ continue;
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
@@ -704,8 +693,7 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
if (ReadAttr != Attribute::None) {
AttrBuilder B, R;
B.addAttribute(ReadAttr);
- R.addAttribute(Attribute::ReadOnly)
- .addAttribute(Attribute::ReadNone);
+ R.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
// Clear out existing readonly/readnone attributes
@@ -720,10 +708,11 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
return Changed;
}
-/// IsFunctionMallocLike - A function is malloc-like if it returns either null
-/// or a pointer that doesn't alias any other pointer visible to the caller.
-bool FunctionAttrs::IsFunctionMallocLike(Function *F,
- SmallPtrSet<Function*, 8> &SCCNodes) const {
+/// Tests whether a function is "malloc-like".
+///
+/// A function is "malloc-like" if it returns either null or a pointer that
+/// doesn't alias any other pointer visible to the caller.
+static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) {
SmallSetVector<Value *, 8> FlowsToReturn;
for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I)
if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator()))
@@ -744,39 +733,38 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F,
if (Instruction *RVI = dyn_cast<Instruction>(RetVal))
switch (RVI->getOpcode()) {
- // Extend the analysis by looking upwards.
- case Instruction::BitCast:
- case Instruction::GetElementPtr:
- case Instruction::AddrSpaceCast:
- FlowsToReturn.insert(RVI->getOperand(0));
- continue;
- case Instruction::Select: {
- SelectInst *SI = cast<SelectInst>(RVI);
- FlowsToReturn.insert(SI->getTrueValue());
- FlowsToReturn.insert(SI->getFalseValue());
- continue;
- }
- case Instruction::PHI: {
- PHINode *PN = cast<PHINode>(RVI);
- for (Value *IncValue : PN->incoming_values())
- FlowsToReturn.insert(IncValue);
- continue;
- }
+ // Extend the analysis by looking upwards.
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::AddrSpaceCast:
+ FlowsToReturn.insert(RVI->getOperand(0));
+ continue;
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(RVI);
+ FlowsToReturn.insert(SI->getTrueValue());
+ FlowsToReturn.insert(SI->getFalseValue());
+ continue;
+ }
+ case Instruction::PHI: {
+ PHINode *PN = cast<PHINode>(RVI);
+ for (Value *IncValue : PN->incoming_values())
+ FlowsToReturn.insert(IncValue);
+ continue;
+ }
- // Check whether the pointer came from an allocation.
- case Instruction::Alloca:
+ // Check whether the pointer came from an allocation.
+ case Instruction::Alloca:
+ break;
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ CallSite CS(RVI);
+ if (CS.paramHasAttr(0, Attribute::NoAlias))
+ break;
+ if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
break;
- case Instruction::Call:
- case Instruction::Invoke: {
- CallSite CS(RVI);
- if (CS.paramHasAttr(0, Attribute::NoAlias))
- break;
- if (CS.getCalledFunction() &&
- SCCNodes.count(CS.getCalledFunction()))
- break;
- } // fall-through
- default:
- return false; // Did not come from an allocation.
+ } // fall-through
+ default:
+ return false; // Did not come from an allocation.
}
if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false))
@@ -786,24 +774,11 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F,
return true;
}
-/// AddNoAliasAttrs - Deduce noalias attributes for the SCC.
-bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
- SmallPtrSet<Function*, 8> SCCNodes;
-
- // Fill SCCNodes with the elements of the SCC. Used for quickly
- // looking up whether a given CallGraphNode is in this SCC.
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
- SCCNodes.insert((*I)->getFunction());
-
+/// Deduce noalias attributes for the SCC.
+static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
// Check each function in turn, determining which functions return noalias
// pointers.
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Function *F = (*I)->getFunction();
-
- if (!F || F->hasFnAttribute(Attribute::OptimizeNone))
- // External node or node we don't want to optimize - skip it;
- return false;
-
+ for (Function *F : SCCNodes) {
// Already noalias.
if (F->doesNotAlias(0))
continue;
@@ -813,18 +788,17 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
if (F->isDeclaration() || F->mayBeOverridden())
return false;
- // We annotate noalias return values, which are only applicable to
+ // We annotate noalias return values, which are only applicable to
// pointer types.
if (!F->getReturnType()->isPointerTy())
continue;
- if (!IsFunctionMallocLike(F, SCCNodes))
+ if (!isFunctionMallocLike(F, SCCNodes))
return false;
}
bool MadeChange = false;
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Function *F = (*I)->getFunction();
+ for (Function *F : SCCNodes) {
if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy())
continue;
@@ -836,880 +810,249 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
return MadeChange;
}
-/// inferPrototypeAttributes - Analyze the name and prototype of the
-/// given function and set any applicable attributes. Returns true
-/// if any attributes were set and false otherwise.
-bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
- if (F.hasFnAttribute(Attribute::OptimizeNone))
- return false;
+/// Tests whether this function is known to not return null.
+///
+/// Requires that the function returns a pointer.
+///
+/// Returns true if it believes the function will not return a null, and sets
+/// \p Speculative based on whether the returned conclusion is a speculative
+/// conclusion due to SCC calls.
+static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes,
+ const TargetLibraryInfo &TLI, bool &Speculative) {
+ assert(F->getReturnType()->isPointerTy() &&
+ "nonnull only meaningful on pointer types");
+ Speculative = false;
- FunctionType *FTy = F.getFunctionType();
- LibFunc::Func TheLibFunc;
- if (!(TLI->getLibFunc(F.getName(), TheLibFunc) && TLI->has(TheLibFunc)))
- return false;
+ SmallSetVector<Value *, 8> FlowsToReturn;
+ for (BasicBlock &BB : *F)
+ if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator()))
+ FlowsToReturn.insert(Ret->getReturnValue());
- switch (TheLibFunc) {
- case LibFunc::strlen:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::strchr:
- case LibFunc::strrchr:
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isIntegerTy())
- return false;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- break;
- case LibFunc::strtol:
- case LibFunc::strtod:
- case LibFunc::strtof:
- case LibFunc::strtoul:
- case LibFunc::strtoll:
- case LibFunc::strtold:
- case LibFunc::strtoull:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::strcpy:
- case LibFunc::stpcpy:
- case LibFunc::strcat:
- case LibFunc::strncat:
- case LibFunc::strncpy:
- case LibFunc::stpncpy:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::strxfrm:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::strcmp: //0,1
- case LibFunc::strspn: // 0,1
- case LibFunc::strncmp: // 0,1
- case LibFunc::strcspn: //0,1
- case LibFunc::strcoll: //0,1
- case LibFunc::strcasecmp: // 0,1
- case LibFunc::strncasecmp: //
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::strstr:
- case LibFunc::strpbrk:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::strtok:
- case LibFunc::strtok_r:
- if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::scanf:
- if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::setbuf:
- case LibFunc::setvbuf:
- if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::strdup:
- case LibFunc::strndup:
- if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::stat:
- case LibFunc::statvfs:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::sscanf:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::sprintf:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::snprintf:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 3);
- setOnlyReadsMemory(F, 3);
- break;
- case LibFunc::setitimer:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- setDoesNotCapture(F, 3);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::system:
- if (FTy->getNumParams() != 1 ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- // May throw; "system" is a valid pthread cancellation point.
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::malloc:
- if (FTy->getNumParams() != 1 ||
- !FTy->getReturnType()->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- break;
- case LibFunc::memcmp:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::memchr:
- case LibFunc::memrchr:
- if (FTy->getNumParams() != 3)
- return false;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- break;
- case LibFunc::modf:
- case LibFunc::modff:
- case LibFunc::modfl:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::memcpy:
- case LibFunc::memccpy:
- case LibFunc::memmove:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::memalign:
- if (!FTy->getReturnType()->isPointerTy())
- return false;
- setDoesNotAlias(F, 0);
- break;
- case LibFunc::mkdir:
- if (FTy->getNumParams() == 0 ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::mktime:
- if (FTy->getNumParams() == 0 ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::realloc:
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getReturnType()->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::read:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- // May throw; "read" is a valid pthread cancellation point.
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::rewind:
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::rmdir:
- case LibFunc::remove:
- case LibFunc::realpath:
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::rename:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::readlink:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::write:
- if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
- return false;
- // May throw; "write" is a valid pthread cancellation point.
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::bcopy:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::bcmp:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setOnlyReadsMemory(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::bzero:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::calloc:
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- break;
- case LibFunc::chmod:
- case LibFunc::chown:
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::ctermid:
- case LibFunc::clearerr:
- case LibFunc::closedir:
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::atoi:
- case LibFunc::atol:
- case LibFunc::atof:
- case LibFunc::atoll:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setOnlyReadsMemory(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::access:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::fopen:
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::fdopen:
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::feof:
- case LibFunc::free:
- case LibFunc::fseek:
- case LibFunc::ftell:
- case LibFunc::fgetc:
- case LibFunc::fseeko:
- case LibFunc::ftello:
- case LibFunc::fileno:
- case LibFunc::fflush:
- case LibFunc::fclose:
- case LibFunc::fsetpos:
- case LibFunc::flockfile:
- case LibFunc::funlockfile:
- case LibFunc::ftrylockfile:
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::ferror:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F);
- break;
- case LibFunc::fputc:
- case LibFunc::fstat:
- case LibFunc::frexp:
- case LibFunc::frexpf:
- case LibFunc::frexpl:
- case LibFunc::fstatvfs:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::fgets:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 3);
- break;
- case LibFunc::fread:
- if (FTy->getNumParams() != 4 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(3)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 4);
- break;
- case LibFunc::fwrite:
- if (FTy->getNumParams() != 4 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(3)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 4);
- break;
- case LibFunc::fputs:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::fscanf:
- case LibFunc::fprintf:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::fgetpos:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::getc:
- case LibFunc::getlogin_r:
- case LibFunc::getc_unlocked:
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::getenv:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setOnlyReadsMemory(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::gets:
- case LibFunc::getchar:
- setDoesNotThrow(F);
- break;
- case LibFunc::getitimer:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::getpwnam:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::ungetc:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::uname:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::unlink:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::unsetenv:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::utime:
- case LibFunc::utimes:
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::putc:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::puts:
- case LibFunc::printf:
- case LibFunc::perror:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::pread:
- if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
- return false;
- // May throw; "pread" is a valid pthread cancellation point.
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::pwrite:
- if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
- return false;
- // May throw; "pwrite" is a valid pthread cancellation point.
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::putchar:
- setDoesNotThrow(F);
- break;
- case LibFunc::popen:
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::pclose:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::vscanf:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::vsscanf:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::vfscanf:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::valloc:
- if (!FTy->getReturnType()->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- break;
- case LibFunc::vprintf:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::vfprintf:
- case LibFunc::vsprintf:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::vsnprintf:
- if (FTy->getNumParams() != 4 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 3);
- setOnlyReadsMemory(F, 3);
- break;
- case LibFunc::open:
- if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
- return false;
- // May throw; "open" is a valid pthread cancellation point.
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::opendir:
- if (FTy->getNumParams() != 1 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::tmpfile:
- if (!FTy->getReturnType()->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- break;
- case LibFunc::times:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::htonl:
- case LibFunc::htons:
- case LibFunc::ntohl:
- case LibFunc::ntohs:
- setDoesNotThrow(F);
- setDoesNotAccessMemory(F);
- break;
- case LibFunc::lstat:
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::lchown:
- if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::qsort:
- if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
- return false;
- // May throw; places call through function pointer.
- setDoesNotCapture(F, 4);
- break;
- case LibFunc::dunder_strdup:
- case LibFunc::dunder_strndup:
- if (FTy->getNumParams() < 1 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::dunder_strtok_r:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::under_IO_getc:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::under_IO_putc:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::dunder_isoc99_scanf:
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::stat64:
- case LibFunc::lstat64:
- case LibFunc::statvfs64:
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::dunder_isoc99_sscanf:
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::fopen64:
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::fseeko64:
- case LibFunc::ftello64:
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::tmpfile64:
- if (!FTy->getReturnType()->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- break;
- case LibFunc::fstat64:
- case LibFunc::fstatvfs64:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::open64:
- if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+ for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
+ Value *RetVal = FlowsToReturn[i];
+
+ // If this value is locally known to be non-null, we're good
+ if (isKnownNonNull(RetVal, &TLI))
+ continue;
+
+ // Otherwise, we need to look upwards since we can't make any local
+ // conclusions.
+ Instruction *RVI = dyn_cast<Instruction>(RetVal);
+ if (!RVI)
return false;
- // May throw; "open" is a valid pthread cancellation point.
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::gettimeofday:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
+ switch (RVI->getOpcode()) {
+ // Extend the analysis by looking upwards.
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::AddrSpaceCast:
+ FlowsToReturn.insert(RVI->getOperand(0));
+ continue;
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(RVI);
+ FlowsToReturn.insert(SI->getTrueValue());
+ FlowsToReturn.insert(SI->getFalseValue());
+ continue;
+ }
+ case Instruction::PHI: {
+ PHINode *PN = cast<PHINode>(RVI);
+ for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ FlowsToReturn.insert(PN->getIncomingValue(i));
+ continue;
+ }
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ CallSite CS(RVI);
+ Function *Callee = CS.getCalledFunction();
+ // A call to a node within the SCC is assumed to return null until
+ // proven otherwise
+ if (Callee && SCCNodes.count(Callee)) {
+ Speculative = true;
+ continue;
+ }
return false;
- // Currently some platforms have the restrict keyword on the arguments to
- // gettimeofday. To be conservative, do not add noalias to gettimeofday's
- // arguments.
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- break;
- default:
- // Didn't mark any attributes.
- return false;
+ }
+ default:
+ return false; // Unknown source, may be null
+ };
+ llvm_unreachable("should have either continued or returned");
}
return true;
}
-/// annotateLibraryCalls - Adds attributes to well-known standard library
-/// call declarations.
-bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) {
+/// Deduce nonnull attributes for the SCC.
+static bool addNonNullAttrs(const SCCNodeSet &SCCNodes,
+ const TargetLibraryInfo &TLI) {
+ // Speculative that all functions in the SCC return only nonnull
+ // pointers. We may refute this as we analyze functions.
+ bool SCCReturnsNonNull = true;
+
bool MadeChange = false;
- // Check each function in turn annotating well-known library function
- // declarations with attributes.
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Function *F = (*I)->getFunction();
+ // Check each function in turn, determining which functions return nonnull
+ // pointers.
+ for (Function *F : SCCNodes) {
+ // Already nonnull.
+ if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ Attribute::NonNull))
+ continue;
+
+ // Definitions with weak linkage may be overridden at linktime, so
+ // treat them like declarations.
+ if (F->isDeclaration() || F->mayBeOverridden())
+ return false;
+
+ // We annotate nonnull return values, which are only applicable to
+ // pointer types.
+ if (!F->getReturnType()->isPointerTy())
+ continue;
- if (F && F->isDeclaration())
- MadeChange |= inferPrototypeAttributes(*F);
+ bool Speculative = false;
+ if (isReturnNonNull(F, SCCNodes, TLI, Speculative)) {
+ if (!Speculative) {
+ // Mark the function eagerly since we may discover a function
+ // which prevents us from speculating about the entire SCC
+ DEBUG(dbgs() << "Eagerly marking " << F->getName() << " as nonnull\n");
+ F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
+ ++NumNonNullReturn;
+ MadeChange = true;
+ }
+ continue;
+ }
+ // At least one function returns something which could be null, can't
+ // speculate any more.
+ SCCReturnsNonNull = false;
+ }
+
+ if (SCCReturnsNonNull) {
+ for (Function *F : SCCNodes) {
+ if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ Attribute::NonNull) ||
+ !F->getReturnType()->isPointerTy())
+ continue;
+
+ DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n");
+ F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
+ ++NumNonNullReturn;
+ MadeChange = true;
+ }
}
return MadeChange;
}
+static bool setDoesNotRecurse(Function &F) {
+ if (F.doesNotRecurse())
+ return false;
+ F.setDoesNotRecurse();
+ ++NumNoRecurse;
+ return true;
+}
+
+static bool addNoRecurseAttrs(const CallGraphSCC &SCC,
+ SmallVectorImpl<WeakVH> &Revisit) {
+ // Try and identify functions that do not recurse.
+
+ // If the SCC contains multiple nodes we know for sure there is recursion.
+ if (!SCC.isSingular())
+ return false;
+
+ const CallGraphNode *CGN = *SCC.begin();
+ Function *F = CGN->getFunction();
+ if (!F || F->isDeclaration() || F->doesNotRecurse())
+ return false;
+
+ // If all of the calls in F are identifiable and are to norecurse functions, F
+ // is norecurse. This check also detects self-recursion as F is not currently
+ // marked norecurse, so any called from F to F will not be marked norecurse.
+ if (std::all_of(CGN->begin(), CGN->end(),
+ [](const CallGraphNode::CallRecord &CR) {
+ Function *F = CR.second->getFunction();
+ return F && F->doesNotRecurse();
+ }))
+ // Function calls a potentially recursive function.
+ return setDoesNotRecurse(*F);
+
+ // We know that F is not obviously recursive, but we haven't been able to
+ // prove that it doesn't actually recurse. Add it to the Revisit list to try
+ // again top-down later.
+ Revisit.push_back(F);
+ return false;
+}
+
+static bool addNoRecurseAttrsTopDownOnly(Function *F) {
+ // If F is internal and all uses are in norecurse functions, then F is also
+ // norecurse.
+ if (F->doesNotRecurse())
+ return false;
+ if (F->hasInternalLinkage()) {
+ for (auto *U : F->users())
+ if (auto *I = dyn_cast<Instruction>(U)) {
+ if (!I->getParent()->getParent()->doesNotRecurse())
+ return false;
+ } else {
+ return false;
+ }
+ return setDoesNotRecurse(*F);
+ }
+ return false;
+}
+
bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
- AA = &getAnalysis<AliasAnalysis>();
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ bool Changed = false;
- bool Changed = annotateLibraryCalls(SCC);
- Changed |= AddReadAttrs(SCC);
- Changed |= AddArgumentAttrs(SCC);
- Changed |= AddNoAliasAttrs(SCC);
+ // We compute dedicated AA results for each function in the SCC as needed. We
+ // use a lambda referencing external objects so that they live long enough to
+ // be queried, but we re-use them each time.
+ Optional<BasicAAResult> BAR;
+ Optional<AAResults> AAR;
+ auto AARGetter = [&](Function &F) -> AAResults & {
+ BAR.emplace(createLegacyPMBasicAAResult(*this, F));
+ AAR.emplace(createLegacyPMAAResults(*this, F, *BAR));
+ return *AAR;
+ };
+
+ // Fill SCCNodes with the elements of the SCC. Used for quickly looking up
+ // whether a given CallGraphNode is in this SCC. Also track whether there are
+ // any external or opt-none nodes that will prevent us from optimizing any
+ // part of the SCC.
+ SCCNodeSet SCCNodes;
+ bool ExternalNode = false;
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+ if (!F || F->hasFnAttribute(Attribute::OptimizeNone)) {
+ // External node or function we're trying not to optimize - we both avoid
+ // transform them and avoid leveraging information they provide.
+ ExternalNode = true;
+ continue;
+ }
+
+ SCCNodes.insert(F);
+ }
+
+ Changed |= addReadAttrs(SCCNodes, AARGetter);
+ Changed |= addArgumentAttrs(SCCNodes);
+
+ // If we have no external nodes participating in the SCC, we can deduce some
+ // more precise attributes as well.
+ if (!ExternalNode) {
+ Changed |= addNoAliasAttrs(SCCNodes);
+ Changed |= addNonNullAttrs(SCCNodes, *TLI);
+ }
+
+ Changed |= addNoRecurseAttrs(SCC, Revisit);
+ return Changed;
+}
+
+bool FunctionAttrs::doFinalization(CallGraph &CG) {
+ bool Changed = false;
+ // When iterating over SCCs we visit functions in a bottom-up fashion. Some of
+ // the rules we have for identifying norecurse functions work best with a
+ // top-down walk, so look again at all the functions we previously marked as
+ // worth revisiting, in top-down order.
+ for (auto &F : reverse(Revisit))
+ if (F)
+ Changed |= addNoRecurseAttrsTopDownOnly(cast<Function>((Value*)F));
return Changed;
}
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
new file mode 100644
index 000000000000..d8b677b966f2
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -0,0 +1,433 @@
+//===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Function import based on summaries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/FunctionImport.h"
+
+#include "llvm/ADT/StringSet.h"
+#include "llvm/IR/AutoUpgrade.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/Linker/Linker.h"
+#include "llvm/Object/FunctionIndexObjectFile.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/SourceMgr.h"
+
+#include <map>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "function-import"
+
+/// Limit on instruction count of imported functions.
+static cl::opt<unsigned> ImportInstrLimit(
+ "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"),
+ cl::desc("Only import functions with less than N instructions"));
+
+// Load lazily a module from \p FileName in \p Context.
+static std::unique_ptr<Module> loadFile(const std::string &FileName,
+ LLVMContext &Context) {
+ SMDiagnostic Err;
+ DEBUG(dbgs() << "Loading '" << FileName << "'\n");
+ std::unique_ptr<Module> Result = getLazyIRFileModule(FileName, Err, Context);
+ if (!Result) {
+ Err.print("function-import", errs());
+ return nullptr;
+ }
+
+ Result->materializeMetadata();
+ UpgradeDebugInfo(*Result);
+
+ return Result;
+}
+
+namespace {
+/// Helper to load on demand a Module from file and cache it for subsequent
+/// queries. It can be used with the FunctionImporter.
+class ModuleLazyLoaderCache {
+ /// Cache of lazily loaded module for import.
+ StringMap<std::unique_ptr<Module>> ModuleMap;
+
+ /// Retrieve a Module from the cache or lazily load it on demand.
+ std::function<std::unique_ptr<Module>(StringRef FileName)> createLazyModule;
+
+public:
+ /// Create the loader, Module will be initialized in \p Context.
+ ModuleLazyLoaderCache(std::function<
+ std::unique_ptr<Module>(StringRef FileName)> createLazyModule)
+ : createLazyModule(createLazyModule) {}
+
+ /// Retrieve a Module from the cache or lazily load it on demand.
+ Module &operator()(StringRef FileName);
+
+ std::unique_ptr<Module> takeModule(StringRef FileName) {
+ auto I = ModuleMap.find(FileName);
+ assert(I != ModuleMap.end());
+ std::unique_ptr<Module> Ret = std::move(I->second);
+ ModuleMap.erase(I);
+ return Ret;
+ }
+};
+
+// Get a Module for \p FileName from the cache, or load it lazily.
+Module &ModuleLazyLoaderCache::operator()(StringRef Identifier) {
+ auto &Module = ModuleMap[Identifier];
+ if (!Module)
+ Module = createLazyModule(Identifier);
+ return *Module;
+}
+} // anonymous namespace
+
+/// Walk through the instructions in \p F looking for external
+/// calls not already in the \p CalledFunctions set. If any are
+/// found they are added to the \p Worklist for importing.
+static void findExternalCalls(const Module &DestModule, Function &F,
+ const FunctionInfoIndex &Index,
+ StringSet<> &CalledFunctions,
+ SmallVector<StringRef, 64> &Worklist) {
+ // We need to suffix internal function calls imported from other modules,
+ // prepare the suffix ahead of time.
+ std::string Suffix;
+ if (F.getParent() != &DestModule)
+ Suffix =
+ (Twine(".llvm.") +
+ Twine(Index.getModuleId(F.getParent()->getModuleIdentifier()))).str();
+
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ if (isa<CallInst>(I)) {
+ auto CalledFunction = cast<CallInst>(I).getCalledFunction();
+ // Insert any new external calls that have not already been
+ // added to set/worklist.
+ if (!CalledFunction || !CalledFunction->hasName())
+ continue;
+ // Ignore intrinsics early
+ if (CalledFunction->isIntrinsic()) {
+ assert(CalledFunction->getIntrinsicID() != 0);
+ continue;
+ }
+ auto ImportedName = CalledFunction->getName();
+ auto Renamed = (ImportedName + Suffix).str();
+ // Rename internal functions
+ if (CalledFunction->hasInternalLinkage()) {
+ ImportedName = Renamed;
+ }
+ auto It = CalledFunctions.insert(ImportedName);
+ if (!It.second) {
+ // This is a call to a function we already considered, skip.
+ continue;
+ }
+ // Ignore functions already present in the destination module
+ auto *SrcGV = DestModule.getNamedValue(ImportedName);
+ if (SrcGV) {
+ assert(isa<Function>(SrcGV) && "Name collision during import");
+ if (!cast<Function>(SrcGV)->isDeclaration()) {
+ DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Ignoring "
+ << ImportedName << " already in DestinationModule\n");
+ continue;
+ }
+ }
+
+ Worklist.push_back(It.first->getKey());
+ DEBUG(dbgs() << DestModule.getModuleIdentifier()
+ << ": Adding callee for : " << ImportedName << " : "
+ << F.getName() << "\n");
+ }
+ }
+ }
+}
+
+// Helper function: given a worklist and an index, will process all the worklist
+// and decide what to import based on the summary information.
+//
+// Nothing is actually imported, functions are materialized in their source
+// module and analyzed there.
+//
+// \p ModuleToFunctionsToImportMap is filled with the set of Function to import
+// per Module.
+static void GetImportList(Module &DestModule,
+ SmallVector<StringRef, 64> &Worklist,
+ StringSet<> &CalledFunctions,
+ std::map<StringRef, DenseSet<const GlobalValue *>>
+ &ModuleToFunctionsToImportMap,
+ const FunctionInfoIndex &Index,
+ ModuleLazyLoaderCache &ModuleLoaderCache) {
+ while (!Worklist.empty()) {
+ auto CalledFunctionName = Worklist.pop_back_val();
+ DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Process import for "
+ << CalledFunctionName << "\n");
+
+ // Try to get a summary for this function call.
+ auto InfoList = Index.findFunctionInfoList(CalledFunctionName);
+ if (InfoList == Index.end()) {
+ DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": No summary for "
+ << CalledFunctionName << " Ignoring.\n");
+ continue;
+ }
+ assert(!InfoList->second.empty() && "No summary, error at import?");
+
+ // Comdat can have multiple entries, FIXME: what do we do with them?
+ auto &Info = InfoList->second[0];
+ assert(Info && "Nullptr in list, error importing summaries?\n");
+
+ auto *Summary = Info->functionSummary();
+ if (!Summary) {
+ // FIXME: in case we are lazyloading summaries, we can do it now.
+ DEBUG(dbgs() << DestModule.getModuleIdentifier()
+ << ": Missing summary for " << CalledFunctionName
+ << ", error at import?\n");
+ llvm_unreachable("Missing summary");
+ }
+
+ if (Summary->instCount() > ImportInstrLimit) {
+ DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Skip import of "
+ << CalledFunctionName << " with " << Summary->instCount()
+ << " instructions (limit " << ImportInstrLimit << ")\n");
+ continue;
+ }
+
+ // Get the module path from the summary.
+ auto ModuleIdentifier = Summary->modulePath();
+ DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Importing "
+ << CalledFunctionName << " from " << ModuleIdentifier << "\n");
+
+ auto &SrcModule = ModuleLoaderCache(ModuleIdentifier);
+
+ // The function that we will import!
+ GlobalValue *SGV = SrcModule.getNamedValue(CalledFunctionName);
+
+ if (!SGV) {
+ // The destination module is referencing function using their renamed name
+ // when importing a function that was originally local in the source
+ // module. The source module we have might not have been renamed so we try
+ // to remove the suffix added during the renaming to recover the original
+ // name in the source module.
+ std::pair<StringRef, StringRef> Split =
+ CalledFunctionName.split(".llvm.");
+ SGV = SrcModule.getNamedValue(Split.first);
+ assert(SGV && "Can't find function to import in source module");
+ }
+ if (!SGV) {
+ report_fatal_error(Twine("Can't load function '") + CalledFunctionName +
+ "' in Module '" + SrcModule.getModuleIdentifier() +
+ "', error in the summary?\n");
+ }
+
+ Function *F = dyn_cast<Function>(SGV);
+ if (!F && isa<GlobalAlias>(SGV)) {
+ auto *SGA = dyn_cast<GlobalAlias>(SGV);
+ F = dyn_cast<Function>(SGA->getBaseObject());
+ CalledFunctionName = F->getName();
+ }
+ assert(F && "Imported Function is ... not a Function");
+
+ // We cannot import weak_any functions/aliases without possibly affecting
+ // the order they are seen and selected by the linker, changing program
+ // semantics.
+ if (SGV->hasWeakAnyLinkage()) {
+ DEBUG(dbgs() << DestModule.getModuleIdentifier()
+ << ": Ignoring import request for weak-any "
+ << (isa<Function>(SGV) ? "function " : "alias ")
+ << CalledFunctionName << " from "
+ << SrcModule.getModuleIdentifier() << "\n");
+ continue;
+ }
+
+ // Add the function to the import list
+ auto &Entry = ModuleToFunctionsToImportMap[SrcModule.getModuleIdentifier()];
+ Entry.insert(F);
+
+ // Process the newly imported functions and add callees to the worklist.
+ F->materialize();
+ findExternalCalls(DestModule, *F, Index, CalledFunctions, Worklist);
+ }
+}
+
+// Automatically import functions in Module \p DestModule based on the summaries
+// index.
+//
+// The current implementation imports every called functions that exists in the
+// summaries index.
+bool FunctionImporter::importFunctions(Module &DestModule) {
+ DEBUG(dbgs() << "Starting import for Module "
+ << DestModule.getModuleIdentifier() << "\n");
+ unsigned ImportedCount = 0;
+
+ /// First step is collecting the called external functions.
+ StringSet<> CalledFunctions;
+ SmallVector<StringRef, 64> Worklist;
+ for (auto &F : DestModule) {
+ if (F.isDeclaration() || F.hasFnAttribute(Attribute::OptimizeNone))
+ continue;
+ findExternalCalls(DestModule, F, Index, CalledFunctions, Worklist);
+ }
+ if (Worklist.empty())
+ return false;
+
+ /// Second step: for every call to an external function, try to import it.
+
+ // Linker that will be used for importing function
+ Linker TheLinker(DestModule);
+
+ // Map of Module -> List of Function to import from the Module
+ std::map<StringRef, DenseSet<const GlobalValue *>>
+ ModuleToFunctionsToImportMap;
+
+ // Analyze the summaries and get the list of functions to import by
+ // populating ModuleToFunctionsToImportMap
+ ModuleLazyLoaderCache ModuleLoaderCache(ModuleLoader);
+ GetImportList(DestModule, Worklist, CalledFunctions,
+ ModuleToFunctionsToImportMap, Index, ModuleLoaderCache);
+ assert(Worklist.empty() && "Worklist hasn't been flushed in GetImportList");
+
+ StringMap<std::unique_ptr<DenseMap<unsigned, MDNode *>>>
+ ModuleToTempMDValsMap;
+
+ // Do the actual import of functions now, one Module at a time
+ for (auto &FunctionsToImportPerModule : ModuleToFunctionsToImportMap) {
+ // Get the module for the import
+ auto &FunctionsToImport = FunctionsToImportPerModule.second;
+ std::unique_ptr<Module> SrcModule =
+ ModuleLoaderCache.takeModule(FunctionsToImportPerModule.first);
+ assert(&DestModule.getContext() == &SrcModule->getContext() &&
+ "Context mismatch");
+
+ // Save the mapping of value ids to temporary metadata created when
+ // importing this function. If we have already imported from this module,
+ // add new temporary metadata to the existing mapping.
+ auto &TempMDVals = ModuleToTempMDValsMap[SrcModule->getModuleIdentifier()];
+ if (!TempMDVals)
+ TempMDVals = llvm::make_unique<DenseMap<unsigned, MDNode *>>();
+
+ // Link in the specified functions.
+ if (TheLinker.linkInModule(std::move(SrcModule), Linker::Flags::None,
+ &Index, &FunctionsToImport, TempMDVals.get()))
+ report_fatal_error("Function Import: link error");
+
+ ImportedCount += FunctionsToImport.size();
+ }
+
+ // Now link in metadata for all modules from which we imported functions.
+ for (StringMapEntry<std::unique_ptr<DenseMap<unsigned, MDNode *>>> &SME :
+ ModuleToTempMDValsMap) {
+ // Load the specified source module.
+ auto &SrcModule = ModuleLoaderCache(SME.getKey());
+
+ // Link in all necessary metadata from this module.
+ if (TheLinker.linkInMetadata(SrcModule, SME.getValue().get()))
+ return false;
+ }
+
+ DEBUG(dbgs() << "Imported " << ImportedCount << " functions for Module "
+ << DestModule.getModuleIdentifier() << "\n");
+ return ImportedCount;
+}
+
+/// Summary file to use for function importing when using -function-import from
+/// the command line.
+static cl::opt<std::string>
+ SummaryFile("summary-file",
+ cl::desc("The summary file to use for function importing."));
+
+static void diagnosticHandler(const DiagnosticInfo &DI) {
+ raw_ostream &OS = errs();
+ DiagnosticPrinterRawOStream DP(OS);
+ DI.print(DP);
+ OS << '\n';
+}
+
+/// Parse the function index out of an IR file and return the function
+/// index object if found, or nullptr if not.
+static std::unique_ptr<FunctionInfoIndex>
+getFunctionIndexForFile(StringRef Path, std::string &Error,
+ DiagnosticHandlerFunction DiagnosticHandler) {
+ std::unique_ptr<MemoryBuffer> Buffer;
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
+ MemoryBuffer::getFile(Path);
+ if (std::error_code EC = BufferOrErr.getError()) {
+ Error = EC.message();
+ return nullptr;
+ }
+ Buffer = std::move(BufferOrErr.get());
+ ErrorOr<std::unique_ptr<object::FunctionIndexObjectFile>> ObjOrErr =
+ object::FunctionIndexObjectFile::create(Buffer->getMemBufferRef(),
+ DiagnosticHandler);
+ if (std::error_code EC = ObjOrErr.getError()) {
+ Error = EC.message();
+ return nullptr;
+ }
+ return (*ObjOrErr)->takeIndex();
+}
+
+namespace {
+/// Pass that performs cross-module function import provided a summary file.
+class FunctionImportPass : public ModulePass {
+ /// Optional function summary index to use for importing, otherwise
+ /// the summary-file option must be specified.
+ const FunctionInfoIndex *Index;
+
+public:
+ /// Pass identification, replacement for typeid
+ static char ID;
+
+ /// Specify pass name for debug output
+ const char *getPassName() const override {
+ return "Function Importing";
+ }
+
+ explicit FunctionImportPass(const FunctionInfoIndex *Index = nullptr)
+ : ModulePass(ID), Index(Index) {}
+
+ bool runOnModule(Module &M) override {
+ if (SummaryFile.empty() && !Index)
+ report_fatal_error("error: -function-import requires -summary-file or "
+ "file from frontend\n");
+ std::unique_ptr<FunctionInfoIndex> IndexPtr;
+ if (!SummaryFile.empty()) {
+ if (Index)
+ report_fatal_error("error: -summary-file and index from frontend\n");
+ std::string Error;
+ IndexPtr = getFunctionIndexForFile(SummaryFile, Error, diagnosticHandler);
+ if (!IndexPtr) {
+ errs() << "Error loading file '" << SummaryFile << "': " << Error
+ << "\n";
+ return false;
+ }
+ Index = IndexPtr.get();
+ }
+
+ // Perform the import now.
+ auto ModuleLoader = [&M](StringRef Identifier) {
+ return loadFile(Identifier, M.getContext());
+ };
+ FunctionImporter Importer(*Index, ModuleLoader);
+ return Importer.importFunctions(M);
+
+ return false;
+ }
+};
+} // anonymous namespace
+
+char FunctionImportPass::ID = 0;
+INITIALIZE_PASS_BEGIN(FunctionImportPass, "function-import",
+ "Summary Based Function Import", false, false)
+INITIALIZE_PASS_END(FunctionImportPass, "function-import",
+ "Summary Based Function Import", false, false)
+
+namespace llvm {
+Pass *createFunctionImportPass(const FunctionInfoIndex *Index = nullptr) {
+ return new FunctionImportPass(Index);
+}
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
index 61d0ff94a343..9b276ed28e2e 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -92,33 +92,28 @@ bool GlobalDCE::runOnModule(Module &M) {
ComdatMembers.insert(std::make_pair(C, &GA));
// Loop over the module, adding globals which are obviously necessary.
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- Changed |= RemoveUnusedGlobalValue(*I);
+ for (Function &F : M) {
+ Changed |= RemoveUnusedGlobalValue(F);
// Functions with external linkage are needed if they have a body
- if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) {
- if (!I->isDiscardableIfUnused())
- GlobalIsNeeded(I);
- }
+ if (!F.isDeclaration() && !F.hasAvailableExternallyLinkage())
+ if (!F.isDiscardableIfUnused())
+ GlobalIsNeeded(&F);
}
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I) {
- Changed |= RemoveUnusedGlobalValue(*I);
+ for (GlobalVariable &GV : M.globals()) {
+ Changed |= RemoveUnusedGlobalValue(GV);
// Externally visible & appending globals are needed, if they have an
// initializer.
- if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) {
- if (!I->isDiscardableIfUnused())
- GlobalIsNeeded(I);
- }
+ if (!GV.isDeclaration() && !GV.hasAvailableExternallyLinkage())
+ if (!GV.isDiscardableIfUnused())
+ GlobalIsNeeded(&GV);
}
- for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
- I != E; ++I) {
- Changed |= RemoveUnusedGlobalValue(*I);
+ for (GlobalAlias &GA : M.aliases()) {
+ Changed |= RemoveUnusedGlobalValue(GA);
// Externally visible aliases are needed.
- if (!I->isDiscardableIfUnused()) {
- GlobalIsNeeded(I);
- }
+ if (!GA.isDiscardableIfUnused())
+ GlobalIsNeeded(&GA);
}
// Now that all globals which are needed are in the AliveGlobals set, we loop
@@ -126,52 +121,50 @@ bool GlobalDCE::runOnModule(Module &M) {
//
// The first pass is to drop initializers of global variables which are dead.
- std::vector<GlobalVariable*> DeadGlobalVars; // Keep track of dead globals
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I)
- if (!AliveGlobals.count(I)) {
- DeadGlobalVars.push_back(I); // Keep track of dead globals
- if (I->hasInitializer()) {
- Constant *Init = I->getInitializer();
- I->setInitializer(nullptr);
+ std::vector<GlobalVariable *> DeadGlobalVars; // Keep track of dead globals
+ for (GlobalVariable &GV : M.globals())
+ if (!AliveGlobals.count(&GV)) {
+ DeadGlobalVars.push_back(&GV); // Keep track of dead globals
+ if (GV.hasInitializer()) {
+ Constant *Init = GV.getInitializer();
+ GV.setInitializer(nullptr);
if (isSafeToDestroyConstant(Init))
Init->destroyConstant();
}
}
// The second pass drops the bodies of functions which are dead...
- std::vector<Function*> DeadFunctions;
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (!AliveGlobals.count(I)) {
- DeadFunctions.push_back(I); // Keep track of dead globals
- if (!I->isDeclaration())
- I->deleteBody();
+ std::vector<Function *> DeadFunctions;
+ for (Function &F : M)
+ if (!AliveGlobals.count(&F)) {
+ DeadFunctions.push_back(&F); // Keep track of dead globals
+ if (!F.isDeclaration())
+ F.deleteBody();
}
// The third pass drops targets of aliases which are dead...
std::vector<GlobalAlias*> DeadAliases;
- for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E;
- ++I)
- if (!AliveGlobals.count(I)) {
- DeadAliases.push_back(I);
- I->setAliasee(nullptr);
+ for (GlobalAlias &GA : M.aliases())
+ if (!AliveGlobals.count(&GA)) {
+ DeadAliases.push_back(&GA);
+ GA.setAliasee(nullptr);
}
if (!DeadFunctions.empty()) {
// Now that all interferences have been dropped, delete the actual objects
// themselves.
- for (unsigned i = 0, e = DeadFunctions.size(); i != e; ++i) {
- RemoveUnusedGlobalValue(*DeadFunctions[i]);
- M.getFunctionList().erase(DeadFunctions[i]);
+ for (Function *F : DeadFunctions) {
+ RemoveUnusedGlobalValue(*F);
+ M.getFunctionList().erase(F);
}
NumFunctions += DeadFunctions.size();
Changed = true;
}
if (!DeadGlobalVars.empty()) {
- for (unsigned i = 0, e = DeadGlobalVars.size(); i != e; ++i) {
- RemoveUnusedGlobalValue(*DeadGlobalVars[i]);
- M.getGlobalList().erase(DeadGlobalVars[i]);
+ for (GlobalVariable *GV : DeadGlobalVars) {
+ RemoveUnusedGlobalValue(*GV);
+ M.getGlobalList().erase(GV);
}
NumVariables += DeadGlobalVars.size();
Changed = true;
@@ -179,9 +172,9 @@ bool GlobalDCE::runOnModule(Module &M) {
// Now delete any dead aliases.
if (!DeadAliases.empty()) {
- for (unsigned i = 0, e = DeadAliases.size(); i != e; ++i) {
- RemoveUnusedGlobalValue(*DeadAliases[i]);
- M.getAliasList().erase(DeadAliases[i]);
+ for (GlobalAlias *GA : DeadAliases) {
+ RemoveUnusedGlobalValue(*GA);
+ M.getAliasList().erase(GA);
}
NumAliases += DeadAliases.size();
Changed = true;
@@ -222,21 +215,15 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
// any globals used will be marked as needed.
Function *F = cast<Function>(G);
- if (F->hasPrefixData())
- MarkUsedGlobalsAsNeeded(F->getPrefixData());
-
- if (F->hasPrologueData())
- MarkUsedGlobalsAsNeeded(F->getPrologueData());
+ for (Use &U : F->operands())
+ MarkUsedGlobalsAsNeeded(cast<Constant>(U.get()));
- if (F->hasPersonalityFn())
- MarkUsedGlobalsAsNeeded(F->getPersonalityFn());
-
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
- for (User::op_iterator U = I->op_begin(), E = I->op_end(); U != E; ++U)
- if (GlobalValue *GV = dyn_cast<GlobalValue>(*U))
+ for (BasicBlock &BB : *F)
+ for (Instruction &I : BB)
+ for (Use &U : I.operands())
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(U))
GlobalIsNeeded(GV);
- else if (Constant *C = dyn_cast<Constant>(*U))
+ else if (Constant *C = dyn_cast<Constant>(U))
MarkUsedGlobalsAsNeeded(C);
}
}
@@ -247,9 +234,9 @@ void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
// Loop over all of the operands of the constant, adding any globals they
// use to the list of needed globals.
- for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) {
+ for (Use &U : C->operands()) {
// If we've already processed this constant there's no need to do it again.
- Constant *Op = dyn_cast<Constant>(*I);
+ Constant *Op = dyn_cast<Constant>(U);
if (Op && SeenConstants.insert(Op).second)
MarkUsedGlobalsAsNeeded(Op);
}
@@ -262,7 +249,8 @@ void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
// might make it deader.
//
bool GlobalDCE::RemoveUnusedGlobalValue(GlobalValue &GV) {
- if (GV.use_empty()) return false;
+ if (GV.use_empty())
+ return false;
GV.removeDeadConstantUsers();
return GV.use_empty();
}
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 5ffe15dbd31d..fd7736905fe8 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -28,6 +28,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -54,7 +55,6 @@ STATISTIC(NumSRA , "Number of aggregate globals broken into scalars");
STATISTIC(NumHeapSRA , "Number of heap objects SRA'd");
STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
STATISTIC(NumDeleted , "Number of globals deleted");
-STATISTIC(NumFnDeleted , "Number of functions deleted");
STATISTIC(NumGlobUses , "Number of global uses devirtualized");
STATISTIC(NumLocalized , "Number of globals localized");
STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans");
@@ -69,6 +69,7 @@ namespace {
struct GlobalOpt : public ModulePass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
}
static char ID; // Pass identification, replacement for typeid
GlobalOpt() : ModulePass(ID) {
@@ -81,11 +82,14 @@ namespace {
bool OptimizeFunctions(Module &M);
bool OptimizeGlobalVars(Module &M);
bool OptimizeGlobalAliases(Module &M);
- bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
- bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
- const GlobalStatus &GS);
+ bool deleteIfDead(GlobalValue &GV);
+ bool processGlobal(GlobalValue &GV);
+ bool processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS);
bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
+ bool isPointerValueDeadOnEntryToFunction(const Function *F,
+ GlobalValue *GV);
+
TargetLibraryInfo *TLI;
SmallSet<const Comdat *, 8> NotDiscardableComdats;
};
@@ -95,13 +99,14 @@ char GlobalOpt::ID = 0;
INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
"Global Variable Optimizer", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(GlobalOpt, "globalopt",
"Global Variable Optimizer", false, false)
ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
-/// isLeakCheckerRoot - Is this global variable possibly used by a leak checker
-/// as a root? If so, we might not really want to eliminate the stores to it.
+/// Is this global variable possibly used by a leak checker as a root? If so,
+/// we might not really want to eliminate the stores to it.
static bool isLeakCheckerRoot(GlobalVariable *GV) {
// A global variable is a root if it is a pointer, or could plausibly contain
// a pointer. There are two challenges; one is that we could have a struct
@@ -176,10 +181,9 @@ static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
} while (1);
}
-/// CleanupPointerRootUsers - This GV is a pointer root. Loop over all users
-/// of the global and clean up any that obviously don't assign the global a
-/// value that isn't dynamically allocated.
-///
+/// This GV is a pointer root. Loop over all users of the global and clean up
+/// any that obviously don't assign the global a value that isn't dynamically
+/// allocated.
static bool CleanupPointerRootUsers(GlobalVariable *GV,
const TargetLibraryInfo *TLI) {
// A brief explanation of leak checkers. The goal is to find bugs where
@@ -263,10 +267,9 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
return Changed;
}
-/// CleanupConstantGlobalUsers - We just marked GV constant. Loop over all
-/// users of the global, cleaning up the obvious ones. This is largely just a
-/// quick scan over the use list to clean up the easy and obvious cruft. This
-/// returns true if it made a change.
+/// We just marked GV constant. Loop over all users of the global, cleaning up
+/// the obvious ones. This is largely just a quick scan over the use list to
+/// clean up the easy and obvious cruft. This returns true if it made a change.
static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
const DataLayout &DL,
TargetLibraryInfo *TLI) {
@@ -353,8 +356,8 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
return Changed;
}
-/// isSafeSROAElementUse - Return true if the specified instruction is a safe
-/// user of a derived expression from a global that we want to SROA.
+/// Return true if the specified instruction is a safe user of a derived
+/// expression from a global that we want to SROA.
static bool isSafeSROAElementUse(Value *V) {
// We might have a dead and dangling constant hanging off of here.
if (Constant *C = dyn_cast<Constant>(V))
@@ -385,9 +388,8 @@ static bool isSafeSROAElementUse(Value *V) {
}
-/// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value.
-/// Look at it and its uses and decide whether it is safe to SROA this global.
-///
+/// U is a direct user of the specified global value. Look at it and its uses
+/// and decide whether it is safe to SROA this global.
static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
// The user of the global must be a GEP Inst or a ConstantExpr GEP.
if (!isa<GetElementPtrInst>(U) &&
@@ -452,9 +454,8 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
return true;
}
-/// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it
-/// is safe for us to perform this transformation.
-///
+/// Look at all uses of the global and decide whether it is safe for us to
+/// perform this transformation.
static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
for (User *U : GV->users())
if (!IsUserOfGlobalSafeForSRA(U, GV))
@@ -464,10 +465,10 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
}
-/// SRAGlobal - Perform scalar replacement of aggregates on the specified global
-/// variable. This opens the door for other optimizations by exposing the
-/// behavior of the program in a more fine-grained way. We have determined that
-/// this transformation is safe already. We return the first global variable we
+/// Perform scalar replacement of aggregates on the specified global variable.
+/// This opens the door for other optimizations by exposing the behavior of the
+/// program in a more fine-grained way. We have determined that this
+/// transformation is safe already. We return the first global variable we
/// insert so that the caller can reprocess it.
static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
// Make sure this global only has simple uses that we can SRA.
@@ -497,7 +498,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
In, GV->getName()+"."+Twine(i),
GV->getThreadLocalMode(),
GV->getType()->getAddressSpace());
- Globals.insert(GV, NGV);
+ NGV->setExternallyInitialized(GV->isExternallyInitialized());
+ Globals.push_back(NGV);
NewGlobals.push_back(NGV);
// Calculate the known alignment of the field. If the original aggregate
@@ -530,7 +532,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
In, GV->getName()+"."+Twine(i),
GV->getThreadLocalMode(),
GV->getType()->getAddressSpace());
- Globals.insert(GV, NGV);
+ NGV->setExternallyInitialized(GV->isExternallyInitialized());
+ Globals.push_back(NGV);
NewGlobals.push_back(NGV);
// Calculate the known alignment of the field. If the original aggregate
@@ -545,7 +548,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
if (NewGlobals.empty())
return nullptr;
- DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
+ DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n");
Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
@@ -610,9 +613,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : nullptr;
}
-/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
-/// value will trap if the value is dynamically null. PHIs keeps track of any
-/// phi nodes we've seen to avoid reprocessing them.
+/// Return true if all users of the specified value will trap if the value is
+/// dynamically null. PHIs keeps track of any phi nodes we've seen to avoid
+/// reprocessing them.
static bool AllUsesOfValueWillTrapIfNull(const Value *V,
SmallPtrSetImpl<const PHINode*> &PHIs) {
for (const User *U : V->users())
@@ -653,9 +656,9 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V,
return true;
}
-/// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads
-/// from GV will trap if the loaded value is null. Note that this also permits
-/// comparisons of the loaded value against null, as a special case.
+/// Return true if all uses of any loads from GV will trap if the loaded value
+/// is null. Note that this also permits comparisons of the loaded value
+/// against null, as a special case.
static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
for (const User *U : GV->users())
if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
@@ -735,10 +738,10 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
}
-/// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null
-/// value stored into it. If there are uses of the loaded value that would trap
-/// if the loaded value is dynamically null, then we know that they cannot be
-/// reachable with a null optimize away the load.
+/// The specified global has only one non-null value stored into it. If there
+/// are uses of the loaded value that would trap if the loaded value is
+/// dynamically null, then we know that they cannot be reachable with a null
+/// optimize away the load.
static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
const DataLayout &DL,
TargetLibraryInfo *TLI) {
@@ -778,7 +781,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
}
if (Changed) {
- DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV);
+ DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV << "\n");
++NumGlobUses;
}
@@ -801,8 +804,8 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
return Changed;
}
-/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
-/// instructions that are foldable.
+/// Walk the use list of V, constant folding all of the instructions that are
+/// foldable.
static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
TargetLibraryInfo *TLI) {
for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; )
@@ -818,11 +821,11 @@ static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
}
}
-/// OptimizeGlobalAddressOfMalloc - This function takes the specified global
-/// variable, and transforms the program as if it always contained the result of
-/// the specified malloc. Because it is always the result of the specified
-/// malloc, there is no reason to actually DO the malloc. Instead, turn the
-/// malloc into a global, and any loads of GV as uses of the new global.
+/// This function takes the specified global variable, and transforms the
+/// program as if it always contained the result of the specified malloc.
+/// Because it is always the result of the specified malloc, there is no reason
+/// to actually DO the malloc. Instead, turn the malloc into a global, and any
+/// loads of GV as uses of the new global.
static GlobalVariable *
OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
ConstantInt *NElements, const DataLayout &DL,
@@ -838,13 +841,10 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
// Create the new global variable. The contents of the malloc'd memory is
// undefined, so initialize with an undef value.
- GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
- GlobalType, false,
- GlobalValue::InternalLinkage,
- UndefValue::get(GlobalType),
- GV->getName()+".body",
- GV,
- GV->getThreadLocalMode());
+ GlobalVariable *NewGV = new GlobalVariable(
+ *GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage,
+ UndefValue::get(GlobalType), GV->getName() + ".body", nullptr,
+ GV->getThreadLocalMode());
// If there are bitcast users of the malloc (which is typical, usually we have
// a malloc + bitcast) then replace them with uses of the new global. Update
@@ -935,7 +935,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
cast<StoreInst>(InitBool->user_back())->eraseFromParent();
delete InitBool;
} else
- GV->getParent()->getGlobalList().insert(GV, InitBool);
+ GV->getParent()->getGlobalList().insert(GV->getIterator(), InitBool);
// Now the GV is dead, nuke it and the malloc..
GV->eraseFromParent();
@@ -951,10 +951,9 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
return NewGV;
}
-/// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking
-/// to make sure that there are no complex uses of V. We permit simple things
-/// like dereferencing the pointer, but not storing through the address, unless
-/// it is to the specified global.
+/// Scan the use-list of V checking to make sure that there are no complex uses
+/// of V. We permit simple things like dereferencing the pointer, but not
+/// storing through the address, unless it is to the specified global.
static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
const GlobalVariable *GV,
SmallPtrSetImpl<const PHINode*> &PHIs) {
@@ -998,10 +997,9 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
return true;
}
-/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV
-/// somewhere. Transform all uses of the allocation into loads from the
-/// global and uses of the resultant pointer. Further, delete the store into
-/// GV. This assumes that these value pass the
+/// The Alloc pointer is stored into GV somewhere. Transform all uses of the
+/// allocation into loads from the global and uses of the resultant pointer.
+/// Further, delete the store into GV. This assumes that these value pass the
/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
GlobalVariable *GV) {
@@ -1043,9 +1041,9 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
}
}
-/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi
-/// of a load) are simple enough to perform heap SRA on. This permits GEP's
-/// that index through the array and struct field, icmps of null, and PHIs.
+/// Verify that all uses of V (a load, or a phi of a load) are simple enough to
+/// perform heap SRA on. This permits GEP's that index through the array and
+/// struct field, icmps of null, and PHIs.
static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
SmallPtrSetImpl<const PHINode*> &LoadUsingPHIs,
SmallPtrSetImpl<const PHINode*> &LoadUsingPHIsPerLoad) {
@@ -1096,8 +1094,8 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
}
-/// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
-/// GV are simple enough to perform HeapSRA, return true.
+/// If all users of values loaded from GV are simple enough to perform HeapSRA,
+/// return true.
static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
Instruction *StoredVal) {
SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
@@ -1186,8 +1184,8 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
return FieldVals[FieldNo] = Result;
}
-/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
-/// the load, rewrite the derived value to use the HeapSRoA'd load.
+/// Given a load instruction and a value derived from the load, rewrite the
+/// derived value to use the HeapSRoA'd load.
static void RewriteHeapSROALoadUser(Instruction *LoadUser,
DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
@@ -1248,10 +1246,9 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
}
}
-/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global. Ptr
-/// is a value loaded from the global. Eliminate all uses of Ptr, making them
-/// use FieldGlobals instead. All uses of loaded values satisfy
-/// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
+/// We are performing Heap SRoA on a global. Ptr is a value loaded from the
+/// global. Eliminate all uses of Ptr, making them use FieldGlobals instead.
+/// All uses of loaded values satisfy AllGlobalLoadUsesSimpleEnoughForHeapSRA.
static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
@@ -1266,8 +1263,8 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
}
}
-/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break
-/// it up into multiple allocations of arrays of the fields.
+/// CI is an allocation of an array of structures. Break it up into multiple
+/// allocations of arrays of the fields.
static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
Value *NElems, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
@@ -1291,12 +1288,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
Type *FieldTy = STy->getElementType(FieldNo);
PointerType *PFieldTy = PointerType::get(FieldTy, AS);
- GlobalVariable *NGV =
- new GlobalVariable(*GV->getParent(),
- PFieldTy, false, GlobalValue::InternalLinkage,
- Constant::getNullValue(PFieldTy),
- GV->getName() + ".f" + Twine(FieldNo), GV,
- GV->getThreadLocalMode());
+ GlobalVariable *NGV = new GlobalVariable(
+ *GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage,
+ Constant::getNullValue(PFieldTy), GV->getName() + ".f" + Twine(FieldNo),
+ nullptr, GV->getThreadLocalMode());
FieldGlobals.push_back(NGV);
unsigned TypeSize = DL.getTypeAllocSize(FieldTy);
@@ -1336,7 +1331,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
// Split the basic block at the old malloc.
BasicBlock *OrigBB = CI->getParent();
- BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
+ BasicBlock *ContBB =
+ OrigBB->splitBasicBlock(CI->getIterator(), "malloc_cont");
// Create the block to check the first condition. Put all these blocks at the
// end of the function as they are unlikely to be executed.
@@ -1376,9 +1372,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
// CI is no longer needed, remove it.
CI->eraseFromParent();
- /// InsertedScalarizedLoads - As we process loads, if we can't immediately
- /// update all uses of the load, keep track of what scalarized loads are
- /// inserted for a given load.
+ /// As we process loads, if we can't immediately update all uses of the load,
+ /// keep track of what scalarized loads are inserted for a given load.
DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
InsertedScalarizedValues[GV] = FieldGlobals;
@@ -1454,13 +1449,11 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
return cast<GlobalVariable>(FieldGlobals[0]);
}
-/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
-/// pointer global variable with a single value stored it that is a malloc or
-/// cast of malloc.
-static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
+/// This function is called when we see a pointer global variable with a single
+/// value stored it that is a malloc or cast of malloc.
+static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
Type *AllocTy,
AtomicOrdering Ordering,
- Module::global_iterator &GVI,
const DataLayout &DL,
TargetLibraryInfo *TLI) {
// If this is a malloc of an abstract type, don't touch it.
@@ -1499,7 +1492,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
// (2048 bytes currently), as we don't want to introduce a 16M global or
// something.
if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) {
- GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI);
+ OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI);
return true;
}
@@ -1544,19 +1537,18 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
CI = cast<CallInst>(Malloc);
}
- GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true),
- DL, TLI);
+ PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true), DL,
+ TLI);
return true;
}
return false;
}
-// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
-// that only one value (besides its initializer) is ever stored to the global.
-static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
+// Try to optimize globals based on the knowledge that only one value (besides
+// its initializer) is ever stored to the global.
+static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
AtomicOrdering Ordering,
- Module::global_iterator &GVI,
const DataLayout &DL,
TargetLibraryInfo *TLI) {
// Ignore no-op GEPs and bitcasts.
@@ -1577,9 +1569,8 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
return true;
} else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) {
Type *MallocType = getMallocAllocatedType(CI, TLI);
- if (MallocType &&
- TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, GVI,
- DL, TLI))
+ if (MallocType && tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
+ Ordering, DL, TLI))
return true;
}
}
@@ -1587,10 +1578,10 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
return false;
}
-/// TryToShrinkGlobalToBoolean - At this point, we have learned that the only
-/// two values ever stored into GV are its initializer and OtherVal. See if we
-/// can shrink the global into a boolean and select between the two values
-/// whenever it is used. This exposes the values to other scalar optimizations.
+/// At this point, we have learned that the only two values ever stored into GV
+/// are its initializer and OtherVal. See if we can shrink the global into a
+/// boolean and select between the two values whenever it is used. This exposes
+/// the values to other scalar optimizations.
static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
Type *GVElType = GV->getType()->getElementType();
@@ -1610,7 +1601,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
return false;
- DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV);
+ DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV << "\n");
// Create the new global, initializing it to false.
GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
@@ -1620,7 +1611,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
GV->getName()+".b",
GV->getThreadLocalMode(),
GV->getType()->getAddressSpace());
- GV->getParent()->getGlobalList().insert(GV, NewGV);
+ GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV);
Constant *InitVal = GV->getInitializer();
assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
@@ -1688,61 +1679,213 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
return true;
}
+bool GlobalOpt::deleteIfDead(GlobalValue &GV) {
+ GV.removeDeadConstantUsers();
-/// ProcessGlobal - Analyze the specified global variable and optimize it if
-/// possible. If we make a change, return true.
-bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
- Module::global_iterator &GVI) {
- // Do more involved optimizations if the global is internal.
- GV->removeDeadConstantUsers();
+ if (!GV.isDiscardableIfUnused())
+ return false;
- if (GV->use_empty()) {
- DEBUG(dbgs() << "GLOBAL DEAD: " << *GV);
- GV->eraseFromParent();
- ++NumDeleted;
- return true;
- }
+ if (const Comdat *C = GV.getComdat())
+ if (!GV.hasLocalLinkage() && NotDiscardableComdats.count(C))
+ return false;
- if (!GV->hasLocalLinkage())
+ bool Dead;
+ if (auto *F = dyn_cast<Function>(&GV))
+ Dead = F->isDefTriviallyDead();
+ else
+ Dead = GV.use_empty();
+ if (!Dead)
+ return false;
+
+ DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n");
+ GV.eraseFromParent();
+ ++NumDeleted;
+ return true;
+}
+
+/// Analyze the specified global variable and optimize it if possible. If we
+/// make a change, return true.
+bool GlobalOpt::processGlobal(GlobalValue &GV) {
+ // Do more involved optimizations if the global is internal.
+ if (!GV.hasLocalLinkage())
return false;
GlobalStatus GS;
- if (GlobalStatus::analyzeGlobal(GV, GS))
+ if (GlobalStatus::analyzeGlobal(&GV, GS))
return false;
- if (!GS.IsCompared && !GV->hasUnnamedAddr()) {
- GV->setUnnamedAddr(true);
+ bool Changed = false;
+ if (!GS.IsCompared && !GV.hasUnnamedAddr()) {
+ GV.setUnnamedAddr(true);
NumUnnamed++;
+ Changed = true;
}
- if (GV->isConstant() || !GV->hasInitializer())
+ auto *GVar = dyn_cast<GlobalVariable>(&GV);
+ if (!GVar)
+ return Changed;
+
+ if (GVar->isConstant() || !GVar->hasInitializer())
+ return Changed;
+
+ return processInternalGlobal(GVar, GS) || Changed;
+}
+
+bool GlobalOpt::isPointerValueDeadOnEntryToFunction(const Function *F, GlobalValue *GV) {
+ // Find all uses of GV. We expect them all to be in F, and if we can't
+ // identify any of the uses we bail out.
+ //
+ // On each of these uses, identify if the memory that GV points to is
+ // used/required/live at the start of the function. If it is not, for example
+ // if the first thing the function does is store to the GV, the GV can
+ // possibly be demoted.
+ //
+ // We don't do an exhaustive search for memory operations - simply look
+ // through bitcasts as they're quite common and benign.
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ SmallVector<LoadInst *, 4> Loads;
+ SmallVector<StoreInst *, 4> Stores;
+ for (auto *U : GV->users()) {
+ if (Operator::getOpcode(U) == Instruction::BitCast) {
+ for (auto *UU : U->users()) {
+ if (auto *LI = dyn_cast<LoadInst>(UU))
+ Loads.push_back(LI);
+ else if (auto *SI = dyn_cast<StoreInst>(UU))
+ Stores.push_back(SI);
+ else
+ return false;
+ }
+ continue;
+ }
+
+ Instruction *I = dyn_cast<Instruction>(U);
+ if (!I)
+ return false;
+ assert(I->getParent()->getParent() == F);
+
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ Loads.push_back(LI);
+ else if (auto *SI = dyn_cast<StoreInst>(I))
+ Stores.push_back(SI);
+ else
+ return false;
+ }
+
+ // We have identified all uses of GV into loads and stores. Now check if all
+ // of them are known not to depend on the value of the global at the function
+ // entry point. We do this by ensuring that every load is dominated by at
+ // least one store.
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>(*const_cast<Function *>(F))
+ .getDomTree();
+
+ // The below check is quadratic. Check we're not going to do too many tests.
+ // FIXME: Even though this will always have worst-case quadratic time, we
+ // could put effort into minimizing the average time by putting stores that
+ // have been shown to dominate at least one load at the beginning of the
+ // Stores array, making subsequent dominance checks more likely to succeed
+ // early.
+ //
+ // The threshold here is fairly large because global->local demotion is a
+ // very powerful optimization should it fire.
+ const unsigned Threshold = 100;
+ if (Loads.size() * Stores.size() > Threshold)
return false;
- return ProcessInternalGlobal(GV, GVI, GS);
+ for (auto *L : Loads) {
+ auto *LTy = L->getType();
+ if (!std::any_of(Stores.begin(), Stores.end(), [&](StoreInst *S) {
+ auto *STy = S->getValueOperand()->getType();
+ // The load is only dominated by the store if DomTree says so
+ // and the number of bits loaded in L is less than or equal to
+ // the number of bits stored in S.
+ return DT.dominates(S, L) &&
+ DL.getTypeStoreSize(LTy) <= DL.getTypeStoreSize(STy);
+ }))
+ return false;
+ }
+ // All loads have known dependences inside F, so the global can be localized.
+ return true;
+}
+
+/// C may have non-instruction users. Can all of those users be turned into
+/// instructions?
+static bool allNonInstructionUsersCanBeMadeInstructions(Constant *C) {
+ // We don't do this exhaustively. The most common pattern that we really need
+ // to care about is a constant GEP or constant bitcast - so just looking
+ // through one single ConstantExpr.
+ //
+ // The set of constants that this function returns true for must be able to be
+ // handled by makeAllConstantUsesInstructions.
+ for (auto *U : C->users()) {
+ if (isa<Instruction>(U))
+ continue;
+ if (!isa<ConstantExpr>(U))
+ // Non instruction, non-constantexpr user; cannot convert this.
+ return false;
+ for (auto *UU : U->users())
+ if (!isa<Instruction>(UU))
+ // A constantexpr used by another constant. We don't try and recurse any
+ // further but just bail out at this point.
+ return false;
+ }
+
+ return true;
+}
+
+/// C may have non-instruction users, and
+/// allNonInstructionUsersCanBeMadeInstructions has returned true. Convert the
+/// non-instruction users to instructions.
+static void makeAllConstantUsesInstructions(Constant *C) {
+ SmallVector<ConstantExpr*,4> Users;
+ for (auto *U : C->users()) {
+ if (isa<ConstantExpr>(U))
+ Users.push_back(cast<ConstantExpr>(U));
+ else
+ // We should never get here; allNonInstructionUsersCanBeMadeInstructions
+ // should not have returned true for C.
+ assert(
+ isa<Instruction>(U) &&
+ "Can't transform non-constantexpr non-instruction to instruction!");
+ }
+
+ SmallVector<Value*,4> UUsers;
+ for (auto *U : Users) {
+ UUsers.clear();
+ for (auto *UU : U->users())
+ UUsers.push_back(UU);
+ for (auto *UU : UUsers) {
+ Instruction *UI = cast<Instruction>(UU);
+ Instruction *NewU = U->getAsInstruction();
+ NewU->insertBefore(UI);
+ UI->replaceUsesOfWith(U, NewU);
+ }
+ U->dropAllReferences();
+ }
}
-/// ProcessInternalGlobal - Analyze the specified global variable and optimize
+/// Analyze the specified global variable and optimize
/// it if possible. If we make a change, return true.
-bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
- Module::global_iterator &GVI,
+bool GlobalOpt::processInternalGlobal(GlobalVariable *GV,
const GlobalStatus &GS) {
auto &DL = GV->getParent()->getDataLayout();
- // If this is a first class global and has only one accessing function
- // and this function is main (which we know is not recursive), we replace
- // the global with a local alloca in this function.
+ // If this is a first class global and has only one accessing function and
+ // this function is non-recursive, we replace the global with a local alloca
+ // in this function.
//
// NOTE: It doesn't make sense to promote non-single-value types since we
// are just replacing static memory to stack memory.
//
// If the global is in different address space, don't bring it to stack.
if (!GS.HasMultipleAccessingFunctions &&
- GS.AccessingFunction && !GS.HasNonInstructionUser &&
+ GS.AccessingFunction &&
GV->getType()->getElementType()->isSingleValueType() &&
- GS.AccessingFunction->getName() == "main" &&
- GS.AccessingFunction->hasExternalLinkage() &&
- GV->getType()->getAddressSpace() == 0) {
- DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
+ GV->getType()->getAddressSpace() == 0 &&
+ !GV->isExternallyInitialized() &&
+ allNonInstructionUsersCanBeMadeInstructions(GV) &&
+ GS.AccessingFunction->doesNotRecurse() &&
+ isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV) ) {
+ DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
->getEntryBlock().begin());
Type *ElemTy = GV->getType()->getElementType();
@@ -1752,6 +1895,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
if (!isa<UndefValue>(GV->getInitializer()))
new StoreInst(GV->getInitializer(), Alloca, &FirstI);
+ makeAllConstantUsesInstructions(GV);
+
GV->replaceAllUsesWith(Alloca);
GV->eraseFromParent();
++NumLocalized;
@@ -1761,7 +1906,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
// If the global is never loaded (but may be stored to), it is dead.
// Delete it now.
if (!GS.IsLoaded) {
- DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
+ DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n");
bool Changed;
if (isLeakCheckerRoot(GV)) {
@@ -1800,11 +1945,9 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
return true;
} else if (!GV->getInitializer()->getType()->isSingleValueType()) {
const DataLayout &DL = GV->getParent()->getDataLayout();
- if (GlobalVariable *FirstNewGV = SRAGlobal(GV, DL)) {
- GVI = FirstNewGV; // Don't skip the newly produced globals!
+ if (SRAGlobal(GV, DL))
return true;
- }
- } else if (GS.StoredType == GlobalStatus::StoredOnce) {
+ } else if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) {
// If the initial value for the global was an undef value, and if only
// one other value was stored into it, we can just change the
// initializer to be the stored value, then delete all stores to the
@@ -1822,8 +1965,6 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
<< "simplify all users and delete global!\n");
GV->eraseFromParent();
++NumDeleted;
- } else {
- GVI = GV;
}
++NumSubstitute;
return true;
@@ -1831,8 +1972,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
// Try to optimize globals based on the knowledge that only one value
// (besides its initializer) is ever stored to the global.
- if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, GVI,
- DL, TLI))
+ if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL, TLI))
return true;
// Otherwise, if the global was not a boolean, we can shrink it to be a
@@ -1850,8 +1990,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
return false;
}
-/// ChangeCalleesToFastCall - Walk all of the direct calls of the specified
-/// function, changing them to FastCC.
+/// Walk all of the direct calls of the specified function, changing them to
+/// FastCC.
static void ChangeCalleesToFastCall(Function *F) {
for (User *U : F->users()) {
if (isa<BlockAddress>(U))
@@ -1898,38 +2038,38 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
bool Changed = false;
// Optimize functions.
for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
- Function *F = FI++;
+ Function *F = &*FI++;
// Functions without names cannot be referenced outside this module.
if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage())
F->setLinkage(GlobalValue::InternalLinkage);
- const Comdat *C = F->getComdat();
- bool inComdat = C && NotDiscardableComdats.count(C);
- F->removeDeadConstantUsers();
- if ((!inComdat || F->hasLocalLinkage()) && F->isDefTriviallyDead()) {
- F->eraseFromParent();
+ if (deleteIfDead(*F)) {
Changed = true;
- ++NumFnDeleted;
- } else if (F->hasLocalLinkage()) {
- if (isProfitableToMakeFastCC(F) && !F->isVarArg() &&
- !F->hasAddressTaken()) {
- // If this function has a calling convention worth changing, is not a
- // varargs function, and is only called directly, promote it to use the
- // Fast calling convention.
- F->setCallingConv(CallingConv::Fast);
- ChangeCalleesToFastCall(F);
- ++NumFastCallFns;
- Changed = true;
- }
+ continue;
+ }
- if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
- !F->hasAddressTaken()) {
- // The function is not used by a trampoline intrinsic, so it is safe
- // to remove the 'nest' attribute.
- RemoveNestAttribute(F);
- ++NumNestRemoved;
- Changed = true;
- }
+ Changed |= processGlobal(*F);
+
+ if (!F->hasLocalLinkage())
+ continue;
+ if (isProfitableToMakeFastCC(F) && !F->isVarArg() &&
+ !F->hasAddressTaken()) {
+ // If this function has a calling convention worth changing, is not a
+ // varargs function, and is only called directly, promote it to use the
+ // Fast calling convention.
+ F->setCallingConv(CallingConv::Fast);
+ ChangeCalleesToFastCall(F);
+ ++NumFastCallFns;
+ Changed = true;
+ }
+
+ if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
+ !F->hasAddressTaken()) {
+ // The function is not used by a trampoline intrinsic, so it is safe
+ // to remove the 'nest' attribute.
+ RemoveNestAttribute(F);
+ ++NumNestRemoved;
+ Changed = true;
}
}
return Changed;
@@ -1940,7 +2080,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
GVI != E; ) {
- GlobalVariable *GV = GVI++;
+ GlobalVariable *GV = &*GVI++;
// Global variables without names cannot be referenced outside this module.
if (!GV->hasName() && !GV->isDeclaration() && !GV->hasLocalLinkage())
GV->setLinkage(GlobalValue::InternalLinkage);
@@ -1953,12 +2093,12 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
GV->setInitializer(New);
}
- if (GV->isDiscardableIfUnused()) {
- if (const Comdat *C = GV->getComdat())
- if (NotDiscardableComdats.count(C) && !GV->hasLocalLinkage())
- continue;
- Changed |= ProcessGlobal(GV, GVI);
+ if (deleteIfDead(*GV)) {
+ Changed = true;
+ continue;
}
+
+ Changed |= processGlobal(*GV);
}
return Changed;
}
@@ -1968,8 +2108,8 @@ isSimpleEnoughValueToCommit(Constant *C,
SmallPtrSetImpl<Constant *> &SimpleConstants,
const DataLayout &DL);
-/// isSimpleEnoughValueToCommit - Return true if the specified constant can be
-/// handled by the code generator. We don't want to generate something like:
+/// Return true if the specified constant can be handled by the code generator.
+/// We don't want to generate something like:
/// void *X = &X/42;
/// because the code generator doesn't have a relocation that can handle that.
///
@@ -2044,11 +2184,11 @@ isSimpleEnoughValueToCommit(Constant *C,
}
-/// isSimpleEnoughPointerToCommit - Return true if this constant is simple
-/// enough for us to understand. In particular, if it is a cast to anything
-/// other than from one pointer type to another pointer type, we punt.
-/// We basically just support direct accesses to globals and GEP's of
-/// globals. This should be kept up to date with CommitValueTo.
+/// Return true if this constant is simple enough for us to understand. In
+/// particular, if it is a cast to anything other than from one pointer type to
+/// another pointer type, we punt. We basically just support direct accesses to
+/// globals and GEP's of globals. This should be kept up to date with
+/// CommitValueTo.
static bool isSimpleEnoughPointerToCommit(Constant *C) {
// Conservatively, avoid aggregate types. This is because we don't
// want to worry about them partially overlapping other stores.
@@ -2095,9 +2235,9 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
return false;
}
-/// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global
-/// initializer. This returns 'Init' modified to reflect 'Val' stored into it.
-/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into.
+/// Evaluate a piece of a constantexpr store into a global initializer. This
+/// returns 'Init' modified to reflect 'Val' stored into it. At this point, the
+/// GEP operands of Addr [0, OpNo) have been stepped into.
static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
ConstantExpr *Addr, unsigned OpNo) {
// Base case of the recursion.
@@ -2144,7 +2284,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
return ConstantVector::get(Elts);
}
-/// CommitValueTo - We have decided that Addr (which satisfies the predicate
+/// We have decided that Addr (which satisfies the predicate
/// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen.
static void CommitValueTo(Constant *Val, Constant *Addr) {
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
@@ -2160,10 +2300,10 @@ static void CommitValueTo(Constant *Val, Constant *Addr) {
namespace {
-/// Evaluator - This class evaluates LLVM IR, producing the Constant
-/// representing each SSA instruction. Changes to global variables are stored
-/// in a mapping that can be iterated over after the evaluation is complete.
-/// Once an evaluation call fails, the evaluation object should not be reused.
+/// This class evaluates LLVM IR, producing the Constant representing each SSA
+/// instruction. Changes to global variables are stored in a mapping that can
+/// be iterated over after the evaluation is complete. Once an evaluation call
+/// fails, the evaluation object should not be reused.
class Evaluator {
public:
Evaluator(const DataLayout &DL, const TargetLibraryInfo *TLI)
@@ -2180,15 +2320,15 @@ public:
Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
}
- /// EvaluateFunction - Evaluate a call to function F, returning true if
- /// successful, false if we can't evaluate it. ActualArgs contains the formal
- /// arguments for the function.
+ /// Evaluate a call to function F, returning true if successful, false if we
+ /// can't evaluate it. ActualArgs contains the formal arguments for the
+ /// function.
bool EvaluateFunction(Function *F, Constant *&RetVal,
const SmallVectorImpl<Constant*> &ActualArgs);
- /// EvaluateBlock - Evaluate all instructions in block BB, returning true if
- /// successful, false if we can't evaluate it. NewBB returns the next BB that
- /// control flows into, or null upon return.
+ /// Evaluate all instructions in block BB, returning true if successful, false
+ /// if we can't evaluate it. NewBB returns the next BB that control flows
+ /// into, or null upon return.
bool EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB);
Constant *getVal(Value *V) {
@@ -2213,32 +2353,31 @@ public:
private:
Constant *ComputeLoadResult(Constant *P);
- /// ValueStack - As we compute SSA register values, we store their contents
- /// here. The back of the deque contains the current function and the stack
- /// contains the values in the calling frames.
+ /// As we compute SSA register values, we store their contents here. The back
+ /// of the deque contains the current function and the stack contains the
+ /// values in the calling frames.
std::deque<DenseMap<Value*, Constant*>> ValueStack;
- /// CallStack - This is used to detect recursion. In pathological situations
- /// we could hit exponential behavior, but at least there is nothing
- /// unbounded.
+ /// This is used to detect recursion. In pathological situations we could hit
+ /// exponential behavior, but at least there is nothing unbounded.
SmallVector<Function*, 4> CallStack;
- /// MutatedMemory - For each store we execute, we update this map. Loads
- /// check this to get the most up-to-date value. If evaluation is successful,
- /// this state is committed to the process.
+ /// For each store we execute, we update this map. Loads check this to get
+ /// the most up-to-date value. If evaluation is successful, this state is
+ /// committed to the process.
DenseMap<Constant*, Constant*> MutatedMemory;
- /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
- /// to represent its body. This vector is needed so we can delete the
- /// temporary globals when we are done.
+ /// To 'execute' an alloca, we create a temporary global variable to represent
+ /// its body. This vector is needed so we can delete the temporary globals
+ /// when we are done.
SmallVector<std::unique_ptr<GlobalVariable>, 32> AllocaTmps;
- /// Invariants - These global variables have been marked invariant by the
- /// static constructor.
+ /// These global variables have been marked invariant by the static
+ /// constructor.
SmallPtrSet<GlobalVariable*, 8> Invariants;
- /// SimpleConstants - These are constants we have checked and know to be
- /// simple enough to live in a static initializer of a global.
+ /// These are constants we have checked and know to be simple enough to live
+ /// in a static initializer of a global.
SmallPtrSet<Constant*, 8> SimpleConstants;
const DataLayout &DL;
@@ -2247,9 +2386,8 @@ private:
} // anonymous namespace
-/// ComputeLoadResult - Return the value that would be computed by a load from
-/// P after the stores reflected by 'memory' have been performed. If we can't
-/// decide, return null.
+/// Return the value that would be computed by a load from P after the stores
+/// reflected by 'memory' have been performed. If we can't decide, return null.
Constant *Evaluator::ComputeLoadResult(Constant *P) {
// If this memory location has been recently stored, use the stored value: it
// is the most up-to-date.
@@ -2275,9 +2413,9 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) {
return nullptr; // don't know how to evaluate.
}
-/// EvaluateBlock - Evaluate all instructions in block BB, returning true if
-/// successful, false if we can't evaluate it. NewBB returns the next BB that
-/// control flows into, or null upon return.
+/// Evaluate all instructions in block BB, returning true if successful, false
+/// if we can't evaluate it. NewBB returns the next BB that control flows into,
+/// or null upon return.
bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
BasicBlock *&NextBB) {
// This is the main evaluation loop.
@@ -2438,7 +2576,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
InstResult = AllocaTmps.back().get();
DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
} else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
- CallSite CS(CurInst);
+ CallSite CS(&*CurInst);
// Debug info can safely be ignored here.
if (isa<DbgInfoIntrinsic>(CS.getInstruction())) {
@@ -2504,6 +2642,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
// Continue even if we do nothing.
++CurInst;
continue;
+ } else if (II->getIntrinsicID() == Intrinsic::assume) {
+ DEBUG(dbgs() << "Skipping assume intrinsic.\n");
+ ++CurInst;
+ continue;
}
DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
@@ -2600,7 +2742,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
InstResult = ConstantFoldConstantExpression(CE, DL, TLI);
- setVal(CurInst, InstResult);
+ setVal(&*CurInst, InstResult);
}
// If we just processed an invoke, we finished evaluating the block.
@@ -2615,9 +2757,9 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
}
}
-/// EvaluateFunction - Evaluate a call to function F, returning true if
-/// successful, false if we can't evaluate it. ActualArgs contains the formal
-/// arguments for the function.
+/// Evaluate a call to function F, returning true if successful, false if we
+/// can't evaluate it. ActualArgs contains the formal arguments for the
+/// function.
bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
const SmallVectorImpl<Constant*> &ActualArgs) {
// Check to see if this function is already executing (recursion). If so,
@@ -2631,7 +2773,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
unsigned ArgNo = 0;
for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
++AI, ++ArgNo)
- setVal(AI, ActualArgs[ArgNo]);
+ setVal(&*AI, ActualArgs[ArgNo]);
// ExecutedBlocks - We only handle non-looping, non-recursive code. As such,
// we can only evaluate any one basic block at most once. This set keeps
@@ -2639,7 +2781,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
// CurBB - The current basic block we're evaluating.
- BasicBlock *CurBB = F->begin();
+ BasicBlock *CurBB = &F->front();
BasicBlock::iterator CurInst = CurBB->begin();
@@ -2679,8 +2821,8 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
}
}
-/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
-/// we can. Return true if we can, false otherwise.
+/// Evaluate static constructors in the function, if we can. Return true if we
+/// can, false otherwise.
static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
// Call the function.
@@ -2708,7 +2850,8 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
}
static int compareNames(Constant *const *A, Constant *const *B) {
- return (*A)->getName().compare((*B)->getName());
+ return (*A)->stripPointerCasts()->getName().compare(
+ (*B)->stripPointerCasts()->getName());
}
static void setUsedInitializer(GlobalVariable &V,
@@ -2742,7 +2885,7 @@ static void setUsedInitializer(GlobalVariable &V,
}
namespace {
-/// \brief An easy to access representation of llvm.used and llvm.compiler.used.
+/// An easy to access representation of llvm.used and llvm.compiler.used.
class LLVMUsed {
SmallPtrSet<GlobalValue *, 8> Used;
SmallPtrSet<GlobalValue *, 8> CompilerUsed;
@@ -2861,10 +3004,17 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
I != E;) {
- Module::alias_iterator J = I++;
+ GlobalAlias *J = &*I++;
+
// Aliases without names cannot be referenced outside this module.
if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage())
J->setLinkage(GlobalValue::InternalLinkage);
+
+ if (deleteIfDead(*J)) {
+ Changed = true;
+ continue;
+ }
+
// If the aliasee may change at link time, nothing can be done - bail out.
if (J->mayBeOverridden())
continue;
@@ -2889,15 +3039,15 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
if (RenameTarget) {
// Give the aliasee the name, linkage and other attributes of the alias.
- Target->takeName(J);
+ Target->takeName(&*J);
Target->setLinkage(J->getLinkage());
Target->setVisibility(J->getVisibility());
Target->setDLLStorageClass(J->getDLLStorageClass());
- if (Used.usedErase(J))
+ if (Used.usedErase(&*J))
Used.usedInsert(Target);
- if (Used.compilerUsedErase(J))
+ if (Used.compilerUsedErase(&*J))
Used.compilerUsedInsert(Target);
} else if (mayHaveOtherReferences(*J, Used))
continue;
@@ -2936,8 +3086,8 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
return Fn;
}
-/// cxxDtorIsEmpty - Returns whether the given function is an empty C++
-/// destructor and can therefore be eliminated.
+/// Returns whether the given function is an empty C++ destructor and can
+/// therefore be eliminated.
/// Note that we assume that other optimization passes have already simplified
/// the code so we only look for a function with a single basic block, where
/// the only allowed instructions are 'ret', 'call' to an empty C++ dtor and
@@ -3081,3 +3231,4 @@ bool GlobalOpt::runOnModule(Module &M) {
return Changed;
}
+
diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
index 50f56b0f2afe..7ea6c08b2e66 100644
--- a/contrib/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the common infrastructure (including C bindings) for
-// libLLVMIPO.a, which implements several transformations over the LLVM
+// This file implements the common infrastructure (including C bindings) for
+// libLLVMIPO.a, which implements several transformations over the LLVM
// intermediate representation.
//
//===----------------------------------------------------------------------===//
@@ -24,14 +24,17 @@ using namespace llvm;
void llvm::initializeIPO(PassRegistry &Registry) {
initializeArgPromotionPass(Registry);
initializeConstantMergePass(Registry);
+ initializeCrossDSOCFIPass(Registry);
initializeDAEPass(Registry);
initializeDAHPass(Registry);
+ initializeForceFunctionAttrsLegacyPassPass(Registry);
initializeFunctionAttrsPass(Registry);
initializeGlobalDCEPass(Registry);
initializeGlobalOptPass(Registry);
initializeIPCPPass(Registry);
initializeAlwaysInlinerPass(Registry);
initializeSimpleInlinerPass(Registry);
+ initializeInferFunctionAttrsLegacyPassPass(Registry);
initializeInternalizePassPass(Registry);
initializeLoopExtractorPass(Registry);
initializeBlockExtractorPassPass(Registry);
@@ -40,13 +43,15 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeMergeFunctionsPass(Registry);
initializePartialInlinerPass(Registry);
initializePruneEHPass(Registry);
- initializeStripDeadPrototypesPassPass(Registry);
+ initializeStripDeadPrototypesLegacyPassPass(Registry);
initializeStripSymbolsPass(Registry);
initializeStripDebugDeclarePass(Registry);
initializeStripDeadDebugInfoPass(Registry);
initializeStripNonDebugSymbolsPass(Registry);
initializeBarrierNoopPass(Registry);
initializeEliminateAvailableExternallyPass(Registry);
+ initializeSampleProfileLoaderPass(Registry);
+ initializeFunctionImportPassPass(Registry);
}
void LLVMInitializeIPO(LLVMPassRegistryRef R) {
diff --git a/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
new file mode 100644
index 000000000000..d02c861a2948
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
@@ -0,0 +1,937 @@
+//===- InferFunctionAttrs.cpp - Infer implicit function attributes --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "inferattrs"
+
+STATISTIC(NumReadNone, "Number of functions inferred as readnone");
+STATISTIC(NumReadOnly, "Number of functions inferred as readonly");
+STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind");
+STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture");
+STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly");
+STATISTIC(NumNoAlias, "Number of function returns inferred as noalias");
+
+static bool setDoesNotAccessMemory(Function &F) {
+ if (F.doesNotAccessMemory())
+ return false;
+ F.setDoesNotAccessMemory();
+ ++NumReadNone;
+ return true;
+}
+
+static bool setOnlyReadsMemory(Function &F) {
+ if (F.onlyReadsMemory())
+ return false;
+ F.setOnlyReadsMemory();
+ ++NumReadOnly;
+ return true;
+}
+
+static bool setDoesNotThrow(Function &F) {
+ if (F.doesNotThrow())
+ return false;
+ F.setDoesNotThrow();
+ ++NumNoUnwind;
+ return true;
+}
+
+static bool setDoesNotCapture(Function &F, unsigned n) {
+ if (F.doesNotCapture(n))
+ return false;
+ F.setDoesNotCapture(n);
+ ++NumNoCapture;
+ return true;
+}
+
+static bool setOnlyReadsMemory(Function &F, unsigned n) {
+ if (F.onlyReadsMemory(n))
+ return false;
+ F.setOnlyReadsMemory(n);
+ ++NumReadOnlyArg;
+ return true;
+}
+
+static bool setDoesNotAlias(Function &F, unsigned n) {
+ if (F.doesNotAlias(n))
+ return false;
+ F.setDoesNotAlias(n);
+ ++NumNoAlias;
+ return true;
+}
+
+/// Analyze the name and prototype of the given function and set any applicable
+/// attributes.
+///
+/// Returns true if any attributes were set and false otherwise.
+static bool inferPrototypeAttributes(Function &F,
+ const TargetLibraryInfo &TLI) {
+ if (F.hasFnAttribute(Attribute::OptimizeNone))
+ return false;
+
+ FunctionType *FTy = F.getFunctionType();
+ LibFunc::Func TheLibFunc;
+ if (!(TLI.getLibFunc(F.getName(), TheLibFunc) && TLI.has(TheLibFunc)))
+ return false;
+
+ bool Changed = false;
+
+ switch (TheLibFunc) {
+ case LibFunc::strlen:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::strchr:
+ case LibFunc::strrchr:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isIntegerTy())
+ return false;
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc::strtol:
+ case LibFunc::strtod:
+ case LibFunc::strtof:
+ case LibFunc::strtoul:
+ case LibFunc::strtoll:
+ case LibFunc::strtold:
+ case LibFunc::strtoull:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::strcpy:
+ case LibFunc::stpcpy:
+ case LibFunc::strcat:
+ case LibFunc::strncat:
+ case LibFunc::strncpy:
+ case LibFunc::stpncpy:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::strxfrm:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::strcmp: // 0,1
+ case LibFunc::strspn: // 0,1
+ case LibFunc::strncmp: // 0,1
+ case LibFunc::strcspn: // 0,1
+ case LibFunc::strcoll: // 0,1
+ case LibFunc::strcasecmp: // 0,1
+ case LibFunc::strncasecmp: //
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::strstr:
+ case LibFunc::strpbrk:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::strtok:
+ case LibFunc::strtok_r:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::scanf:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::setbuf:
+ case LibFunc::setvbuf:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::strdup:
+ case LibFunc::strndup:
+ if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::stat:
+ case LibFunc::statvfs:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::sscanf:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::sprintf:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::snprintf:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 3);
+ Changed |= setOnlyReadsMemory(F, 3);
+ return Changed;
+ case LibFunc::setitimer:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setDoesNotCapture(F, 3);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::system:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ // May throw; "system" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::malloc:
+ if (FTy->getNumParams() != 1 || !FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::memcmp:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::memchr:
+ case LibFunc::memrchr:
+ if (FTy->getNumParams() != 3)
+ return false;
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc::modf:
+ case LibFunc::modff:
+ case LibFunc::modfl:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::memcpy:
+ case LibFunc::memccpy:
+ case LibFunc::memmove:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::memalign:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::mkdir:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::mktime:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::realloc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::read:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; "read" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::rewind:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::rmdir:
+ case LibFunc::remove:
+ case LibFunc::realpath:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::rename:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::readlink:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::write:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; "write" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::bcopy:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::bcmp:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::bzero:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::calloc:
+ if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::chmod:
+ case LibFunc::chown:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::ctermid:
+ case LibFunc::clearerr:
+ case LibFunc::closedir:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::atoi:
+ case LibFunc::atol:
+ case LibFunc::atof:
+ case LibFunc::atoll:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::access:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::fopen:
+ if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::fdopen:
+ if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::feof:
+ case LibFunc::free:
+ case LibFunc::fseek:
+ case LibFunc::ftell:
+ case LibFunc::fgetc:
+ case LibFunc::fseeko:
+ case LibFunc::ftello:
+ case LibFunc::fileno:
+ case LibFunc::fflush:
+ case LibFunc::fclose:
+ case LibFunc::fsetpos:
+ case LibFunc::flockfile:
+ case LibFunc::funlockfile:
+ case LibFunc::ftrylockfile:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::ferror:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F);
+ return Changed;
+ case LibFunc::fputc:
+ case LibFunc::fstat:
+ case LibFunc::frexp:
+ case LibFunc::frexpf:
+ case LibFunc::frexpl:
+ case LibFunc::fstatvfs:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::fgets:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 3);
+ return Changed;
+ case LibFunc::fread:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(3)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 4);
+ return Changed;
+ case LibFunc::fwrite:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(3)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 4);
+ return Changed;
+ case LibFunc::fputs:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::fscanf:
+ case LibFunc::fprintf:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::fgetpos:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::getc:
+ case LibFunc::getlogin_r:
+ case LibFunc::getc_unlocked:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::getenv:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::gets:
+ case LibFunc::getchar:
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc::getitimer:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::getpwnam:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::ungetc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::uname:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::unlink:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::unsetenv:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::utime:
+ case LibFunc::utimes:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::putc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::puts:
+ case LibFunc::printf:
+ case LibFunc::perror:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::pread:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; "pread" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::pwrite:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; "pwrite" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::putchar:
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc::popen:
+ if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::pclose:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::vscanf:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::vsscanf:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::vfscanf:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::valloc:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::vprintf:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::vfprintf:
+ case LibFunc::vsprintf:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::vsnprintf:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 3);
+ Changed |= setOnlyReadsMemory(F, 3);
+ return Changed;
+ case LibFunc::open:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ // May throw; "open" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::opendir:
+ if (FTy->getNumParams() != 1 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::tmpfile:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::times:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::htonl:
+ case LibFunc::htons:
+ case LibFunc::ntohl:
+ case LibFunc::ntohs:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAccessMemory(F);
+ return Changed;
+ case LibFunc::lstat:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::lchown:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::qsort:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
+ return false;
+ // May throw; places call through function pointer.
+ Changed |= setDoesNotCapture(F, 4);
+ return Changed;
+ case LibFunc::dunder_strdup:
+ case LibFunc::dunder_strndup:
+ if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::dunder_strtok_r:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::under_IO_getc:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::under_IO_putc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::dunder_isoc99_scanf:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::stat64:
+ case LibFunc::lstat64:
+ case LibFunc::statvfs64:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::dunder_isoc99_sscanf:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::fopen64:
+ if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::fseeko64:
+ case LibFunc::ftello64:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::tmpfile64:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::fstat64:
+ case LibFunc::fstatvfs64:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::open64:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ // May throw; "open" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::gettimeofday:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // Currently some platforms have the restrict keyword on the arguments to
+ // gettimeofday. To be conservative, do not add noalias to gettimeofday's
+ // arguments.
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+
+ default:
+ // FIXME: It'd be really nice to cover all the library functions we're
+ // aware of here.
+ return false;
+ }
+}
+
+static bool inferAllPrototypeAttributes(Module &M,
+ const TargetLibraryInfo &TLI) {
+ bool Changed = false;
+
+ for (Function &F : M.functions())
+ // We only infer things using the prototype if the definition isn't around
+ // to analyze directly.
+ if (F.isDeclaration())
+ Changed |= inferPrototypeAttributes(F, TLI);
+
+ return Changed;
+}
+
+PreservedAnalyses InferFunctionAttrsPass::run(Module &M,
+ AnalysisManager<Module> *AM) {
+ auto &TLI = AM->getResult<TargetLibraryAnalysis>(M);
+
+ if (!inferAllPrototypeAttributes(M, TLI))
+ // If we didn't infer anything, preserve all analyses.
+ return PreservedAnalyses::all();
+
+ // Otherwise, we may have changed fundamental function attributes, so clear
+ // out all the passes.
+ return PreservedAnalyses::none();
+}
+
+namespace {
+struct InferFunctionAttrsLegacyPass : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ InferFunctionAttrsLegacyPass() : ModulePass(ID) {
+ initializeInferFunctionAttrsLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ }
+
+ bool runOnModule(Module &M) override {
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ return inferAllPrototypeAttributes(M, TLI);
+ }
+};
+}
+
+char InferFunctionAttrsLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(InferFunctionAttrsLegacyPass, "inferattrs",
+ "Infer set function attributes", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(InferFunctionAttrsLegacyPass, "inferattrs",
+ "Infer set function attributes", false, false)
+
+Pass *llvm::createInferFunctionAttrsLegacyPass() {
+ return new InferFunctionAttrsLegacyPass();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
index dc56a02e7b7d..1704bfea0b86 100644
--- a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
@@ -14,10 +14,10 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
@@ -35,17 +35,15 @@ namespace {
/// \brief Inliner pass which only handles "always inline" functions.
class AlwaysInliner : public Inliner {
- InlineCostAnalysis *ICA;
public:
// Use extremely low threshold.
- AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true),
- ICA(nullptr) {
+ AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true) {
initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
}
AlwaysInliner(bool InsertLifetime)
- : Inliner(ID, -2000000000, InsertLifetime), ICA(nullptr) {
+ : Inliner(ID, -2000000000, InsertLifetime) {
initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
}
@@ -53,9 +51,6 @@ public:
InlineCost getInlineCost(CallSite CS) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool runOnSCC(CallGraphSCC &SCC) override;
-
using llvm::Pass::doFinalization;
bool doFinalization(CallGraph &CG) override {
return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/ true);
@@ -67,10 +62,9 @@ public:
char AlwaysInliner::ID = 0;
INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
"Inliner for always_inline functions", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
"Inliner for always_inline functions", false, false)
@@ -99,19 +93,8 @@ InlineCost AlwaysInliner::getInlineCost(CallSite CS) {
// that are viable for inlining. FIXME: We shouldn't even get here for
// declarations.
if (Callee && !Callee->isDeclaration() &&
- CS.hasFnAttr(Attribute::AlwaysInline) &&
- ICA->isInlineViable(*Callee))
+ CS.hasFnAttr(Attribute::AlwaysInline) && isInlineViable(*Callee))
return InlineCost::getAlways();
return InlineCost::getNever();
}
-
-bool AlwaysInliner::runOnSCC(CallGraphSCC &SCC) {
- ICA = &getAnalysis<InlineCostAnalysis>();
- return Inliner::runOnSCC(SCC);
-}
-
-void AlwaysInliner::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<InlineCostAnalysis>();
- Inliner::getAnalysisUsage(AU);
-}
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
index 9b01d81b3c7c..45609f891ed8 100644
--- a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
@@ -11,11 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
@@ -23,6 +23,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
+#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/InlinerPass.h"
using namespace llvm;
@@ -37,26 +38,30 @@ namespace {
/// inliner pass and the always inliner pass. The two passes use different cost
/// analyses to determine when to inline.
class SimpleInliner : public Inliner {
- InlineCostAnalysis *ICA;
public:
- SimpleInliner() : Inliner(ID), ICA(nullptr) {
+ SimpleInliner() : Inliner(ID) {
initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
}
SimpleInliner(int Threshold)
- : Inliner(ID, Threshold, /*InsertLifetime*/ true), ICA(nullptr) {
+ : Inliner(ID, Threshold, /*InsertLifetime*/ true) {
initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
}
static char ID; // Pass identification, replacement for typeid
InlineCost getInlineCost(CallSite CS) override {
- return ICA->getInlineCost(CS, getInlineThreshold(CS));
+ Function *Callee = CS.getCalledFunction();
+ TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
+ return llvm::getInlineCost(CS, getInlineThreshold(CS), TTI, ACT);
}
bool runOnSCC(CallGraphSCC &SCC) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+ TargetTransformInfoWrapperPass *TTIWP;
};
static int computeThresholdFromOptLevels(unsigned OptLevel,
@@ -75,10 +80,10 @@ static int computeThresholdFromOptLevels(unsigned OptLevel,
char SimpleInliner::ID = 0;
INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
"Function Integration/Inlining", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(SimpleInliner, "inline",
"Function Integration/Inlining", false, false)
@@ -95,11 +100,11 @@ Pass *llvm::createFunctionInliningPass(unsigned OptLevel,
}
bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) {
- ICA = &getAnalysis<InlineCostAnalysis>();
+ TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
return Inliner::runOnSCC(SCC);
}
void SimpleInliner::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<InlineCostAnalysis>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
Inliner::getAnalysisUsage(AU);
}
diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
index 5273c3dc3ca2..bbe5f8761d5f 100644
--- a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -64,20 +65,22 @@ ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(225),
// Threshold to use when optsize is specified (and there is no -inline-limit).
const int OptSizeThreshold = 75;
-Inliner::Inliner(char &ID)
- : CallGraphSCCPass(ID), InlineThreshold(InlineLimit), InsertLifetime(true) {}
+Inliner::Inliner(char &ID)
+ : CallGraphSCCPass(ID), InlineThreshold(InlineLimit), InsertLifetime(true) {
+}
Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime)
- : CallGraphSCCPass(ID), InlineThreshold(InlineLimit.getNumOccurrences() > 0 ?
- InlineLimit : Threshold),
- InsertLifetime(InsertLifetime) {}
+ : CallGraphSCCPass(ID),
+ InlineThreshold(InlineLimit.getNumOccurrences() > 0 ? InlineLimit
+ : Threshold),
+ InsertLifetime(InsertLifetime) {}
/// For this class, we declare that we require and preserve the call graph.
/// If the derived class implements this method, it should
/// always explicitly call the implementation here.
void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<AliasAnalysis>();
AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
CallGraphSCCPass::getAnalysisUsage(AU);
}
@@ -85,39 +88,6 @@ void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
typedef DenseMap<ArrayType*, std::vector<AllocaInst*> >
InlinedArrayAllocasTy;
-/// \brief If the inlined function had a higher stack protection level than the
-/// calling function, then bump up the caller's stack protection level.
-static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) {
- // If upgrading the SSP attribute, clear out the old SSP Attributes first.
- // Having multiple SSP attributes doesn't actually hurt, but it adds useless
- // clutter to the IR.
- AttrBuilder B;
- B.addAttribute(Attribute::StackProtect)
- .addAttribute(Attribute::StackProtectStrong)
- .addAttribute(Attribute::StackProtectReq);
- AttributeSet OldSSPAttr = AttributeSet::get(Caller->getContext(),
- AttributeSet::FunctionIndex,
- B);
-
- if (Callee->hasFnAttribute(Attribute::SafeStack)) {
- Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
- Caller->addFnAttr(Attribute::SafeStack);
- } else if (Callee->hasFnAttribute(Attribute::StackProtectReq) &&
- !Caller->hasFnAttribute(Attribute::SafeStack)) {
- Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
- Caller->addFnAttr(Attribute::StackProtectReq);
- } else if (Callee->hasFnAttribute(Attribute::StackProtectStrong) &&
- !Caller->hasFnAttribute(Attribute::SafeStack) &&
- !Caller->hasFnAttribute(Attribute::StackProtectReq)) {
- Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
- Caller->addFnAttr(Attribute::StackProtectStrong);
- } else if (Callee->hasFnAttribute(Attribute::StackProtect) &&
- !Caller->hasFnAttribute(Attribute::SafeStack) &&
- !Caller->hasFnAttribute(Attribute::StackProtectReq) &&
- !Caller->hasFnAttribute(Attribute::StackProtectStrong))
- Caller->addFnAttr(Attribute::StackProtect);
-}
-
/// If it is possible to inline the specified call site,
/// do so and update the CallGraph for this operation.
///
@@ -126,18 +96,26 @@ static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) {
/// available from other functions inlined into the caller. If we are able to
/// inline this call site we attempt to reuse already available allocas or add
/// any new allocas to the set if not possible.
-static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
+static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI,
InlinedArrayAllocasTy &InlinedArrayAllocas,
int InlineHistory, bool InsertLifetime) {
Function *Callee = CS.getCalledFunction();
Function *Caller = CS.getCaller();
+ // We need to manually construct BasicAA directly in order to disable
+ // its use of other function analyses.
+ BasicAAResult BAR(createLegacyPMBasicAAResult(P, *Callee));
+
+ // Construct our own AA results for this function. We do this manually to
+ // work around the limitations of the legacy pass manager.
+ AAResults AAR(createLegacyPMAAResults(P, *Callee, BAR));
+
// Try to inline the function. Get the list of static allocas that were
// inlined.
- if (!InlineFunction(CS, IFI, InsertLifetime))
+ if (!InlineFunction(CS, IFI, &AAR, InsertLifetime))
return false;
- AdjustCallerSSPLevel(Caller, Callee);
+ AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee);
// Look at all of the allocas that we inlined through this call site. If we
// have already inlined other allocas through other calls into this function,
@@ -219,6 +197,14 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: "
<< *AvailableAlloca << '\n');
+ // Move affected dbg.declare calls immediately after the new alloca to
+ // avoid the situation when a dbg.declare preceeds its alloca.
+ if (auto *L = LocalAsMetadata::getIfExists(AI))
+ if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L))
+ for (User *U : MDV->users())
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
+ DDI->moveBefore(AvailableAlloca->getNextNode());
+
AI->replaceAllUsesWith(AvailableAlloca);
if (Align1 != Align2) {
@@ -258,39 +244,64 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
}
unsigned Inliner::getInlineThreshold(CallSite CS) const {
- int thres = InlineThreshold; // -inline-threshold or else selected by
- // overall opt level
+ int Threshold = InlineThreshold; // -inline-threshold or else selected by
+ // overall opt level
// If -inline-threshold is not given, listen to the optsize attribute when it
// would decrease the threshold.
Function *Caller = CS.getCaller();
bool OptSize = Caller && !Caller->isDeclaration() &&
+ // FIXME: Use Function::optForSize().
Caller->hasFnAttribute(Attribute::OptimizeForSize);
if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
- OptSizeThreshold < thres)
- thres = OptSizeThreshold;
+ OptSizeThreshold < Threshold)
+ Threshold = OptSizeThreshold;
- // Listen to the inlinehint attribute when it would increase the threshold
- // and the caller does not need to minimize its size.
Function *Callee = CS.getCalledFunction();
- bool InlineHint = Callee && !Callee->isDeclaration() &&
- Callee->hasFnAttribute(Attribute::InlineHint);
- if (InlineHint && HintThreshold > thres &&
- !Caller->hasFnAttribute(Attribute::MinSize))
- thres = HintThreshold;
+ if (!Callee || Callee->isDeclaration())
+ return Threshold;
+
+ // If profile information is available, use that to adjust threshold of hot
+ // and cold functions.
+ // FIXME: The heuristic used below for determining hotness and coldness are
+ // based on preliminary SPEC tuning and may not be optimal. Replace this with
+ // a well-tuned heuristic based on *callsite* hotness and not callee hotness.
+ uint64_t FunctionCount = 0, MaxFunctionCount = 0;
+ bool HasPGOCounts = false;
+ if (Callee->getEntryCount() &&
+ Callee->getParent()->getMaximumFunctionCount()) {
+ HasPGOCounts = true;
+ FunctionCount = Callee->getEntryCount().getValue();
+ MaxFunctionCount =
+ Callee->getParent()->getMaximumFunctionCount().getValue();
+ }
- // Listen to the cold attribute when it would decrease the threshold.
- bool ColdCallee = Callee && !Callee->isDeclaration() &&
- Callee->hasFnAttribute(Attribute::Cold);
+ // Listen to the inlinehint attribute or profile based hotness information
+ // when it would increase the threshold and the caller does not need to
+ // minimize its size.
+ bool InlineHint =
+ Callee->hasFnAttribute(Attribute::InlineHint) ||
+ (HasPGOCounts &&
+ FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount));
+ if (InlineHint && HintThreshold > Threshold &&
+ !Caller->hasFnAttribute(Attribute::MinSize))
+ Threshold = HintThreshold;
+
+ // Listen to the cold attribute or profile based coldness information
+ // when it would decrease the threshold.
+ bool ColdCallee =
+ Callee->hasFnAttribute(Attribute::Cold) ||
+ (HasPGOCounts &&
+ FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount));
// Command line argument for InlineLimit will override the default
// ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
// do not use the default cold threshold even if it is smaller.
if ((InlineLimit.getNumOccurrences() == 0 ||
ColdThreshold.getNumOccurrences() > 0) && ColdCallee &&
- ColdThreshold < thres)
- thres = ColdThreshold;
+ ColdThreshold < Threshold)
+ Threshold = ColdThreshold;
- return thres;
+ return Threshold;
}
static void emitAnalysis(CallSite CS, const Twine &Msg) {
@@ -430,10 +441,8 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
- AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
- auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- const TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
- AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
+ ACT = &getAnalysis<AssumptionCacheTracker>();
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
SmallPtrSet<Function*, 8> SCCFunctions;
DEBUG(dbgs() << "Inliner visiting SCC:");
@@ -469,8 +478,9 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
// If this is a direct call to an external function, we can never inline
// it. If it is an indirect call, inlining may resolve it to be a
// direct call, so we keep it.
- if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration())
- continue;
+ if (Function *Callee = CS.getCalledFunction())
+ if (Callee->isDeclaration())
+ continue;
CallSites.push_back(std::make_pair(CS, -1));
}
@@ -492,7 +502,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
InlinedArrayAllocasTy InlinedArrayAllocas;
- InlineFunctionInfo InlineInfo(&CG, AA, ACT);
+ InlineFunctionInfo InlineInfo(&CG, ACT);
// Now that we have all of the call sites, loop over them and inline them if
// it looks profitable to do so.
@@ -513,7 +523,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
// just delete the call instead of trying to inline it, regardless of
// size. This happens because IPSCCP propagates the result out of the
// call and then we're left with the dead call.
- if (isInstructionTriviallyDead(CS.getInstruction(), TLI)) {
+ if (isInstructionTriviallyDead(CS.getInstruction(), &TLI)) {
DEBUG(dbgs() << " -> Deleting dead call: "
<< *CS.getInstruction() << "\n");
// Update the call graph by deleting the edge from Callee to Caller.
@@ -550,7 +560,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
}
// Attempt to inline the function.
- if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
+ if (!InlineCallIfPossible(*this, CS, InlineInfo, InlinedArrayAllocas,
InlineHistoryID, InsertLifetime)) {
emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
Twine(Callee->getName() +
@@ -647,8 +657,8 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
// Scan for all of the functions, looking for ones that should now be removed
// from the program. Insert the dead ones in the FunctionsToRemove set.
- for (auto I : CG) {
- CallGraphNode *CGN = I.second;
+ for (const auto &I : CG) {
+ CallGraphNode *CGN = I.second.get();
Function *F = CGN->getFunction();
if (!F || F->isDeclaration())
continue;
diff --git a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
index 7950163f757d..21bb5d000bc7 100644
--- a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
@@ -60,6 +60,10 @@ namespace {
explicit InternalizePass();
explicit InternalizePass(ArrayRef<const char *> ExportList);
void LoadFile(const char *Filename);
+ bool maybeInternalize(GlobalValue &GV,
+ const std::set<const Comdat *> &ExternalComdats);
+ void checkComdatVisibility(GlobalValue &GV,
+ std::set<const Comdat *> &ExternalComdats);
bool runOnModule(Module &M) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -105,40 +109,85 @@ void InternalizePass::LoadFile(const char *Filename) {
}
}
-static bool shouldInternalize(const GlobalValue &GV,
- const std::set<std::string> &ExternalNames) {
+static bool isExternallyVisible(const GlobalValue &GV,
+ const std::set<std::string> &ExternalNames) {
// Function must be defined here
if (GV.isDeclaration())
- return false;
+ return true;
// Available externally is really just a "declaration with a body".
if (GV.hasAvailableExternallyLinkage())
- return false;
+ return true;
// Assume that dllexported symbols are referenced elsewhere
if (GV.hasDLLExportStorageClass())
- return false;
-
- // Already has internal linkage
- if (GV.hasLocalLinkage())
- return false;
+ return true;
// Marked to keep external?
- if (ExternalNames.count(GV.getName()))
- return false;
+ if (!GV.hasLocalLinkage() && ExternalNames.count(GV.getName()))
+ return true;
+
+ return false;
+}
+// Internalize GV if it is possible to do so, i.e. it is not externally visible
+// and is not a member of an externally visible comdat.
+bool InternalizePass::maybeInternalize(
+ GlobalValue &GV, const std::set<const Comdat *> &ExternalComdats) {
+ if (Comdat *C = GV.getComdat()) {
+ if (ExternalComdats.count(C))
+ return false;
+
+ // If a comdat is not externally visible we can drop it.
+ if (auto GO = dyn_cast<GlobalObject>(&GV))
+ GO->setComdat(nullptr);
+
+ if (GV.hasLocalLinkage())
+ return false;
+ } else {
+ if (GV.hasLocalLinkage())
+ return false;
+
+ if (isExternallyVisible(GV, ExternalNames))
+ return false;
+ }
+
+ GV.setVisibility(GlobalValue::DefaultVisibility);
+ GV.setLinkage(GlobalValue::InternalLinkage);
return true;
}
+// If GV is part of a comdat and is externally visible, keep track of its
+// comdat so that we don't internalize any of its members.
+void InternalizePass::checkComdatVisibility(
+ GlobalValue &GV, std::set<const Comdat *> &ExternalComdats) {
+ Comdat *C = GV.getComdat();
+ if (!C)
+ return;
+
+ if (isExternallyVisible(GV, ExternalNames))
+ ExternalComdats.insert(C);
+}
+
bool InternalizePass::runOnModule(Module &M) {
CallGraphWrapperPass *CGPass = getAnalysisIfAvailable<CallGraphWrapperPass>();
CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
- bool Changed = false;
SmallPtrSet<GlobalValue *, 8> Used;
collectUsedGlobalVariables(M, Used, false);
+ // Collect comdat visiblity information for the module.
+ std::set<const Comdat *> ExternalComdats;
+ if (!M.getComdatSymbolTable().empty()) {
+ for (Function &F : M)
+ checkComdatVisibility(F, ExternalComdats);
+ for (GlobalVariable &GV : M.globals())
+ checkComdatVisibility(GV, ExternalComdats);
+ for (GlobalAlias &GA : M.aliases())
+ checkComdatVisibility(GA, ExternalComdats);
+ }
+
// We must assume that globals in llvm.used have a reference that not even
// the linker can see, so we don't internalize them.
// For llvm.compiler.used the situation is a bit fuzzy. The assembler and
@@ -153,20 +202,16 @@ bool InternalizePass::runOnModule(Module &M) {
}
// Mark all functions not in the api as internal.
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- if (!shouldInternalize(*I, ExternalNames))
+ for (Function &I : M) {
+ if (!maybeInternalize(I, ExternalComdats))
continue;
- I->setVisibility(GlobalValue::DefaultVisibility);
- I->setLinkage(GlobalValue::InternalLinkage);
-
if (ExternalNode)
// Remove a callgraph edge from the external node to this function.
- ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);
+ ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
- Changed = true;
++NumFunctions;
- DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n");
+ DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
}
// Never internalize the llvm.used symbol. It is used to implement
@@ -191,12 +236,9 @@ bool InternalizePass::runOnModule(Module &M) {
// internal as well.
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I) {
- if (!shouldInternalize(*I, ExternalNames))
+ if (!maybeInternalize(*I, ExternalComdats))
continue;
- I->setVisibility(GlobalValue::DefaultVisibility);
- I->setLinkage(GlobalValue::InternalLinkage);
- Changed = true;
++NumGlobals;
DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n");
}
@@ -204,17 +246,20 @@ bool InternalizePass::runOnModule(Module &M) {
// Mark all aliases that are not in the api as internal as well.
for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
I != E; ++I) {
- if (!shouldInternalize(*I, ExternalNames))
+ if (!maybeInternalize(*I, ExternalComdats))
continue;
- I->setVisibility(GlobalValue::DefaultVisibility);
- I->setLinkage(GlobalValue::InternalLinkage);
- Changed = true;
++NumAliases;
DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n");
}
- return Changed;
+ // We do not keep track of whether this pass changed the module because
+ // it adds unnecessary complexity:
+ // 1) This pass will generally be near the start of the pass pipeline, so
+ // there will be no analyses to invalidate.
+ // 2) This pass will most likely end up changing the module and it isn't worth
+ // worrying about optimizing the case where the module is unchanged.
+ return true;
}
ModulePass *llvm::createInternalizePass() { return new InternalizePass(); }
diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
index 41334ca5b429..8e4ad642ddd5 100644
--- a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -43,12 +43,13 @@ namespace {
initializeLoopExtractorPass(*PassRegistry::getPassRegistry());
}
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+ bool runOnLoop(Loop *L, LPPassManager &) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequiredID(BreakCriticalEdgesID);
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
}
};
}
@@ -79,7 +80,7 @@ INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single",
//
Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); }
-bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
+bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &) {
if (skipOptnoneFunction(L))
return false;
@@ -92,6 +93,7 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
return false;
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
bool Changed = false;
// If there is more than one top-level loop in this function, extract all of
@@ -120,14 +122,14 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
}
if (ShouldExtractLoop) {
- // We must omit landing pads. Landing pads must accompany the invoke
+ // We must omit EH pads. EH pads must accompany the invoke
// instruction. But this would result in a loop in the extracted
// function. An infinite cycle occurs when it tries to extract that loop as
// well.
SmallVector<BasicBlock*, 8> ExitBlocks;
L->getExitBlocks(ExitBlocks);
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- if (ExitBlocks[i]->isLandingPad()) {
+ if (ExitBlocks[i]->isEHPad()) {
ShouldExtractLoop = false;
break;
}
@@ -141,7 +143,7 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
Changed = true;
// After extraction, the loop is replaced by a function call, so
// we shouldn't try to run any more loop passes on it.
- LPM.deleteLoopFromQueue(L);
+ LI.updateUnloop(L);
}
++NumExtracted;
}
@@ -259,7 +261,7 @@ bool BlockExtractorPass::runOnModule(Module &M) {
// Figure out which index the basic block is in its function.
Function::iterator BBI = MF->begin();
std::advance(BBI, std::distance(F->begin(), Function::iterator(BB)));
- TranslatedBlocksToNotExtract.insert(BBI);
+ TranslatedBlocksToNotExtract.insert(&*BBI);
}
while (!BlocksToNotExtractByName.empty()) {
@@ -278,7 +280,7 @@ bool BlockExtractorPass::runOnModule(Module &M) {
BasicBlock &BB = *BI;
if (BB.getName() != BlockName) continue;
- TranslatedBlocksToNotExtract.insert(BI);
+ TranslatedBlocksToNotExtract.insert(&*BI);
}
}
@@ -291,8 +293,8 @@ bool BlockExtractorPass::runOnModule(Module &M) {
for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
SplitLandingPadPreds(&*F);
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
- if (!TranslatedBlocksToNotExtract.count(BB))
- BlocksToExtract.push_back(BB);
+ if (!TranslatedBlocksToNotExtract.count(&*BB))
+ BlocksToExtract.push_back(&*BB);
}
for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i) {
diff --git a/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp b/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp
index c6795c623eff..7b515745c312 100644
--- a/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp
@@ -19,6 +19,8 @@
#include "llvm/ADT/Triple.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
@@ -26,6 +28,8 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
@@ -59,9 +63,9 @@ bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
bool BitSetInfo::containsValue(
const DataLayout &DL,
- const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout, Value *V,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout, Value *V,
uint64_t COffset) const {
- if (auto GV = dyn_cast<GlobalVariable>(V)) {
+ if (auto GV = dyn_cast<GlobalObject>(V)) {
auto I = GlobalLayout.find(GV);
if (I == GlobalLayout.end())
return false;
@@ -90,6 +94,21 @@ bool BitSetInfo::containsValue(
return false;
}
+void BitSetInfo::print(raw_ostream &OS) const {
+ OS << "offset " << ByteOffset << " size " << BitSize << " align "
+ << (1 << AlignLog2);
+
+ if (isAllOnes()) {
+ OS << " all-ones\n";
+ return;
+ }
+
+ OS << " { ";
+ for (uint64_t B : Bits)
+ OS << B << ' ';
+ OS << "}\n";
+}
+
BitSetInfo BitSetBuilder::build() {
if (Min > Max)
Min = 0;
@@ -193,34 +212,48 @@ struct LowerBitSets : public ModulePass {
Module *M;
bool LinkerSubsectionsViaSymbols;
+ Triple::ArchType Arch;
+ Triple::ObjectFormatType ObjectFormat;
IntegerType *Int1Ty;
IntegerType *Int8Ty;
IntegerType *Int32Ty;
Type *Int32PtrTy;
IntegerType *Int64Ty;
- Type *IntPtrTy;
+ IntegerType *IntPtrTy;
// The llvm.bitsets named metadata.
NamedMDNode *BitSetNM;
- // Mapping from bitset mdstrings to the call sites that test them.
- DenseMap<MDString *, std::vector<CallInst *>> BitSetTestCallSites;
+ // Mapping from bitset identifiers to the call sites that test them.
+ DenseMap<Metadata *, std::vector<CallInst *>> BitSetTestCallSites;
std::vector<ByteArrayInfo> ByteArrayInfos;
BitSetInfo
- buildBitSet(MDString *BitSet,
- const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
+ buildBitSet(Metadata *BitSet,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
ByteArrayInfo *createByteArray(BitSetInfo &BSI);
void allocateByteArrays();
Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI,
Value *BitOffset);
+ void lowerBitSetCalls(ArrayRef<Metadata *> BitSets,
+ Constant *CombinedGlobalAddr,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
Value *
lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
- GlobalVariable *CombinedGlobal,
- const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
- void buildBitSetsFromGlobals(const std::vector<MDString *> &BitSets,
- const std::vector<GlobalVariable *> &Globals);
+ Constant *CombinedGlobal,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
+ void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> BitSets,
+ ArrayRef<GlobalVariable *> Globals);
+ unsigned getJumpTableEntrySize();
+ Type *getJumpTableEntryType();
+ Constant *createJumpTableEntry(GlobalObject *Src, Function *Dest,
+ unsigned Distance);
+ void verifyBitSetMDNode(MDNode *Op);
+ void buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets,
+ ArrayRef<Function *> Functions);
+ void buildBitSetsFromDisjointSet(ArrayRef<Metadata *> BitSets,
+ ArrayRef<GlobalObject *> Globals);
bool buildBitSets();
bool eraseBitSetMetadata();
@@ -228,7 +261,7 @@ struct LowerBitSets : public ModulePass {
bool runOnModule(Module &M) override;
};
-} // namespace
+} // anonymous namespace
INITIALIZE_PASS_BEGIN(LowerBitSets, "lowerbitsets",
"Lower bitset metadata", false, false)
@@ -244,6 +277,8 @@ bool LowerBitSets::doInitialization(Module &Mod) {
Triple TargetTriple(M->getTargetTriple());
LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();
+ Arch = TargetTriple.getArch();
+ ObjectFormat = TargetTriple.getObjectFormat();
Int1Ty = Type::getInt1Ty(M->getContext());
Int8Ty = Type::getInt8Ty(M->getContext());
@@ -262,8 +297,8 @@ bool LowerBitSets::doInitialization(Module &Mod) {
/// Build a bit set for BitSet using the object layouts in
/// GlobalLayout.
BitSetInfo LowerBitSets::buildBitSet(
- MDString *BitSet,
- const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) {
+ Metadata *BitSet,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
BitSetBuilder BSB;
// Compute the byte offset of each element of this bitset.
@@ -271,8 +306,11 @@ BitSetInfo LowerBitSets::buildBitSet(
for (MDNode *Op : BitSetNM->operands()) {
if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
continue;
- auto OpGlobal = dyn_cast<GlobalVariable>(
- cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
+ Constant *OpConst =
+ cast<ConstantAsMetadata>(Op->getOperand(1))->getValue();
+ if (auto GA = dyn_cast<GlobalAlias>(OpConst))
+ OpConst = GA->getAliasee();
+ auto OpGlobal = dyn_cast<GlobalObject>(OpConst);
if (!OpGlobal)
continue;
uint64_t Offset =
@@ -360,9 +398,8 @@ void LowerBitSets::allocateByteArrays() {
if (LinkerSubsectionsViaSymbols) {
BAI->ByteArray->replaceAllUsesWith(GEP);
} else {
- GlobalAlias *Alias =
- GlobalAlias::create(PointerType::getUnqual(Int8Ty),
- GlobalValue::PrivateLinkage, "bits", GEP, M);
+ GlobalAlias *Alias = GlobalAlias::create(
+ Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, M);
BAI->ByteArray->replaceAllUsesWith(Alias);
}
BAI->ByteArray->eraseFromParent();
@@ -404,7 +441,7 @@ Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
// Each use of the byte array uses a different alias. This makes the
// backend less likely to reuse previously computed byte array addresses,
// improving the security of the CFI mechanism based on this pass.
- ByteArray = GlobalAlias::create(BAI->ByteArray->getType(),
+ ByteArray = GlobalAlias::create(BAI->ByteArray->getValueType(), 0,
GlobalValue::PrivateLinkage, "bits_use",
ByteArray, M);
}
@@ -421,17 +458,16 @@ Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
/// replace the call with.
Value *LowerBitSets::lowerBitSetCall(
CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
- GlobalVariable *CombinedGlobal,
- const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) {
+ Constant *CombinedGlobalIntAddr,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
Value *Ptr = CI->getArgOperand(0);
const DataLayout &DL = M->getDataLayout();
if (BSI.containsValue(DL, GlobalLayout, Ptr))
- return ConstantInt::getTrue(CombinedGlobal->getParent()->getContext());
+ return ConstantInt::getTrue(M->getContext());
- Constant *GlobalAsInt = ConstantExpr::getPtrToInt(CombinedGlobal, IntPtrTy);
Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd(
- GlobalAsInt, ConstantInt::get(IntPtrTy, BSI.ByteOffset));
+ CombinedGlobalIntAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset));
BasicBlock *InitialBB = CI->getParent();
@@ -490,18 +526,19 @@ Value *LowerBitSets::lowerBitSetCall(
/// Given a disjoint set of bitsets and globals, layout the globals, build the
/// bit sets and lower the llvm.bitset.test calls.
-void LowerBitSets::buildBitSetsFromGlobals(
- const std::vector<MDString *> &BitSets,
- const std::vector<GlobalVariable *> &Globals) {
+void LowerBitSets::buildBitSetsFromGlobalVariables(
+ ArrayRef<Metadata *> BitSets, ArrayRef<GlobalVariable *> Globals) {
// Build a new global with the combined contents of the referenced globals.
+ // This global is a struct whose even-indexed elements contain the original
+ // contents of the referenced globals and whose odd-indexed elements contain
+ // any padding required to align the next element to the next power of 2.
std::vector<Constant *> GlobalInits;
const DataLayout &DL = M->getDataLayout();
for (GlobalVariable *G : Globals) {
GlobalInits.push_back(G->getInitializer());
- uint64_t InitSize = DL.getTypeAllocSize(G->getInitializer()->getType());
+ uint64_t InitSize = DL.getTypeAllocSize(G->getValueType());
- // Compute the amount of padding required to align the next element to the
- // next power of 2.
+ // Compute the amount of padding required.
uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize;
// Cap at 128 was found experimentally to have a good data/instruction
@@ -515,34 +552,20 @@ void LowerBitSets::buildBitSetsFromGlobals(
if (!GlobalInits.empty())
GlobalInits.pop_back();
Constant *NewInit = ConstantStruct::getAnon(M->getContext(), GlobalInits);
- auto CombinedGlobal =
+ auto *CombinedGlobal =
new GlobalVariable(*M, NewInit->getType(), /*isConstant=*/true,
GlobalValue::PrivateLinkage, NewInit);
- const StructLayout *CombinedGlobalLayout =
- DL.getStructLayout(cast<StructType>(NewInit->getType()));
+ StructType *NewTy = cast<StructType>(NewInit->getType());
+ const StructLayout *CombinedGlobalLayout = DL.getStructLayout(NewTy);
// Compute the offsets of the original globals within the new global.
- DenseMap<GlobalVariable *, uint64_t> GlobalLayout;
+ DenseMap<GlobalObject *, uint64_t> GlobalLayout;
for (unsigned I = 0; I != Globals.size(); ++I)
// Multiply by 2 to account for padding elements.
GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2);
- // For each bitset in this disjoint set...
- for (MDString *BS : BitSets) {
- // Build the bitset.
- BitSetInfo BSI = buildBitSet(BS, GlobalLayout);
-
- ByteArrayInfo *BAI = 0;
-
- // Lower each call to llvm.bitset.test for this bitset.
- for (CallInst *CI : BitSetTestCallSites[BS]) {
- ++NumBitSetCallsLowered;
- Value *Lowered = lowerBitSetCall(CI, BSI, BAI, CombinedGlobal, GlobalLayout);
- CI->replaceAllUsesWith(Lowered);
- CI->eraseFromParent();
- }
- }
+ lowerBitSetCalls(BitSets, CombinedGlobal, GlobalLayout);
// Build aliases pointing to offsets into the combined global for each
// global from which we built the combined global, and replace references
@@ -556,9 +579,11 @@ void LowerBitSets::buildBitSetsFromGlobals(
if (LinkerSubsectionsViaSymbols) {
Globals[I]->replaceAllUsesWith(CombinedGlobalElemPtr);
} else {
- GlobalAlias *GAlias =
- GlobalAlias::create(Globals[I]->getType(), Globals[I]->getLinkage(),
- "", CombinedGlobalElemPtr, M);
+ assert(Globals[I]->getType()->getAddressSpace() == 0);
+ GlobalAlias *GAlias = GlobalAlias::create(NewTy->getElementType(I * 2), 0,
+ Globals[I]->getLinkage(), "",
+ CombinedGlobalElemPtr, M);
+ GAlias->setVisibility(Globals[I]->getVisibility());
GAlias->takeName(Globals[I]);
Globals[I]->replaceAllUsesWith(GAlias);
}
@@ -566,6 +591,331 @@ void LowerBitSets::buildBitSetsFromGlobals(
}
}
+void LowerBitSets::lowerBitSetCalls(
+ ArrayRef<Metadata *> BitSets, Constant *CombinedGlobalAddr,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
+ Constant *CombinedGlobalIntAddr =
+ ConstantExpr::getPtrToInt(CombinedGlobalAddr, IntPtrTy);
+
+ // For each bitset in this disjoint set...
+ for (Metadata *BS : BitSets) {
+ // Build the bitset.
+ BitSetInfo BSI = buildBitSet(BS, GlobalLayout);
+ DEBUG({
+ if (auto BSS = dyn_cast<MDString>(BS))
+ dbgs() << BSS->getString() << ": ";
+ else
+ dbgs() << "<unnamed>: ";
+ BSI.print(dbgs());
+ });
+
+ ByteArrayInfo *BAI = nullptr;
+
+ // Lower each call to llvm.bitset.test for this bitset.
+ for (CallInst *CI : BitSetTestCallSites[BS]) {
+ ++NumBitSetCallsLowered;
+ Value *Lowered =
+ lowerBitSetCall(CI, BSI, BAI, CombinedGlobalIntAddr, GlobalLayout);
+ CI->replaceAllUsesWith(Lowered);
+ CI->eraseFromParent();
+ }
+ }
+}
+
+void LowerBitSets::verifyBitSetMDNode(MDNode *Op) {
+ if (Op->getNumOperands() != 3)
+ report_fatal_error(
+ "All operands of llvm.bitsets metadata must have 3 elements");
+ if (!Op->getOperand(1))
+ return;
+
+ auto OpConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(1));
+ if (!OpConstMD)
+ report_fatal_error("Bit set element must be a constant");
+ auto OpGlobal = dyn_cast<GlobalObject>(OpConstMD->getValue());
+ if (!OpGlobal)
+ return;
+
+ if (OpGlobal->isThreadLocal())
+ report_fatal_error("Bit set element may not be thread-local");
+ if (OpGlobal->hasSection())
+ report_fatal_error("Bit set element may not have an explicit section");
+
+ if (isa<GlobalVariable>(OpGlobal) && OpGlobal->isDeclarationForLinker())
+ report_fatal_error("Bit set global var element must be a definition");
+
+ auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
+ if (!OffsetConstMD)
+ report_fatal_error("Bit set element offset must be a constant");
+ auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue());
+ if (!OffsetInt)
+ report_fatal_error("Bit set element offset must be an integer constant");
+}
+
+static const unsigned kX86JumpTableEntrySize = 8;
+
+unsigned LowerBitSets::getJumpTableEntrySize() {
+ if (Arch != Triple::x86 && Arch != Triple::x86_64)
+ report_fatal_error("Unsupported architecture for jump tables");
+
+ return kX86JumpTableEntrySize;
+}
+
+// Create a constant representing a jump table entry for the target. This
+// consists of an instruction sequence containing a relative branch to Dest. The
+// constant will be laid out at address Src+(Len*Distance) where Len is the
+// target-specific jump table entry size.
+Constant *LowerBitSets::createJumpTableEntry(GlobalObject *Src, Function *Dest,
+ unsigned Distance) {
+ if (Arch != Triple::x86 && Arch != Triple::x86_64)
+ report_fatal_error("Unsupported architecture for jump tables");
+
+ const unsigned kJmpPCRel32Code = 0xe9;
+ const unsigned kInt3Code = 0xcc;
+
+ ConstantInt *Jmp = ConstantInt::get(Int8Ty, kJmpPCRel32Code);
+
+ // Build a constant representing the displacement between the constant's
+ // address and Dest. This will resolve to a PC32 relocation referring to Dest.
+ Constant *DestInt = ConstantExpr::getPtrToInt(Dest, IntPtrTy);
+ Constant *SrcInt = ConstantExpr::getPtrToInt(Src, IntPtrTy);
+ Constant *Disp = ConstantExpr::getSub(DestInt, SrcInt);
+ ConstantInt *DispOffset =
+ ConstantInt::get(IntPtrTy, Distance * kX86JumpTableEntrySize + 5);
+ Constant *OffsetedDisp = ConstantExpr::getSub(Disp, DispOffset);
+ OffsetedDisp = ConstantExpr::getTruncOrBitCast(OffsetedDisp, Int32Ty);
+
+ ConstantInt *Int3 = ConstantInt::get(Int8Ty, kInt3Code);
+
+ Constant *Fields[] = {
+ Jmp, OffsetedDisp, Int3, Int3, Int3,
+ };
+ return ConstantStruct::getAnon(Fields, /*Packed=*/true);
+}
+
+Type *LowerBitSets::getJumpTableEntryType() {
+ if (Arch != Triple::x86 && Arch != Triple::x86_64)
+ report_fatal_error("Unsupported architecture for jump tables");
+
+ return StructType::get(M->getContext(),
+ {Int8Ty, Int32Ty, Int8Ty, Int8Ty, Int8Ty},
+ /*Packed=*/true);
+}
+
+/// Given a disjoint set of bitsets and functions, build a jump table for the
+/// functions, build the bit sets and lower the llvm.bitset.test calls.
+void LowerBitSets::buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets,
+ ArrayRef<Function *> Functions) {
+ // Unlike the global bitset builder, the function bitset builder cannot
+ // re-arrange functions in a particular order and base its calculations on the
+ // layout of the functions' entry points, as we have no idea how large a
+ // particular function will end up being (the size could even depend on what
+ // this pass does!) Instead, we build a jump table, which is a block of code
+ // consisting of one branch instruction for each of the functions in the bit
+ // set that branches to the target function, and redirect any taken function
+ // addresses to the corresponding jump table entry. In the object file's
+ // symbol table, the symbols for the target functions also refer to the jump
+ // table entries, so that addresses taken outside the module will pass any
+ // verification done inside the module.
+ //
+ // In more concrete terms, suppose we have three functions f, g, h which are
+ // members of a single bitset, and a function foo that returns their
+ // addresses:
+ //
+ // f:
+ // mov 0, %eax
+ // ret
+ //
+ // g:
+ // mov 1, %eax
+ // ret
+ //
+ // h:
+ // mov 2, %eax
+ // ret
+ //
+ // foo:
+ // mov f, %eax
+ // mov g, %edx
+ // mov h, %ecx
+ // ret
+ //
+ // To create a jump table for these functions, we instruct the LLVM code
+ // generator to output a jump table in the .text section. This is done by
+ // representing the instructions in the jump table as an LLVM constant and
+ // placing them in a global variable in the .text section. The end result will
+ // (conceptually) look like this:
+ //
+ // f:
+ // jmp .Ltmp0 ; 5 bytes
+ // int3 ; 1 byte
+ // int3 ; 1 byte
+ // int3 ; 1 byte
+ //
+ // g:
+ // jmp .Ltmp1 ; 5 bytes
+ // int3 ; 1 byte
+ // int3 ; 1 byte
+ // int3 ; 1 byte
+ //
+ // h:
+ // jmp .Ltmp2 ; 5 bytes
+ // int3 ; 1 byte
+ // int3 ; 1 byte
+ // int3 ; 1 byte
+ //
+ // .Ltmp0:
+ // mov 0, %eax
+ // ret
+ //
+ // .Ltmp1:
+ // mov 1, %eax
+ // ret
+ //
+ // .Ltmp2:
+ // mov 2, %eax
+ // ret
+ //
+ // foo:
+ // mov f, %eax
+ // mov g, %edx
+ // mov h, %ecx
+ // ret
+ //
+ // Because the addresses of f, g, h are evenly spaced at a power of 2, in the
+ // normal case the check can be carried out using the same kind of simple
+ // arithmetic that we normally use for globals.
+
+ assert(!Functions.empty());
+
+ // Build a simple layout based on the regular layout of jump tables.
+ DenseMap<GlobalObject *, uint64_t> GlobalLayout;
+ unsigned EntrySize = getJumpTableEntrySize();
+ for (unsigned I = 0; I != Functions.size(); ++I)
+ GlobalLayout[Functions[I]] = I * EntrySize;
+
+ // Create a constant to hold the jump table.
+ ArrayType *JumpTableType =
+ ArrayType::get(getJumpTableEntryType(), Functions.size());
+ auto JumpTable = new GlobalVariable(*M, JumpTableType,
+ /*isConstant=*/true,
+ GlobalValue::PrivateLinkage, nullptr);
+ JumpTable->setSection(ObjectFormat == Triple::MachO
+ ? "__TEXT,__text,regular,pure_instructions"
+ : ".text");
+ lowerBitSetCalls(BitSets, JumpTable, GlobalLayout);
+
+ // Build aliases pointing to offsets into the jump table, and replace
+ // references to the original functions with references to the aliases.
+ for (unsigned I = 0; I != Functions.size(); ++I) {
+ Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast(
+ ConstantExpr::getGetElementPtr(
+ JumpTableType, JumpTable,
+ ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0),
+ ConstantInt::get(IntPtrTy, I)}),
+ Functions[I]->getType());
+ if (LinkerSubsectionsViaSymbols || Functions[I]->isDeclarationForLinker()) {
+ Functions[I]->replaceAllUsesWith(CombinedGlobalElemPtr);
+ } else {
+ assert(Functions[I]->getType()->getAddressSpace() == 0);
+ GlobalAlias *GAlias = GlobalAlias::create(Functions[I]->getValueType(), 0,
+ Functions[I]->getLinkage(), "",
+ CombinedGlobalElemPtr, M);
+ GAlias->setVisibility(Functions[I]->getVisibility());
+ GAlias->takeName(Functions[I]);
+ Functions[I]->replaceAllUsesWith(GAlias);
+ }
+ if (!Functions[I]->isDeclarationForLinker())
+ Functions[I]->setLinkage(GlobalValue::PrivateLinkage);
+ }
+
+ // Build and set the jump table's initializer.
+ std::vector<Constant *> JumpTableEntries;
+ for (unsigned I = 0; I != Functions.size(); ++I)
+ JumpTableEntries.push_back(
+ createJumpTableEntry(JumpTable, Functions[I], I));
+ JumpTable->setInitializer(
+ ConstantArray::get(JumpTableType, JumpTableEntries));
+}
+
+void LowerBitSets::buildBitSetsFromDisjointSet(
+ ArrayRef<Metadata *> BitSets, ArrayRef<GlobalObject *> Globals) {
+ llvm::DenseMap<Metadata *, uint64_t> BitSetIndices;
+ llvm::DenseMap<GlobalObject *, uint64_t> GlobalIndices;
+ for (unsigned I = 0; I != BitSets.size(); ++I)
+ BitSetIndices[BitSets[I]] = I;
+ for (unsigned I = 0; I != Globals.size(); ++I)
+ GlobalIndices[Globals[I]] = I;
+
+ // For each bitset, build a set of indices that refer to globals referenced by
+ // the bitset.
+ std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size());
+ if (BitSetNM) {
+ for (MDNode *Op : BitSetNM->operands()) {
+ // Op = { bitset name, global, offset }
+ if (!Op->getOperand(1))
+ continue;
+ auto I = BitSetIndices.find(Op->getOperand(0));
+ if (I == BitSetIndices.end())
+ continue;
+
+ auto OpGlobal = dyn_cast<GlobalObject>(
+ cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
+ if (!OpGlobal)
+ continue;
+ BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]);
+ }
+ }
+
+ // Order the sets of indices by size. The GlobalLayoutBuilder works best
+ // when given small index sets first.
+ std::stable_sort(
+ BitSetMembers.begin(), BitSetMembers.end(),
+ [](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) {
+ return O1.size() < O2.size();
+ });
+
+ // Create a GlobalLayoutBuilder and provide it with index sets as layout
+ // fragments. The GlobalLayoutBuilder tries to lay out members of fragments as
+ // close together as possible.
+ GlobalLayoutBuilder GLB(Globals.size());
+ for (auto &&MemSet : BitSetMembers)
+ GLB.addFragment(MemSet);
+
+ // Build the bitsets from this disjoint set.
+ if (Globals.empty() || isa<GlobalVariable>(Globals[0])) {
+ // Build a vector of global variables with the computed layout.
+ std::vector<GlobalVariable *> OrderedGVs(Globals.size());
+ auto OGI = OrderedGVs.begin();
+ for (auto &&F : GLB.Fragments) {
+ for (auto &&Offset : F) {
+ auto GV = dyn_cast<GlobalVariable>(Globals[Offset]);
+ if (!GV)
+ report_fatal_error(
+ "Bit set may not contain both global variables and functions");
+ *OGI++ = GV;
+ }
+ }
+
+ buildBitSetsFromGlobalVariables(BitSets, OrderedGVs);
+ } else {
+ // Build a vector of functions with the computed layout.
+ std::vector<Function *> OrderedFns(Globals.size());
+ auto OFI = OrderedFns.begin();
+ for (auto &&F : GLB.Fragments) {
+ for (auto &&Offset : F) {
+ auto Fn = dyn_cast<Function>(Globals[Offset]);
+ if (!Fn)
+ report_fatal_error(
+ "Bit set may not contain both global variables and functions");
+ *OFI++ = Fn;
+ }
+ }
+
+ buildBitSetsFromFunctions(BitSets, OrderedFns);
+ }
+}
+
/// Lower all bit sets in this module.
bool LowerBitSets::buildBitSets() {
Function *BitSetTestFunc =
@@ -576,24 +926,36 @@ bool LowerBitSets::buildBitSets() {
// Equivalence class set containing bitsets and the globals they reference.
// This is used to partition the set of bitsets in the module into disjoint
// sets.
- typedef EquivalenceClasses<PointerUnion<GlobalVariable *, MDString *>>
+ typedef EquivalenceClasses<PointerUnion<GlobalObject *, Metadata *>>
GlobalClassesTy;
GlobalClassesTy GlobalClasses;
+ // Verify the bitset metadata and build a mapping from bitset identifiers to
+ // their last observed index in BitSetNM. This will used later to
+ // deterministically order the list of bitset identifiers.
+ llvm::DenseMap<Metadata *, unsigned> BitSetIdIndices;
+ if (BitSetNM) {
+ for (unsigned I = 0, E = BitSetNM->getNumOperands(); I != E; ++I) {
+ MDNode *Op = BitSetNM->getOperand(I);
+ verifyBitSetMDNode(Op);
+ BitSetIdIndices[Op->getOperand(0)] = I;
+ }
+ }
+
for (const Use &U : BitSetTestFunc->uses()) {
auto CI = cast<CallInst>(U.getUser());
auto BitSetMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
- if (!BitSetMDVal || !isa<MDString>(BitSetMDVal->getMetadata()))
+ if (!BitSetMDVal)
report_fatal_error(
- "Second argument of llvm.bitset.test must be metadata string");
- auto BitSet = cast<MDString>(BitSetMDVal->getMetadata());
+ "Second argument of llvm.bitset.test must be metadata");
+ auto BitSet = BitSetMDVal->getMetadata();
// Add the call site to the list of call sites for this bit set. We also use
// BitSetTestCallSites to keep track of whether we have seen this bit set
// before. If we have, we don't need to re-add the referenced globals to the
// equivalence class.
- std::pair<DenseMap<MDString *, std::vector<CallInst *>>::iterator,
+ std::pair<DenseMap<Metadata *, std::vector<CallInst *>>::iterator,
bool> Ins =
BitSetTestCallSites.insert(
std::make_pair(BitSet, std::vector<CallInst *>()));
@@ -608,31 +970,16 @@ bool LowerBitSets::buildBitSets() {
if (!BitSetNM)
continue;
- // Verify the bitset metadata and add the referenced globals to the bitset's
- // equivalence class.
+ // Add the referenced globals to the bitset's equivalence class.
for (MDNode *Op : BitSetNM->operands()) {
- if (Op->getNumOperands() != 3)
- report_fatal_error(
- "All operands of llvm.bitsets metadata must have 3 elements");
-
if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
continue;
- auto OpConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(1));
- if (!OpConstMD)
- report_fatal_error("Bit set element must be a constant");
- auto OpGlobal = dyn_cast<GlobalVariable>(OpConstMD->getValue());
+ auto OpGlobal = dyn_cast<GlobalObject>(
+ cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
if (!OpGlobal)
continue;
- auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
- if (!OffsetConstMD)
- report_fatal_error("Bit set element offset must be a constant");
- auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue());
- if (!OffsetInt)
- report_fatal_error(
- "Bit set element offset must be an integer constant");
-
CurSet = GlobalClasses.unionSets(
CurSet, GlobalClasses.findLeader(GlobalClasses.insert(OpGlobal)));
}
@@ -641,79 +988,51 @@ bool LowerBitSets::buildBitSets() {
if (GlobalClasses.empty())
return false;
- // For each disjoint set we found...
+ // Build a list of disjoint sets ordered by their maximum BitSetNM index
+ // for determinism.
+ std::vector<std::pair<GlobalClassesTy::iterator, unsigned>> Sets;
for (GlobalClassesTy::iterator I = GlobalClasses.begin(),
E = GlobalClasses.end();
I != E; ++I) {
if (!I->isLeader()) continue;
-
++NumBitSetDisjointSets;
- // Build the list of bitsets and referenced globals in this disjoint set.
- std::vector<MDString *> BitSets;
- std::vector<GlobalVariable *> Globals;
- llvm::DenseMap<MDString *, uint64_t> BitSetIndices;
- llvm::DenseMap<GlobalVariable *, uint64_t> GlobalIndices;
+ unsigned MaxIndex = 0;
for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I);
MI != GlobalClasses.member_end(); ++MI) {
- if ((*MI).is<MDString *>()) {
- BitSetIndices[MI->get<MDString *>()] = BitSets.size();
- BitSets.push_back(MI->get<MDString *>());
- } else {
- GlobalIndices[MI->get<GlobalVariable *>()] = Globals.size();
- Globals.push_back(MI->get<GlobalVariable *>());
- }
+ if ((*MI).is<Metadata *>())
+ MaxIndex = std::max(MaxIndex, BitSetIdIndices[MI->get<Metadata *>()]);
}
+ Sets.emplace_back(I, MaxIndex);
+ }
+ std::sort(Sets.begin(), Sets.end(),
+ [](const std::pair<GlobalClassesTy::iterator, unsigned> &S1,
+ const std::pair<GlobalClassesTy::iterator, unsigned> &S2) {
+ return S1.second < S2.second;
+ });
- // For each bitset, build a set of indices that refer to globals referenced
- // by the bitset.
- std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size());
- if (BitSetNM) {
- for (MDNode *Op : BitSetNM->operands()) {
- // Op = { bitset name, global, offset }
- if (!Op->getOperand(1))
- continue;
- auto I = BitSetIndices.find(cast<MDString>(Op->getOperand(0)));
- if (I == BitSetIndices.end())
- continue;
-
- auto OpGlobal = dyn_cast<GlobalVariable>(
- cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
- if (!OpGlobal)
- continue;
- BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]);
- }
+ // For each disjoint set we found...
+ for (const auto &S : Sets) {
+ // Build the list of bitsets in this disjoint set.
+ std::vector<Metadata *> BitSets;
+ std::vector<GlobalObject *> Globals;
+ for (GlobalClassesTy::member_iterator MI =
+ GlobalClasses.member_begin(S.first);
+ MI != GlobalClasses.member_end(); ++MI) {
+ if ((*MI).is<Metadata *>())
+ BitSets.push_back(MI->get<Metadata *>());
+ else
+ Globals.push_back(MI->get<GlobalObject *>());
}
- // Order the sets of indices by size. The GlobalLayoutBuilder works best
- // when given small index sets first.
- std::stable_sort(
- BitSetMembers.begin(), BitSetMembers.end(),
- [](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) {
- return O1.size() < O2.size();
- });
-
- // Create a GlobalLayoutBuilder and provide it with index sets as layout
- // fragments. The GlobalLayoutBuilder tries to lay out members of fragments
- // as close together as possible.
- GlobalLayoutBuilder GLB(Globals.size());
- for (auto &&MemSet : BitSetMembers)
- GLB.addFragment(MemSet);
-
- // Build a vector of globals with the computed layout.
- std::vector<GlobalVariable *> OrderedGlobals(Globals.size());
- auto OGI = OrderedGlobals.begin();
- for (auto &&F : GLB.Fragments)
- for (auto &&Offset : F)
- *OGI++ = Globals[Offset];
-
- // Order bitsets by name for determinism.
- std::sort(BitSets.begin(), BitSets.end(), [](MDString *S1, MDString *S2) {
- return S1->getString() < S2->getString();
+ // Order bitsets by BitSetNM index for determinism. This ordering is stable
+ // as there is a one-to-one mapping between metadata and indices.
+ std::sort(BitSets.begin(), BitSets.end(), [&](Metadata *M1, Metadata *M2) {
+ return BitSetIdIndices[M1] < BitSetIdIndices[M2];
});
- // Build the bitsets from this disjoint set.
- buildBitSetsFromGlobals(BitSets, OrderedGlobals);
+ // Lower the bitsets in this disjoint set.
+ buildBitSetsFromDisjointSet(BitSets, Globals);
}
allocateByteArrays();
diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 2e3519eac6a5..8a209a18c540 100644
--- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -27,6 +27,14 @@
// -- We define Function* container class with custom "operator<" (FunctionPtr).
// -- "FunctionPtr" instances are stored in std::set collection, so every
// std::set::insert operation will give you result in log(N) time.
+//
+// As an optimization, a hash of the function structure is calculated first, and
+// two functions are only compared if they have the same hash. This hash is
+// cheap to compute, and has the property that if function F == G according to
+// the comparison function, then hash(F) == hash(G). This consistency property
+// is critical to ensuring all possible merging opportunities are exploited.
+// Collisions in the hash affect the speed of the pass but not the correctness
+// or determinism of the resulting transformation.
//
// When a match is found the functions are folded. If both functions are
// overridable, we move the functionality into a new internal function and
@@ -87,6 +95,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Hashing.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -97,12 +106,14 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/ValueMap.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "mergefunc"
@@ -121,21 +132,64 @@ static cl::opt<unsigned> NumFunctionsForSanityCheck(
namespace {
+/// GlobalNumberState assigns an integer to each global value in the program,
+/// which is used by the comparison routine to order references to globals. This
+/// state must be preserved throughout the pass, because Functions and other
+/// globals need to maintain their relative order. Globals are assigned a number
+/// when they are first visited. This order is deterministic, and so the
+/// assigned numbers are as well. When two functions are merged, neither number
+/// is updated. If the symbols are weak, this would be incorrect. If they are
+/// strong, then one will be replaced at all references to the other, and so
+/// direct callsites will now see one or the other symbol, and no update is
+/// necessary. Note that if we were guaranteed unique names, we could just
+/// compare those, but this would not work for stripped bitcodes or for those
+/// few symbols without a name.
+class GlobalNumberState {
+ struct Config : ValueMapConfig<GlobalValue*> {
+ enum { FollowRAUW = false };
+ };
+ // Each GlobalValue is mapped to an identifier. The Config ensures when RAUW
+ // occurs, the mapping does not change. Tracking changes is unnecessary, and
+ // also problematic for weak symbols (which may be overwritten).
+ typedef ValueMap<GlobalValue *, uint64_t, Config> ValueNumberMap;
+ ValueNumberMap GlobalNumbers;
+ // The next unused serial number to assign to a global.
+ uint64_t NextNumber;
+ public:
+ GlobalNumberState() : GlobalNumbers(), NextNumber(0) {}
+ uint64_t getNumber(GlobalValue* Global) {
+ ValueNumberMap::iterator MapIter;
+ bool Inserted;
+ std::tie(MapIter, Inserted) = GlobalNumbers.insert({Global, NextNumber});
+ if (Inserted)
+ NextNumber++;
+ return MapIter->second;
+ }
+ void clear() {
+ GlobalNumbers.clear();
+ }
+};
+
/// FunctionComparator - Compares two functions to determine whether or not
/// they will generate machine code with the same behaviour. DataLayout is
/// used if available. The comparator always fails conservatively (erring on the
/// side of claiming that two functions are different).
class FunctionComparator {
public:
- FunctionComparator(const Function *F1, const Function *F2)
- : FnL(F1), FnR(F2) {}
+ FunctionComparator(const Function *F1, const Function *F2,
+ GlobalNumberState* GN)
+ : FnL(F1), FnR(F2), GlobalNumbers(GN) {}
/// Test whether the two functions have equivalent behaviour.
int compare();
+ /// Hash a function. Equivalent functions will have the same hash, and unequal
+ /// functions will have different hashes with high probability.
+ typedef uint64_t FunctionHash;
+ static FunctionHash functionHash(Function &);
private:
/// Test whether two basic blocks have equivalent behaviour.
- int compare(const BasicBlock *BBL, const BasicBlock *BBR);
+ int cmpBasicBlocks(const BasicBlock *BBL, const BasicBlock *BBR);
/// Constants comparison.
/// Its analog to lexicographical comparison between hypothetical numbers
@@ -241,6 +295,10 @@ private:
/// If these properties are equal - compare their contents.
int cmpConstants(const Constant *L, const Constant *R);
+ /// Compares two global values by number. Uses the GlobalNumbersState to
+ /// identify the same gobals across function calls.
+ int cmpGlobalValues(GlobalValue *L, GlobalValue *R);
+
/// Assign or look up previously assigned numbers for the two values, and
/// return whether the numbers are equal. Numbers are assigned in the order
/// visited.
@@ -320,8 +378,9 @@ private:
///
/// 1. If types are of different kind (different type IDs).
/// Return result of type IDs comparison, treating them as numbers.
- /// 2. If types are vectors or integers, compare Type* values as numbers.
- /// 3. Types has same ID, so check whether they belongs to the next group:
+ /// 2. If types are integers, check that they have the same width. If they
+ /// are vectors, check that they have the same count and subtype.
+ /// 3. Types have the same ID, so check whether they are one of:
/// * Void
/// * Float
/// * Double
@@ -330,8 +389,7 @@ private:
/// * PPC_FP128
/// * Label
/// * Metadata
- /// If so - return 0, yes - we can treat these types as equal only because
- /// their IDs are same.
+ /// We can treat these types as equal whenever their IDs are same.
/// 4. If Left and Right are pointers, return result of address space
/// comparison (numbers comparison). We can treat pointer types of same
/// address space as equal.
@@ -343,11 +401,13 @@ private:
int cmpTypes(Type *TyL, Type *TyR) const;
int cmpNumbers(uint64_t L, uint64_t R) const;
-
int cmpAPInts(const APInt &L, const APInt &R) const;
int cmpAPFloats(const APFloat &L, const APFloat &R) const;
- int cmpStrings(StringRef L, StringRef R) const;
+ int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const;
+ int cmpMem(StringRef L, StringRef R) const;
int cmpAttrs(const AttributeSet L, const AttributeSet R) const;
+ int cmpRangeMetadata(const MDNode* L, const MDNode* R) const;
+ int cmpOperandBundlesSchema(const Instruction *L, const Instruction *R) const;
// The two functions undergoing comparison.
const Function *FnL, *FnR;
@@ -386,30 +446,30 @@ private:
/// could be operands from further BBs we didn't scan yet.
/// So it's impossible to use dominance properties in general.
DenseMap<const Value*, int> sn_mapL, sn_mapR;
+
+ // The global state we will use
+ GlobalNumberState* GlobalNumbers;
};
class FunctionNode {
mutable AssertingVH<Function> F;
-
+ FunctionComparator::FunctionHash Hash;
public:
- FunctionNode(Function *F) : F(F) {}
+ // Note the hash is recalculated potentially multiple times, but it is cheap.
+ FunctionNode(Function *F)
+ : F(F), Hash(FunctionComparator::functionHash(*F)) {}
Function *getFunc() const { return F; }
+ FunctionComparator::FunctionHash getHash() const { return Hash; }
/// Replace the reference to the function F by the function G, assuming their
/// implementations are equal.
void replaceBy(Function *G) const {
- assert(!(*this < FunctionNode(G)) && !(FunctionNode(G) < *this) &&
- "The two functions must be equal");
-
F = G;
}
- void release() { F = 0; }
- bool operator<(const FunctionNode &RHS) const {
- return (FunctionComparator(F, RHS.getFunc()).compare()) == -1;
- }
+ void release() { F = nullptr; }
};
-}
+} // end anonymous namespace
int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
if (L < R) return -1;
@@ -426,13 +486,25 @@ int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const {
}
int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const {
- if (int Res = cmpNumbers((uint64_t)&L.getSemantics(),
- (uint64_t)&R.getSemantics()))
+ // Floats are ordered first by semantics (i.e. float, double, half, etc.),
+ // then by value interpreted as a bitstring (aka APInt).
+ const fltSemantics &SL = L.getSemantics(), &SR = R.getSemantics();
+ if (int Res = cmpNumbers(APFloat::semanticsPrecision(SL),
+ APFloat::semanticsPrecision(SR)))
+ return Res;
+ if (int Res = cmpNumbers(APFloat::semanticsMaxExponent(SL),
+ APFloat::semanticsMaxExponent(SR)))
+ return Res;
+ if (int Res = cmpNumbers(APFloat::semanticsMinExponent(SL),
+ APFloat::semanticsMinExponent(SR)))
+ return Res;
+ if (int Res = cmpNumbers(APFloat::semanticsSizeInBits(SL),
+ APFloat::semanticsSizeInBits(SR)))
return Res;
return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt());
}
-int FunctionComparator::cmpStrings(StringRef L, StringRef R) const {
+int FunctionComparator::cmpMem(StringRef L, StringRef R) const {
// Prevent heavy comparison, compare sizes first.
if (int Res = cmpNumbers(L.size(), R.size()))
return Res;
@@ -466,6 +538,59 @@ int FunctionComparator::cmpAttrs(const AttributeSet L,
return 0;
}
+int FunctionComparator::cmpRangeMetadata(const MDNode* L,
+ const MDNode* R) const {
+ if (L == R)
+ return 0;
+ if (!L)
+ return -1;
+ if (!R)
+ return 1;
+ // Range metadata is a sequence of numbers. Make sure they are the same
+ // sequence.
+ // TODO: Note that as this is metadata, it is possible to drop and/or merge
+ // this data when considering functions to merge. Thus this comparison would
+ // return 0 (i.e. equivalent), but merging would become more complicated
+ // because the ranges would need to be unioned. It is not likely that
+ // functions differ ONLY in this metadata if they are actually the same
+ // function semantically.
+ if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
+ return Res;
+ for (size_t I = 0; I < L->getNumOperands(); ++I) {
+ ConstantInt* LLow = mdconst::extract<ConstantInt>(L->getOperand(I));
+ ConstantInt* RLow = mdconst::extract<ConstantInt>(R->getOperand(I));
+ if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue()))
+ return Res;
+ }
+ return 0;
+}
+
+int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L,
+ const Instruction *R) const {
+ ImmutableCallSite LCS(L);
+ ImmutableCallSite RCS(R);
+
+ assert(LCS && RCS && "Must be calls or invokes!");
+ assert(LCS.isCall() == RCS.isCall() && "Can't compare otherwise!");
+
+ if (int Res =
+ cmpNumbers(LCS.getNumOperandBundles(), RCS.getNumOperandBundles()))
+ return Res;
+
+ for (unsigned i = 0, e = LCS.getNumOperandBundles(); i != e; ++i) {
+ auto OBL = LCS.getOperandBundleAt(i);
+ auto OBR = RCS.getOperandBundleAt(i);
+
+ if (int Res = OBL.getTagName().compare(OBR.getTagName()))
+ return Res;
+
+ if (int Res = cmpNumbers(OBL.Inputs.size(), OBR.Inputs.size()))
+ return Res;
+ }
+
+ return 0;
+}
+
/// Constants comparison:
/// 1. Check whether type of L constant could be losslessly bitcasted to R
/// type.
@@ -500,9 +625,9 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
unsigned TyLWidth = 0;
unsigned TyRWidth = 0;
- if (const VectorType *VecTyL = dyn_cast<VectorType>(TyL))
+ if (auto *VecTyL = dyn_cast<VectorType>(TyL))
TyLWidth = VecTyL->getBitWidth();
- if (const VectorType *VecTyR = dyn_cast<VectorType>(TyR))
+ if (auto *VecTyR = dyn_cast<VectorType>(TyR))
TyRWidth = VecTyR->getBitWidth();
if (TyLWidth != TyRWidth)
@@ -538,11 +663,29 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
if (!L->isNullValue() && R->isNullValue())
return -1;
+ auto GlobalValueL = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(L));
+ auto GlobalValueR = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(R));
+ if (GlobalValueL && GlobalValueR) {
+ return cmpGlobalValues(GlobalValueL, GlobalValueR);
+ }
+
if (int Res = cmpNumbers(L->getValueID(), R->getValueID()))
return Res;
+ if (const auto *SeqL = dyn_cast<ConstantDataSequential>(L)) {
+ const auto *SeqR = cast<ConstantDataSequential>(R);
+ // This handles ConstantDataArray and ConstantDataVector. Note that we
+ // compare the two raw data arrays, which might differ depending on the host
+ // endianness. This isn't a problem though, because the endiness of a module
+ // will affect the order of the constants, but this order is the same
+ // for a given input module and host platform.
+ return cmpMem(SeqL->getRawDataValues(), SeqR->getRawDataValues());
+ }
+
switch (L->getValueID()) {
- case Value::UndefValueVal: return TypesRes;
+ case Value::UndefValueVal:
+ case Value::ConstantTokenNoneVal:
+ return TypesRes;
case Value::ConstantIntVal: {
const APInt &LInt = cast<ConstantInt>(L)->getValue();
const APInt &RInt = cast<ConstantInt>(R)->getValue();
@@ -609,19 +752,55 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
}
return 0;
}
- case Value::FunctionVal:
- case Value::GlobalVariableVal:
- case Value::GlobalAliasVal:
- default: // Unknown constant, cast L and R pointers to numbers and compare.
- return cmpNumbers((uint64_t)L, (uint64_t)R);
+ case Value::BlockAddressVal: {
+ const BlockAddress *LBA = cast<BlockAddress>(L);
+ const BlockAddress *RBA = cast<BlockAddress>(R);
+ if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction()))
+ return Res;
+ if (LBA->getFunction() == RBA->getFunction()) {
+ // They are BBs in the same function. Order by which comes first in the
+ // BB order of the function. This order is deterministic.
+ Function* F = LBA->getFunction();
+ BasicBlock *LBB = LBA->getBasicBlock();
+ BasicBlock *RBB = RBA->getBasicBlock();
+ if (LBB == RBB)
+ return 0;
+ for(BasicBlock &BB : F->getBasicBlockList()) {
+ if (&BB == LBB) {
+ assert(&BB != RBB);
+ return -1;
+ }
+ if (&BB == RBB)
+ return 1;
+ }
+ llvm_unreachable("Basic Block Address does not point to a basic block in "
+ "its function.");
+ return -1;
+ } else {
+ // cmpValues said the functions are the same. So because they aren't
+ // literally the same pointer, they must respectively be the left and
+ // right functions.
+ assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR);
+ // cmpValues will tell us if these are equivalent BasicBlocks, in the
+ // context of their respective functions.
+ return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock());
+ }
}
+ default: // Unknown constant, abort.
+ DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n");
+ llvm_unreachable("Constant ValueID not recognized.");
+ return -1;
+ }
+}
+
+int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue* R) {
+ return cmpNumbers(GlobalNumbers->getNumber(L), GlobalNumbers->getNumber(R));
}
/// cmpType - compares two types,
/// defines total ordering among the types set.
/// See method declaration comments for more details.
int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
-
PointerType *PTyL = dyn_cast<PointerType>(TyL);
PointerType *PTyR = dyn_cast<PointerType>(TyR);
@@ -642,10 +821,15 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
llvm_unreachable("Unknown type!");
// Fall through in Release mode.
case Type::IntegerTyID:
- case Type::VectorTyID:
- // TyL == TyR would have returned true earlier.
- return cmpNumbers((uint64_t)TyL, (uint64_t)TyR);
-
+ return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(),
+ cast<IntegerType>(TyR)->getBitWidth());
+ case Type::VectorTyID: {
+ VectorType *VTyL = cast<VectorType>(TyL), *VTyR = cast<VectorType>(TyR);
+ if (int Res = cmpNumbers(VTyL->getNumElements(), VTyR->getNumElements()))
+ return Res;
+ return cmpTypes(VTyL->getElementType(), VTyR->getElementType());
+ }
+ // TyL == TyR would have returned true earlier, because types are uniqued.
case Type::VoidTyID:
case Type::FloatTyID:
case Type::DoubleTyID:
@@ -654,6 +838,7 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
case Type::PPC_FP128TyID:
case Type::LabelTyID:
case Type::MetadataTyID:
+ case Type::TokenTyID:
return 0;
case Type::PointerTyID: {
@@ -759,8 +944,8 @@ int FunctionComparator::cmpOperations(const Instruction *L,
if (int Res =
cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope()))
return Res;
- return cmpNumbers((uint64_t)LI->getMetadata(LLVMContext::MD_range),
- (uint64_t)cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
+ return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range),
+ cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
}
if (const StoreInst *SI = dyn_cast<StoreInst>(L)) {
if (int Res =
@@ -783,20 +968,24 @@ int FunctionComparator::cmpOperations(const Instruction *L,
if (int Res =
cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes()))
return Res;
- return cmpNumbers(
- (uint64_t)CI->getMetadata(LLVMContext::MD_range),
- (uint64_t)cast<CallInst>(R)->getMetadata(LLVMContext::MD_range));
+ if (int Res = cmpOperandBundlesSchema(CI, R))
+ return Res;
+ return cmpRangeMetadata(
+ CI->getMetadata(LLVMContext::MD_range),
+ cast<CallInst>(R)->getMetadata(LLVMContext::MD_range));
}
- if (const InvokeInst *CI = dyn_cast<InvokeInst>(L)) {
- if (int Res = cmpNumbers(CI->getCallingConv(),
+ if (const InvokeInst *II = dyn_cast<InvokeInst>(L)) {
+ if (int Res = cmpNumbers(II->getCallingConv(),
cast<InvokeInst>(R)->getCallingConv()))
return Res;
if (int Res =
- cmpAttrs(CI->getAttributes(), cast<InvokeInst>(R)->getAttributes()))
+ cmpAttrs(II->getAttributes(), cast<InvokeInst>(R)->getAttributes()))
+ return Res;
+ if (int Res = cmpOperandBundlesSchema(II, R))
return Res;
- return cmpNumbers(
- (uint64_t)CI->getMetadata(LLVMContext::MD_range),
- (uint64_t)cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range));
+ return cmpRangeMetadata(
+ II->getMetadata(LLVMContext::MD_range),
+ cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range));
}
if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
ArrayRef<unsigned> LIndices = IVI->getIndices();
@@ -876,9 +1065,8 @@ int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
if (GEPL->accumulateConstantOffset(DL, OffsetL) &&
GEPR->accumulateConstantOffset(DL, OffsetR))
return cmpAPInts(OffsetL, OffsetR);
-
- if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(),
- (uint64_t)GEPR->getPointerOperand()->getType()))
+ if (int Res = cmpTypes(GEPL->getSourceElementType(),
+ GEPR->getSourceElementType()))
return Res;
if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands()))
@@ -892,6 +1080,28 @@ int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
return 0;
}
+int FunctionComparator::cmpInlineAsm(const InlineAsm *L,
+ const InlineAsm *R) const {
+ // InlineAsm's are uniqued. If they are the same pointer, obviously they are
+ // the same, otherwise compare the fields.
+ if (L == R)
+ return 0;
+ if (int Res = cmpTypes(L->getFunctionType(), R->getFunctionType()))
+ return Res;
+ if (int Res = cmpMem(L->getAsmString(), R->getAsmString()))
+ return Res;
+ if (int Res = cmpMem(L->getConstraintString(), R->getConstraintString()))
+ return Res;
+ if (int Res = cmpNumbers(L->hasSideEffects(), R->hasSideEffects()))
+ return Res;
+ if (int Res = cmpNumbers(L->isAlignStack(), R->isAlignStack()))
+ return Res;
+ if (int Res = cmpNumbers(L->getDialect(), R->getDialect()))
+ return Res;
+ llvm_unreachable("InlineAsm blocks were not uniqued.");
+ return 0;
+}
+
/// Compare two values used by the two functions under pair-wise comparison. If
/// this is the first time the values are seen, they're added to the mapping so
/// that we will detect mismatches on next use.
@@ -926,7 +1136,7 @@ int FunctionComparator::cmpValues(const Value *L, const Value *R) {
const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R);
if (InlineAsmL && InlineAsmR)
- return cmpNumbers((uint64_t)L, (uint64_t)R);
+ return cmpInlineAsm(InlineAsmL, InlineAsmR);
if (InlineAsmL)
return 1;
if (InlineAsmR)
@@ -938,12 +1148,13 @@ int FunctionComparator::cmpValues(const Value *L, const Value *R) {
return cmpNumbers(LeftSN.first->second, RightSN.first->second);
}
// Test whether two basic blocks have equivalent behaviour.
-int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) {
+int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL,
+ const BasicBlock *BBR) {
BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end();
BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end();
do {
- if (int Res = cmpValues(InstL, InstR))
+ if (int Res = cmpValues(&*InstL, &*InstR))
return Res;
const GetElementPtrInst *GEPL = dyn_cast<GetElementPtrInst>(InstL);
@@ -961,7 +1172,7 @@ int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) {
if (int Res = cmpGEPs(GEPL, GEPR))
return Res;
} else {
- if (int Res = cmpOperations(InstL, InstR))
+ if (int Res = cmpOperations(&*InstL, &*InstR))
return Res;
assert(InstL->getNumOperands() == InstR->getNumOperands());
@@ -970,11 +1181,8 @@ int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) {
Value *OpR = InstR->getOperand(i);
if (int Res = cmpValues(OpL, OpR))
return Res;
- if (int Res = cmpNumbers(OpL->getValueID(), OpR->getValueID()))
- return Res;
- // TODO: Already checked in cmpOperation
- if (int Res = cmpTypes(OpL->getType(), OpR->getType()))
- return Res;
+ // cmpValues should ensure this is true.
+ assert(cmpTypes(OpL->getType(), OpR->getType()) == 0);
}
}
@@ -990,7 +1198,6 @@ int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) {
// Test whether the two functions have equivalent behaviour.
int FunctionComparator::compare() {
-
sn_mapL.clear();
sn_mapR.clear();
@@ -1001,7 +1208,7 @@ int FunctionComparator::compare() {
return Res;
if (FnL->hasGC()) {
- if (int Res = cmpNumbers((uint64_t)FnL->getGC(), (uint64_t)FnR->getGC()))
+ if (int Res = cmpMem(FnL->getGC(), FnR->getGC()))
return Res;
}
@@ -1009,7 +1216,7 @@ int FunctionComparator::compare() {
return Res;
if (FnL->hasSection()) {
- if (int Res = cmpStrings(FnL->getSection(), FnR->getSection()))
+ if (int Res = cmpMem(FnL->getSection(), FnR->getSection()))
return Res;
}
@@ -1033,7 +1240,7 @@ int FunctionComparator::compare() {
ArgRI = FnR->arg_begin(),
ArgLE = FnL->arg_end();
ArgLI != ArgLE; ++ArgLI, ++ArgRI) {
- if (cmpValues(ArgLI, ArgRI) != 0)
+ if (cmpValues(&*ArgLI, &*ArgRI) != 0)
llvm_unreachable("Arguments repeat!");
}
@@ -1055,7 +1262,7 @@ int FunctionComparator::compare() {
if (int Res = cmpValues(BBL, BBR))
return Res;
- if (int Res = compare(BBL, BBR))
+ if (int Res = cmpBasicBlocks(BBL, BBR))
return Res;
const TerminatorInst *TermL = BBL->getTerminator();
@@ -1074,6 +1281,68 @@ int FunctionComparator::compare() {
}
namespace {
+// Accumulate the hash of a sequence of 64-bit integers. This is similar to a
+// hash of a sequence of 64bit ints, but the entire input does not need to be
+// available at once. This interface is necessary for functionHash because it
+// needs to accumulate the hash as the structure of the function is traversed
+// without saving these values to an intermediate buffer. This form of hashing
+// is not often needed, as usually the object to hash is just read from a
+// buffer.
+class HashAccumulator64 {
+ uint64_t Hash;
+public:
+ // Initialize to random constant, so the state isn't zero.
+ HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
+ void add(uint64_t V) {
+ Hash = llvm::hashing::detail::hash_16_bytes(Hash, V);
+ }
+ // No finishing is required, because the entire hash value is used.
+ uint64_t getHash() { return Hash; }
+};
+} // end anonymous namespace
+
+// A function hash is calculated by considering only the number of arguments and
+// whether a function is varargs, the order of basic blocks (given by the
+// successors of each basic block in depth first order), and the order of
+// opcodes of each instruction within each of these basic blocks. This mirrors
+// the strategy compare() uses to compare functions by walking the BBs in depth
+// first order and comparing each instruction in sequence. Because this hash
+// does not look at the operands, it is insensitive to things such as the
+// target of calls and the constants used in the function, which makes it useful
+// when possibly merging functions which are the same modulo constants and call
+// targets.
+FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
+ HashAccumulator64 H;
+ H.add(F.isVarArg());
+ H.add(F.arg_size());
+
+ SmallVector<const BasicBlock *, 8> BBs;
+ SmallSet<const BasicBlock *, 16> VisitedBBs;
+
+ // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
+ // accumulating the hash of the function "structure." (BB and opcode sequence)
+ BBs.push_back(&F.getEntryBlock());
+ VisitedBBs.insert(BBs[0]);
+ while (!BBs.empty()) {
+ const BasicBlock *BB = BBs.pop_back_val();
+ // This random value acts as a block header, as otherwise the partition of
+ // opcodes into BBs wouldn't affect the hash, only the order of the opcodes
+ H.add(45798);
+ for (auto &Inst : *BB) {
+ H.add(Inst.getOpcode());
+ }
+ const TerminatorInst *Term = BB->getTerminator();
+ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+ if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
+ continue;
+ BBs.push_back(Term->getSuccessor(i));
+ }
+ }
+ return H.getHash();
+}
+
+
+namespace {
/// MergeFunctions finds functions which will generate identical machine code,
/// by considering all pointer types to be equivalent. Once identified,
@@ -1084,14 +1353,31 @@ class MergeFunctions : public ModulePass {
public:
static char ID;
MergeFunctions()
- : ModulePass(ID), HasGlobalAliases(false) {
+ : ModulePass(ID), FnTree(FunctionNodeCmp(&GlobalNumbers)), FNodesInTree(),
+ HasGlobalAliases(false) {
initializeMergeFunctionsPass(*PassRegistry::getPassRegistry());
}
bool runOnModule(Module &M) override;
private:
- typedef std::set<FunctionNode> FnTreeType;
+ // The function comparison operator is provided here so that FunctionNodes do
+ // not need to become larger with another pointer.
+ class FunctionNodeCmp {
+ GlobalNumberState* GlobalNumbers;
+ public:
+ FunctionNodeCmp(GlobalNumberState* GN) : GlobalNumbers(GN) {}
+ bool operator()(const FunctionNode &LHS, const FunctionNode &RHS) const {
+ // Order first by hashes, then full function comparison.
+ if (LHS.getHash() != RHS.getHash())
+ return LHS.getHash() < RHS.getHash();
+ FunctionComparator FCmp(LHS.getFunc(), RHS.getFunc(), GlobalNumbers);
+ return FCmp.compare() == -1;
+ }
+ };
+ typedef std::set<FunctionNode, FunctionNodeCmp> FnTreeType;
+
+ GlobalNumberState GlobalNumbers;
/// A work queue of functions that may have been modified and should be
/// analyzed again.
@@ -1133,17 +1419,23 @@ private:
void writeAlias(Function *F, Function *G);
/// Replace function F with function G in the function tree.
- void replaceFunctionInTree(FnTreeType::iterator &IterToF, Function *G);
+ void replaceFunctionInTree(const FunctionNode &FN, Function *G);
/// The set of all distinct functions. Use the insert() and remove() methods
- /// to modify it.
+ /// to modify it. The map allows efficient lookup and deferring of Functions.
FnTreeType FnTree;
+ // Map functions to the iterators of the FunctionNode which contains them
+ // in the FnTree. This must be updated carefully whenever the FnTree is
+ // modified, i.e. in insert(), remove(), and replaceFunctionInTree(), to avoid
+ // dangling iterators into FnTree. The invariant that preserves this is that
+ // there is exactly one mapping F -> FN for each FunctionNode FN in FnTree.
+ ValueMap<Function*, FnTreeType::iterator> FNodesInTree;
/// Whether or not the target supports global aliases.
bool HasGlobalAliases;
};
-} // end anonymous namespace
+} // end anonymous namespace
char MergeFunctions::ID = 0;
INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false)
@@ -1166,8 +1458,8 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) {
Function *F1 = cast<Function>(*I);
Function *F2 = cast<Function>(*J);
- int Res1 = FunctionComparator(F1, F2).compare();
- int Res2 = FunctionComparator(F2, F1).compare();
+ int Res1 = FunctionComparator(F1, F2, &GlobalNumbers).compare();
+ int Res2 = FunctionComparator(F2, F1, &GlobalNumbers).compare();
// If F1 <= F2, then F2 >= F1, otherwise report failure.
if (Res1 != -Res2) {
@@ -1188,8 +1480,8 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
continue;
Function *F3 = cast<Function>(*K);
- int Res3 = FunctionComparator(F1, F3).compare();
- int Res4 = FunctionComparator(F2, F3).compare();
+ int Res3 = FunctionComparator(F1, F3, &GlobalNumbers).compare();
+ int Res4 = FunctionComparator(F2, F3, &GlobalNumbers).compare();
bool Transitive = true;
@@ -1227,11 +1519,33 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
bool MergeFunctions::runOnModule(Module &M) {
bool Changed = false;
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage())
- Deferred.push_back(WeakVH(I));
+ // All functions in the module, ordered by hash. Functions with a unique
+ // hash value are easily eliminated.
+ std::vector<std::pair<FunctionComparator::FunctionHash, Function *>>
+ HashedFuncs;
+ for (Function &Func : M) {
+ if (!Func.isDeclaration() && !Func.hasAvailableExternallyLinkage()) {
+ HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
+ }
}
+ std::stable_sort(
+ HashedFuncs.begin(), HashedFuncs.end(),
+ [](const std::pair<FunctionComparator::FunctionHash, Function *> &a,
+ const std::pair<FunctionComparator::FunctionHash, Function *> &b) {
+ return a.first < b.first;
+ });
+
+ auto S = HashedFuncs.begin();
+ for (auto I = HashedFuncs.begin(), IE = HashedFuncs.end(); I != IE; ++I) {
+ // If the hash value matches the previous value or the next one, we must
+ // consider merging it. Otherwise it is dropped and never considered again.
+ if ((I != S && std::prev(I)->first == I->first) ||
+ (std::next(I) != IE && std::next(I)->first == I->first) ) {
+ Deferred.push_back(WeakVH(I->second));
+ }
+ }
+
do {
std::vector<WeakVH> Worklist;
Deferred.swap(Worklist);
@@ -1270,6 +1584,7 @@ bool MergeFunctions::runOnModule(Module &M) {
} while (!Deferred.empty());
FnTree.clear();
+ GlobalNumbers.clear();
return Changed;
}
@@ -1282,6 +1597,32 @@ void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
++UI;
CallSite CS(U->getUser());
if (CS && CS.isCallee(U)) {
+ // Transfer the called function's attributes to the call site. Due to the
+ // bitcast we will 'lose' ABI changing attributes because the 'called
+ // function' is no longer a Function* but the bitcast. Code that looks up
+ // the attributes from the called function will fail.
+
+ // FIXME: This is not actually true, at least not anymore. The callsite
+ // will always have the same ABI affecting attributes as the callee,
+ // because otherwise the original input has UB. Note that Old and New
+ // always have matching ABI, so no attributes need to be changed.
+ // Transferring other attributes may help other optimizations, but that
+ // should be done uniformly and not in this ad-hoc way.
+ auto &Context = New->getContext();
+ auto NewFuncAttrs = New->getAttributes();
+ auto CallSiteAttrs = CS.getAttributes();
+
+ CallSiteAttrs = CallSiteAttrs.addAttributes(
+ Context, AttributeSet::ReturnIndex, NewFuncAttrs.getRetAttributes());
+
+ for (unsigned argIdx = 0; argIdx < CS.arg_size(); argIdx++) {
+ AttributeSet Attrs = NewFuncAttrs.getParamAttributes(argIdx);
+ if (Attrs.getNumSlots())
+ CallSiteAttrs = CallSiteAttrs.addAttributes(Context, argIdx, Attrs);
+ }
+
+ CS.setAttributes(CallSiteAttrs);
+
remove(CS.getInstruction()->getParent()->getParent());
U->set(BitcastNew);
}
@@ -1352,15 +1693,15 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
SmallVector<Value *, 16> Args;
unsigned i = 0;
FunctionType *FFTy = F->getFunctionType();
- for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end();
- AI != AE; ++AI) {
- Args.push_back(createCast(Builder, (Value*)AI, FFTy->getParamType(i)));
+ for (Argument & AI : NewG->args()) {
+ Args.push_back(createCast(Builder, &AI, FFTy->getParamType(i)));
++i;
}
CallInst *CI = Builder.CreateCall(F, Args);
CI->setTailCall();
CI->setCallingConv(F->getCallingConv());
+ CI->setAttributes(F->getAttributes());
if (NewG->getReturnType()->isVoidTy()) {
Builder.CreateRetVoid();
} else {
@@ -1379,8 +1720,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
// Replace G with an alias to F and delete G.
void MergeFunctions::writeAlias(Function *F, Function *G) {
- PointerType *PTy = G->getType();
- auto *GA = GlobalAlias::create(PTy, G->getLinkage(), "", F);
+ auto *GA = GlobalAlias::create(G->getLinkage(), "", F);
F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
GA->takeName(G);
GA->setVisibility(G->getVisibility());
@@ -1425,19 +1765,24 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
++NumFunctionsMerged;
}
-/// Replace function F for function G in the map.
-void MergeFunctions::replaceFunctionInTree(FnTreeType::iterator &IterToF,
+/// Replace function F by function G.
+void MergeFunctions::replaceFunctionInTree(const FunctionNode &FN,
Function *G) {
- Function *F = IterToF->getFunc();
-
- // A total order is already guaranteed otherwise because we process strong
- // functions before weak functions.
- assert(((F->mayBeOverridden() && G->mayBeOverridden()) ||
- (!F->mayBeOverridden() && !G->mayBeOverridden())) &&
- "Only change functions if both are strong or both are weak");
- (void)F;
-
- IterToF->replaceBy(G);
+ Function *F = FN.getFunc();
+ assert(FunctionComparator(F, G, &GlobalNumbers).compare() == 0 &&
+ "The two functions must be equal");
+
+ auto I = FNodesInTree.find(F);
+ assert(I != FNodesInTree.end() && "F should be in FNodesInTree");
+ assert(FNodesInTree.count(G) == 0 && "FNodesInTree should not contain G");
+
+ FnTreeType::iterator IterToFNInFnTree = I->second;
+ assert(&(*IterToFNInFnTree) == &FN && "F should map to FN in FNodesInTree.");
+ // Remove F -> FN and insert G -> FN
+ FNodesInTree.erase(I);
+ FNodesInTree.insert({G, IterToFNInFnTree});
+ // Replace F with G in FN, which is stored inside the FnTree.
+ FN.replaceBy(G);
}
// Insert a ComparableFunction into the FnTree, or merge it away if equal to one
@@ -1447,6 +1792,8 @@ bool MergeFunctions::insert(Function *NewFunction) {
FnTree.insert(FunctionNode(NewFunction));
if (Result.second) {
+ assert(FNodesInTree.count(NewFunction) == 0);
+ FNodesInTree.insert({NewFunction, Result.first});
DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n');
return false;
}
@@ -1476,7 +1823,7 @@ bool MergeFunctions::insert(Function *NewFunction) {
if (OldF.getFunc()->getName() > NewFunction->getName()) {
// Swap the two functions.
Function *F = OldF.getFunc();
- replaceFunctionInTree(Result.first, NewFunction);
+ replaceFunctionInTree(*Result.first, NewFunction);
NewFunction = F;
assert(OldF.getFunc() != F && "Must have swapped the functions.");
}
@@ -1495,18 +1842,13 @@ bool MergeFunctions::insert(Function *NewFunction) {
// Remove a function from FnTree. If it was already in FnTree, add
// it to Deferred so that we'll look at it in the next round.
void MergeFunctions::remove(Function *F) {
- // We need to make sure we remove F, not a function "equal" to F per the
- // function equality comparator.
- FnTreeType::iterator found = FnTree.find(FunctionNode(F));
- size_t Erased = 0;
- if (found != FnTree.end() && found->getFunc() == F) {
- Erased = 1;
- FnTree.erase(found);
- }
-
- if (Erased) {
- DEBUG(dbgs() << "Removed " << F->getName()
- << " from set and deferred it.\n");
+ auto I = FNodesInTree.find(F);
+ if (I != FNodesInTree.end()) {
+ DEBUG(dbgs() << "Deferred " << F->getName()<< ".\n");
+ FnTree.erase(I->second);
+ // I->second has been invalidated, remove it from the FNodesInTree map to
+ // preserve the invariant.
+ FNodesInTree.erase(I);
Deferred.emplace_back(F);
}
}
@@ -1516,6 +1858,8 @@ void MergeFunctions::remove(Function *F) {
void MergeFunctions::removeUsers(Value *V) {
std::vector<Value *> Worklist;
Worklist.push_back(V);
+ SmallSet<Value*, 8> Visited;
+ Visited.insert(V);
while (!Worklist.empty()) {
Value *V = Worklist.back();
Worklist.pop_back();
@@ -1526,8 +1870,10 @@ void MergeFunctions::removeUsers(Value *V) {
} else if (isa<GlobalValue>(U)) {
// do nothing
} else if (Constant *C = dyn_cast<Constant>(U)) {
- for (User *UU : C->users())
- Worklist.push_back(UU);
+ for (User *UU : C->users()) {
+ if (!Visited.insert(UU).second)
+ Worklist.push_back(UU);
+ }
}
}
}
diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
index 4a7cb7ba7d12..0c5c84bbccab 100644
--- a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -50,7 +50,7 @@ ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); }
Function* PartialInliner::unswitchFunction(Function* F) {
// First, verify that this function is an unswitching candidate...
- BasicBlock* entryBlock = F->begin();
+ BasicBlock *entryBlock = &F->front();
BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator());
if (!BR || BR->isUnconditional())
return nullptr;
@@ -89,18 +89,18 @@ Function* PartialInliner::unswitchFunction(Function* F) {
// of which will go outside.
BasicBlock* preReturn = newReturnBlock;
newReturnBlock = newReturnBlock->splitBasicBlock(
- newReturnBlock->getFirstNonPHI());
+ newReturnBlock->getFirstNonPHI()->getIterator());
BasicBlock::iterator I = preReturn->begin();
- BasicBlock::iterator Ins = newReturnBlock->begin();
+ Instruction *Ins = &newReturnBlock->front();
while (I != preReturn->end()) {
PHINode* OldPhi = dyn_cast<PHINode>(I);
if (!OldPhi) break;
-
- PHINode* retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
+
+ PHINode *retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
OldPhi->replaceAllUsesWith(retPhi);
Ins = newReturnBlock->getFirstNonPHI();
-
- retPhi->addIncoming(I, preReturn);
+
+ retPhi->addIncoming(&*I, preReturn);
retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock),
newEntryBlock);
OldPhi->removeIncomingValue(newEntryBlock);
@@ -116,8 +116,8 @@ Function* PartialInliner::unswitchFunction(Function* F) {
FE = duplicateFunction->end(); FI != FE; ++FI)
if (&*FI != newEntryBlock && &*FI != newReturnBlock &&
&*FI != newNonReturnBlock)
- toExtract.push_back(FI);
-
+ toExtract.push_back(&*FI);
+
// The CodeExtractor needs a dominator tree.
DominatorTree DT;
DT.recalculate(*duplicateFunction);
diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 909baae92548..9876efa7b235 100644
--- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -12,19 +12,26 @@
//
//===----------------------------------------------------------------------===//
-
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm-c/Transforms/PassManagerBuilder.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CFLAliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Verifier.h"
+#include "llvm/IR/FunctionInfo.h"
#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Verifier.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
+#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Vectorize.h"
@@ -89,11 +96,21 @@ static cl::opt<bool> EnableLoopDistribute(
"enable-loop-distribute", cl::init(false), cl::Hidden,
cl::desc("Enable the new, experimental LoopDistribution Pass"));
+static cl::opt<bool> EnableNonLTOGlobalsModRef(
+ "enable-non-lto-gmr", cl::init(true), cl::Hidden,
+ cl::desc(
+ "Enable the GlobalsModRef AliasAnalysis outside of the LTO pipeline."));
+
+static cl::opt<bool> EnableLoopLoadElim(
+ "enable-loop-load-elim", cl::init(false), cl::Hidden,
+ cl::desc("Enable the new, experimental LoopLoadElimination Pass"));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
LibraryInfo = nullptr;
Inliner = nullptr;
+ FunctionIndex = nullptr;
DisableUnitAtATime = false;
DisableUnrollLoops = false;
BBVectorize = RunBBVectorization;
@@ -143,10 +160,9 @@ void PassManagerBuilder::addInitialAliasAnalysisPasses(
// BasicAliasAnalysis wins if they disagree. This is intended to help
// support "obvious" type-punning idioms.
if (UseCFLAA)
- PM.add(createCFLAliasAnalysisPass());
- PM.add(createTypeBasedAliasAnalysisPass());
- PM.add(createScopedNoAliasAAPass());
- PM.add(createBasicAliasAnalysisPass());
+ PM.add(createCFLAAWrapperPass());
+ PM.add(createTypeBasedAAWrapperPass());
+ PM.add(createScopedNoAliasAAWrapperPass());
}
void PassManagerBuilder::populateFunctionPassManager(
@@ -172,6 +188,9 @@ void PassManagerBuilder::populateFunctionPassManager(
void PassManagerBuilder::populateModulePassManager(
legacy::PassManagerBase &MPM) {
+ // Allow forcing function attributes as a debugging and tuning aid.
+ MPM.add(createForceFunctionAttrsLegacyPass());
+
// If all optimizations are disabled, just run the always-inline pass and,
// if enabled, the function merging pass.
if (OptLevel == 0) {
@@ -201,10 +220,15 @@ void PassManagerBuilder::populateModulePassManager(
addInitialAliasAnalysisPasses(MPM);
if (!DisableUnitAtATime) {
+ // Infer attributes about declarations if possible.
+ MPM.add(createInferFunctionAttrsLegacyPass());
+
addExtensionsToPM(EP_ModuleOptimizerEarly, MPM);
MPM.add(createIPSCCPPass()); // IP SCCP
MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
+ // Promote any localized global vars
+ MPM.add(createPromoteMemoryToRegisterPass());
MPM.add(createDeadArgEliminationPass()); // Dead argument elimination
@@ -213,6 +237,12 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
}
+ if (EnableNonLTOGlobalsModRef)
+ // We add a module alias analysis pass here. In part due to bugs in the
+ // analysis infrastructure this "works" in that the analysis stays alive
+ // for the entire SCC pass run below.
+ MPM.add(createGlobalsAAWrapperPass());
+
// Start of CallGraph SCC passes.
if (!DisableUnitAtATime)
MPM.add(createPruneEHPass()); // Remove dead EH info
@@ -245,6 +275,7 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
MPM.add(createLICMPass()); // Hoist loop invariants
MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
+ MPM.add(createCFGSimplificationPass());
MPM.add(createInstructionCombiningPass());
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
@@ -315,9 +346,42 @@ void PassManagerBuilder::populateModulePassManager(
// we must insert a no-op module pass to reset the pass manager.
MPM.add(createBarrierNoopPass());
+ if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO) {
+ // Remove avail extern fns and globals definitions if we aren't
+ // compiling an object file for later LTO. For LTO we want to preserve
+ // these so they are eligible for inlining at link-time. Note if they
+ // are unreferenced they will be removed by GlobalDCE later, so
+ // this only impacts referenced available externally globals.
+ // Eventually they will be suppressed during codegen, but eliminating
+ // here enables more opportunity for GlobalDCE as it may make
+ // globals referenced by available external functions dead
+ // and saves running remaining passes on the eliminated functions.
+ MPM.add(createEliminateAvailableExternallyPass());
+ }
+
+ if (EnableNonLTOGlobalsModRef)
+ // We add a fresh GlobalsModRef run at this point. This is particularly
+ // useful as the above will have inlined, DCE'ed, and function-attr
+ // propagated everything. We should at this point have a reasonably minimal
+ // and richly annotated call graph. By computing aliasing and mod/ref
+ // information for all local globals here, the late loop passes and notably
+ // the vectorizer will be able to use them to help recognize vectorizable
+ // memory operations.
+ //
+ // Note that this relies on a bug in the pass manager which preserves
+ // a module analysis into a function pass pipeline (and throughout it) so
+ // long as the first function pass doesn't invalidate the module analysis.
+ // Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for
+ // this to work. Fortunately, it is trivial to preserve AliasAnalysis
+ // (doing nothing preserves it as it is required to be conservatively
+ // correct in the face of IR changes).
+ MPM.add(createGlobalsAAWrapperPass());
+
if (RunFloat2Int)
MPM.add(createFloat2IntPass());
+ addExtensionsToPM(EP_VectorizerStart, MPM);
+
// Re-rotate loops in all our loop nests. These may have fallout out of
// rotated form due to GVN or other transformations, and the vectorizer relies
// on the rotated form. Disable header duplication at -Oz.
@@ -329,6 +393,12 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createLoopDistributePass());
MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
+
+ // Eliminate loads by forwarding stores from the previous iteration to loads
+ // of the current iteration.
+ if (EnableLoopLoadElim)
+ MPM.add(createLoopLoadEliminationPass());
+
// FIXME: Because of #pragma vectorize enable, the passes below are always
// inserted in the pipeline, even when the vectorizer doesn't run (ex. when
// on -O1 and no #pragma is found). Would be good to have these two passes
@@ -402,17 +472,6 @@ void PassManagerBuilder::populateModulePassManager(
// GlobalOpt already deletes dead functions and globals, at -O2 try a
// late pass of GlobalDCE. It is capable of deleting dead cycles.
if (OptLevel > 1) {
- if (!PrepareForLTO) {
- // Remove avail extern fns and globals definitions if we aren't
- // compiling an object file for later LTO. For LTO we want to preserve
- // these so they are eligible for inlining at link-time. Note if they
- // are unreferenced they will be removed by GlobalDCE below, so
- // this only impacts referenced available externally globals.
- // Eventually they will be suppressed during codegen, but eliminating
- // here enables more opportunity for GlobalDCE as it may make
- // globals referenced by available external functions dead.
- MPM.add(createEliminateAvailableExternallyPass());
- }
MPM.add(createGlobalDCEPass()); // Remove dead fns and globals.
MPM.add(createConstantMergePass()); // Merge dup global constants
}
@@ -428,13 +487,25 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// Provide AliasAnalysis services for optimizations.
addInitialAliasAnalysisPasses(PM);
+ if (FunctionIndex)
+ PM.add(createFunctionImportPass(FunctionIndex));
+
+ // Allow forcing function attributes as a debugging and tuning aid.
+ PM.add(createForceFunctionAttrsLegacyPass());
+
+ // Infer attributes about declarations if possible.
+ PM.add(createInferFunctionAttrsLegacyPass());
+
// Propagate constants at call sites into the functions they call. This
// opens opportunities for globalopt (and inlining) by substituting function
// pointers passed as arguments to direct uses of functions.
PM.add(createIPSCCPPass());
// Now that we internalized some globals, see if we can hack on them!
+ PM.add(createFunctionAttrsPass()); // Add norecurse if possible.
PM.add(createGlobalOptimizerPass());
+ // Promote any localized global vars.
+ PM.add(createPromoteMemoryToRegisterPass());
// Linking modules together can lead to duplicated global constants, only
// keep one copy of each constant.
@@ -481,7 +552,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// Run a few AA driven optimizations here and now, to cleanup the code.
PM.add(createFunctionAttrsPass()); // Add nocapture.
- PM.add(createGlobalsModRefPass()); // IP alias analysis.
+ PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
PM.add(createLICMPass()); // Hoist loop invariants.
if (EnableMLSM)
@@ -500,6 +571,15 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createLoopVectorizePass(true, LoopVectorize));
+ // Now that we've optimized loops (in particular loop induction variables),
+ // we may have exposed more scalar opportunities. Run parts of the scalar
+ // optimizer again at this point.
+ PM.add(createInstructionCombiningPass()); // Initial cleanup
+ PM.add(createCFGSimplificationPass()); // if-convert
+ PM.add(createSCCPPass()); // Propagate exposed constants
+ PM.add(createInstructionCombiningPass()); // Clean up again
+ PM.add(createBitTrackingDCEPass());
+
// More scalar chains could be vectorized due to more alias information
if (RunSLPAfterLoopVectorization)
if (SLPVectorize)
@@ -524,6 +604,9 @@ void PassManagerBuilder::addLateLTOOptimizationPasses(
// Delete basic blocks, which optimization passes may have killed.
PM.add(createCFGSimplificationPass());
+ // Drop bodies of available externally objects to improve GlobalDCE.
+ PM.add(createEliminateAvailableExternallyPass());
+
// Now that we have optimized the program, discard unreachable functions.
PM.add(createGlobalDCEPass());
@@ -543,6 +626,10 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
if (OptLevel > 1)
addLTOOptimizationPasses(PM);
+ // Create a function that performs CFI checks for cross-DSO calls with targets
+ // in the current module.
+ PM.add(createCrossDSOCFIPass());
+
// Lower bit sets to globals. This pass supports Clang's control flow
// integrity mechanisms (-fsanitize=cfi*) and needs to run at link time if CFI
// is enabled. The pass does nothing if CFI is disabled.
diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
index b2f1010c9a07..3af4afb903fe 100644
--- a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -21,7 +21,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
-#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -153,21 +153,16 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
// If the SCC doesn't unwind or doesn't throw, note this fact.
if (!SCCMightUnwind || !SCCMightReturn)
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- AttrBuilder NewAttributes;
-
- if (!SCCMightUnwind)
- NewAttributes.addAttribute(Attribute::NoUnwind);
- if (!SCCMightReturn)
- NewAttributes.addAttribute(Attribute::NoReturn);
-
Function *F = (*I)->getFunction();
- const AttributeSet &PAL = F->getAttributes().getFnAttributes();
- const AttributeSet &NPAL = AttributeSet::get(
- F->getContext(), AttributeSet::FunctionIndex, NewAttributes);
- if (PAL != NPAL) {
+ if (!SCCMightUnwind && !F->hasFnAttribute(Attribute::NoUnwind)) {
+ F->addFnAttr(Attribute::NoUnwind);
+ MadeChange = true;
+ }
+
+ if (!SCCMightReturn && !F->hasFnAttribute(Attribute::NoReturn)) {
+ F->addFnAttr(Attribute::NoReturn);
MadeChange = true;
- F->addAttributes(AttributeSet::FunctionIndex, NPAL);
}
}
@@ -191,9 +186,13 @@ bool PruneEH::SimplifyFunction(Function *F) {
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(F)) {
- SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+ SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end());
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ II->getOperandBundlesAsDefs(OpBundles);
+
// Insert a call instruction before the invoke.
- CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
+ CallInst *Call = CallInst::Create(II->getCalledValue(), Args, OpBundles,
+ "", II);
Call->takeName(II);
Call->setCallingConv(II->getCallingConv());
Call->setAttributes(II->getAttributes());
@@ -233,7 +232,7 @@ bool PruneEH::SimplifyFunction(Function *F) {
// Remove the uncond branch and add an unreachable.
BB->getInstList().pop_back();
- new UnreachableInst(BB->getContext(), BB);
+ new UnreachableInst(BB->getContext(), &*BB);
DeleteBasicBlock(New); // Delete the new BB.
MadeChange = true;
diff --git a/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
index c8dfa54a4aa0..928d92ef9d12 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -22,7 +22,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -44,7 +43,11 @@
#include "llvm/ProfileData/SampleProfReader.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils/Cloning.h"
#include <cctype>
using namespace llvm;
@@ -61,27 +64,51 @@ static cl::opt<unsigned> SampleProfileMaxPropagateIterations(
"sample-profile-max-propagate-iterations", cl::init(100),
cl::desc("Maximum number of iterations to go through when propagating "
"sample block/edge weights through the CFG."));
+static cl::opt<unsigned> SampleProfileRecordCoverage(
+ "sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"),
+ cl::desc("Emit a warning if less than N% of records in the input profile "
+ "are matched to the IR."));
+static cl::opt<unsigned> SampleProfileSampleCoverage(
+ "sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"),
+ cl::desc("Emit a warning if less than N% of samples in the input profile "
+ "are matched to the IR."));
+static cl::opt<double> SampleProfileHotThreshold(
+ "sample-profile-inline-hot-threshold", cl::init(0.1), cl::value_desc("N"),
+ cl::desc("Inlined functions that account for more than N% of all samples "
+ "collected in the parent function, will be inlined again."));
+static cl::opt<double> SampleProfileGlobalHotThreshold(
+ "sample-profile-global-hot-threshold", cl::init(30), cl::value_desc("N"),
+ cl::desc("Top-level functions that account for more than N% of all samples "
+ "collected in the profile, will be marked as hot for the inliner "
+ "to consider."));
+static cl::opt<double> SampleProfileGlobalColdThreshold(
+ "sample-profile-global-cold-threshold", cl::init(0.5), cl::value_desc("N"),
+ cl::desc("Top-level functions that account for less than N% of all samples "
+ "collected in the profile, will be marked as cold for the inliner "
+ "to consider."));
namespace {
-typedef DenseMap<BasicBlock *, unsigned> BlockWeightMap;
-typedef DenseMap<BasicBlock *, BasicBlock *> EquivalenceClassMap;
-typedef std::pair<BasicBlock *, BasicBlock *> Edge;
-typedef DenseMap<Edge, unsigned> EdgeWeightMap;
-typedef DenseMap<BasicBlock *, SmallVector<BasicBlock *, 8>> BlockEdgeMap;
+typedef DenseMap<const BasicBlock *, uint64_t> BlockWeightMap;
+typedef DenseMap<const BasicBlock *, const BasicBlock *> EquivalenceClassMap;
+typedef std::pair<const BasicBlock *, const BasicBlock *> Edge;
+typedef DenseMap<Edge, uint64_t> EdgeWeightMap;
+typedef DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>
+ BlockEdgeMap;
/// \brief Sample profile pass.
///
/// This pass reads profile data from the file specified by
/// -sample-profile-file and annotates every affected function with the
/// profile information found in that file.
-class SampleProfileLoader : public FunctionPass {
+class SampleProfileLoader : public ModulePass {
public:
// Class identification, replacement for typeinfo
static char ID;
SampleProfileLoader(StringRef Name = SampleProfileFile)
- : FunctionPass(ID), DT(nullptr), PDT(nullptr), LI(nullptr), Ctx(nullptr),
- Reader(), Samples(nullptr), Filename(Name), ProfileIsValid(false) {
+ : ModulePass(ID), DT(nullptr), PDT(nullptr), LI(nullptr), Reader(),
+ Samples(nullptr), Filename(Name), ProfileIsValid(false),
+ TotalCollectedSamples(0) {
initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry());
}
@@ -91,36 +118,37 @@ public:
const char *getPassName() const override { return "Sample profile pass"; }
- bool runOnFunction(Function &F) override;
+ bool runOnModule(Module &M) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<PostDominatorTree>();
}
protected:
+ bool runOnFunction(Function &F);
unsigned getFunctionLoc(Function &F);
bool emitAnnotations(Function &F);
- unsigned getInstWeight(Instruction &I);
- unsigned getBlockWeight(BasicBlock *BB);
+ ErrorOr<uint64_t> getInstWeight(const Instruction &I) const;
+ ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB) const;
+ const FunctionSamples *findCalleeFunctionSamples(const CallInst &I) const;
+ const FunctionSamples *findFunctionSamples(const Instruction &I) const;
+ bool inlineHotFunctions(Function &F);
+ bool emitInlineHints(Function &F);
void printEdgeWeight(raw_ostream &OS, Edge E);
- void printBlockWeight(raw_ostream &OS, BasicBlock *BB);
- void printBlockEquivalence(raw_ostream &OS, BasicBlock *BB);
+ void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
+ void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
bool computeBlockWeights(Function &F);
void findEquivalenceClasses(Function &F);
void findEquivalencesFor(BasicBlock *BB1,
SmallVector<BasicBlock *, 8> Descendants,
DominatorTreeBase<BasicBlock> *DomTree);
void propagateWeights(Function &F);
- unsigned visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
+ uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
void buildEdges(Function &F);
bool propagateThroughEdges(Function &F);
-
- /// \brief Line number for the function header. Used to compute absolute
- /// line numbers from the relative line numbers found in the profile.
- unsigned HeaderLineno;
+ void computeDominanceAndLoopInfo(Function &F);
+ unsigned getOffset(unsigned L, unsigned H) const;
+ void clearFunctionData();
/// \brief Map basic blocks to their computed weights.
///
@@ -135,7 +163,7 @@ protected:
EdgeWeightMap EdgeWeights;
/// \brief Set of visited blocks during propagation.
- SmallPtrSet<BasicBlock *, 128> VisitedBlocks;
+ SmallPtrSet<const BasicBlock *, 128> VisitedBlocks;
/// \brief Set of visited edges during propagation.
SmallSet<Edge, 128> VisitedEdges;
@@ -149,9 +177,9 @@ protected:
EquivalenceClassMap EquivalenceClass;
/// \brief Dominance, post-dominance and loop information.
- DominatorTree *DT;
- PostDominatorTree *PDT;
- LoopInfo *LI;
+ std::unique_ptr<DominatorTree> DT;
+ std::unique_ptr<DominatorTreeBase<BasicBlock>> PDT;
+ std::unique_ptr<LoopInfo> LI;
/// \brief Predecessors for each basic block in the CFG.
BlockEdgeMap Predecessors;
@@ -159,9 +187,6 @@ protected:
/// \brief Successors for each basic block in the CFG.
BlockEdgeMap Successors;
- /// \brief LLVM context holding the debug data we need.
- LLVMContext *Ctx;
-
/// \brief Profile reader object.
std::unique_ptr<SampleProfileReader> Reader;
@@ -173,7 +198,207 @@ protected:
/// \brief Flag indicating whether the profile input loaded successfully.
bool ProfileIsValid;
+
+ /// \brief Total number of samples collected in this profile.
+ ///
+ /// This is the sum of all the samples collected in all the functions executed
+ /// at runtime.
+ uint64_t TotalCollectedSamples;
};
+
+class SampleCoverageTracker {
+public:
+ SampleCoverageTracker() : SampleCoverage(), TotalUsedSamples(0) {}
+
+ bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset,
+ uint32_t Discriminator, uint64_t Samples);
+ unsigned computeCoverage(unsigned Used, unsigned Total) const;
+ unsigned countUsedRecords(const FunctionSamples *FS) const;
+ unsigned countBodyRecords(const FunctionSamples *FS) const;
+ uint64_t getTotalUsedSamples() const { return TotalUsedSamples; }
+ uint64_t countBodySamples(const FunctionSamples *FS) const;
+ void clear() {
+ SampleCoverage.clear();
+ TotalUsedSamples = 0;
+ }
+
+private:
+ typedef std::map<LineLocation, unsigned> BodySampleCoverageMap;
+ typedef DenseMap<const FunctionSamples *, BodySampleCoverageMap>
+ FunctionSamplesCoverageMap;
+
+ /// Coverage map for sampling records.
+ ///
+ /// This map keeps a record of sampling records that have been matched to
+ /// an IR instruction. This is used to detect some form of staleness in
+ /// profiles (see flag -sample-profile-check-coverage).
+ ///
+ /// Each entry in the map corresponds to a FunctionSamples instance. This is
+ /// another map that counts how many times the sample record at the
+ /// given location has been used.
+ FunctionSamplesCoverageMap SampleCoverage;
+
+ /// Number of samples used from the profile.
+ ///
+ /// When a sampling record is used for the first time, the samples from
+ /// that record are added to this accumulator. Coverage is later computed
+ /// based on the total number of samples available in this function and
+ /// its callsites.
+ ///
+ /// Note that this accumulator tracks samples used from a single function
+ /// and all the inlined callsites. Strictly, we should have a map of counters
+ /// keyed by FunctionSamples pointers, but these stats are cleared after
+ /// every function, so we just need to keep a single counter.
+ uint64_t TotalUsedSamples;
+};
+
+SampleCoverageTracker CoverageTracker;
+
+/// Return true if the given callsite is hot wrt to its caller.
+///
+/// Functions that were inlined in the original binary will be represented
+/// in the inline stack in the sample profile. If the profile shows that
+/// the original inline decision was "good" (i.e., the callsite is executed
+/// frequently), then we will recreate the inline decision and apply the
+/// profile from the inlined callsite.
+///
+/// To decide whether an inlined callsite is hot, we compute the fraction
+/// of samples used by the callsite with respect to the total number of samples
+/// collected in the caller.
+///
+/// If that fraction is larger than the default given by
+/// SampleProfileHotThreshold, the callsite will be inlined again.
+bool callsiteIsHot(const FunctionSamples *CallerFS,
+ const FunctionSamples *CallsiteFS) {
+ if (!CallsiteFS)
+ return false; // The callsite was not inlined in the original binary.
+
+ uint64_t ParentTotalSamples = CallerFS->getTotalSamples();
+ if (ParentTotalSamples == 0)
+ return false; // Avoid division by zero.
+
+ uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples();
+ if (CallsiteTotalSamples == 0)
+ return false; // Callsite is trivially cold.
+
+ double PercentSamples =
+ (double)CallsiteTotalSamples / (double)ParentTotalSamples * 100.0;
+ return PercentSamples >= SampleProfileHotThreshold;
+}
+
+}
+
+/// Mark as used the sample record for the given function samples at
+/// (LineOffset, Discriminator).
+///
+/// \returns true if this is the first time we mark the given record.
+bool SampleCoverageTracker::markSamplesUsed(const FunctionSamples *FS,
+ uint32_t LineOffset,
+ uint32_t Discriminator,
+ uint64_t Samples) {
+ LineLocation Loc(LineOffset, Discriminator);
+ unsigned &Count = SampleCoverage[FS][Loc];
+ bool FirstTime = (++Count == 1);
+ if (FirstTime)
+ TotalUsedSamples += Samples;
+ return FirstTime;
+}
+
+/// Return the number of sample records that were applied from this profile.
+///
+/// This count does not include records from cold inlined callsites.
+unsigned
+SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS) const {
+ auto I = SampleCoverage.find(FS);
+
+ // The size of the coverage map for FS represents the number of records
+ // that were marked used at least once.
+ unsigned Count = (I != SampleCoverage.end()) ? I->second.size() : 0;
+
+ // If there are inlined callsites in this function, count the samples found
+ // in the respective bodies. However, do not bother counting callees with 0
+ // total samples, these are callees that were never invoked at runtime.
+ for (const auto &I : FS->getCallsiteSamples()) {
+ const FunctionSamples *CalleeSamples = &I.second;
+ if (callsiteIsHot(FS, CalleeSamples))
+ Count += countUsedRecords(CalleeSamples);
+ }
+
+ return Count;
+}
+
+/// Return the number of sample records in the body of this profile.
+///
+/// This count does not include records from cold inlined callsites.
+unsigned
+SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS) const {
+ unsigned Count = FS->getBodySamples().size();
+
+ // Only count records in hot callsites.
+ for (const auto &I : FS->getCallsiteSamples()) {
+ const FunctionSamples *CalleeSamples = &I.second;
+ if (callsiteIsHot(FS, CalleeSamples))
+ Count += countBodyRecords(CalleeSamples);
+ }
+
+ return Count;
+}
+
+/// Return the number of samples collected in the body of this profile.
+///
+/// This count does not include samples from cold inlined callsites.
+uint64_t
+SampleCoverageTracker::countBodySamples(const FunctionSamples *FS) const {
+ uint64_t Total = 0;
+ for (const auto &I : FS->getBodySamples())
+ Total += I.second.getSamples();
+
+ // Only count samples in hot callsites.
+ for (const auto &I : FS->getCallsiteSamples()) {
+ const FunctionSamples *CalleeSamples = &I.second;
+ if (callsiteIsHot(FS, CalleeSamples))
+ Total += countBodySamples(CalleeSamples);
+ }
+
+ return Total;
+}
+
+/// Return the fraction of sample records used in this profile.
+///
+/// The returned value is an unsigned integer in the range 0-100 indicating
+/// the percentage of sample records that were used while applying this
+/// profile to the associated function.
+unsigned SampleCoverageTracker::computeCoverage(unsigned Used,
+ unsigned Total) const {
+ assert(Used <= Total &&
+ "number of used records cannot exceed the total number of records");
+ return Total > 0 ? Used * 100 / Total : 100;
+}
+
+/// Clear all the per-function data used to load samples and propagate weights.
+void SampleProfileLoader::clearFunctionData() {
+ BlockWeights.clear();
+ EdgeWeights.clear();
+ VisitedBlocks.clear();
+ VisitedEdges.clear();
+ EquivalenceClass.clear();
+ DT = nullptr;
+ PDT = nullptr;
+ LI = nullptr;
+ Predecessors.clear();
+ Successors.clear();
+ CoverageTracker.clear();
+}
+
+/// \brief Returns the offset of lineno \p L to head_lineno \p H
+///
+/// \param L Lineno
+/// \param H Header lineno of the function
+///
+/// \returns offset to the header lineno. 16 bits are used to represent offset.
+/// We assume that a single function will not exceed 65535 LOC.
+unsigned SampleProfileLoader::getOffset(unsigned L, unsigned H) const {
+ return (L - H) & 0xffff;
}
/// \brief Print the weight of edge \p E on stream \p OS.
@@ -190,8 +415,8 @@ void SampleProfileLoader::printEdgeWeight(raw_ostream &OS, Edge E) {
/// \param OS Stream to emit the output to.
/// \param BB Block to print.
void SampleProfileLoader::printBlockEquivalence(raw_ostream &OS,
- BasicBlock *BB) {
- BasicBlock *Equiv = EquivalenceClass[BB];
+ const BasicBlock *BB) {
+ const BasicBlock *Equiv = EquivalenceClass[BB];
OS << "equivalence[" << BB->getName()
<< "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
}
@@ -200,8 +425,11 @@ void SampleProfileLoader::printBlockEquivalence(raw_ostream &OS,
///
/// \param OS Stream to emit the output to.
/// \param BB Block to print.
-void SampleProfileLoader::printBlockWeight(raw_ostream &OS, BasicBlock *BB) {
- OS << "weight[" << BB->getName() << "]: " << BlockWeights[BB] << "\n";
+void SampleProfileLoader::printBlockWeight(raw_ostream &OS,
+ const BasicBlock *BB) const {
+ const auto &I = BlockWeights.find(BB);
+ uint64_t W = (I == BlockWeights.end() ? 0 : I->second);
+ OS << "weight[" << BB->getName() << "]: " << W << "\n";
}
/// \brief Get the weight for an instruction.
@@ -214,51 +442,67 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS, BasicBlock *BB) {
///
/// \param Inst Instruction to query.
///
-/// \returns The profiled weight of I.
-unsigned SampleProfileLoader::getInstWeight(Instruction &Inst) {
+/// \returns the weight of \p Inst.
+ErrorOr<uint64_t>
+SampleProfileLoader::getInstWeight(const Instruction &Inst) const {
DebugLoc DLoc = Inst.getDebugLoc();
if (!DLoc)
- return 0;
+ return std::error_code();
- unsigned Lineno = DLoc.getLine();
- if (Lineno < HeaderLineno)
- return 0;
+ const FunctionSamples *FS = findFunctionSamples(Inst);
+ if (!FS)
+ return std::error_code();
const DILocation *DIL = DLoc;
- int LOffset = Lineno - HeaderLineno;
- unsigned Discriminator = DIL->getDiscriminator();
- unsigned Weight = Samples->samplesAt(LOffset, Discriminator);
- DEBUG(dbgs() << " " << Lineno << "." << Discriminator << ":" << Inst
- << " (line offset: " << LOffset << "." << Discriminator
- << " - weight: " << Weight << ")\n");
- return Weight;
+ unsigned Lineno = DLoc.getLine();
+ unsigned HeaderLineno = DIL->getScope()->getSubprogram()->getLine();
+
+ uint32_t LineOffset = getOffset(Lineno, HeaderLineno);
+ uint32_t Discriminator = DIL->getDiscriminator();
+ ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
+ if (R) {
+ bool FirstMark =
+ CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
+ if (FirstMark) {
+ const Function *F = Inst.getParent()->getParent();
+ LLVMContext &Ctx = F->getContext();
+ emitOptimizationRemark(
+ Ctx, DEBUG_TYPE, *F, DLoc,
+ Twine("Applied ") + Twine(*R) + " samples from profile (offset: " +
+ Twine(LineOffset) +
+ ((Discriminator) ? Twine(".") + Twine(Discriminator) : "") + ")");
+ }
+ DEBUG(dbgs() << " " << Lineno << "." << DIL->getDiscriminator() << ":"
+ << Inst << " (line offset: " << Lineno - HeaderLineno << "."
+ << DIL->getDiscriminator() << " - weight: " << R.get()
+ << ")\n");
+ }
+ return R;
}
/// \brief Compute the weight of a basic block.
///
/// The weight of basic block \p BB is the maximum weight of all the
-/// instructions in BB. The weight of \p BB is computed and cached in
-/// the BlockWeights map.
+/// instructions in BB.
///
/// \param BB The basic block to query.
///
-/// \returns The computed weight of BB.
-unsigned SampleProfileLoader::getBlockWeight(BasicBlock *BB) {
- // If we've computed BB's weight before, return it.
- std::pair<BlockWeightMap::iterator, bool> Entry =
- BlockWeights.insert(std::make_pair(BB, 0));
- if (!Entry.second)
- return Entry.first->second;
-
- // Otherwise, compute and cache BB's weight.
- unsigned Weight = 0;
+/// \returns the weight for \p BB.
+ErrorOr<uint64_t>
+SampleProfileLoader::getBlockWeight(const BasicBlock *BB) const {
+ bool Found = false;
+ uint64_t Weight = 0;
for (auto &I : BB->getInstList()) {
- unsigned InstWeight = getInstWeight(I);
- if (InstWeight > Weight)
- Weight = InstWeight;
+ const ErrorOr<uint64_t> &R = getInstWeight(I);
+ if (R && R.get() >= Weight) {
+ Weight = R.get();
+ Found = true;
+ }
}
- Entry.first->second = Weight;
- return Weight;
+ if (Found)
+ return Weight;
+ else
+ return std::error_code();
}
/// \brief Compute and store the weights of every basic block.
@@ -270,15 +514,199 @@ unsigned SampleProfileLoader::getBlockWeight(BasicBlock *BB) {
bool SampleProfileLoader::computeBlockWeights(Function &F) {
bool Changed = false;
DEBUG(dbgs() << "Block weights\n");
- for (auto &BB : F) {
- unsigned Weight = getBlockWeight(&BB);
- Changed |= (Weight > 0);
+ for (const auto &BB : F) {
+ ErrorOr<uint64_t> Weight = getBlockWeight(&BB);
+ if (Weight) {
+ BlockWeights[&BB] = Weight.get();
+ VisitedBlocks.insert(&BB);
+ Changed = true;
+ }
DEBUG(printBlockWeight(dbgs(), &BB));
}
return Changed;
}
+/// \brief Get the FunctionSamples for a call instruction.
+///
+/// The FunctionSamples of a call instruction \p Inst is the inlined
+/// instance in which that call instruction is calling to. It contains
+/// all samples that resides in the inlined instance. We first find the
+/// inlined instance in which the call instruction is from, then we
+/// traverse its children to find the callsite with the matching
+/// location and callee function name.
+///
+/// \param Inst Call instruction to query.
+///
+/// \returns The FunctionSamples pointer to the inlined instance.
+const FunctionSamples *
+SampleProfileLoader::findCalleeFunctionSamples(const CallInst &Inst) const {
+ const DILocation *DIL = Inst.getDebugLoc();
+ if (!DIL) {
+ return nullptr;
+ }
+ DISubprogram *SP = DIL->getScope()->getSubprogram();
+ if (!SP)
+ return nullptr;
+
+ Function *CalleeFunc = Inst.getCalledFunction();
+ if (!CalleeFunc) {
+ return nullptr;
+ }
+
+ StringRef CalleeName = CalleeFunc->getName();
+ const FunctionSamples *FS = findFunctionSamples(Inst);
+ if (FS == nullptr)
+ return nullptr;
+
+ return FS->findFunctionSamplesAt(
+ CallsiteLocation(getOffset(DIL->getLine(), SP->getLine()),
+ DIL->getDiscriminator(), CalleeName));
+}
+
+/// \brief Get the FunctionSamples for an instruction.
+///
+/// The FunctionSamples of an instruction \p Inst is the inlined instance
+/// in which that instruction is coming from. We traverse the inline stack
+/// of that instruction, and match it with the tree nodes in the profile.
+///
+/// \param Inst Instruction to query.
+///
+/// \returns the FunctionSamples pointer to the inlined instance.
+const FunctionSamples *
+SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
+ SmallVector<CallsiteLocation, 10> S;
+ const DILocation *DIL = Inst.getDebugLoc();
+ if (!DIL) {
+ return Samples;
+ }
+ StringRef CalleeName;
+ for (const DILocation *DIL = Inst.getDebugLoc(); DIL;
+ DIL = DIL->getInlinedAt()) {
+ DISubprogram *SP = DIL->getScope()->getSubprogram();
+ if (!SP)
+ return nullptr;
+ if (!CalleeName.empty()) {
+ S.push_back(CallsiteLocation(getOffset(DIL->getLine(), SP->getLine()),
+ DIL->getDiscriminator(), CalleeName));
+ }
+ CalleeName = SP->getLinkageName();
+ }
+ if (S.size() == 0)
+ return Samples;
+ const FunctionSamples *FS = Samples;
+ for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) {
+ FS = FS->findFunctionSamplesAt(S[i]);
+ }
+ return FS;
+}
+
+/// \brief Emit an inline hint if \p F is globally hot or cold.
+///
+/// If \p F consumes a significant fraction of samples (indicated by
+/// SampleProfileGlobalHotThreshold), apply the InlineHint attribute for the
+/// inliner to consider the function hot.
+///
+/// If \p F consumes a small fraction of samples (indicated by
+/// SampleProfileGlobalColdThreshold), apply the Cold attribute for the inliner
+/// to consider the function cold.
+///
+/// FIXME - This setting of inline hints is sub-optimal. Instead of marking a
+/// function globally hot or cold, we should be annotating individual callsites.
+/// This is not currently possible, but work on the inliner will eventually
+/// provide this ability. See http://reviews.llvm.org/D15003 for details and
+/// discussion.
+///
+/// \returns True if either attribute was applied to \p F.
+bool SampleProfileLoader::emitInlineHints(Function &F) {
+ if (TotalCollectedSamples == 0)
+ return false;
+
+ uint64_t FunctionSamples = Samples->getTotalSamples();
+ double SamplesPercent =
+ (double)FunctionSamples / (double)TotalCollectedSamples * 100.0;
+
+ // If the function collected more samples than the hot threshold, mark
+ // it globally hot.
+ if (SamplesPercent >= SampleProfileGlobalHotThreshold) {
+ F.addFnAttr(llvm::Attribute::InlineHint);
+ std::string Msg;
+ raw_string_ostream S(Msg);
+ S << "Applied inline hint to globally hot function '" << F.getName()
+ << "' with " << format("%.2f", SamplesPercent)
+ << "% of samples (threshold: "
+ << format("%.2f", SampleProfileGlobalHotThreshold.getValue()) << "%)";
+ S.flush();
+ emitOptimizationRemark(F.getContext(), DEBUG_TYPE, F, DebugLoc(), Msg);
+ return true;
+ }
+
+ // If the function collected fewer samples than the cold threshold, mark
+ // it globally cold.
+ if (SamplesPercent <= SampleProfileGlobalColdThreshold) {
+ F.addFnAttr(llvm::Attribute::Cold);
+ std::string Msg;
+ raw_string_ostream S(Msg);
+ S << "Applied cold hint to globally cold function '" << F.getName()
+ << "' with " << format("%.2f", SamplesPercent)
+ << "% of samples (threshold: "
+ << format("%.2f", SampleProfileGlobalColdThreshold.getValue()) << "%)";
+ S.flush();
+ emitOptimizationRemark(F.getContext(), DEBUG_TYPE, F, DebugLoc(), Msg);
+ return true;
+ }
+
+ return false;
+}
+
+/// \brief Iteratively inline hot callsites of a function.
+///
+/// Iteratively traverse all callsites of the function \p F, and find if
+/// the corresponding inlined instance exists and is hot in profile. If
+/// it is hot enough, inline the callsites and adds new callsites of the
+/// callee into the caller.
+///
+/// TODO: investigate the possibility of not invoking InlineFunction directly.
+///
+/// \param F function to perform iterative inlining.
+///
+/// \returns True if there is any inline happened.
+bool SampleProfileLoader::inlineHotFunctions(Function &F) {
+ bool Changed = false;
+ LLVMContext &Ctx = F.getContext();
+ while (true) {
+ bool LocalChanged = false;
+ SmallVector<CallInst *, 10> CIS;
+ for (auto &BB : F) {
+ for (auto &I : BB.getInstList()) {
+ CallInst *CI = dyn_cast<CallInst>(&I);
+ if (CI && callsiteIsHot(Samples, findCalleeFunctionSamples(*CI)))
+ CIS.push_back(CI);
+ }
+ }
+ for (auto CI : CIS) {
+ InlineFunctionInfo IFI;
+ Function *CalledFunction = CI->getCalledFunction();
+ DebugLoc DLoc = CI->getDebugLoc();
+ uint64_t NumSamples = findCalleeFunctionSamples(*CI)->getTotalSamples();
+ if (InlineFunction(CI, IFI)) {
+ LocalChanged = true;
+ emitOptimizationRemark(Ctx, DEBUG_TYPE, F, DLoc,
+ Twine("inlined hot callee '") +
+ CalledFunction->getName() + "' with " +
+ Twine(NumSamples) + " samples into '" +
+ F.getName() + "'");
+ }
+ }
+ if (LocalChanged) {
+ Changed = true;
+ } else {
+ break;
+ }
+ }
+ return Changed;
+}
+
/// \brief Find equivalence classes for the given block.
///
/// This finds all the blocks that are guaranteed to execute the same
@@ -305,12 +733,13 @@ bool SampleProfileLoader::computeBlockWeights(Function &F) {
void SampleProfileLoader::findEquivalencesFor(
BasicBlock *BB1, SmallVector<BasicBlock *, 8> Descendants,
DominatorTreeBase<BasicBlock> *DomTree) {
- for (auto *BB2 : Descendants) {
+ const BasicBlock *EC = EquivalenceClass[BB1];
+ uint64_t Weight = BlockWeights[EC];
+ for (const auto *BB2 : Descendants) {
bool IsDomParent = DomTree->dominates(BB2, BB1);
bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
- if (BB1 != BB2 && VisitedBlocks.insert(BB2).second && IsDomParent &&
- IsInSameLoop) {
- EquivalenceClass[BB2] = BB1;
+ if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
+ EquivalenceClass[BB2] = EC;
// If BB2 is heavier than BB1, make BB2 have the same weight
// as BB1.
@@ -320,11 +749,10 @@ void SampleProfileLoader::findEquivalencesFor(
// during the propagation phase. Right now, we just want to
// make sure that BB1 has the largest weight of all the
// members of its equivalence set.
- unsigned &BB1Weight = BlockWeights[BB1];
- unsigned &BB2Weight = BlockWeights[BB2];
- BB1Weight = std::max(BB1Weight, BB2Weight);
+ Weight = std::max(Weight, BlockWeights[BB2]);
}
}
+ BlockWeights[EC] = Weight;
}
/// \brief Find equivalence classes.
@@ -364,19 +792,7 @@ void SampleProfileLoader::findEquivalenceClasses(Function &F) {
// class by making BB2's equivalence class be BB1.
DominatedBBs.clear();
DT->getDescendants(BB1, DominatedBBs);
- findEquivalencesFor(BB1, DominatedBBs, PDT->DT);
-
- // Repeat the same logic for all the blocks post-dominated by BB1.
- // We are looking for every basic block BB2 such that:
- //
- // 1- BB1 post-dominates BB2.
- // 2- BB2 dominates BB1.
- // 3- BB1 and BB2 are in the same loop nest.
- //
- // If all those conditions hold, BB2's equivalence class is BB1.
- DominatedBBs.clear();
- PDT->getDescendants(BB1, DominatedBBs);
- findEquivalencesFor(BB1, DominatedBBs, DT);
+ findEquivalencesFor(BB1, DominatedBBs, PDT.get());
DEBUG(printBlockEquivalence(dbgs(), BB1));
}
@@ -389,8 +805,8 @@ void SampleProfileLoader::findEquivalenceClasses(Function &F) {
// to all the blocks in that equivalence class.
DEBUG(dbgs() << "\nAssign the same weight to all blocks in the same class\n");
for (auto &BI : F) {
- BasicBlock *BB = &BI;
- BasicBlock *EquivBB = EquivalenceClass[BB];
+ const BasicBlock *BB = &BI;
+ const BasicBlock *EquivBB = EquivalenceClass[BB];
if (BB != EquivBB)
BlockWeights[BB] = BlockWeights[EquivBB];
DEBUG(printBlockWeight(dbgs(), BB));
@@ -407,7 +823,7 @@ void SampleProfileLoader::findEquivalenceClasses(Function &F) {
/// \param UnknownEdge Set if E has not been visited before.
///
/// \returns E's weight, if known. Otherwise, return 0.
-unsigned SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
+uint64_t SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
Edge *UnknownEdge) {
if (!VisitedEdges.count(E)) {
(*NumUnknownEdges)++;
@@ -432,8 +848,9 @@ unsigned SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
bool SampleProfileLoader::propagateThroughEdges(Function &F) {
bool Changed = false;
DEBUG(dbgs() << "\nPropagation through edges\n");
- for (auto &BI : F) {
- BasicBlock *BB = &BI;
+ for (const auto &BI : F) {
+ const BasicBlock *BB = &BI;
+ const BasicBlock *EC = EquivalenceClass[BB];
// Visit all the predecessor and successor edges to determine
// which ones have a weight assigned already. Note that it doesn't
@@ -441,7 +858,7 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) {
// only case we are interested in handling is when only a single
// edge is unknown (see setEdgeOrBlockWeight).
for (unsigned i = 0; i < 2; i++) {
- unsigned TotalWeight = 0;
+ uint64_t TotalWeight = 0;
unsigned NumUnknownEdges = 0;
Edge UnknownEdge, SelfReferentialEdge;
@@ -485,7 +902,7 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) {
// all edges will get a weight, or iteration will stop when
// it reaches SampleProfileMaxPropagateIterations.
if (NumUnknownEdges <= 1) {
- unsigned &BBWeight = BlockWeights[BB];
+ uint64_t &BBWeight = BlockWeights[EC];
if (NumUnknownEdges == 0) {
// If we already know the weight of all edges, the weight of the
// basic block can be computed. It should be no larger than the sum
@@ -497,9 +914,9 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) {
<< " known. Set weight for block: ";
printBlockWeight(dbgs(), BB););
}
- if (VisitedBlocks.insert(BB).second)
+ if (VisitedBlocks.insert(EC).second)
Changed = true;
- } else if (NumUnknownEdges == 1 && VisitedBlocks.count(BB)) {
+ } else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) {
// If there is a single unknown edge and the block has been
// visited, then we can compute E's weight.
if (BBWeight >= TotalWeight)
@@ -511,8 +928,8 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) {
DEBUG(dbgs() << "Set weight for edge: ";
printEdgeWeight(dbgs(), UnknownEdge));
}
- } else if (SelfReferentialEdge.first && VisitedBlocks.count(BB)) {
- unsigned &BBWeight = BlockWeights[BB];
+ } else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
+ uint64_t &BBWeight = BlockWeights[BB];
// We have a self-referential edge and the weight of BB is known.
if (BBWeight >= TotalWeight)
EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
@@ -578,7 +995,7 @@ void SampleProfileLoader::buildEdges(Function &F) {
/// known).
void SampleProfileLoader::propagateWeights(Function &F) {
bool Changed = true;
- unsigned i = 0;
+ unsigned I = 0;
// Add an entry count to the function using the samples gathered
// at the function entry.
@@ -592,14 +1009,15 @@ void SampleProfileLoader::propagateWeights(Function &F) {
buildEdges(F);
// Propagate until we converge or we go past the iteration limit.
- while (Changed && i++ < SampleProfileMaxPropagateIterations) {
+ while (Changed && I++ < SampleProfileMaxPropagateIterations) {
Changed = propagateThroughEdges(F);
}
// Generate MD_prof metadata for every branch instruction using the
// edge weights computed during propagation.
DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
- MDBuilder MDB(F.getContext());
+ LLVMContext &Ctx = F.getContext();
+ MDBuilder MDB(Ctx);
for (auto &BI : F) {
BasicBlock *BB = &BI;
TerminatorInst *TI = BB->getTerminator();
@@ -610,24 +1028,44 @@ void SampleProfileLoader::propagateWeights(Function &F) {
DEBUG(dbgs() << "\nGetting weights for branch at line "
<< TI->getDebugLoc().getLine() << ".\n");
- SmallVector<unsigned, 4> Weights;
- bool AllWeightsZero = true;
+ SmallVector<uint32_t, 4> Weights;
+ uint32_t MaxWeight = 0;
+ DebugLoc MaxDestLoc;
for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
BasicBlock *Succ = TI->getSuccessor(I);
Edge E = std::make_pair(BB, Succ);
- unsigned Weight = EdgeWeights[E];
+ uint64_t Weight = EdgeWeights[E];
DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
- Weights.push_back(Weight);
- if (Weight != 0)
- AllWeightsZero = false;
+ // Use uint32_t saturated arithmetic to adjust the incoming weights,
+ // if needed. Sample counts in profiles are 64-bit unsigned values,
+ // but internally branch weights are expressed as 32-bit values.
+ if (Weight > std::numeric_limits<uint32_t>::max()) {
+ DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
+ Weight = std::numeric_limits<uint32_t>::max();
+ }
+ Weights.push_back(static_cast<uint32_t>(Weight));
+ if (Weight != 0) {
+ if (Weight > MaxWeight) {
+ MaxWeight = Weight;
+ MaxDestLoc = Succ->getFirstNonPHIOrDbgOrLifetime()->getDebugLoc();
+ }
+ }
}
// Only set weights if there is at least one non-zero weight.
// In any other case, let the analyzer set weights.
- if (!AllWeightsZero) {
+ if (MaxWeight > 0) {
DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
TI->setMetadata(llvm::LLVMContext::MD_prof,
MDB.createBranchWeights(Weights));
+ DebugLoc BranchLoc = TI->getDebugLoc();
+ emitOptimizationRemark(
+ Ctx, DEBUG_TYPE, F, MaxDestLoc,
+ Twine("most popular destination for conditional branches at ") +
+ ((BranchLoc) ? Twine(BranchLoc->getFilename() + ":" +
+ Twine(BranchLoc.getLine()) + ":" +
+ Twine(BranchLoc.getCol()))
+ : Twine("<UNKNOWN LOCATION>")));
} else {
DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
}
@@ -649,7 +1087,7 @@ unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
if (DISubprogram *S = getDISubprogram(&F))
return S->getLine();
- // If could not find the start of \p F, emit a diagnostic to inform the user
+ // If the start of \p F is missing, emit a diagnostic to inform the user
// about the missed opportunity.
F.getContext().diagnose(DiagnosticInfoSampleProfile(
"No debug information found in function " + F.getName() +
@@ -658,6 +1096,17 @@ unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
return 0;
}
+void SampleProfileLoader::computeDominanceAndLoopInfo(Function &F) {
+ DT.reset(new DominatorTree);
+ DT->recalculate(F);
+
+ PDT.reset(new DominatorTreeBase<BasicBlock>(true));
+ PDT->recalculate(F);
+
+ LI.reset(new LoopInfo);
+ LI->analyze(*DT);
+}
+
/// \brief Generate branch weight metadata for all branches in \p F.
///
/// Branch weights are computed out of instruction samples using a
@@ -710,18 +1159,23 @@ unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
bool SampleProfileLoader::emitAnnotations(Function &F) {
bool Changed = false;
- // Initialize invariants used during computation and propagation.
- HeaderLineno = getFunctionLoc(F);
- if (HeaderLineno == 0)
+ if (getFunctionLoc(F) == 0)
return false;
DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
- << ": " << HeaderLineno << "\n");
+ << ": " << getFunctionLoc(F) << "\n");
+
+ Changed |= emitInlineHints(F);
+
+ Changed |= inlineHotFunctions(F);
// Compute basic block weights.
Changed |= computeBlockWeights(F);
if (Changed) {
+ // Compute dominance and loop info needed for propagation.
+ computeDominanceAndLoopInfo(F);
+
// Find equivalence classes.
findEquivalenceClasses(F);
@@ -729,24 +1183,48 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
propagateWeights(F);
}
+ // If coverage checking was requested, compute it now.
+ if (SampleProfileRecordCoverage) {
+ unsigned Used = CoverageTracker.countUsedRecords(Samples);
+ unsigned Total = CoverageTracker.countBodyRecords(Samples);
+ unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
+ if (Coverage < SampleProfileRecordCoverage) {
+ F.getContext().diagnose(DiagnosticInfoSampleProfile(
+ getDISubprogram(&F)->getFilename(), getFunctionLoc(F),
+ Twine(Used) + " of " + Twine(Total) + " available profile records (" +
+ Twine(Coverage) + "%) were applied",
+ DS_Warning));
+ }
+ }
+
+ if (SampleProfileSampleCoverage) {
+ uint64_t Used = CoverageTracker.getTotalUsedSamples();
+ uint64_t Total = CoverageTracker.countBodySamples(Samples);
+ unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
+ if (Coverage < SampleProfileSampleCoverage) {
+ F.getContext().diagnose(DiagnosticInfoSampleProfile(
+ getDISubprogram(&F)->getFilename(), getFunctionLoc(F),
+ Twine(Used) + " of " + Twine(Total) + " available profile samples (" +
+ Twine(Coverage) + "%) were applied",
+ DS_Warning));
+ }
+ }
return Changed;
}
char SampleProfileLoader::ID = 0;
INITIALIZE_PASS_BEGIN(SampleProfileLoader, "sample-profile",
"Sample Profile loader", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AddDiscriminators)
INITIALIZE_PASS_END(SampleProfileLoader, "sample-profile",
"Sample Profile loader", false, false)
bool SampleProfileLoader::doInitialization(Module &M) {
- auto ReaderOrErr = SampleProfileReader::create(Filename, M.getContext());
+ auto &Ctx = M.getContext();
+ auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx);
if (std::error_code EC = ReaderOrErr.getError()) {
std::string Msg = "Could not open profile: " + EC.message();
- M.getContext().diagnose(DiagnosticInfoSampleProfile(Filename.data(), Msg));
+ Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
return false;
}
Reader = std::move(ReaderOrErr.get());
@@ -754,22 +1232,32 @@ bool SampleProfileLoader::doInitialization(Module &M) {
return true;
}
-FunctionPass *llvm::createSampleProfileLoaderPass() {
+ModulePass *llvm::createSampleProfileLoaderPass() {
return new SampleProfileLoader(SampleProfileFile);
}
-FunctionPass *llvm::createSampleProfileLoaderPass(StringRef Name) {
+ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {
return new SampleProfileLoader(Name);
}
-bool SampleProfileLoader::runOnFunction(Function &F) {
+bool SampleProfileLoader::runOnModule(Module &M) {
if (!ProfileIsValid)
return false;
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- PDT = &getAnalysis<PostDominatorTree>();
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- Ctx = &F.getParent()->getContext();
+ // Compute the total number of samples collected in this profile.
+ for (const auto &I : Reader->getProfiles())
+ TotalCollectedSamples += I.second.getTotalSamples();
+
+ bool retval = false;
+ for (auto &F : M)
+ if (!F.isDeclaration()) {
+ clearFunctionData();
+ retval |= runOnFunction(F);
+ }
+ return retval;
+}
+
+bool SampleProfileLoader::runOnFunction(Function &F) {
Samples = Reader->getSamplesFor(F);
if (!Samples->empty())
return emitAnnotations(F);
diff --git a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
index 956991ad1f95..c94cc7c74a89 100644
--- a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -7,47 +7,31 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass loops over all of the functions in the input module, looking for
+// This pass loops over all of the functions in the input module, looking for
// dead declarations and removes them. Dead declarations are declarations of
// functions for which no implementation is available (i.e., declarations for
// unused library functions).
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/StripDeadPrototypes.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
+
using namespace llvm;
#define DEBUG_TYPE "strip-dead-prototypes"
STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed");
-namespace {
-
-/// @brief Pass to remove unused function declarations.
-class StripDeadPrototypesPass : public ModulePass {
-public:
- static char ID; // Pass identification, replacement for typeid
- StripDeadPrototypesPass() : ModulePass(ID) {
- initializeStripDeadPrototypesPassPass(*PassRegistry::getPassRegistry());
- }
- bool runOnModule(Module &M) override;
-};
-
-} // end anonymous namespace
-
-char StripDeadPrototypesPass::ID = 0;
-INITIALIZE_PASS(StripDeadPrototypesPass, "strip-dead-prototypes",
- "Strip Unused Function Prototypes", false, false)
-
-bool StripDeadPrototypesPass::runOnModule(Module &M) {
+static bool stripDeadPrototypes(Module &M) {
bool MadeChange = false;
-
+
// Erase dead function prototypes.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
- Function *F = I++;
+ Function *F = &*I++;
// Function must be a prototype and unused.
if (F->isDeclaration() && F->use_empty()) {
F->eraseFromParent();
@@ -59,16 +43,42 @@ bool StripDeadPrototypesPass::runOnModule(Module &M) {
// Erase dead global var prototypes.
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
I != E; ) {
- GlobalVariable *GV = I++;
+ GlobalVariable *GV = &*I++;
// Global must be a prototype and unused.
if (GV->isDeclaration() && GV->use_empty())
GV->eraseFromParent();
}
-
+
// Return an indication of whether we changed anything or not.
return MadeChange;
}
+PreservedAnalyses StripDeadPrototypesPass::run(Module &M) {
+ if (stripDeadPrototypes(M))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+
+namespace {
+
+class StripDeadPrototypesLegacyPass : public ModulePass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ StripDeadPrototypesLegacyPass() : ModulePass(ID) {
+ initializeStripDeadPrototypesLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+ bool runOnModule(Module &M) override {
+ return stripDeadPrototypes(M);
+ }
+};
+
+} // end anonymous namespace
+
+char StripDeadPrototypesLegacyPass::ID = 0;
+INITIALIZE_PASS(StripDeadPrototypesLegacyPass, "strip-dead-prototypes",
+ "Strip Unused Function Prototypes", false, false)
+
ModulePass *llvm::createStripDeadPrototypesPass() {
- return new StripDeadPrototypesPass();
+ return new StripDeadPrototypesLegacyPass();
}
diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
index a4f30c58f936..46f352f7f9f1 100644
--- a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -211,13 +211,13 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I) {
- if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
+ if (I->hasLocalLinkage() && llvmUsedValues.count(&*I) == 0)
if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
I->setName(""); // Internal symbols can't participate in linkage
}
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
+ if (I->hasLocalLinkage() && llvmUsedValues.count(&*I) == 0)
if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
I->setName(""); // Internal symbols can't participate in linkage
StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo);
@@ -305,6 +305,12 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
SmallVector<Metadata *, 64> LiveSubprograms;
DenseSet<const MDNode *> VisitedSet;
+ std::set<DISubprogram *> LiveSPs;
+ for (Function &F : M) {
+ if (DISubprogram *SP = F.getSubprogram())
+ LiveSPs.insert(SP);
+ }
+
for (DICompileUnit *DIC : F.compile_units()) {
// Create our live subprogram list.
bool SubprogramChange = false;
@@ -314,7 +320,7 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
continue;
// If the function referenced by DISP is not null, the function is live.
- if (DISP->getFunction())
+ if (LiveSPs.count(DISP))
LiveSubprograms.push_back(DISP);
else
SubprogramChange = true;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 2d2c109f3243..6f49399f57bf 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1,4 +1,4 @@
-//===- InstCombineAddSub.cpp ----------------------------------------------===//
+//===- InstCombineAddSub.cpp ------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -17,6 +17,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/PatternMatch.h"
+
using namespace llvm;
using namespace PatternMatch;
@@ -67,17 +68,17 @@ namespace {
private:
bool insaneIntVal(int V) { return V > 4 || V < -4; }
- APFloat *getFpValPtr(void)
+ APFloat *getFpValPtr()
{ return reinterpret_cast<APFloat*>(&FpValBuf.buffer[0]); }
- const APFloat *getFpValPtr(void) const
+ const APFloat *getFpValPtr() const
{ return reinterpret_cast<const APFloat*>(&FpValBuf.buffer[0]); }
- const APFloat &getFpVal(void) const {
+ const APFloat &getFpVal() const {
assert(IsFp && BufHasFpVal && "Incorret state");
return *getFpValPtr();
}
- APFloat &getFpVal(void) {
+ APFloat &getFpVal() {
assert(IsFp && BufHasFpVal && "Incorret state");
return *getFpValPtr();
}
@@ -92,8 +93,8 @@ namespace {
// TODO: We should get rid of this function when APFloat can be constructed
// from an *SIGNED* integer.
APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val);
- private:
+ private:
bool IsFp;
// True iff FpValBuf contains an instance of APFloat.
@@ -114,10 +115,10 @@ namespace {
///
class FAddend {
public:
- FAddend() { Val = nullptr; }
+ FAddend() : Val(nullptr) {}
- Value *getSymVal (void) const { return Val; }
- const FAddendCoef &getCoef(void) const { return Coeff; }
+ Value *getSymVal() const { return Val; }
+ const FAddendCoef &getCoef() const { return Coeff; }
bool isConstant() const { return Val == nullptr; }
bool isZero() const { return Coeff.isZero(); }
@@ -182,7 +183,6 @@ namespace {
InstCombiner::BuilderTy *Builder;
Instruction *Instr;
- private:
// Debugging stuff are clustered here.
#ifndef NDEBUG
unsigned CreateInstrNum;
@@ -193,7 +193,8 @@ namespace {
void incCreateInstNum() {}
#endif
};
-}
+
+} // anonymous namespace
//===----------------------------------------------------------------------===//
//
@@ -602,7 +603,6 @@ Value *FAddCombine::simplify(Instruction *I) {
}
Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
-
unsigned AddendNum = Addends.size();
assert(AddendNum <= 4 && "Too many addends");
@@ -886,7 +886,7 @@ static bool checkRippleForAdd(const APInt &Op0KnownZero,
return Op0ZeroPosition >= Op1OnePosition;
}
-/// WillNotOverflowSignedAdd - Return true if we can prove that:
+/// Return true if we can prove that:
/// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS))
/// This basically requires proving that the add in the original type would not
/// overflow to change the sign bit or have a carry out.
@@ -1118,8 +1118,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C,
// transform them into (X + (signbit ^ C))
if (XorRHS->getValue().isSignBit())
- return BinaryOperator::CreateAdd(XorLHS,
- ConstantExpr::getXor(XorRHS, CI));
+ return BinaryOperator::CreateAdd(XorLHS,
+ ConstantExpr::getXor(XorRHS, CI));
}
}
@@ -1421,7 +1421,6 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
return Changed ? &I : nullptr;
}
-
/// Optimize pointer differences into the same array into a size. Consider:
/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer
/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
@@ -1589,7 +1588,6 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
}
}
-
{
Value *Y;
// X-(X+Y) == -Y X-(Y+X) == -Y
@@ -1611,32 +1609,6 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return BinaryOperator::CreateAnd(A, B);
}
- // (sub (select (a, c, b)), (select (a, d, b))) -> (select (a, (sub c, d), 0))
- // (sub (select (a, b, c)), (select (a, b, d))) -> (select (a, 0, (sub c, d)))
- if (auto *SI0 = dyn_cast<SelectInst>(Op0)) {
- if (auto *SI1 = dyn_cast<SelectInst>(Op1)) {
- if (SI0->getCondition() == SI1->getCondition()) {
- if (Value *V = SimplifySubInst(
- SI0->getFalseValue(), SI1->getFalseValue(), I.hasNoSignedWrap(),
- I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
- return SelectInst::Create(
- SI0->getCondition(),
- Builder->CreateSub(SI0->getTrueValue(), SI1->getTrueValue(), "",
- /*HasNUW=*/I.hasNoUnsignedWrap(),
- /*HasNSW=*/I.hasNoSignedWrap()),
- V);
- if (Value *V = SimplifySubInst(SI0->getTrueValue(), SI1->getTrueValue(),
- I.hasNoSignedWrap(),
- I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
- return SelectInst::Create(
- SI0->getCondition(), V,
- Builder->CreateSub(SI0->getFalseValue(), SI1->getFalseValue(), "",
- /*HasNUW=*/I.hasNoUnsignedWrap(),
- /*HasNSW=*/I.hasNoSignedWrap()));
- }
- }
- }
-
if (Op0->hasOneUse()) {
Value *Y = nullptr;
// ((X | Y) - X) --> (~X & Y)
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 15e0889b51b7..95c50d32c820 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -37,9 +37,9 @@ static inline Value *dyn_castNotVal(Value *V) {
return nullptr;
}
-/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp
-/// predicate into a three bit mask. It also returns whether it is an ordered
-/// predicate by reference.
+/// Similar to getICmpCode but for FCmpInst. This encodes a fcmp predicate into
+/// a three bit mask. It also returns whether it is an ordered predicate by
+/// reference.
static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
isOrdered = false;
switch (CC) {
@@ -64,10 +64,10 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
}
}
-/// getNewICmpValue - This is the complement of getICmpCode, which turns an
-/// opcode and two operands into either a constant true or false, or a brand
-/// new ICmp instruction. The sign is passed in to determine which kind
-/// of predicate to use in the new icmp instruction.
+/// This is the complement of getICmpCode, which turns an opcode and two
+/// operands into either a constant true or false, or a brand new ICmp
+/// instruction. The sign is passed in to determine which kind of predicate to
+/// use in the new icmp instruction.
static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
InstCombiner::BuilderTy *Builder) {
ICmpInst::Predicate NewPred;
@@ -76,9 +76,9 @@ static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
return Builder->CreateICmp(NewPred, LHS, RHS);
}
-/// getFCmpValue - This is the complement of getFCmpCode, which turns an
-/// opcode and two operands into either a FCmp instruction. isordered is passed
-/// in to determine which kind of predicate to use in the new fcmp instruction.
+/// This is the complement of getFCmpCode, which turns an opcode and two
+/// operands into either a FCmp instruction. isordered is passed in to determine
+/// which kind of predicate to use in the new fcmp instruction.
static Value *getFCmpValue(bool isordered, unsigned code,
Value *LHS, Value *RHS,
InstCombiner::BuilderTy *Builder) {
@@ -150,14 +150,13 @@ Value *InstCombiner::SimplifyBSwap(BinaryOperator &I) {
else //if (Op == Instruction::Xor)
BinOp = Builder->CreateXor(NewLHS, NewRHS);
- Module *M = I.getParent()->getParent()->getParent();
- Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy);
+ Function *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::bswap, ITy);
return Builder->CreateCall(F, BinOp);
}
-// OptAndOp - This handles expressions of the form ((val OP C1) & C2). Where
-// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is
-// guaranteed to be a binary operator.
+/// This handles expressions of the form ((val OP C1) & C2). Where
+/// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is
+/// guaranteed to be a binary operator.
Instruction *InstCombiner::OptAndOp(Instruction *Op,
ConstantInt *OpRHS,
ConstantInt *AndRHS,
@@ -341,10 +340,10 @@ Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
return Builder->CreateICmpUGT(Add, LowerBound);
}
-// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with
-// any number of 0s on either side. The 1s are allowed to wrap from LSB to
-// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is
-// not, since all 1s are not contiguous.
+/// Returns true iff Val consists of one contiguous run of 1s with any number
+/// of 0s on either side. The 1s are allowed to wrap from LSB to MSB,
+/// so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is
+/// not, since all 1s are not contiguous.
static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) {
const APInt& V = Val->getValue();
uint32_t BitWidth = Val->getType()->getBitWidth();
@@ -357,9 +356,8 @@ static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) {
return true;
}
-/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask,
-/// where isSub determines whether the operator is a sub. If we can fold one of
-/// the following xforms:
+/// This is part of an expression (LHS +/- RHS) & Mask, where isSub determines
+/// whether the operator is a sub. If we can fold one of the following xforms:
///
/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask
/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
@@ -449,8 +447,8 @@ enum MaskedICmpType {
FoldMskICmp_BMask_NotMixed = 512
};
-/// return the set of pattern classes (from MaskedICmpType)
-/// that (icmp SCC (A & B), C) satisfies
+/// Return the set of pattern classes (from MaskedICmpType)
+/// that (icmp SCC (A & B), C) satisfies.
static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
ICmpInst::Predicate SCC)
{
@@ -538,8 +536,8 @@ static unsigned conjugateICmpMask(unsigned Mask) {
return NewMask;
}
-/// decomposeBitTestICmp - Decompose an icmp into the form ((X & Y) pred Z)
-/// if possible. The returned predicate is either == or !=. Returns false if
+/// Decompose an icmp into the form ((X & Y) pred Z) if possible.
+/// The returned predicate is either == or !=. Returns false if
/// decomposition fails.
static bool decomposeBitTestICmp(const ICmpInst *I, ICmpInst::Predicate &Pred,
Value *&X, Value *&Y, Value *&Z) {
@@ -585,10 +583,9 @@ static bool decomposeBitTestICmp(const ICmpInst *I, ICmpInst::Predicate &Pred,
return true;
}
-/// foldLogOpOfMaskedICmpsHelper:
-/// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
-/// return the set of pattern classes (from MaskedICmpType)
-/// that both LHS and RHS satisfy
+/// Handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// Return the set of pattern classes (from MaskedICmpType)
+/// that both LHS and RHS satisfy.
static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
Value*& B, Value*& C,
Value*& D, Value*& E,
@@ -700,9 +697,9 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
unsigned right_type = getTypeOfMaskedICmp(A, D, E, RHSCC);
return left_type & right_type;
}
-/// foldLogOpOfMaskedICmps:
-/// try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
-/// into a single (icmp(A & X) ==/!= Y)
+
+/// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// into a single (icmp(A & X) ==/!= Y).
static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
llvm::InstCombiner::BuilderTy *Builder) {
Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr;
@@ -879,7 +876,7 @@ Value *InstCombiner::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1,
return Builder->CreateICmp(NewPred, Input, RangeEnd);
}
-/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
+/// Fold (icmp)&(icmp) if possible.
Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
@@ -1123,9 +1120,8 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
return nullptr;
}
-/// FoldAndOfFCmps - Optimize (fcmp)&(fcmp). NOTE: Unlike the rest of
-/// instcombine, this returns a Value which should already be inserted into the
-/// function.
+/// Optimize (fcmp)&(fcmp). NOTE: Unlike the rest of instcombine, this returns
+/// a Value which should already be inserted into the function.
Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
RHS->getPredicate() == FCmpInst::FCMP_ORD) {
@@ -1203,6 +1199,54 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
return nullptr;
}
+/// Match De Morgan's Laws:
+/// (~A & ~B) == (~(A | B))
+/// (~A | ~B) == (~(A & B))
+static Instruction *matchDeMorgansLaws(BinaryOperator &I,
+ InstCombiner::BuilderTy *Builder) {
+ auto Opcode = I.getOpcode();
+ assert((Opcode == Instruction::And || Opcode == Instruction::Or) &&
+ "Trying to match De Morgan's Laws with something other than and/or");
+ // Flip the logic operation.
+ if (Opcode == Instruction::And)
+ Opcode = Instruction::Or;
+ else
+ Opcode = Instruction::And;
+
+ Value *Op0 = I.getOperand(0);
+ Value *Op1 = I.getOperand(1);
+ // TODO: Use pattern matchers instead of dyn_cast.
+ if (Value *Op0NotVal = dyn_castNotVal(Op0))
+ if (Value *Op1NotVal = dyn_castNotVal(Op1))
+ if (Op0->hasOneUse() && Op1->hasOneUse()) {
+ Value *LogicOp = Builder->CreateBinOp(Opcode, Op0NotVal, Op1NotVal,
+ I.getName() + ".demorgan");
+ return BinaryOperator::CreateNot(LogicOp);
+ }
+
+ // De Morgan's Law in disguise:
+ // (zext(bool A) ^ 1) & (zext(bool B) ^ 1) -> zext(~(A | B))
+ // (zext(bool A) ^ 1) | (zext(bool B) ^ 1) -> zext(~(A & B))
+ Value *A = nullptr;
+ Value *B = nullptr;
+ ConstantInt *C1 = nullptr;
+ if (match(Op0, m_OneUse(m_Xor(m_ZExt(m_Value(A)), m_ConstantInt(C1)))) &&
+ match(Op1, m_OneUse(m_Xor(m_ZExt(m_Value(B)), m_Specific(C1))))) {
+ // TODO: This check could be loosened to handle different type sizes.
+ // Alternatively, we could fix the definition of m_Not to recognize a not
+ // operation hidden by a zext?
+ if (A->getType()->isIntegerTy(1) && B->getType()->isIntegerTy(1) &&
+ C1->isOne()) {
+ Value *LogicOp = Builder->CreateBinOp(Opcode, A, B,
+ I.getName() + ".demorgan");
+ Value *Not = Builder->CreateNot(LogicOp);
+ return CastInst::CreateZExtOrBitCast(Not, I.getType());
+ }
+ }
+
+ return nullptr;
+}
+
Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -1273,6 +1317,10 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I))
return BinaryOperator::CreateAnd(V, AndRHS);
+ // -x & 1 -> x & 1
+ if (AndRHSMask == 1 && match(Op0LHS, m_Zero()))
+ return BinaryOperator::CreateAnd(Op0RHS, AndRHS);
+
// (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS
// has 1's for all bits that the subtraction with A might affect.
if (Op0I->hasOneUse() && !match(Op0LHS, m_Zero())) {
@@ -1329,15 +1377,8 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
return NV;
}
-
- // (~A & ~B) == (~(A | B)) - De Morgan's Law
- if (Value *Op0NotVal = dyn_castNotVal(Op0))
- if (Value *Op1NotVal = dyn_castNotVal(Op1))
- if (Op0->hasOneUse() && Op1->hasOneUse()) {
- Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal,
- I.getName()+".demorgan");
- return BinaryOperator::CreateNot(Or);
- }
+ if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder))
+ return DeMorgan;
{
Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
@@ -1446,14 +1487,15 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
return ReplaceInstUsesWith(I, Res);
- // fold (and (cast A), (cast B)) -> (cast (and A, B))
- if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+ Value *Op0COp = Op0C->getOperand(0);
+ Type *SrcTy = Op0COp->getType();
+ // fold (and (cast A), (cast B)) -> (cast (and A, B))
if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) {
- Type *SrcTy = Op0C->getOperand(0)->getType();
if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ?
SrcTy == Op1C->getOperand(0)->getType() &&
SrcTy->isIntOrIntVectorTy()) {
- Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
+ Value *Op1COp = Op1C->getOperand(0);
// Only do this if the casts both really cause code to be generated.
if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
@@ -1478,6 +1520,20 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
}
}
+ // If we are masking off the sign bit of a floating-point value, convert
+ // this to the canonical fabs intrinsic call and cast back to integer.
+ // The backend should know how to optimize fabs().
+ // TODO: This transform should also apply to vectors.
+ ConstantInt *CI;
+ if (isa<BitCastInst>(Op0C) && SrcTy->isFloatingPointTy() &&
+ match(Op1, m_ConstantInt(CI)) && CI->isMaxValue(true)) {
+ Module *M = I.getModule();
+ Function *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, SrcTy);
+ Value *Call = Builder->CreateCall(Fabs, Op0COp, "fabs");
+ return CastInst::CreateBitOrPointerCast(Call, I.getType());
+ }
+ }
+
{
Value *X = nullptr;
bool OpsSwapped = false;
@@ -1509,163 +1565,195 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
return Changed ? &I : nullptr;
}
-/// CollectBSwapParts - Analyze the specified subexpression and see if it is
-/// capable of providing pieces of a bswap. The subexpression provides pieces
-/// of a bswap if it is proven that each of the non-zero bytes in the output of
-/// the expression came from the corresponding "byte swapped" byte in some other
-/// value. For example, if the current subexpression is "(shl i32 %X, 24)" then
-/// we know that the expression deposits the low byte of %X into the high byte
-/// of the bswap result and that all other bytes are zero. This expression is
-/// accepted, the high byte of ByteValues is set to X to indicate a correct
-/// match.
+
+/// Analyze the specified subexpression and see if it is capable of providing
+/// pieces of a bswap or bitreverse. The subexpression provides a potential
+/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in
+/// the output of the expression came from a corresponding bit in some other
+/// value. This function is recursive, and the end result is a mapping of
+/// (value, bitnumber) to bitnumber. It is the caller's responsibility to
+/// validate that all `value`s are identical and that the bitnumber to bitnumber
+/// mapping is correct for a bswap or bitreverse.
+///
+/// For example, if the current subexpression if "(shl i32 %X, 24)" then we know
+/// that the expression deposits the low byte of %X into the high byte of the
+/// result and that all other bits are zero. This expression is accepted,
+/// BitValues[24-31] are set to %X and BitProvenance[24-31] are set to [0-7].
///
/// This function returns true if the match was unsuccessful and false if so.
/// On entry to the function the "OverallLeftShift" is a signed integer value
-/// indicating the number of bytes that the subexpression is later shifted. For
+/// indicating the number of bits that the subexpression is later shifted. For
/// example, if the expression is later right shifted by 16 bits, the
-/// OverallLeftShift value would be -2 on entry. This is used to specify which
-/// byte of ByteValues is actually being set.
+/// OverallLeftShift value would be -16 on entry. This is used to specify which
+/// bits of BitValues are actually being set.
///
-/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding
-/// byte is masked to zero by a user. For example, in (X & 255), X will be
-/// processed with a bytemask of 1. Because bytemask is 32-bits, this limits
-/// this function to working on up to 32-byte (256 bit) values. ByteMask is
-/// always in the local (OverallLeftShift) coordinate space.
+/// Similarly, BitMask is a bitmask where a bit is clear if its corresponding
+/// bit is masked to zero by a user. For example, in (X & 255), X will be
+/// processed with a bytemask of 255. BitMask is always in the local
+/// (OverallLeftShift) coordinate space.
///
-static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
- SmallVectorImpl<Value *> &ByteValues) {
+static bool CollectBitParts(Value *V, int OverallLeftShift, APInt BitMask,
+ SmallVectorImpl<Value *> &BitValues,
+ SmallVectorImpl<int> &BitProvenance) {
if (Instruction *I = dyn_cast<Instruction>(V)) {
// If this is an or instruction, it may be an inner node of the bswap.
- if (I->getOpcode() == Instruction::Or) {
- return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
- ByteValues) ||
- CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask,
- ByteValues);
- }
-
- // If this is a logical shift by a constant multiple of 8, recurse with
- // OverallLeftShift and ByteMask adjusted.
+ if (I->getOpcode() == Instruction::Or)
+ return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask,
+ BitValues, BitProvenance) ||
+ CollectBitParts(I->getOperand(1), OverallLeftShift, BitMask,
+ BitValues, BitProvenance);
+
+ // If this is a logical shift by a constant, recurse with OverallLeftShift
+ // and BitMask adjusted.
if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
unsigned ShAmt =
- cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
- // Ensure the shift amount is defined and of a byte value.
- if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size()))
+ cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
+ // Ensure the shift amount is defined.
+ if (ShAmt > BitValues.size())
return true;
- unsigned ByteShift = ShAmt >> 3;
+ unsigned BitShift = ShAmt;
if (I->getOpcode() == Instruction::Shl) {
- // X << 2 -> collect(X, +2)
- OverallLeftShift += ByteShift;
- ByteMask >>= ByteShift;
+ // X << C -> collect(X, +C)
+ OverallLeftShift += BitShift;
+ BitMask = BitMask.lshr(BitShift);
} else {
- // X >>u 2 -> collect(X, -2)
- OverallLeftShift -= ByteShift;
- ByteMask <<= ByteShift;
- ByteMask &= (~0U >> (32-ByteValues.size()));
+ // X >>u C -> collect(X, -C)
+ OverallLeftShift -= BitShift;
+ BitMask = BitMask.shl(BitShift);
}
- if (OverallLeftShift >= (int)ByteValues.size()) return true;
- if (OverallLeftShift <= -(int)ByteValues.size()) return true;
+ if (OverallLeftShift >= (int)BitValues.size())
+ return true;
+ if (OverallLeftShift <= -(int)BitValues.size())
+ return true;
- return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
- ByteValues);
+ return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask,
+ BitValues, BitProvenance);
}
- // If this is a logical 'and' with a mask that clears bytes, clear the
- // corresponding bytes in ByteMask.
+ // If this is a logical 'and' with a mask that clears bits, clear the
+ // corresponding bits in BitMask.
if (I->getOpcode() == Instruction::And &&
isa<ConstantInt>(I->getOperand(1))) {
- // Scan every byte of the and mask, seeing if the byte is either 0 or 255.
- unsigned NumBytes = ByteValues.size();
- APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255);
+ unsigned NumBits = BitValues.size();
+ APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1);
const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();
- for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) {
- // If this byte is masked out by a later operation, we don't care what
+ for (unsigned i = 0; i != NumBits; ++i, Bit <<= 1) {
+ // If this bit is masked out by a later operation, we don't care what
// the and mask is.
- if ((ByteMask & (1 << i)) == 0)
+ if (BitMask[i] == 0)
continue;
- // If the AndMask is all zeros for this byte, clear the bit.
- APInt MaskB = AndMask & Byte;
+ // If the AndMask is zero for this bit, clear the bit.
+ APInt MaskB = AndMask & Bit;
if (MaskB == 0) {
- ByteMask &= ~(1U << i);
+ BitMask.clearBit(i);
continue;
}
- // If the AndMask is not all ones for this byte, it's not a bytezap.
- if (MaskB != Byte)
- return true;
-
- // Otherwise, this byte is kept.
+ // Otherwise, this bit is kept.
}
- return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
- ByteValues);
+ return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask,
+ BitValues, BitProvenance);
}
}
// Okay, we got to something that isn't a shift, 'or' or 'and'. This must be
- // the input value to the bswap. Some observations: 1) if more than one byte
- // is demanded from this input, then it could not be successfully assembled
- // into a byteswap. At least one of the two bytes would not be aligned with
- // their ultimate destination.
- if (!isPowerOf2_32(ByteMask)) return true;
- unsigned InputByteNo = countTrailingZeros(ByteMask);
-
- // 2) The input and ultimate destinations must line up: if byte 3 of an i32
- // is demanded, it needs to go into byte 0 of the result. This means that the
- // byte needs to be shifted until it lands in the right byte bucket. The
- // shift amount depends on the position: if the byte is coming from the high
- // part of the value (e.g. byte 3) then it must be shifted right. If from the
- // low part, it must be shifted left.
- unsigned DestByteNo = InputByteNo + OverallLeftShift;
- if (ByteValues.size()-1-DestByteNo != InputByteNo)
+ // the input value to the bswap/bitreverse. To be part of a bswap or
+ // bitreverse we must be demanding a contiguous range of bits from it.
+ unsigned InputBitLen = BitMask.countPopulation();
+ unsigned InputBitNo = BitMask.countTrailingZeros();
+ if (BitMask.getBitWidth() - BitMask.countLeadingZeros() - InputBitNo !=
+ InputBitLen)
+ // Not a contiguous set range of bits!
return true;
- // If the destination byte value is already defined, the values are or'd
- // together, which isn't a bswap (unless it's an or of the same bits).
- if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V)
+ // We know we're moving a contiguous range of bits from the input to the
+ // output. Record which bits in the output came from which bits in the input.
+ unsigned DestBitNo = InputBitNo + OverallLeftShift;
+ for (unsigned I = 0; I < InputBitLen; ++I)
+ BitProvenance[DestBitNo + I] = InputBitNo + I;
+
+ // If the destination bit value is already defined, the values are or'd
+ // together, which isn't a bswap/bitreverse (unless it's an or of the same
+ // bits).
+ if (BitValues[DestBitNo] && BitValues[DestBitNo] != V)
return true;
- ByteValues[DestByteNo] = V;
+ for (unsigned I = 0; I < InputBitLen; ++I)
+ BitValues[DestBitNo + I] = V;
+
return false;
}
-/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom.
-/// If so, insert the new bswap intrinsic and return it.
-Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
- IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
- if (!ITy || ITy->getBitWidth() % 16 ||
- // ByteMask only allows up to 32-byte values.
- ITy->getBitWidth() > 32*8)
- return nullptr; // Can only bswap pairs of bytes. Can't do vectors.
+static bool bitTransformIsCorrectForBSwap(unsigned From, unsigned To,
+ unsigned BitWidth) {
+ if (From % 8 != To % 8)
+ return false;
+ // Convert from bit indices to byte indices and check for a byte reversal.
+ From >>= 3;
+ To >>= 3;
+ BitWidth >>= 3;
+ return From == BitWidth - To - 1;
+}
- /// ByteValues - For each byte of the result, we keep track of which value
- /// defines each byte.
- SmallVector<Value*, 8> ByteValues;
- ByteValues.resize(ITy->getBitWidth()/8);
+static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To,
+ unsigned BitWidth) {
+ return From == BitWidth - To - 1;
+}
+/// Given an OR instruction, check to see if this is a bswap or bitreverse
+/// idiom. If so, insert the new intrinsic and return it.
+Instruction *InstCombiner::MatchBSwapOrBitReverse(BinaryOperator &I) {
+ IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
+ if (!ITy)
+ return nullptr; // Can't do vectors.
+ unsigned BW = ITy->getBitWidth();
+
+ /// We keep track of which bit (BitProvenance) inside which value (BitValues)
+ /// defines each bit in the result.
+ SmallVector<Value *, 8> BitValues(BW, nullptr);
+ SmallVector<int, 8> BitProvenance(BW, -1);
+
// Try to find all the pieces corresponding to the bswap.
- uint32_t ByteMask = ~0U >> (32-ByteValues.size());
- if (CollectBSwapParts(&I, 0, ByteMask, ByteValues))
+ APInt BitMask = APInt::getAllOnesValue(BitValues.size());
+ if (CollectBitParts(&I, 0, BitMask, BitValues, BitProvenance))
return nullptr;
- // Check to see if all of the bytes come from the same value.
- Value *V = ByteValues[0];
- if (!V) return nullptr; // Didn't find a byte? Must be zero.
+ // Check to see if all of the bits come from the same value.
+ Value *V = BitValues[0];
+ if (!V) return nullptr; // Didn't find a bit? Must be zero.
- // Check to make sure that all of the bytes come from the same value.
- for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
- if (ByteValues[i] != V)
- return nullptr;
- Module *M = I.getParent()->getParent()->getParent();
- Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy);
+ if (!std::all_of(BitValues.begin(), BitValues.end(),
+ [&](const Value *X) { return X == V; }))
+ return nullptr;
+
+ // Now, is the bit permutation correct for a bswap or a bitreverse? We can
+ // only byteswap values with an even number of bytes.
+ bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true;;
+ for (unsigned i = 0, e = BitValues.size(); i != e; ++i) {
+ OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[i], i, BW);
+ OKForBitReverse &=
+ bitTransformIsCorrectForBitReverse(BitProvenance[i], i, BW);
+ }
+
+ Intrinsic::ID Intrin;
+ if (OKForBSwap)
+ Intrin = Intrinsic::bswap;
+ else if (OKForBitReverse)
+ Intrin = Intrinsic::bitreverse;
+ else
+ return nullptr;
+
+ Function *F = Intrinsic::getDeclaration(I.getModule(), Intrin, ITy);
return CallInst::Create(F, V);
}
-/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D). Check
-/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then
-/// we can simplify this expression to "cond ? C : D or B".
+/// We have an expression of the form (A&C)|(B&D). Check if A is (cond?-1:0)
+/// and either B or D is ~(cond?-1,0) or (cond?0,-1), then we can simplify this
+/// expression to "cond ? C : D or B".
static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
Value *C, Value *D) {
// If A is not a select of -1/0, this cannot match.
@@ -1688,7 +1776,7 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
return nullptr;
}
-/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
+/// Fold (icmp)|(icmp) if possible.
Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
Instruction *CxtI) {
ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
@@ -1905,14 +1993,14 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
case ICmpInst::ICMP_EQ:
if (LHS->getOperand(0) == RHS->getOperand(0)) {
// if LHSCst and RHSCst differ only by one bit:
- // (A == C1 || A == C2) -> (A & ~(C1 ^ C2)) == C1
+ // (A == C1 || A == C2) -> (A | (C1 ^ C2)) == C2
assert(LHSCst->getValue().ule(LHSCst->getValue()));
APInt Xor = LHSCst->getValue() ^ RHSCst->getValue();
if (Xor.isPowerOf2()) {
- Value *NegCst = Builder->getInt(~Xor);
- Value *And = Builder->CreateAnd(LHS->getOperand(0), NegCst);
- return Builder->CreateICmp(ICmpInst::ICMP_EQ, And, LHSCst);
+ Value *Cst = Builder->getInt(Xor);
+ Value *Or = Builder->CreateOr(LHS->getOperand(0), Cst);
+ return Builder->CreateICmp(ICmpInst::ICMP_EQ, Or, RHSCst);
}
}
@@ -2020,9 +2108,8 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
return nullptr;
}
-/// FoldOrOfFCmps - Optimize (fcmp)|(fcmp). NOTE: Unlike the rest of
-/// instcombine, this returns a Value which should already be inserted into the
-/// function.
+/// Optimize (fcmp)|(fcmp). NOTE: Unlike the rest of instcombine, this returns
+/// a Value which should already be inserted into the function.
Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
RHS->getPredicate() == FCmpInst::FCMP_UNO &&
@@ -2080,7 +2167,7 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
return nullptr;
}
-/// FoldOrWithConstants - This helper function folds:
+/// This helper function folds:
///
/// ((A | B) & C1) | (B & C2)
///
@@ -2199,14 +2286,18 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
ConstantInt *C1 = nullptr, *C2 = nullptr;
// (A | B) | C and A | (B | C) -> bswap if possible.
+ bool OrOfOrs = match(Op0, m_Or(m_Value(), m_Value())) ||
+ match(Op1, m_Or(m_Value(), m_Value()));
// (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible.
- if (match(Op0, m_Or(m_Value(), m_Value())) ||
- match(Op1, m_Or(m_Value(), m_Value())) ||
- (match(Op0, m_LogicalShift(m_Value(), m_Value())) &&
- match(Op1, m_LogicalShift(m_Value(), m_Value())))) {
- if (Instruction *BSwap = MatchBSwap(I))
+ bool OrOfShifts = match(Op0, m_LogicalShift(m_Value(), m_Value())) &&
+ match(Op1, m_LogicalShift(m_Value(), m_Value()));
+ // (A & B) | (C & D) -> bswap if possible.
+ bool OrOfAnds = match(Op0, m_And(m_Value(), m_Value())) &&
+ match(Op1, m_And(m_Value(), m_Value()));
+
+ if (OrOfOrs || OrOfShifts || OrOfAnds)
+ if (Instruction *BSwap = MatchBSwapOrBitReverse(I))
return BSwap;
- }
// (X^C)|Y -> (X|Y)^C iff Y&C == 0
if (Op0->hasOneUse() &&
@@ -2360,14 +2451,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A))))
return BinaryOperator::CreateOr(Op1, Builder->CreateAnd(A, C));
- // (~A | ~B) == (~(A & B)) - De Morgan's Law
- if (Value *Op0NotVal = dyn_castNotVal(Op0))
- if (Value *Op1NotVal = dyn_castNotVal(Op1))
- if (Op0->hasOneUse() && Op1->hasOneUse()) {
- Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal,
- I.getName()+".demorgan");
- return BinaryOperator::CreateNot(And);
- }
+ if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder))
+ return DeMorgan;
// Canonicalize xor to the RHS.
bool SwappedForXor = false;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 6de380bcad67..e3634f269cf5 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -67,8 +67,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
unsigned CopyAlign = MI->getAlignment();
if (CopyAlign < MinAlign) {
- MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
- MinAlign, false));
+ MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), MinAlign, false));
return MI;
}
@@ -198,12 +197,140 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
return nullptr;
}
+static Value *SimplifyX86immshift(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ bool LogicalShift = false;
+ bool ShiftLeft = false;
+
+ switch (II.getIntrinsicID()) {
+ default:
+ return nullptr;
+ case Intrinsic::x86_sse2_psra_d:
+ case Intrinsic::x86_sse2_psra_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_avx2_psra_d:
+ case Intrinsic::x86_avx2_psra_w:
+ case Intrinsic::x86_avx2_psrai_d:
+ case Intrinsic::x86_avx2_psrai_w:
+ LogicalShift = false; ShiftLeft = false;
+ break;
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w:
+ LogicalShift = true; ShiftLeft = false;
+ break;
+ case Intrinsic::x86_sse2_psll_d:
+ case Intrinsic::x86_sse2_psll_q:
+ case Intrinsic::x86_sse2_psll_w:
+ case Intrinsic::x86_sse2_pslli_d:
+ case Intrinsic::x86_sse2_pslli_q:
+ case Intrinsic::x86_sse2_pslli_w:
+ case Intrinsic::x86_avx2_psll_d:
+ case Intrinsic::x86_avx2_psll_q:
+ case Intrinsic::x86_avx2_psll_w:
+ case Intrinsic::x86_avx2_pslli_d:
+ case Intrinsic::x86_avx2_pslli_q:
+ case Intrinsic::x86_avx2_pslli_w:
+ LogicalShift = true; ShiftLeft = true;
+ break;
+ }
+ assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
+
+ // Simplify if count is constant.
+ auto Arg1 = II.getArgOperand(1);
+ auto CAZ = dyn_cast<ConstantAggregateZero>(Arg1);
+ auto CDV = dyn_cast<ConstantDataVector>(Arg1);
+ auto CInt = dyn_cast<ConstantInt>(Arg1);
+ if (!CAZ && !CDV && !CInt)
+ return nullptr;
+
+ APInt Count(64, 0);
+ if (CDV) {
+ // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector
+ // operand to compute the shift amount.
+ auto VT = cast<VectorType>(CDV->getType());
+ unsigned BitWidth = VT->getElementType()->getPrimitiveSizeInBits();
+ assert((64 % BitWidth) == 0 && "Unexpected packed shift size");
+ unsigned NumSubElts = 64 / BitWidth;
+
+ // Concatenate the sub-elements to create the 64-bit value.
+ for (unsigned i = 0; i != NumSubElts; ++i) {
+ unsigned SubEltIdx = (NumSubElts - 1) - i;
+ auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
+ Count = Count.shl(BitWidth);
+ Count |= SubElt->getValue().zextOrTrunc(64);
+ }
+ }
+ else if (CInt)
+ Count = CInt->getValue();
+
+ auto Vec = II.getArgOperand(0);
+ auto VT = cast<VectorType>(Vec->getType());
+ auto SVT = VT->getElementType();
+ unsigned VWidth = VT->getNumElements();
+ unsigned BitWidth = SVT->getPrimitiveSizeInBits();
+
+ // If shift-by-zero then just return the original value.
+ if (Count == 0)
+ return Vec;
+
+ // Handle cases when Shift >= BitWidth.
+ if (Count.uge(BitWidth)) {
+ // If LogicalShift - just return zero.
+ if (LogicalShift)
+ return ConstantAggregateZero::get(VT);
+
+ // If ArithmeticShift - clamp Shift to (BitWidth - 1).
+ Count = APInt(64, BitWidth - 1);
+ }
+
+ // Get a constant vector of the same type as the first operand.
+ auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));
+ auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
+
+ if (ShiftLeft)
+ return Builder.CreateShl(Vec, ShiftVec);
+
+ if (LogicalShift)
+ return Builder.CreateLShr(Vec, ShiftVec);
+
+ return Builder.CreateAShr(Vec, ShiftVec);
+}
+
+static Value *SimplifyX86extend(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder,
+ bool SignExtend) {
+ VectorType *SrcTy = cast<VectorType>(II.getArgOperand(0)->getType());
+ VectorType *DstTy = cast<VectorType>(II.getType());
+ unsigned NumDstElts = DstTy->getNumElements();
+
+ // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
+ SmallVector<int, 8> ShuffleMask;
+ for (int i = 0; i != (int)NumDstElts; ++i)
+ ShuffleMask.push_back(i);
+
+ Value *SV = Builder.CreateShuffleVector(II.getArgOperand(0),
+ UndefValue::get(SrcTy), ShuffleMask);
+ return SignExtend ? Builder.CreateSExt(SV, DstTy)
+ : Builder.CreateZExt(SV, DstTy);
+}
+
static Value *SimplifyX86insertps(const IntrinsicInst &II,
InstCombiner::BuilderTy &Builder) {
if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
VectorType *VecTy = cast<VectorType>(II.getType());
assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
-
+
// The immediate permute control byte looks like this:
// [3:0] - zero mask for each 32-bit lane
// [5:4] - select one 32-bit destination lane
@@ -248,12 +375,202 @@ static Value *SimplifyX86insertps(const IntrinsicInst &II,
// Replace the selected destination lane with the selected source lane.
ShuffleMask[DestLane] = SourceLane + 4;
}
-
+
return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
}
return nullptr;
}
+/// Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding
+/// or conversion to a shuffle vector.
+static Value *SimplifyX86extrq(IntrinsicInst &II, Value *Op0,
+ ConstantInt *CILength, ConstantInt *CIIndex,
+ InstCombiner::BuilderTy &Builder) {
+ auto LowConstantHighUndef = [&](uint64_t Val) {
+ Type *IntTy64 = Type::getInt64Ty(II.getContext());
+ Constant *Args[] = {ConstantInt::get(IntTy64, Val),
+ UndefValue::get(IntTy64)};
+ return ConstantVector::get(Args);
+ };
+
+ // See if we're dealing with constant values.
+ Constant *C0 = dyn_cast<Constant>(Op0);
+ ConstantInt *CI0 =
+ C0 ? dyn_cast<ConstantInt>(C0->getAggregateElement((unsigned)0))
+ : nullptr;
+
+ // Attempt to constant fold.
+ if (CILength && CIIndex) {
+ // From AMD documentation: "The bit index and field length are each six
+ // bits in length other bits of the field are ignored."
+ APInt APIndex = CIIndex->getValue().zextOrTrunc(6);
+ APInt APLength = CILength->getValue().zextOrTrunc(6);
+
+ unsigned Index = APIndex.getZExtValue();
+
+ // From AMD documentation: "a value of zero in the field length is
+ // defined as length of 64".
+ unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
+
+ // From AMD documentation: "If the sum of the bit index + length field
+ // is greater than 64, the results are undefined".
+ unsigned End = Index + Length;
+
+ // Note that both field index and field length are 8-bit quantities.
+ // Since variables 'Index' and 'Length' are unsigned values
+ // obtained from zero-extending field index and field length
+ // respectively, their sum should never wrap around.
+ if (End > 64)
+ return UndefValue::get(II.getType());
+
+ // If we are inserting whole bytes, we can convert this to a shuffle.
+ // Lowering can recognize EXTRQI shuffle masks.
+ if ((Length % 8) == 0 && (Index % 8) == 0) {
+ // Convert bit indices to byte indices.
+ Length /= 8;
+ Index /= 8;
+
+ Type *IntTy8 = Type::getInt8Ty(II.getContext());
+ Type *IntTy32 = Type::getInt32Ty(II.getContext());
+ VectorType *ShufTy = VectorType::get(IntTy8, 16);
+
+ SmallVector<Constant *, 16> ShuffleMask;
+ for (int i = 0; i != (int)Length; ++i)
+ ShuffleMask.push_back(
+ Constant::getIntegerValue(IntTy32, APInt(32, i + Index)));
+ for (int i = Length; i != 8; ++i)
+ ShuffleMask.push_back(
+ Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
+ for (int i = 8; i != 16; ++i)
+ ShuffleMask.push_back(UndefValue::get(IntTy32));
+
+ Value *SV = Builder.CreateShuffleVector(
+ Builder.CreateBitCast(Op0, ShufTy),
+ ConstantAggregateZero::get(ShufTy), ConstantVector::get(ShuffleMask));
+ return Builder.CreateBitCast(SV, II.getType());
+ }
+
+ // Constant Fold - shift Index'th bit to lowest position and mask off
+ // Length bits.
+ if (CI0) {
+ APInt Elt = CI0->getValue();
+ Elt = Elt.lshr(Index).zextOrTrunc(Length);
+ return LowConstantHighUndef(Elt.getZExtValue());
+ }
+
+ // If we were an EXTRQ call, we'll save registers if we convert to EXTRQI.
+ if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
+ Value *Args[] = {Op0, CILength, CIIndex};
+ Module *M = II.getModule();
+ Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
+ return Builder.CreateCall(F, Args);
+ }
+ }
+
+ // Constant Fold - extraction from zero is always {zero, undef}.
+ if (CI0 && CI0->equalsInt(0))
+ return LowConstantHighUndef(0);
+
+ return nullptr;
+}
+
+/// Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant
+/// folding or conversion to a shuffle vector.
+static Value *SimplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1,
+ APInt APLength, APInt APIndex,
+ InstCombiner::BuilderTy &Builder) {
+
+ // From AMD documentation: "The bit index and field length are each six bits
+ // in length other bits of the field are ignored."
+ APIndex = APIndex.zextOrTrunc(6);
+ APLength = APLength.zextOrTrunc(6);
+
+ // Attempt to constant fold.
+ unsigned Index = APIndex.getZExtValue();
+
+ // From AMD documentation: "a value of zero in the field length is
+ // defined as length of 64".
+ unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
+
+ // From AMD documentation: "If the sum of the bit index + length field
+ // is greater than 64, the results are undefined".
+ unsigned End = Index + Length;
+
+ // Note that both field index and field length are 8-bit quantities.
+ // Since variables 'Index' and 'Length' are unsigned values
+ // obtained from zero-extending field index and field length
+ // respectively, their sum should never wrap around.
+ if (End > 64)
+ return UndefValue::get(II.getType());
+
+ // If we are inserting whole bytes, we can convert this to a shuffle.
+ // Lowering can recognize INSERTQI shuffle masks.
+ if ((Length % 8) == 0 && (Index % 8) == 0) {
+ // Convert bit indices to byte indices.
+ Length /= 8;
+ Index /= 8;
+
+ Type *IntTy8 = Type::getInt8Ty(II.getContext());
+ Type *IntTy32 = Type::getInt32Ty(II.getContext());
+ VectorType *ShufTy = VectorType::get(IntTy8, 16);
+
+ SmallVector<Constant *, 16> ShuffleMask;
+ for (int i = 0; i != (int)Index; ++i)
+ ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
+ for (int i = 0; i != (int)Length; ++i)
+ ShuffleMask.push_back(
+ Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
+ for (int i = Index + Length; i != 8; ++i)
+ ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
+ for (int i = 8; i != 16; ++i)
+ ShuffleMask.push_back(UndefValue::get(IntTy32));
+
+ Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy),
+ Builder.CreateBitCast(Op1, ShufTy),
+ ConstantVector::get(ShuffleMask));
+ return Builder.CreateBitCast(SV, II.getType());
+ }
+
+ // See if we're dealing with constant values.
+ Constant *C0 = dyn_cast<Constant>(Op0);
+ Constant *C1 = dyn_cast<Constant>(Op1);
+ ConstantInt *CI00 =
+ C0 ? dyn_cast<ConstantInt>(C0->getAggregateElement((unsigned)0))
+ : nullptr;
+ ConstantInt *CI10 =
+ C1 ? dyn_cast<ConstantInt>(C1->getAggregateElement((unsigned)0))
+ : nullptr;
+
+ // Constant Fold - insert bottom Length bits starting at the Index'th bit.
+ if (CI00 && CI10) {
+ APInt V00 = CI00->getValue();
+ APInt V10 = CI10->getValue();
+ APInt Mask = APInt::getLowBitsSet(64, Length).shl(Index);
+ V00 = V00 & ~Mask;
+ V10 = V10.zextOrTrunc(Length).zextOrTrunc(64).shl(Index);
+ APInt Val = V00 | V10;
+ Type *IntTy64 = Type::getInt64Ty(II.getContext());
+ Constant *Args[] = {ConstantInt::get(IntTy64, Val.getZExtValue()),
+ UndefValue::get(IntTy64)};
+ return ConstantVector::get(Args);
+ }
+
+ // If we were an INSERTQ call, we'll save demanded elements if we convert to
+ // INSERTQI.
+ if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
+ Type *IntTy8 = Type::getInt8Ty(II.getContext());
+ Constant *CILength = ConstantInt::get(IntTy8, Length, false);
+ Constant *CIIndex = ConstantInt::get(IntTy8, Index, false);
+
+ Value *Args[] = {Op0, Op1, CILength, CIIndex};
+ Module *M = II.getModule();
+ Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
+ return Builder.CreateCall(F, Args);
+ }
+
+ return nullptr;
+}
+
/// The shuffle mask for a perm2*128 selects any two halves of two 256-bit
/// source vectors, unless a zero bit is set. If a zero bit is set,
/// then ignore that half of the mask and clear that half of the vector.
@@ -289,7 +606,7 @@ static Value *SimplifyX86vperm2(const IntrinsicInst &II,
// The high bit of the selection field chooses the 1st or 2nd operand.
bool LowInputSelect = Imm & 0x02;
bool HighInputSelect = Imm & 0x20;
-
+
// The low bit of the selection field chooses the low or high half
// of the selected operand.
bool LowHalfSelect = Imm & 0x01;
@@ -298,11 +615,11 @@ static Value *SimplifyX86vperm2(const IntrinsicInst &II,
// Determine which operand(s) are actually in use for this instruction.
Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
-
+
// If needed, replace operands based on zero mask.
V0 = LowHalfZero ? ZeroVector : V0;
V1 = HighHalfZero ? ZeroVector : V1;
-
+
// Permute low half of result.
unsigned StartIndex = LowHalfSelect ? HalfSize : 0;
for (unsigned i = 0; i < HalfSize; ++i)
@@ -319,6 +636,43 @@ static Value *SimplifyX86vperm2(const IntrinsicInst &II,
return nullptr;
}
+/// Decode XOP integer vector comparison intrinsics.
+static Value *SimplifyX86vpcom(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder, bool IsSigned) {
+ if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
+ uint64_t Imm = CInt->getZExtValue() & 0x7;
+ VectorType *VecTy = cast<VectorType>(II.getType());
+ CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
+
+ switch (Imm) {
+ case 0x0:
+ Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+ break;
+ case 0x1:
+ Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
+ break;
+ case 0x2:
+ Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+ break;
+ case 0x3:
+ Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
+ break;
+ case 0x4:
+ Pred = ICmpInst::ICMP_EQ; break;
+ case 0x5:
+ Pred = ICmpInst::ICMP_NE; break;
+ case 0x6:
+ return ConstantInt::getSigned(VecTy, 0); // FALSE
+ case 0x7:
+ return ConstantInt::getSigned(VecTy, -1); // TRUE
+ }
+
+ if (Value *Cmp = Builder.CreateICmp(Pred, II.getArgOperand(0), II.getArgOperand(1)))
+ return Builder.CreateSExtOrTrunc(Cmp, VecTy);
+ }
+ return nullptr;
+}
+
/// visitCallInst - CallInst simplification. This mostly only handles folding
/// of intrinsic instructions. For normal calls, it allows visitCallSite to do
/// the heavy lifting.
@@ -371,7 +725,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
if (GVSrc->isConstant()) {
- Module *M = CI.getParent()->getParent()->getParent();
+ Module *M = CI.getModule();
Intrinsic::ID MemCpyID = Intrinsic::memcpy;
Type *Tys[3] = { CI.getArgOperand(0)->getType(),
CI.getArgOperand(1)->getType(),
@@ -400,6 +754,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (Changed) return II;
}
+ auto SimplifyDemandedVectorEltsLow = [this](Value *Op, unsigned Width, unsigned DemandedWidth)
+ {
+ APInt UndefElts(Width, 0);
+ APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth);
+ return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
+ };
+
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::objectsize: {
@@ -427,6 +788,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
+ case Intrinsic::bitreverse: {
+ Value *IIOperand = II->getArgOperand(0);
+ Value *X = nullptr;
+
+ // bitreverse(bitreverse(x)) -> x
+ if (match(IIOperand, m_Intrinsic<Intrinsic::bitreverse>(m_Value(X))))
+ return ReplaceInstUsesWith(CI, X);
+ break;
+ }
+
case Intrinsic::powi:
if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
// powi(x, 0) -> 1.0
@@ -669,6 +1040,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return new StoreInst(II->getArgOperand(0), Ptr);
}
break;
+
case Intrinsic::x86_sse_storeu_ps:
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
@@ -682,6 +1054,50 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
break;
+ case Intrinsic::x86_vcvtph2ps_128:
+ case Intrinsic::x86_vcvtph2ps_256: {
+ auto Arg = II->getArgOperand(0);
+ auto ArgType = cast<VectorType>(Arg->getType());
+ auto RetType = cast<VectorType>(II->getType());
+ unsigned ArgWidth = ArgType->getNumElements();
+ unsigned RetWidth = RetType->getNumElements();
+ assert(RetWidth <= ArgWidth && "Unexpected input/return vector widths");
+ assert(ArgType->isIntOrIntVectorTy() &&
+ ArgType->getScalarSizeInBits() == 16 &&
+ "CVTPH2PS input type should be 16-bit integer vector");
+ assert(RetType->getScalarType()->isFloatTy() &&
+ "CVTPH2PS output type should be 32-bit float vector");
+
+ // Constant folding: Convert to generic half to single conversion.
+ if (isa<ConstantAggregateZero>(Arg))
+ return ReplaceInstUsesWith(*II, ConstantAggregateZero::get(RetType));
+
+ if (isa<ConstantDataVector>(Arg)) {
+ auto VectorHalfAsShorts = Arg;
+ if (RetWidth < ArgWidth) {
+ SmallVector<int, 8> SubVecMask;
+ for (unsigned i = 0; i != RetWidth; ++i)
+ SubVecMask.push_back((int)i);
+ VectorHalfAsShorts = Builder->CreateShuffleVector(
+ Arg, UndefValue::get(ArgType), SubVecMask);
+ }
+
+ auto VectorHalfType =
+ VectorType::get(Type::getHalfTy(II->getContext()), RetWidth);
+ auto VectorHalfs =
+ Builder->CreateBitCast(VectorHalfAsShorts, VectorHalfType);
+ auto VectorFloats = Builder->CreateFPExt(VectorHalfs, RetType);
+ return ReplaceInstUsesWith(*II, VectorFloats);
+ }
+
+ // We only use the lowest lanes of the argument.
+ if (Value *V = SimplifyDemandedVectorEltsLow(Arg, ArgWidth, RetWidth)) {
+ II->setArgOperand(0, V);
+ return II;
+ }
+ break;
+ }
+
case Intrinsic::x86_sse_cvtss2si:
case Intrinsic::x86_sse_cvtss2si64:
case Intrinsic::x86_sse_cvttss2si:
@@ -692,194 +1108,229 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_sse2_cvttsd2si64: {
// These intrinsics only demand the 0th element of their input vectors. If
// we can simplify the input based on that, do so now.
- unsigned VWidth =
- cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
- APInt DemandedElts(VWidth, 1);
- APInt UndefElts(VWidth, 0);
- if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0),
- DemandedElts, UndefElts)) {
+ Value *Arg = II->getArgOperand(0);
+ unsigned VWidth = Arg->getType()->getVectorNumElements();
+ if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
II->setArgOperand(0, V);
return II;
}
break;
}
- // Constant fold <A x Bi> << Ci.
- // FIXME: We don't handle _dq because it's a shift of an i128, but is
- // represented in the IR as <2 x i64>. A per element shift is wrong.
- case Intrinsic::x86_sse2_psll_d:
- case Intrinsic::x86_sse2_psll_q:
- case Intrinsic::x86_sse2_psll_w:
+ // Constant fold ashr( <A x Bi>, Ci ).
+ // Constant fold lshr( <A x Bi>, Ci ).
+ // Constant fold shl( <A x Bi>, Ci ).
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_avx2_psrai_d:
+ case Intrinsic::x86_avx2_psrai_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w:
case Intrinsic::x86_sse2_pslli_d:
case Intrinsic::x86_sse2_pslli_q:
case Intrinsic::x86_sse2_pslli_w:
- case Intrinsic::x86_avx2_psll_d:
- case Intrinsic::x86_avx2_psll_q:
- case Intrinsic::x86_avx2_psll_w:
case Intrinsic::x86_avx2_pslli_d:
case Intrinsic::x86_avx2_pslli_q:
case Intrinsic::x86_avx2_pslli_w:
+ if (Value *V = SimplifyX86immshift(*II, *Builder))
+ return ReplaceInstUsesWith(*II, V);
+ break;
+
+ case Intrinsic::x86_sse2_psra_d:
+ case Intrinsic::x86_sse2_psra_w:
+ case Intrinsic::x86_avx2_psra_d:
+ case Intrinsic::x86_avx2_psra_w:
case Intrinsic::x86_sse2_psrl_d:
case Intrinsic::x86_sse2_psrl_q:
case Intrinsic::x86_sse2_psrl_w:
- case Intrinsic::x86_sse2_psrli_d:
- case Intrinsic::x86_sse2_psrli_q:
- case Intrinsic::x86_sse2_psrli_w:
case Intrinsic::x86_avx2_psrl_d:
case Intrinsic::x86_avx2_psrl_q:
case Intrinsic::x86_avx2_psrl_w:
- case Intrinsic::x86_avx2_psrli_d:
- case Intrinsic::x86_avx2_psrli_q:
- case Intrinsic::x86_avx2_psrli_w: {
- // Simplify if count is constant. To 0 if >= BitWidth,
- // otherwise to shl/lshr.
- auto CDV = dyn_cast<ConstantDataVector>(II->getArgOperand(1));
- auto CInt = dyn_cast<ConstantInt>(II->getArgOperand(1));
- if (!CDV && !CInt)
- break;
- ConstantInt *Count;
- if (CDV)
- Count = cast<ConstantInt>(CDV->getElementAsConstant(0));
- else
- Count = CInt;
-
- auto Vec = II->getArgOperand(0);
- auto VT = cast<VectorType>(Vec->getType());
- if (Count->getZExtValue() >
- VT->getElementType()->getPrimitiveSizeInBits() - 1)
- return ReplaceInstUsesWith(
- CI, ConstantAggregateZero::get(Vec->getType()));
-
- bool isPackedShiftLeft = true;
- switch (II->getIntrinsicID()) {
- default : break;
- case Intrinsic::x86_sse2_psrl_d:
- case Intrinsic::x86_sse2_psrl_q:
- case Intrinsic::x86_sse2_psrl_w:
- case Intrinsic::x86_sse2_psrli_d:
- case Intrinsic::x86_sse2_psrli_q:
- case Intrinsic::x86_sse2_psrli_w:
- case Intrinsic::x86_avx2_psrl_d:
- case Intrinsic::x86_avx2_psrl_q:
- case Intrinsic::x86_avx2_psrl_w:
- case Intrinsic::x86_avx2_psrli_d:
- case Intrinsic::x86_avx2_psrli_q:
- case Intrinsic::x86_avx2_psrli_w: isPackedShiftLeft = false; break;
- }
-
- unsigned VWidth = VT->getNumElements();
- // Get a constant vector of the same type as the first operand.
- auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue());
- if (isPackedShiftLeft)
- return BinaryOperator::CreateShl(Vec,
- Builder->CreateVectorSplat(VWidth, VTCI));
-
- return BinaryOperator::CreateLShr(Vec,
- Builder->CreateVectorSplat(VWidth, VTCI));
+ case Intrinsic::x86_sse2_psll_d:
+ case Intrinsic::x86_sse2_psll_q:
+ case Intrinsic::x86_sse2_psll_w:
+ case Intrinsic::x86_avx2_psll_d:
+ case Intrinsic::x86_avx2_psll_q:
+ case Intrinsic::x86_avx2_psll_w: {
+ if (Value *V = SimplifyX86immshift(*II, *Builder))
+ return ReplaceInstUsesWith(*II, V);
+
+ // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector
+ // operand to compute the shift amount.
+ Value *Arg1 = II->getArgOperand(1);
+ assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 &&
+ "Unexpected packed shift size");
+ unsigned VWidth = Arg1->getType()->getVectorNumElements();
+
+ if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
+ II->setArgOperand(1, V);
+ return II;
+ }
+ break;
}
- case Intrinsic::x86_sse41_pmovsxbw:
- case Intrinsic::x86_sse41_pmovsxwd:
- case Intrinsic::x86_sse41_pmovsxdq:
+ case Intrinsic::x86_avx2_pmovsxbd:
+ case Intrinsic::x86_avx2_pmovsxbq:
+ case Intrinsic::x86_avx2_pmovsxbw:
+ case Intrinsic::x86_avx2_pmovsxdq:
+ case Intrinsic::x86_avx2_pmovsxwd:
+ case Intrinsic::x86_avx2_pmovsxwq:
+ if (Value *V = SimplifyX86extend(*II, *Builder, true))
+ return ReplaceInstUsesWith(*II, V);
+ break;
+
+ case Intrinsic::x86_sse41_pmovzxbd:
+ case Intrinsic::x86_sse41_pmovzxbq:
case Intrinsic::x86_sse41_pmovzxbw:
+ case Intrinsic::x86_sse41_pmovzxdq:
case Intrinsic::x86_sse41_pmovzxwd:
- case Intrinsic::x86_sse41_pmovzxdq: {
- // pmov{s|z}x ignores the upper half of their input vectors.
- unsigned VWidth =
- cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
- unsigned LowHalfElts = VWidth / 2;
- APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts));
- APInt UndefElts(VWidth, 0);
- if (Value *TmpV = SimplifyDemandedVectorElts(
- II->getArgOperand(0), InputDemandedElts, UndefElts)) {
- II->setArgOperand(0, TmpV);
+ case Intrinsic::x86_sse41_pmovzxwq:
+ case Intrinsic::x86_avx2_pmovzxbd:
+ case Intrinsic::x86_avx2_pmovzxbq:
+ case Intrinsic::x86_avx2_pmovzxbw:
+ case Intrinsic::x86_avx2_pmovzxdq:
+ case Intrinsic::x86_avx2_pmovzxwd:
+ case Intrinsic::x86_avx2_pmovzxwq:
+ if (Value *V = SimplifyX86extend(*II, *Builder, false))
+ return ReplaceInstUsesWith(*II, V);
+ break;
+
+ case Intrinsic::x86_sse41_insertps:
+ if (Value *V = SimplifyX86insertps(*II, *Builder))
+ return ReplaceInstUsesWith(*II, V);
+ break;
+
+ case Intrinsic::x86_sse4a_extrq: {
+ Value *Op0 = II->getArgOperand(0);
+ Value *Op1 = II->getArgOperand(1);
+ unsigned VWidth0 = Op0->getType()->getVectorNumElements();
+ unsigned VWidth1 = Op1->getType()->getVectorNumElements();
+ assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
+ Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
+ VWidth1 == 16 && "Unexpected operand sizes");
+
+ // See if we're dealing with constant values.
+ Constant *C1 = dyn_cast<Constant>(Op1);
+ ConstantInt *CILength =
+ C1 ? dyn_cast<ConstantInt>(C1->getAggregateElement((unsigned)0))
+ : nullptr;
+ ConstantInt *CIIndex =
+ C1 ? dyn_cast<ConstantInt>(C1->getAggregateElement((unsigned)1))
+ : nullptr;
+
+ // Attempt to simplify to a constant, shuffle vector or EXTRQI call.
+ if (Value *V = SimplifyX86extrq(*II, Op0, CILength, CIIndex, *Builder))
+ return ReplaceInstUsesWith(*II, V);
+
+ // EXTRQ only uses the lowest 64-bits of the first 128-bit vector
+ // operands and the lowest 16-bits of the second.
+ if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
+ II->setArgOperand(0, V);
+ return II;
+ }
+ if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
+ II->setArgOperand(1, V);
return II;
}
break;
}
- case Intrinsic::x86_sse41_insertps:
- if (Value *V = SimplifyX86insertps(*II, *Builder))
+
+ case Intrinsic::x86_sse4a_extrqi: {
+ // EXTRQI: Extract Length bits starting from Index. Zero pad the remaining
+ // bits of the lower 64-bits. The upper 64-bits are undefined.
+ Value *Op0 = II->getArgOperand(0);
+ unsigned VWidth = Op0->getType()->getVectorNumElements();
+ assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
+ "Unexpected operand size");
+
+ // See if we're dealing with constant values.
+ ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(1));
+ ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(2));
+
+ // Attempt to simplify to a constant or shuffle vector.
+ if (Value *V = SimplifyX86extrq(*II, Op0, CILength, CIIndex, *Builder))
return ReplaceInstUsesWith(*II, V);
+
+ // EXTRQI only uses the lowest 64-bits of the first 128-bit vector
+ // operand.
+ if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
+ II->setArgOperand(0, V);
+ return II;
+ }
+ break;
+ }
+
+ case Intrinsic::x86_sse4a_insertq: {
+ Value *Op0 = II->getArgOperand(0);
+ Value *Op1 = II->getArgOperand(1);
+ unsigned VWidth = Op0->getType()->getVectorNumElements();
+ assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
+ Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
+ Op1->getType()->getVectorNumElements() == 2 &&
+ "Unexpected operand size");
+
+ // See if we're dealing with constant values.
+ Constant *C1 = dyn_cast<Constant>(Op1);
+ ConstantInt *CI11 =
+ C1 ? dyn_cast<ConstantInt>(C1->getAggregateElement((unsigned)1))
+ : nullptr;
+
+ // Attempt to simplify to a constant, shuffle vector or INSERTQI call.
+ if (CI11) {
+ APInt V11 = CI11->getValue();
+ APInt Len = V11.zextOrTrunc(6);
+ APInt Idx = V11.lshr(8).zextOrTrunc(6);
+ if (Value *V = SimplifyX86insertq(*II, Op0, Op1, Len, Idx, *Builder))
+ return ReplaceInstUsesWith(*II, V);
+ }
+
+ // INSERTQ only uses the lowest 64-bits of the first 128-bit vector
+ // operand.
+ if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
+ II->setArgOperand(0, V);
+ return II;
+ }
break;
-
+ }
+
case Intrinsic::x86_sse4a_insertqi: {
- // insertqi x, y, 64, 0 can just copy y's lower bits and leave the top
- // ones undef
- // TODO: eventually we should lower this intrinsic to IR
- if (auto CIWidth = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
- if (auto CIStart = dyn_cast<ConstantInt>(II->getArgOperand(3))) {
- unsigned Index = CIStart->getZExtValue();
- // From AMD documentation: "a value of zero in the field length is
- // defined as length of 64".
- unsigned Length = CIWidth->equalsInt(0) ? 64 : CIWidth->getZExtValue();
-
- // From AMD documentation: "If the sum of the bit index + length field
- // is greater than 64, the results are undefined".
-
- // Note that both field index and field length are 8-bit quantities.
- // Since variables 'Index' and 'Length' are unsigned values
- // obtained from zero-extending field index and field length
- // respectively, their sum should never wrap around.
- if ((Index + Length) > 64)
- return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
-
- if (Length == 64 && Index == 0) {
- Value *Vec = II->getArgOperand(1);
- Value *Undef = UndefValue::get(Vec->getType());
- const uint32_t Mask[] = { 0, 2 };
- return ReplaceInstUsesWith(
- CI,
- Builder->CreateShuffleVector(
- Vec, Undef, ConstantDataVector::get(
- II->getContext(), makeArrayRef(Mask))));
-
- } else if (auto Source =
- dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
- if (Source->hasOneUse() &&
- Source->getArgOperand(1) == II->getArgOperand(1)) {
- // If the source of the insert has only one use and it's another
- // insert (and they're both inserting from the same vector), try to
- // bundle both together.
- auto CISourceWidth =
- dyn_cast<ConstantInt>(Source->getArgOperand(2));
- auto CISourceStart =
- dyn_cast<ConstantInt>(Source->getArgOperand(3));
- if (CISourceStart && CISourceWidth) {
- unsigned Start = CIStart->getZExtValue();
- unsigned Width = CIWidth->getZExtValue();
- unsigned End = Start + Width;
- unsigned SourceStart = CISourceStart->getZExtValue();
- unsigned SourceWidth = CISourceWidth->getZExtValue();
- unsigned SourceEnd = SourceStart + SourceWidth;
- unsigned NewStart, NewWidth;
- bool ShouldReplace = false;
- if (Start <= SourceStart && SourceStart <= End) {
- NewStart = Start;
- NewWidth = std::max(End, SourceEnd) - NewStart;
- ShouldReplace = true;
- } else if (SourceStart <= Start && Start <= SourceEnd) {
- NewStart = SourceStart;
- NewWidth = std::max(SourceEnd, End) - NewStart;
- ShouldReplace = true;
- }
-
- if (ShouldReplace) {
- Constant *ConstantWidth = ConstantInt::get(
- II->getArgOperand(2)->getType(), NewWidth, false);
- Constant *ConstantStart = ConstantInt::get(
- II->getArgOperand(3)->getType(), NewStart, false);
- Value *Args[4] = { Source->getArgOperand(0),
- II->getArgOperand(1), ConstantWidth,
- ConstantStart };
- Module *M = CI.getParent()->getParent()->getParent();
- Value *F =
- Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
- return ReplaceInstUsesWith(CI, Builder->CreateCall(F, Args));
- }
- }
- }
- }
- }
+ // INSERTQI: Extract lowest Length bits from lower half of second source and
+ // insert over first source starting at Index bit. The upper 64-bits are
+ // undefined.
+ Value *Op0 = II->getArgOperand(0);
+ Value *Op1 = II->getArgOperand(1);
+ unsigned VWidth0 = Op0->getType()->getVectorNumElements();
+ unsigned VWidth1 = Op1->getType()->getVectorNumElements();
+ assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
+ Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
+ VWidth1 == 2 && "Unexpected operand sizes");
+
+ // See if we're dealing with constant values.
+ ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(2));
+ ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(3));
+
+ // Attempt to simplify to a constant or shuffle vector.
+ if (CILength && CIIndex) {
+ APInt Len = CILength->getValue().zextOrTrunc(6);
+ APInt Idx = CIIndex->getValue().zextOrTrunc(6);
+ if (Value *V = SimplifyX86insertq(*II, Op0, Op1, Len, Idx, *Builder))
+ return ReplaceInstUsesWith(*II, V);
+ }
+
+ // INSERTQI only uses the lowest 64-bits of the first two 128-bit vector
+ // operands.
+ if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
+ II->setArgOperand(0, V);
+ return II;
+ }
+
+ if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
+ II->setArgOperand(1, V);
+ return II;
}
break;
}
@@ -894,7 +1345,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// This optimization is convoluted because the intrinsic is defined as
// getting a vector of floats or doubles for the ps and pd versions.
// FIXME: That should be changed.
+
+ Value *Op0 = II->getArgOperand(0);
+ Value *Op1 = II->getArgOperand(1);
Value *Mask = II->getArgOperand(2);
+
+ // fold (blend A, A, Mask) -> A
+ if (Op0 == Op1)
+ return ReplaceInstUsesWith(CI, Op0);
+
+ // Zero Mask - select 1st argument.
+ if (isa<ConstantAggregateZero>(Mask))
+ return ReplaceInstUsesWith(CI, Op0);
+
+ // Constant Mask - select 1st/2nd argument lane based on top bit of mask.
if (auto C = dyn_cast<ConstantDataVector>(Mask)) {
auto Tyi1 = Builder->getInt1Ty();
auto SelectorType = cast<VectorType>(Mask->getType());
@@ -917,11 +1381,50 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Selectors.push_back(ConstantInt::get(Tyi1, Selector >> (BitWidth - 1)));
}
auto NewSelector = ConstantVector::get(Selectors);
- return SelectInst::Create(NewSelector, II->getArgOperand(1),
- II->getArgOperand(0), "blendv");
- } else {
- break;
+ return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
}
+ break;
+ }
+
+ case Intrinsic::x86_ssse3_pshuf_b_128:
+ case Intrinsic::x86_avx2_pshuf_b: {
+ // Turn pshufb(V1,mask) -> shuffle(V1,Zero,mask) if mask is a constant.
+ auto *V = II->getArgOperand(1);
+ auto *VTy = cast<VectorType>(V->getType());
+ unsigned NumElts = VTy->getNumElements();
+ assert((NumElts == 16 || NumElts == 32) &&
+ "Unexpected number of elements in shuffle mask!");
+ // Initialize the resulting shuffle mask to all zeroes.
+ uint32_t Indexes[32] = {0};
+
+ if (auto *Mask = dyn_cast<ConstantDataVector>(V)) {
+ // Each byte in the shuffle control mask forms an index to permute the
+ // corresponding byte in the destination operand.
+ for (unsigned I = 0; I < NumElts; ++I) {
+ int8_t Index = Mask->getElementAsInteger(I);
+ // If the most significant bit (bit[7]) of each byte of the shuffle
+ // control mask is set, then zero is written in the result byte.
+ // The zero vector is in the right-hand side of the resulting
+ // shufflevector.
+
+ // The value of each index is the least significant 4 bits of the
+ // shuffle control byte.
+ Indexes[I] = (Index < 0) ? NumElts : Index & 0xF;
+ }
+ } else if (!isa<ConstantAggregateZero>(V))
+ break;
+
+ // The value of each index for the high 128-bit lane is the least
+ // significant 4 bits of the respective shuffle control byte.
+ for (unsigned I = 16; I < NumElts; ++I)
+ Indexes[I] += I & 0xF0;
+
+ auto NewC = ConstantDataVector::get(V->getContext(),
+ makeArrayRef(Indexes, NumElts));
+ auto V1 = II->getArgOperand(0);
+ auto V2 = Constant::getNullValue(II->getType());
+ auto Shuffle = Builder->CreateShuffleVector(V1, V2, NewC);
+ return ReplaceInstUsesWith(CI, Shuffle);
}
case Intrinsic::x86_avx_vpermilvar_ps:
@@ -972,6 +1475,22 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return ReplaceInstUsesWith(*II, V);
break;
+ case Intrinsic::x86_xop_vpcomb:
+ case Intrinsic::x86_xop_vpcomd:
+ case Intrinsic::x86_xop_vpcomq:
+ case Intrinsic::x86_xop_vpcomw:
+ if (Value *V = SimplifyX86vpcom(*II, *Builder, true))
+ return ReplaceInstUsesWith(*II, V);
+ break;
+
+ case Intrinsic::x86_xop_vpcomub:
+ case Intrinsic::x86_xop_vpcomud:
+ case Intrinsic::x86_xop_vpcomuq:
+ case Intrinsic::x86_xop_vpcomuw:
+ if (Value *V = SimplifyX86vpcom(*II, *Builder, false))
+ return ReplaceInstUsesWith(*II, V);
+ break;
+
case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
// Note that ppc_altivec_vperm has a big-endian bias, so when creating
@@ -1115,15 +1634,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// happen when variable allocas are DCE'd.
if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
if (SS->getIntrinsicID() == Intrinsic::stacksave) {
- BasicBlock::iterator BI = SS;
- if (&*++BI == II)
+ if (&*++SS->getIterator() == II)
return EraseInstFromFunction(CI);
}
}
// Scan down this block to see if there is another stack restore in the
// same block without an intervening call/alloca.
- BasicBlock::iterator BI = II;
+ BasicBlock::iterator BI(II);
TerminatorInst *TI = II->getParent()->getTerminator();
bool CannotRemove = false;
for (++BI; &*BI != TI; ++BI) {
@@ -1153,6 +1671,29 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return EraseInstFromFunction(CI);
break;
}
+ case Intrinsic::lifetime_start: {
+ // Remove trivially empty lifetime_start/end ranges, i.e. a start
+ // immediately followed by an end (ignoring debuginfo or other
+ // lifetime markers in between).
+ BasicBlock::iterator BI = II->getIterator(), BE = II->getParent()->end();
+ for (++BI; BI != BE; ++BI) {
+ if (IntrinsicInst *LTE = dyn_cast<IntrinsicInst>(BI)) {
+ if (isa<DbgInfoIntrinsic>(LTE) ||
+ LTE->getIntrinsicID() == Intrinsic::lifetime_start)
+ continue;
+ if (LTE->getIntrinsicID() == Intrinsic::lifetime_end) {
+ if (II->getOperand(0) == LTE->getOperand(0) &&
+ II->getOperand(1) == LTE->getOperand(1)) {
+ EraseInstFromFunction(*LTE);
+ return EraseInstFromFunction(*II);
+ }
+ continue;
+ }
+ }
+ break;
+ }
+ break;
+ }
case Intrinsic::assume: {
// Canonicalize assume(a && b) -> assume(a); assume(b);
// Note: New assumption intrinsics created here are registered by
@@ -1233,7 +1774,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
// isKnownNonNull -> nonnull attribute
- if (isKnownNonNull(DerivedPtr))
+ if (isKnownNonNullAt(DerivedPtr, II, DT, TLI))
II->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
// isDereferenceablePointer -> deref attribute
@@ -1355,9 +1896,10 @@ static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp,
Value *TrampMem) {
// Visit all the previous instructions in the basic block, and try to find a
// init.trampoline which has a direct path to the adjust.trampoline.
- for (BasicBlock::iterator I = AdjustTramp,
- E = AdjustTramp->getParent()->begin(); I != E; ) {
- Instruction *Inst = --I;
+ for (BasicBlock::iterator I = AdjustTramp->getIterator(),
+ E = AdjustTramp->getParent()->begin();
+ I != E;) {
+ Instruction *Inst = &*--I;
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
II->getOperand(0) == TrampMem)
@@ -1400,20 +1942,27 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
// Mark any parameters that are known to be non-null with the nonnull
// attribute. This is helpful for inlining calls to functions with null
// checks on their arguments.
+ SmallVector<unsigned, 4> Indices;
unsigned ArgNo = 0;
+
for (Value *V : CS.args()) {
- if (!CS.paramHasAttr(ArgNo+1, Attribute::NonNull) &&
- isKnownNonNull(V)) {
- AttributeSet AS = CS.getAttributes();
- AS = AS.addAttribute(CS.getInstruction()->getContext(), ArgNo+1,
- Attribute::NonNull);
- CS.setAttributes(AS);
- Changed = true;
- }
+ if (V->getType()->isPointerTy() && !CS.paramHasAttr(ArgNo+1, Attribute::NonNull) &&
+ isKnownNonNullAt(V, CS.getInstruction(), DT, TLI))
+ Indices.push_back(ArgNo + 1);
ArgNo++;
}
+
assert(ArgNo == CS.arg_size() && "sanity check");
+ if (!Indices.empty()) {
+ AttributeSet AS = CS.getAttributes();
+ LLVMContext &Ctx = CS.getInstruction()->getContext();
+ AS = AS.addAttribute(Ctx, Indices,
+ Attribute::get(Ctx, Attribute::NonNull));
+ CS.setAttributes(AS);
+ Changed = true;
+ }
+
// If the callee is a pointer to a function, attempt to move any casts to the
// arguments of the call/invoke.
Value *Callee = CS.getCalledValue();
@@ -1725,16 +2274,19 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
const AttributeSet &NewCallerPAL = AttributeSet::get(Callee->getContext(),
attrVec);
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ CS.getOperandBundlesAsDefs(OpBundles);
+
Instruction *NC;
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
- NC = Builder->CreateInvoke(Callee, II->getNormalDest(),
- II->getUnwindDest(), Args);
+ NC = Builder->CreateInvoke(Callee, II->getNormalDest(), II->getUnwindDest(),
+ Args, OpBundles);
NC->takeName(II);
cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
} else {
CallInst *CI = cast<CallInst>(Caller);
- NC = Builder->CreateCall(Callee, Args);
+ NC = Builder->CreateCall(Callee, Args, OpBundles);
NC->takeName(CI);
if (CI->isTailCall())
cast<CallInst>(NC)->setTailCall();
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 48ab0eb2c1b9..da835a192322 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -21,11 +21,11 @@ using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
-/// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear
-/// expression. If so, decompose it, returning some value X, such that Val is
+/// Analyze 'Val', seeing if it is a simple linear expression.
+/// If so, decompose it, returning some value X, such that Val is
/// X*Scale+Offset.
///
-static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
+static Value *decomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
uint64_t &Offset) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
Offset = CI->getZExtValue();
@@ -62,7 +62,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
// where C1 is divisible by C2.
unsigned SubScale;
Value *SubVal =
- DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
+ decomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
Offset += RHS->getZExtValue();
Scale = SubScale;
return SubVal;
@@ -76,14 +76,14 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
return Val;
}
-/// PromoteCastOfAllocation - If we find a cast of an allocation instruction,
-/// try to eliminate the cast by moving the type information into the alloc.
+/// If we find a cast of an allocation instruction, try to eliminate the cast by
+/// moving the type information into the alloc.
Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
AllocaInst &AI) {
PointerType *PTy = cast<PointerType>(CI.getType());
BuilderTy AllocaBuilder(*Builder);
- AllocaBuilder.SetInsertPoint(AI.getParent(), &AI);
+ AllocaBuilder.SetInsertPoint(&AI);
// Get the type really allocated and the type casted to.
Type *AllocElTy = AI.getAllocatedType();
@@ -114,7 +114,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
unsigned ArraySizeScale;
uint64_t ArrayOffset;
Value *NumElements = // See if the array size is a decomposable linear expr.
- DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
+ decomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
// If we can now satisfy the modulus, by using a non-1 scale, we really can
// do the xform.
@@ -154,9 +154,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
return ReplaceInstUsesWith(CI, New);
}
-/// EvaluateInDifferentType - Given an expression that
-/// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually
-/// insert the code to evaluate the expression.
+/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
+/// true for, actually insert the code to evaluate the expression.
Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
bool isSigned) {
if (Constant *C = dyn_cast<Constant>(V)) {
@@ -261,9 +260,9 @@ isEliminableCastPair(const CastInst *CI, ///< First cast instruction
return Instruction::CastOps(Res);
}
-/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
-/// results in any code being generated and is interesting to optimize out. If
-/// the cast can be eliminated by some other simple transformation, we prefer
+/// Return true if the cast from "V to Ty" actually results in any code being
+/// generated and is interesting to optimize out.
+/// If the cast can be eliminated by some other simple transformation, we prefer
/// to do the simplification first.
bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
Type *Ty) {
@@ -318,9 +317,9 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
return nullptr;
}
-/// CanEvaluateTruncated - Return true if we can evaluate the specified
-/// expression tree as type Ty instead of its larger type, and arrive with the
-/// same value. This is used by code that tries to eliminate truncates.
+/// Return true if we can evaluate the specified expression tree as type Ty
+/// instead of its larger type, and arrive with the same value.
+/// This is used by code that tries to eliminate truncates.
///
/// Ty will always be a type smaller than V. We should return true if trunc(V)
/// can be computed by computing V in the smaller type. If V is an instruction,
@@ -329,7 +328,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
///
/// This function works on both vectors and scalars.
///
-static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
+static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
Instruction *CxtI) {
// We can always evaluate constants in another type.
if (isa<Constant>(V))
@@ -359,8 +358,8 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
case Instruction::Or:
case Instruction::Xor:
// These operators can all arbitrarily be extended or truncated.
- return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
- CanEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
+ return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
+ canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
case Instruction::UDiv:
case Instruction::URem: {
@@ -371,8 +370,8 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth);
if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, CxtI) &&
IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, CxtI)) {
- return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
- CanEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
+ return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
+ canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
}
}
break;
@@ -383,7 +382,7 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
uint32_t BitWidth = Ty->getScalarSizeInBits();
if (CI->getLimitedValue(BitWidth) < BitWidth)
- return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
+ return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
}
break;
case Instruction::LShr:
@@ -396,7 +395,7 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
if (IC.MaskedValueIsZero(I->getOperand(0),
APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth), 0, CxtI) &&
CI->getLimitedValue(BitWidth) < BitWidth) {
- return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
+ return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
}
}
break;
@@ -410,8 +409,8 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
return true;
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
- return CanEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
- CanEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
+ return canEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
+ canEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
}
case Instruction::PHI: {
// We can change a phi if we can change all operands. Note that we never
@@ -419,7 +418,7 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
for (Value *IncValue : PN->incoming_values())
- if (!CanEvaluateTruncated(IncValue, Ty, IC, CxtI))
+ if (!canEvaluateTruncated(IncValue, Ty, IC, CxtI))
return false;
return true;
}
@@ -431,6 +430,50 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
return false;
}
+/// Given a vector that is bitcast to an integer, optionally logically
+/// right-shifted, and truncated, convert it to an extractelement.
+/// Example (big endian):
+/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
+/// --->
+/// extractelement <4 x i32> %X, 1
+static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC,
+ const DataLayout &DL) {
+ Value *TruncOp = Trunc.getOperand(0);
+ Type *DestType = Trunc.getType();
+ if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
+ return nullptr;
+
+ Value *VecInput = nullptr;
+ ConstantInt *ShiftVal = nullptr;
+ if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
+ m_LShr(m_BitCast(m_Value(VecInput)),
+ m_ConstantInt(ShiftVal)))) ||
+ !isa<VectorType>(VecInput->getType()))
+ return nullptr;
+
+ VectorType *VecType = cast<VectorType>(VecInput->getType());
+ unsigned VecWidth = VecType->getPrimitiveSizeInBits();
+ unsigned DestWidth = DestType->getPrimitiveSizeInBits();
+ unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
+
+ if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
+ return nullptr;
+
+ // If the element type of the vector doesn't match the result type,
+ // bitcast it to a vector type that we can extract from.
+ unsigned NumVecElts = VecWidth / DestWidth;
+ if (VecType->getElementType() != DestType) {
+ VecType = VectorType::get(DestType, NumVecElts);
+ VecInput = IC.Builder->CreateBitCast(VecInput, VecType, "bc");
+ }
+
+ unsigned Elt = ShiftAmount / DestWidth;
+ if (DL.isBigEndian())
+ Elt = NumVecElts - 1 - Elt;
+
+ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
+}
+
Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
if (Instruction *Result = commonCastTransforms(CI))
return Result;
@@ -441,7 +484,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
// min/max.
Value *LHS, *RHS;
if (SelectInst *SI = dyn_cast<SelectInst>(CI.getOperand(0)))
- if (matchSelectPattern(SI, LHS, RHS) != SPF_UNKNOWN)
+ if (matchSelectPattern(SI, LHS, RHS).Flavor != SPF_UNKNOWN)
return nullptr;
// See if we can simplify any instructions used by the input whose sole
@@ -457,7 +500,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
// expression tree to something weird like i93 unless the source is also
// strange.
if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
- CanEvaluateTruncated(Src, DestTy, *this, &CI)) {
+ canEvaluateTruncated(Src, DestTy, *this, &CI)) {
// If this cast is a truncate, evaluting in a different type always
// eliminates the cast, so it is always a win.
@@ -470,7 +513,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
// Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector.
if (DestTy->getScalarSizeInBits() == 1) {
- Constant *One = ConstantInt::get(Src->getType(), 1);
+ Constant *One = ConstantInt::get(SrcTy, 1);
Src = Builder->CreateAnd(Src, One);
Value *Zero = Constant::getNullValue(Src->getType());
return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
@@ -489,31 +532,54 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
// If the shift amount is larger than the size of A, then the result is
// known to be zero because all the input bits got shifted out.
if (Cst->getZExtValue() >= ASize)
- return ReplaceInstUsesWith(CI, Constant::getNullValue(CI.getType()));
+ return ReplaceInstUsesWith(CI, Constant::getNullValue(DestTy));
// Since we're doing an lshr and a zero extend, and know that the shift
// amount is smaller than ASize, it is always safe to do the shift in A's
// type, then zero extend or truncate to the result.
Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue());
Shift->takeName(Src);
- return CastInst::CreateIntegerCast(Shift, CI.getType(), false);
+ return CastInst::CreateIntegerCast(Shift, DestTy, false);
+ }
+
+ // Transform trunc(lshr (sext A), Cst) to ashr A, Cst to eliminate type
+ // conversion.
+ // It works because bits coming from sign extension have the same value as
+ // the sign bit of the original value; performing ashr instead of lshr
+ // generates bits of the same value as the sign bit.
+ if (Src->hasOneUse() &&
+ match(Src, m_LShr(m_SExt(m_Value(A)), m_ConstantInt(Cst))) &&
+ cast<Instruction>(Src)->getOperand(0)->hasOneUse()) {
+ const unsigned ASize = A->getType()->getPrimitiveSizeInBits();
+ // This optimization can be only performed when zero bits generated by
+ // the original lshr aren't pulled into the value after truncation, so we
+ // can only shift by values smaller than the size of destination type (in
+ // bits).
+ if (Cst->getValue().ult(ASize)) {
+ Value *Shift = Builder->CreateAShr(A, Cst->getZExtValue());
+ Shift->takeName(Src);
+ return CastInst::CreateIntegerCast(Shift, CI.getType(), true);
+ }
}
// Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest
// type isn't non-native.
- if (Src->hasOneUse() && isa<IntegerType>(Src->getType()) &&
- ShouldChangeType(Src->getType(), CI.getType()) &&
+ if (Src->hasOneUse() && isa<IntegerType>(SrcTy) &&
+ ShouldChangeType(SrcTy, DestTy) &&
match(Src, m_And(m_Value(A), m_ConstantInt(Cst)))) {
- Value *NewTrunc = Builder->CreateTrunc(A, CI.getType(), A->getName()+".tr");
+ Value *NewTrunc = Builder->CreateTrunc(A, DestTy, A->getName() + ".tr");
return BinaryOperator::CreateAnd(NewTrunc,
- ConstantExpr::getTrunc(Cst, CI.getType()));
+ ConstantExpr::getTrunc(Cst, DestTy));
}
+ if (Instruction *I = foldVecTruncToExtElt(CI, *this, DL))
+ return I;
+
return nullptr;
}
-/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations
-/// in order to eliminate the icmp.
+/// Transform (zext icmp) to bitwise / integer operations in order to eliminate
+/// the icmp.
Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
bool DoXform) {
// If we are just checking for a icmp eq of a single bit and zext'ing it
@@ -637,8 +703,8 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
return nullptr;
}
-/// CanEvaluateZExtd - Determine if the specified value can be computed in the
-/// specified wider type and produce the same low bits. If not, return false.
+/// Determine if the specified value can be computed in the specified wider type
+/// and produce the same low bits. If not, return false.
///
/// If this function returns true, it can also return a non-zero number of bits
/// (in BitsToClear) which indicates that the value it computes is correct for
@@ -655,7 +721,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
/// clear the top bits anyway, doing this has no extra cost.
///
/// This function works on both vectors and scalars.
-static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
+static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
InstCombiner &IC, Instruction *CxtI) {
BitsToClear = 0;
if (isa<Constant>(V))
@@ -685,8 +751,8 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
- if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
- !CanEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))
+ if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
+ !canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))
return false;
// These can all be promoted if neither operand has 'bits to clear'.
if (BitsToClear == 0 && Tmp == 0)
@@ -713,7 +779,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
// We can promote shl(x, cst) if we can promote x. Since shl overwrites the
// upper bits we can reduce BitsToClear by the shift amount.
if (ConstantInt *Amt = dyn_cast<ConstantInt>(I->getOperand(1))) {
- if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
+ if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
return false;
uint64_t ShiftAmt = Amt->getZExtValue();
BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
@@ -724,7 +790,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
// We can promote lshr(x, cst) if we can promote x. This requires the
// ultimate 'and' to clear out the high zero bits we're clearing out though.
if (ConstantInt *Amt = dyn_cast<ConstantInt>(I->getOperand(1))) {
- if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
+ if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
return false;
BitsToClear += Amt->getZExtValue();
if (BitsToClear > V->getType()->getScalarSizeInBits())
@@ -734,8 +800,8 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
// Cannot promote variable LSHR.
return false;
case Instruction::Select:
- if (!CanEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
- !CanEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
+ if (!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
+ !canEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
// TODO: If important, we could handle the case when the BitsToClear are
// known zero in the disagreeing side.
Tmp != BitsToClear)
@@ -747,10 +813,10 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
// get into trouble with cyclic PHIs here because we only consider
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
- if (!CanEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))
+ if (!canEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))
return false;
for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
- if (!CanEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
+ if (!canEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
// TODO: If important, we could handle the case when the BitsToClear
// are known zero in the disagreeing input.
Tmp != BitsToClear)
@@ -787,13 +853,13 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
// strange.
unsigned BitsToClear;
if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
- CanEvaluateZExtd(Src, DestTy, BitsToClear, *this, &CI)) {
+ canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &CI)) {
assert(BitsToClear < SrcTy->getScalarSizeInBits() &&
"Unreasonable BitsToClear");
// Okay, we can transform this! Insert the new expression now.
DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
- " to avoid zero extend: " << CI);
+ " to avoid zero extend: " << CI << '\n');
Value *Res = EvaluateInDifferentType(Src, DestTy, false);
assert(Res->getType() == DestTy);
@@ -897,8 +963,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
return nullptr;
}
-/// transformSExtICmp - Transform (sext icmp) to bitwise / integer operations
-/// in order to eliminate the icmp.
+/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1);
ICmpInst::Predicate Pred = ICI->getPredicate();
@@ -985,15 +1050,14 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
return nullptr;
}
-/// CanEvaluateSExtd - Return true if we can take the specified value
-/// and return it as type Ty without inserting any new casts and without
-/// changing the value of the common low bits. This is used by code that tries
-/// to promote integer operations to a wider types will allow us to eliminate
-/// the extension.
+/// Return true if we can take the specified value and return it as type Ty
+/// without inserting any new casts and without changing the value of the common
+/// low bits. This is used by code that tries to promote integer operations to
+/// a wider types will allow us to eliminate the extension.
///
/// This function works on both vectors and scalars.
///
-static bool CanEvaluateSExtd(Value *V, Type *Ty) {
+static bool canEvaluateSExtd(Value *V, Type *Ty) {
assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
"Can't sign extend type to a smaller type");
// If this is a constant, it can be trivially promoted.
@@ -1023,15 +1087,15 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
case Instruction::Sub:
case Instruction::Mul:
// These operators can all arbitrarily be extended if their inputs can.
- return CanEvaluateSExtd(I->getOperand(0), Ty) &&
- CanEvaluateSExtd(I->getOperand(1), Ty);
+ return canEvaluateSExtd(I->getOperand(0), Ty) &&
+ canEvaluateSExtd(I->getOperand(1), Ty);
//case Instruction::Shl: TODO
//case Instruction::LShr: TODO
case Instruction::Select:
- return CanEvaluateSExtd(I->getOperand(1), Ty) &&
- CanEvaluateSExtd(I->getOperand(2), Ty);
+ return canEvaluateSExtd(I->getOperand(1), Ty) &&
+ canEvaluateSExtd(I->getOperand(2), Ty);
case Instruction::PHI: {
// We can change a phi if we can change all operands. Note that we never
@@ -1039,7 +1103,7 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
for (Value *IncValue : PN->incoming_values())
- if (!CanEvaluateSExtd(IncValue, Ty)) return false;
+ if (!canEvaluateSExtd(IncValue, Ty)) return false;
return true;
}
default:
@@ -1081,10 +1145,10 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
// expression tree to something weird like i93 unless the source is also
// strange.
if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
- CanEvaluateSExtd(Src, DestTy)) {
+ canEvaluateSExtd(Src, DestTy)) {
// Okay, we can transform this! Insert the new expression now.
DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
- " to avoid sign extend: " << CI);
+ " to avoid sign extend: " << CI << '\n');
Value *Res = EvaluateInDifferentType(Src, DestTy, true);
assert(Res->getType() == DestTy);
@@ -1149,9 +1213,9 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
}
-/// FitsInFPType - Return a Constant* for the specified FP constant if it fits
+/// Return a Constant* for the specified floating-point constant if it fits
/// in the specified FP type without changing its value.
-static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
+static Constant *fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
bool losesInfo;
APFloat F = CFP->getValueAPF();
(void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
@@ -1160,12 +1224,12 @@ static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
return nullptr;
}
-/// LookThroughFPExtensions - If this is an fp extension instruction, look
+/// If this is a floating-point extension instruction, look
/// through it until we get the source value.
-static Value *LookThroughFPExtensions(Value *V) {
+static Value *lookThroughFPExtensions(Value *V) {
if (Instruction *I = dyn_cast<Instruction>(V))
if (I->getOpcode() == Instruction::FPExt)
- return LookThroughFPExtensions(I->getOperand(0));
+ return lookThroughFPExtensions(I->getOperand(0));
// If this value is a constant, return the constant in the smallest FP type
// that can accurately represent it. This allows us to turn
@@ -1174,14 +1238,14 @@ static Value *LookThroughFPExtensions(Value *V) {
if (CFP->getType() == Type::getPPC_FP128Ty(V->getContext()))
return V; // No constant folding of this.
// See if the value can be truncated to half and then reextended.
- if (Value *V = FitsInFPType(CFP, APFloat::IEEEhalf))
+ if (Value *V = fitsInFPType(CFP, APFloat::IEEEhalf))
return V;
// See if the value can be truncated to float and then reextended.
- if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle))
+ if (Value *V = fitsInFPType(CFP, APFloat::IEEEsingle))
return V;
if (CFP->getType()->isDoubleTy())
return V; // Won't shrink.
- if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble))
+ if (Value *V = fitsInFPType(CFP, APFloat::IEEEdouble))
return V;
// Don't try to shrink to various long double types.
}
@@ -1193,7 +1257,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
if (Instruction *I = commonCastTransforms(CI))
return I;
// If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
- // simpilify this expression to avoid one or more of the trunc/extend
+ // simplify this expression to avoid one or more of the trunc/extend
// operations if we can do so without changing the numerical results.
//
// The exact manner in which the widths of the operands interact to limit
@@ -1201,8 +1265,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
// is explained below in the various case statements.
BinaryOperator *OpI = dyn_cast<BinaryOperator>(CI.getOperand(0));
if (OpI && OpI->hasOneUse()) {
- Value *LHSOrig = LookThroughFPExtensions(OpI->getOperand(0));
- Value *RHSOrig = LookThroughFPExtensions(OpI->getOperand(1));
+ Value *LHSOrig = lookThroughFPExtensions(OpI->getOperand(0));
+ Value *RHSOrig = lookThroughFPExtensions(OpI->getOperand(1));
unsigned OpWidth = OpI->getType()->getFPMantissaWidth();
unsigned LHSWidth = LHSOrig->getType()->getFPMantissaWidth();
unsigned RHSWidth = RHSOrig->getType()->getFPMantissaWidth();
@@ -1307,10 +1371,16 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
// (fptrunc (select cond, R1, Cst)) -->
// (select cond, (fptrunc R1), (fptrunc Cst))
+ //
+ // - but only if this isn't part of a min/max operation, else we'll
+ // ruin min/max canonical form which is to have the select and
+ // compare's operands be of the same type with no casts to look through.
+ Value *LHS, *RHS;
SelectInst *SI = dyn_cast<SelectInst>(CI.getOperand(0));
if (SI &&
(isa<ConstantFP>(SI->getOperand(1)) ||
- isa<ConstantFP>(SI->getOperand(2)))) {
+ isa<ConstantFP>(SI->getOperand(2))) &&
+ matchSelectPattern(SI, LHS, RHS).Flavor == SPF_UNKNOWN) {
Value *LHSTrunc = Builder->CreateFPTrunc(SI->getOperand(1),
CI.getType());
Value *RHSTrunc = Builder->CreateFPTrunc(SI->getOperand(2),
@@ -1327,9 +1397,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0),
CI.getType());
Type *IntrinsicType[] = { CI.getType() };
- Function *Overload =
- Intrinsic::getDeclaration(CI.getParent()->getParent()->getParent(),
- II->getIntrinsicID(), IntrinsicType);
+ Function *Overload = Intrinsic::getDeclaration(
+ CI.getModule(), II->getIntrinsicID(), IntrinsicType);
Value *Args[] = { InnerTrunc };
return CallInst::Create(Overload, Args, II->getName());
@@ -1483,12 +1552,12 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
}
-/// OptimizeVectorResize - This input value (which is known to have vector type)
-/// is being zero extended or truncated to the specified vector type. Try to
-/// replace it with a shuffle (and vector/vector bitcast) if possible.
+/// This input value (which is known to have vector type) is being zero extended
+/// or truncated to the specified vector type.
+/// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
///
/// The source and destination vector types may have different element types.
-static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy,
+static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy,
InstCombiner &IC) {
// We can only do this optimization if the output is a multiple of the input
// element size, or the input is a multiple of the output element size.
@@ -1548,8 +1617,8 @@ static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
return Value / Ty->getPrimitiveSizeInBits();
}
-/// CollectInsertionElements - V is a value which is inserted into a vector of
-/// VecEltTy. Look through the value to see if we can decompose it into
+/// V is a value which is inserted into a vector of VecEltTy.
+/// Look through the value to see if we can decompose it into
/// insertions into the vector. See the example in the comment for
/// OptimizeIntegerToVectorInsertions for the pattern this handles.
/// The type of V is always a non-zero multiple of VecEltTy's size.
@@ -1558,7 +1627,7 @@ static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
///
/// This returns false if the pattern can't be matched or true if it can,
/// filling in Elements with the elements found here.
-static bool CollectInsertionElements(Value *V, unsigned Shift,
+static bool collectInsertionElements(Value *V, unsigned Shift,
SmallVectorImpl<Value *> &Elements,
Type *VecEltTy, bool isBigEndian) {
assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
@@ -1595,7 +1664,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
// If the constant is the size of a vector element, we just need to bitcast
// it to the right type so it gets properly inserted.
if (NumElts == 1)
- return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
+ return collectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
Shift, Elements, VecEltTy, isBigEndian);
// Okay, this is a constant that covers multiple elements. Slice it up into
@@ -1611,7 +1680,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
ShiftI));
Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
- if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy,
+ if (!collectInsertionElements(Piece, ShiftI, Elements, VecEltTy,
isBigEndian))
return false;
}
@@ -1625,19 +1694,19 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
switch (I->getOpcode()) {
default: return false; // Unhandled case.
case Instruction::BitCast:
- return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
isBigEndian);
case Instruction::ZExt:
if (!isMultipleOfTypeSize(
I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
VecEltTy))
return false;
- return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
isBigEndian);
case Instruction::Or:
- return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
isBigEndian) &&
- CollectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
+ collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
isBigEndian);
case Instruction::Shl: {
// Must be shifting by a constant that is a multiple of the element size.
@@ -1645,7 +1714,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
if (!CI) return false;
Shift += CI->getZExtValue();
if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
- return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
isBigEndian);
}
@@ -1653,8 +1722,8 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
}
-/// OptimizeIntegerToVectorInsertions - If the input is an 'or' instruction, we
-/// may be doing shifts and ors to assemble the elements of the vector manually.
+/// If the input is an 'or' instruction, we may be doing shifts and ors to
+/// assemble the elements of the vector manually.
/// Try to rip the code out and replace it with insertelements. This is to
/// optimize code like this:
///
@@ -1667,13 +1736,13 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
/// %tmp43 = bitcast i64 %ins35 to <2 x float>
///
/// Into two insertelements that do "buildvector{%inc, %inc5}".
-static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
+static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI,
InstCombiner &IC) {
VectorType *DestVecTy = cast<VectorType>(CI.getType());
Value *IntInput = CI.getOperand(0);
SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
- if (!CollectInsertionElements(IntInput, 0, Elements,
+ if (!collectInsertionElements(IntInput, 0, Elements,
DestVecTy->getElementType(),
IC.getDataLayout().isBigEndian()))
return nullptr;
@@ -1692,63 +1761,29 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
return Result;
}
-
-/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
-/// bitcast. The various long double bitcasts can't get in here.
-static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI, InstCombiner &IC,
+/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
+/// vector followed by extract element. The backend tends to handle bitcasts of
+/// vectors better than bitcasts of scalars because vector registers are
+/// usually not type-specific like scalar integer or scalar floating-point.
+static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast,
+ InstCombiner &IC,
const DataLayout &DL) {
- Value *Src = CI.getOperand(0);
- Type *DestTy = CI.getType();
-
- // If this is a bitcast from int to float, check to see if the int is an
- // extraction from a vector.
- Value *VecInput = nullptr;
- // bitcast(trunc(bitcast(somevector)))
- if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) &&
- isa<VectorType>(VecInput->getType())) {
- VectorType *VecTy = cast<VectorType>(VecInput->getType());
- unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
-
- if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) {
- // If the element type of the vector doesn't match the result type,
- // bitcast it to be a vector type we can extract from.
- if (VecTy->getElementType() != DestTy) {
- VecTy = VectorType::get(DestTy,
- VecTy->getPrimitiveSizeInBits() / DestWidth);
- VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
- }
-
- unsigned Elt = 0;
- if (DL.isBigEndian())
- Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1;
- return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
- }
- }
+ // TODO: Create and use a pattern matcher for ExtractElementInst.
+ auto *ExtElt = dyn_cast<ExtractElementInst>(BitCast.getOperand(0));
+ if (!ExtElt || !ExtElt->hasOneUse())
+ return nullptr;
- // bitcast(trunc(lshr(bitcast(somevector), cst))
- ConstantInt *ShAmt = nullptr;
- if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
- m_ConstantInt(ShAmt)))) &&
- isa<VectorType>(VecInput->getType())) {
- VectorType *VecTy = cast<VectorType>(VecInput->getType());
- unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
- if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 &&
- ShAmt->getZExtValue() % DestWidth == 0) {
- // If the element type of the vector doesn't match the result type,
- // bitcast it to be a vector type we can extract from.
- if (VecTy->getElementType() != DestTy) {
- VecTy = VectorType::get(DestTy,
- VecTy->getPrimitiveSizeInBits() / DestWidth);
- VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
- }
+ // The bitcast must be to a vectorizable type, otherwise we can't make a new
+ // type to extract from.
+ Type *DestType = BitCast.getType();
+ if (!VectorType::isValidElementType(DestType))
+ return nullptr;
- unsigned Elt = ShAmt->getZExtValue() / DestWidth;
- if (DL.isBigEndian())
- Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt;
- return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
- }
- }
- return nullptr;
+ unsigned NumElts = ExtElt->getVectorOperandType()->getNumElements();
+ auto *NewVecType = VectorType::get(DestType, NumElts);
+ auto *NewBC = IC.Builder->CreateBitCast(ExtElt->getVectorOperand(),
+ NewVecType, "bc");
+ return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand());
}
Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
@@ -1794,11 +1829,6 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
}
}
- // Try to optimize int -> float bitcasts.
- if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
- if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this, DL))
- return I;
-
if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {
Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType());
@@ -1815,7 +1845,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
CastInst *SrcCast = cast<CastInst>(Src);
if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
if (isa<VectorType>(BCIn->getOperand(0)->getType()))
- if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0),
+ if (Instruction *I = optimizeVectorResize(BCIn->getOperand(0),
cast<VectorType>(DestTy), *this))
return I;
}
@@ -1823,7 +1853,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// If the input is an 'or' instruction, we may be doing shifts and ors to
// assemble the elements of the vector manually. Try to rip the code out
// and replace it with insertelements.
- if (Value *V = OptimizeIntegerToVectorInsertions(CI, *this))
+ if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
return ReplaceInstUsesWith(CI, V);
}
}
@@ -1872,6 +1902,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
}
}
+ if (Instruction *I = canonicalizeBitCastExtElt(CI, *this, DL))
+ return I;
+
if (SrcTy->isPointerTy())
return commonPointerCastTransforms(CI);
return commonCastTransforms(CI);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 95bba3c7af7d..c0786afe965e 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -216,8 +216,6 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
Max = KnownOne|UnknownBits;
}
-
-
/// FoldCmpLoadFromIndexedGlobal - Called we see this pattern:
/// cmp pred (load (gep GV, ...)), cmpcst
/// where GV is a global variable with a constant initializer. Try to simplify
@@ -371,7 +369,6 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
}
}
-
// If this element is in range, update our magic bitvector.
if (i < 64 && IsTrueForElt)
MagicBitvector |= 1ULL << i;
@@ -469,7 +466,6 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
}
-
// If a magic bitvector captures the entire comparison state
// of this load, replace it with computation that does:
// ((magic_cst >> i) & 1) != 0
@@ -496,7 +492,6 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
return nullptr;
}
-
/// EvaluateGEPOffsetExpression - Return a value that can be used to compare
/// the *offset* implied by a GEP to zero. For example, if we have &A[i], we
/// want to return 'i' for "icmp ne i, 0". Note that, in general, indices can
@@ -562,8 +557,6 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
}
}
-
-
// Okay, we know we have a single variable index, which must be a
// pointer/array/vector index. If there is no offset, life is simple, return
// the index.
@@ -737,6 +730,83 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
return nullptr;
}
+Instruction *InstCombiner::FoldAllocaCmp(ICmpInst &ICI, AllocaInst *Alloca,
+ Value *Other) {
+ assert(ICI.isEquality() && "Cannot fold non-equality comparison.");
+
+ // It would be tempting to fold away comparisons between allocas and any
+ // pointer not based on that alloca (e.g. an argument). However, even
+ // though such pointers cannot alias, they can still compare equal.
+ //
+ // But LLVM doesn't specify where allocas get their memory, so if the alloca
+ // doesn't escape we can argue that it's impossible to guess its value, and we
+ // can therefore act as if any such guesses are wrong.
+ //
+ // The code below checks that the alloca doesn't escape, and that it's only
+ // used in a comparison once (the current instruction). The
+ // single-comparison-use condition ensures that we're trivially folding all
+ // comparisons against the alloca consistently, and avoids the risk of
+ // erroneously folding a comparison of the pointer with itself.
+
+ unsigned MaxIter = 32; // Break cycles and bound to constant-time.
+
+ SmallVector<Use *, 32> Worklist;
+ for (Use &U : Alloca->uses()) {
+ if (Worklist.size() >= MaxIter)
+ return nullptr;
+ Worklist.push_back(&U);
+ }
+
+ unsigned NumCmps = 0;
+ while (!Worklist.empty()) {
+ assert(Worklist.size() <= MaxIter);
+ Use *U = Worklist.pop_back_val();
+ Value *V = U->getUser();
+ --MaxIter;
+
+ if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V) || isa<PHINode>(V) ||
+ isa<SelectInst>(V)) {
+ // Track the uses.
+ } else if (isa<LoadInst>(V)) {
+ // Loading from the pointer doesn't escape it.
+ continue;
+ } else if (auto *SI = dyn_cast<StoreInst>(V)) {
+ // Storing *to* the pointer is fine, but storing the pointer escapes it.
+ if (SI->getValueOperand() == U->get())
+ return nullptr;
+ continue;
+ } else if (isa<ICmpInst>(V)) {
+ if (NumCmps++)
+ return nullptr; // Found more than one cmp.
+ continue;
+ } else if (auto *Intrin = dyn_cast<IntrinsicInst>(V)) {
+ switch (Intrin->getIntrinsicID()) {
+ // These intrinsics don't escape or compare the pointer. Memset is safe
+ // because we don't allow ptrtoint. Memcpy and memmove are safe because
+ // we don't allow stores, so src cannot point to V.
+ case Intrinsic::lifetime_start: case Intrinsic::lifetime_end:
+ case Intrinsic::dbg_declare: case Intrinsic::dbg_value:
+ case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset:
+ continue;
+ default:
+ return nullptr;
+ }
+ } else {
+ return nullptr;
+ }
+ for (Use &U : V->uses()) {
+ if (Worklist.size() >= MaxIter)
+ return nullptr;
+ Worklist.push_back(&U);
+ }
+ }
+
+ Type *CmpTy = CmpInst::makeCmpResultType(Other->getType());
+ return ReplaceInstUsesWith(
+ ICI,
+ ConstantInt::get(CmpTy, !CmpInst::isTrueWhenEqual(ICI.getPredicate())));
+}
+
/// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X".
Instruction *InstCombiner::FoldICmpAddOpCst(Instruction &ICI,
Value *X, ConstantInt *CI,
@@ -851,7 +921,6 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
// to the same result value.
HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false);
}
-
} else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
if (CmpRHSV == 0) { // (X / pos) op 0
// Can't overflow. e.g. X/2 op 0 --> [-1, 2)
@@ -996,7 +1065,6 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
return Res;
}
-
// If we are comparing against bits always shifted out, the
// comparison cannot succeed.
APInt Comp = CmpRHSV << ShAmtVal;
@@ -1074,18 +1142,22 @@ Instruction *InstCombiner::FoldICmpCstShrCst(ICmpInst &I, Value *Op, Value *A,
if (AP1 == AP2)
return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
- // Get the distance between the highest bit that's set.
int Shift;
- // Both the constants are negative, take their positive to calculate log.
if (IsAShr && AP1.isNegative())
- // Get the ones' complement of AP2 and AP1 when computing the distance.
- Shift = (~AP2).logBase2() - (~AP1).logBase2();
+ Shift = AP1.countLeadingOnes() - AP2.countLeadingOnes();
else
- Shift = AP2.logBase2() - AP1.logBase2();
+ Shift = AP1.countLeadingZeros() - AP2.countLeadingZeros();
if (Shift > 0) {
- if (IsAShr ? AP1 == AP2.ashr(Shift) : AP1 == AP2.lshr(Shift))
+ if (IsAShr && AP1 == AP2.ashr(Shift)) {
+ // There are multiple solutions if we are comparing against -1 and the LHS
+ // of the ashr is not a power of two.
+ if (AP1.isAllOnesValue() && !AP2.isPowerOf2())
+ return getICmp(I.ICMP_UGE, A, ConstantInt::get(A->getType(), Shift));
+ return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
+ } else if (AP1 == AP2.lshr(Shift)) {
return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
+ }
}
// Shifting const2 will never be equal to const1.
return getConstant(false);
@@ -1145,6 +1217,14 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
switch (LHSI->getOpcode()) {
case Instruction::Trunc:
+ if (RHS->isOne() && RHSV.getBitWidth() > 1) {
+ // icmp slt trunc(signum(V)) 1 --> icmp slt V, 1
+ Value *V = nullptr;
+ if (ICI.getPredicate() == ICmpInst::ICMP_SLT &&
+ match(LHSI->getOperand(0), m_Signum(m_Value(V))))
+ return new ICmpInst(ICmpInst::ICMP_SLT, V,
+ ConstantInt::get(V->getType(), 1));
+ }
if (ICI.isEquality() && LHSI->hasOneUse()) {
// Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
// of the high bits truncated out of x are known.
@@ -1447,9 +1527,35 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
ICI.getPredicate() == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_UGT
: ICmpInst::ICMP_ULE,
LHSI->getOperand(0), SubOne(RHS));
+
+ // (icmp eq (and %A, C), 0) -> (icmp sgt (trunc %A), -1)
+ // iff C is a power of 2
+ if (ICI.isEquality() && LHSI->hasOneUse() && match(RHS, m_Zero())) {
+ if (auto *CI = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
+ const APInt &AI = CI->getValue();
+ int32_t ExactLogBase2 = AI.exactLogBase2();
+ if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) {
+ Type *NTy = IntegerType::get(ICI.getContext(), ExactLogBase2 + 1);
+ Value *Trunc = Builder->CreateTrunc(LHSI->getOperand(0), NTy);
+ return new ICmpInst(ICI.getPredicate() == ICmpInst::ICMP_EQ
+ ? ICmpInst::ICMP_SGE
+ : ICmpInst::ICMP_SLT,
+ Trunc, Constant::getNullValue(NTy));
+ }
+ }
+ }
break;
case Instruction::Or: {
+ if (RHS->isOne()) {
+ // icmp slt signum(V) 1 --> icmp slt V, 1
+ Value *V = nullptr;
+ if (ICI.getPredicate() == ICmpInst::ICMP_SLT &&
+ match(LHSI, m_Signum(m_Value(V))))
+ return new ICmpInst(ICmpInst::ICMP_SLT, V,
+ ConstantInt::get(V->getType(), 1));
+ }
+
if (!ICI.isEquality() || !RHS->isNullValue() || !LHSI->hasOneUse())
break;
Value *P, *Q;
@@ -2083,11 +2189,9 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
// If the pattern matches, truncate the inputs to the narrower type and
// use the sadd_with_overflow intrinsic to efficiently compute both the
// result and the overflow bit.
- Module *M = I.getParent()->getParent()->getParent();
-
Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
- Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow,
- NewType);
+ Value *F = Intrinsic::getDeclaration(I.getModule(),
+ Intrinsic::sadd_with_overflow, NewType);
InstCombiner::BuilderTy *Builder = IC.Builder;
@@ -2123,6 +2227,12 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
return true;
};
+ // If the overflow check was an add followed by a compare, the insertion point
+ // may be pointing to the compare. We want to insert the new instructions
+ // before the add in case there are uses of the add between the add and the
+ // compare.
+ Builder->SetInsertPoint(&OrigI);
+
switch (OCF) {
case OCF_INVALID:
llvm_unreachable("bad overflow check kind!");
@@ -2223,7 +2333,9 @@ static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal,
assert(I.getOperand(0) == MulVal || I.getOperand(1) == MulVal);
assert(I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal);
- Instruction *MulInstr = cast<Instruction>(MulVal);
+ auto *MulInstr = dyn_cast<Instruction>(MulVal);
+ if (!MulInstr)
+ return nullptr;
assert(MulInstr->getOpcode() == Instruction::Mul);
auto *LHS = cast<ZExtOperator>(MulInstr->getOperand(0)),
@@ -2357,7 +2469,6 @@ static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal,
InstCombiner::BuilderTy *Builder = IC.Builder;
Builder->SetInsertPoint(MulInstr);
- Module *M = I.getParent()->getParent()->getParent();
// Replace: mul(zext A, zext B) --> mul.with.overflow(A, B)
Value *MulA = A, *MulB = B;
@@ -2365,8 +2476,8 @@ static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal,
MulA = Builder->CreateZExt(A, MulType);
if (WidthB < MulWidth)
MulB = Builder->CreateZExt(B, MulType);
- Value *F =
- Intrinsic::getDeclaration(M, Intrinsic::umul_with_overflow, MulType);
+ Value *F = Intrinsic::getDeclaration(I.getModule(),
+ Intrinsic::umul_with_overflow, MulType);
CallInst *Call = Builder->CreateCall(F, {MulA, MulB}, "umul");
IC.Worklist.Add(MulInstr);
@@ -2468,7 +2579,6 @@ static APInt DemandedBitsLHSMask(ICmpInst &I,
default:
return APInt::getAllOnesValue(BitWidth);
}
-
}
/// \brief Check if the order of \p Op0 and \p Op1 as operand in an ICmpInst
@@ -2905,7 +3015,6 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
ConstantInt::get(X->getType(),
CI->countTrailingZeros()));
}
-
break;
}
case ICmpInst::ICMP_NE: {
@@ -2950,7 +3059,6 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
ConstantInt::get(X->getType(),
CI->countTrailingZeros()));
}
-
break;
}
case ICmpInst::ICMP_ULT:
@@ -3103,7 +3211,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// comparison into the select arms, which will cause one to be
// constant folded and the select turned into a bitwise or.
Value *Op1 = nullptr, *Op2 = nullptr;
- ConstantInt *CI = 0;
+ ConstantInt *CI = nullptr;
if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
CI = dyn_cast<ConstantInt>(Op1);
@@ -3177,6 +3285,17 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
ICmpInst::getSwappedPredicate(I.getPredicate()), I))
return NI;
+ // Try to optimize equality comparisons against alloca-based pointers.
+ if (Op0->getType()->isPointerTy() && I.isEquality()) {
+ assert(Op1->getType()->isPointerTy() && "Comparing pointer with non-pointer?");
+ if (auto *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Op0, DL)))
+ if (Instruction *New = FoldAllocaCmp(I, Alloca, Op1))
+ return New;
+ if (auto *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Op1, DL)))
+ if (Instruction *New = FoldAllocaCmp(I, Alloca, Op0))
+ return New;
+ }
+
// Test to see if the operands of the icmp are casted versions of other
// values. If the ptr->ptr cast can be stripped off both arguments, we do so
// now.
@@ -3304,6 +3423,26 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
match(B, m_One()))
return new ICmpInst(CmpInst::ICMP_SGE, A, Op1);
+ // icmp sgt X, (Y + -1) -> icmp sge X, Y
+ if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGT &&
+ match(D, m_AllOnes()))
+ return new ICmpInst(CmpInst::ICMP_SGE, Op0, C);
+
+ // icmp sle X, (Y + -1) -> icmp slt X, Y
+ if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLE &&
+ match(D, m_AllOnes()))
+ return new ICmpInst(CmpInst::ICMP_SLT, Op0, C);
+
+ // icmp sge X, (Y + 1) -> icmp sgt X, Y
+ if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGE &&
+ match(D, m_One()))
+ return new ICmpInst(CmpInst::ICMP_SGT, Op0, C);
+
+ // icmp slt X, (Y + 1) -> icmp sle X, Y
+ if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLT &&
+ match(D, m_One()))
+ return new ICmpInst(CmpInst::ICMP_SLE, Op0, C);
+
// if C1 has greater magnitude than C2:
// icmp (X + C1), (Y + C2) -> icmp (X + C3), Y
// s.t. C3 = C1 - C2
@@ -3473,6 +3612,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
}
}
+
+ if (BO0) {
+ // Transform A & (L - 1) `ult` L --> L != 0
+ auto LSubOne = m_Add(m_Specific(Op1), m_AllOnes());
+ auto BitwiseAnd =
+ m_CombineOr(m_And(m_Value(), LSubOne), m_And(LSubOne, m_Value()));
+
+ if (match(BO0, BitwiseAnd) && I.getPredicate() == ICmpInst::ICMP_ULT) {
+ auto *Zero = Constant::getNullValue(BO0->getType());
+ return new ICmpInst(ICmpInst::ICMP_NE, Op1, Zero);
+ }
+ }
}
{ Value *A, *B;
@@ -3697,15 +3848,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
- // Check to see that the input is converted from an integer type that is small
- // enough that preserves all bits. TODO: check here for "known" sign bits.
- // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
- unsigned InputSize = IntTy->getScalarSizeInBits();
-
- // If this is a uitofp instruction, we need an extra bit to hold the sign.
bool LHSUnsigned = isa<UIToFPInst>(LHSI);
- if (LHSUnsigned)
- ++InputSize;
if (I.isEquality()) {
FCmpInst::Predicate P = I.getPredicate();
@@ -3732,13 +3875,30 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
// equality compares as integer?
}
- // Comparisons with zero are a special case where we know we won't lose
- // information.
- bool IsCmpZero = RHS.isPosZero();
+ // Check to see that the input is converted from an integer type that is small
+ // enough that preserves all bits. TODO: check here for "known" sign bits.
+ // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
+ unsigned InputSize = IntTy->getScalarSizeInBits();
- // If the conversion would lose info, don't hack on this.
- if ((int)InputSize > MantissaWidth && !IsCmpZero)
- return nullptr;
+ // Following test does NOT adjust InputSize downwards for signed inputs,
+ // because the most negative value still requires all the mantissa bits
+ // to distinguish it from one less than that value.
+ if ((int)InputSize > MantissaWidth) {
+ // Conversion would lose accuracy. Check if loss can impact comparison.
+ int Exp = ilogb(RHS);
+ if (Exp == APFloat::IEK_Inf) {
+ int MaxExponent = ilogb(APFloat::getLargest(RHS.getSemantics()));
+ if (MaxExponent < (int)InputSize - !LHSUnsigned)
+ // Conversion could create infinity.
+ return nullptr;
+ } else {
+ // Note that if RHS is zero or NaN, then Exp is negative
+ // and first condition is trivially false.
+ if (MantissaWidth <= Exp && Exp <= (int)InputSize - !LHSUnsigned)
+ // Conversion could affect comparison.
+ return nullptr;
+ }
+ }
// Otherwise, we can potentially simplify the comparison. We know that it
// will always come through as an integer value and we know the constant is
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index ac934f1bd85c..534f67008150 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -281,6 +281,7 @@ public:
ICmpInst::Predicate Pred);
Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
ICmpInst::Predicate Cond, Instruction &I);
+ Instruction *FoldAllocaCmp(ICmpInst &ICI, AllocaInst *Alloca, Value *Other);
Instruction *FoldShiftByConstant(Value *Op0, Constant *Op1,
BinaryOperator &I);
Instruction *commonCastTransforms(CastInst &CI);
@@ -341,6 +342,7 @@ public:
const unsigned SIOpd);
private:
+ bool ShouldChangeType(unsigned FromBitWidth, unsigned ToBitWidth) const;
bool ShouldChangeType(Type *From, Type *To) const;
Value *dyn_castNegVal(Value *V) const;
Value *dyn_castFNegVal(Value *V, bool NoSignedZero = false) const;
@@ -360,6 +362,11 @@ private:
/// \brief Try to optimize a sequence of instructions checking if an operation
/// on LHS and RHS overflows.
///
+ /// If this overflow check is done via one of the overflow check intrinsics,
+ /// then CtxI has to be the call instruction calling that intrinsic. If this
+ /// overflow check is done by arithmetic followed by a compare, then CtxI has
+ /// to be the arithmetic instruction.
+ ///
/// If a simplification is possible, stores the simplified result of the
/// operation in OperationResult and result of the overflow check in
/// OverflowResult, and return true. If no simplification is possible,
@@ -393,7 +400,7 @@ public:
assert(New && !New->getParent() &&
"New instruction already inserted into a basic block!");
BasicBlock *BB = Old.getParent();
- BB->getInstList().insert(&Old, New); // Insert inst
+ BB->getInstList().insert(Old.getIterator(), New); // Insert inst
Worklist.Add(New);
return New;
}
@@ -539,6 +546,7 @@ private:
Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN);
Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN);
Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN);
+ Instruction *FoldPHIArgZextsIntoPHI(PHINode &PN);
Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS,
ConstantInt *AndRHS, BinaryOperator &TheAnd);
@@ -548,7 +556,7 @@ private:
Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, bool isSigned,
bool Inside);
Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
- Instruction *MatchBSwap(BinaryOperator &I);
+ Instruction *MatchBSwapOrBitReverse(BinaryOperator &I);
bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
Instruction *SimplifyMemTransfer(MemIntrinsic *MI);
Instruction *SimplifyMemSet(MemSetInst *MI);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index e3179dbeece8..47406b9a1632 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "InstCombineInternal.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/IR/DataLayout.h"
@@ -90,21 +91,23 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
if (CS.isCallee(&U))
continue;
+ unsigned DataOpNo = CS.getDataOperandNo(&U);
+ bool IsArgOperand = CS.isArgOperand(&U);
+
// Inalloca arguments are clobbered by the call.
- unsigned ArgNo = CS.getArgumentNo(&U);
- if (CS.isInAllocaArgument(ArgNo))
+ if (IsArgOperand && CS.isInAllocaArgument(DataOpNo))
return false;
// If this is a readonly/readnone call site, then we know it is just a
// load (but one that potentially returns the value itself), so we can
// ignore it if we know that the value isn't captured.
if (CS.onlyReadsMemory() &&
- (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
+ (CS.getInstruction()->use_empty() || CS.doesNotCapture(DataOpNo)))
continue;
// If this is being passed as a byval argument, the caller is making a
// copy, so it is only a read of the alloca.
- if (CS.isByValArgument(ArgNo))
+ if (IsArgOperand && CS.isByValArgument(DataOpNo))
continue;
}
@@ -186,7 +189,7 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
// Scan to the end of the allocation instructions, to skip over a block of
// allocas if possible...also skip interleaved debug info
//
- BasicBlock::iterator It = New;
+ BasicBlock::iterator It(New);
while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
++It;
@@ -367,7 +370,13 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
MDB.createRange(NonNullInt, NullInt));
}
break;
-
+ case LLVMContext::MD_align:
+ case LLVMContext::MD_dereferenceable:
+ case LLVMContext::MD_dereferenceable_or_null:
+ // These only directly apply if the new type is also a pointer.
+ if (NewTy->isPointerTy())
+ NewLoad->setMetadata(ID, N);
+ break;
case LLVMContext::MD_range:
// FIXME: It would be nice to propagate this in some way, but the type
// conversions make it hard. If the new type is a pointer, we could
@@ -418,6 +427,9 @@ static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value
case LLVMContext::MD_invariant_load:
case LLVMContext::MD_nonnull:
case LLVMContext::MD_range:
+ case LLVMContext::MD_align:
+ case LLVMContext::MD_dereferenceable:
+ case LLVMContext::MD_dereferenceable_or_null:
// These don't apply for stores.
break;
}
@@ -511,16 +523,46 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
if (!T->isAggregateType())
return nullptr;
- assert(LI.getAlignment() && "Alignement must be set at this point");
+ assert(LI.getAlignment() && "Alignment must be set at this point");
if (auto *ST = dyn_cast<StructType>(T)) {
// If the struct only have one element, we unpack.
- if (ST->getNumElements() == 1) {
+ unsigned Count = ST->getNumElements();
+ if (Count == 1) {
LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
".unpack");
return IC.ReplaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
UndefValue::get(T), NewLoad, 0, LI.getName()));
}
+
+ // We don't want to break loads with padding here as we'd loose
+ // the knowledge that padding exists for the rest of the pipeline.
+ const DataLayout &DL = IC.getDataLayout();
+ auto *SL = DL.getStructLayout(ST);
+ if (SL->hasPadding())
+ return nullptr;
+
+ auto Name = LI.getName();
+ SmallString<16> LoadName = Name;
+ LoadName += ".unpack";
+ SmallString<16> EltName = Name;
+ EltName += ".elt";
+ auto *Addr = LI.getPointerOperand();
+ Value *V = UndefValue::get(T);
+ auto *IdxType = Type::getInt32Ty(ST->getContext());
+ auto *Zero = ConstantInt::get(IdxType, 0);
+ for (unsigned i = 0; i < Count; i++) {
+ Value *Indices[2] = {
+ Zero,
+ ConstantInt::get(IdxType, i),
+ };
+ auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), EltName);
+ auto *L = IC.Builder->CreateLoad(ST->getTypeAtIndex(i), Ptr, LoadName);
+ V = IC.Builder->CreateInsertValue(V, L, i);
+ }
+
+ V->setName(Name);
+ return IC.ReplaceInstUsesWith(LI, V);
}
if (auto *AT = dyn_cast<ArrayType>(T)) {
@@ -681,7 +723,7 @@ static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
// FIXME: If the GEP is not inbounds, and there are extra indices after the
// one we'll replace, those could cause the address computation to wrap
// (rendering the IsAllNonNegative() check below insufficient). We can do
- // better, ignoring zero indicies (and other indicies we can prove small
+ // better, ignoring zero indices (and other indices we can prove small
// enough not to wrap).
if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
return false;
@@ -748,19 +790,19 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
// Do really simple store-to-load forwarding and load CSE, to catch cases
// where there are several consecutive memory accesses to the same location,
// separated by a few arithmetic operations.
- BasicBlock::iterator BBI = &LI;
+ BasicBlock::iterator BBI(LI);
AAMDNodes AATags;
- if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,
- 6, AA, &AATags)) {
+ if (Value *AvailableVal =
+ FindAvailableLoadedValue(Op, LI.getParent(), BBI,
+ DefMaxInstsToScan, AA, &AATags)) {
if (LoadInst *NLI = dyn_cast<LoadInst>(AvailableVal)) {
unsigned KnownIDs[] = {
- LLVMContext::MD_tbaa,
- LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias,
- LLVMContext::MD_range,
- LLVMContext::MD_invariant_load,
- LLVMContext::MD_nonnull,
- };
+ LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias, LLVMContext::MD_range,
+ LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull,
+ LLVMContext::MD_invariant_group, LLVMContext::MD_align,
+ LLVMContext::MD_dereferenceable,
+ LLVMContext::MD_dereferenceable_or_null};
combineMetadata(NLI, &LI, KnownIDs);
};
@@ -822,7 +864,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
}
// load (select (cond, null, P)) -> load P
- if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
+ if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
LI.getPointerAddressSpace() == 0) {
LI.setOperand(0, SI->getOperand(2));
return &LI;
@@ -857,7 +899,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
///
/// \returns true if the store was successfully combined away. This indicates
/// the caller must erase the store instruction. We have to let the caller erase
-/// the store instruction sas otherwise there is no way to signal whether it was
+/// the store instruction as otherwise there is no way to signal whether it was
/// combined or not: IC.EraseInstFromFunction returns a null pointer.
static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
// FIXME: We could probably with some care handle both volatile and atomic
@@ -893,11 +935,38 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
if (auto *ST = dyn_cast<StructType>(T)) {
// If the struct only have one element, we unpack.
- if (ST->getNumElements() == 1) {
+ unsigned Count = ST->getNumElements();
+ if (Count == 1) {
V = IC.Builder->CreateExtractValue(V, 0);
combineStoreToNewValue(IC, SI, V);
return true;
}
+
+ // We don't want to break loads with padding here as we'd loose
+ // the knowledge that padding exists for the rest of the pipeline.
+ const DataLayout &DL = IC.getDataLayout();
+ auto *SL = DL.getStructLayout(ST);
+ if (SL->hasPadding())
+ return false;
+
+ SmallString<16> EltName = V->getName();
+ EltName += ".elt";
+ auto *Addr = SI.getPointerOperand();
+ SmallString<16> AddrName = Addr->getName();
+ AddrName += ".repack";
+ auto *IdxType = Type::getInt32Ty(ST->getContext());
+ auto *Zero = ConstantInt::get(IdxType, 0);
+ for (unsigned i = 0; i < Count; i++) {
+ Value *Indices[2] = {
+ Zero,
+ ConstantInt::get(IdxType, i),
+ };
+ auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), AddrName);
+ auto *Val = IC.Builder->CreateExtractValue(V, i, EltName);
+ IC.Builder->CreateStore(Val, Ptr);
+ }
+
+ return true;
}
if (auto *AT = dyn_cast<ArrayType>(T)) {
@@ -971,9 +1040,9 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
return &SI;
}
- // Don't hack volatile/atomic stores.
- // FIXME: Some bits are legal for atomic stores; needs refactoring.
- if (!SI.isSimple()) return nullptr;
+ // Don't hack volatile/ordered stores.
+ // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
+ if (!SI.isUnordered()) return nullptr;
// If the RHS is an alloca with a single use, zapify the store, making the
// alloca dead.
@@ -991,7 +1060,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
// Do really simple DSE, to catch cases where there are several consecutive
// stores to the same location, separated by a few arithmetic operations. This
// situation often occurs with bitfield accesses.
- BasicBlock::iterator BBI = &SI;
+ BasicBlock::iterator BBI(SI);
for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
--ScanInsts) {
--BBI;
@@ -1005,7 +1074,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
// Prev store isn't volatile, and stores to the same location?
- if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
+ if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1),
SI.getOperand(1))) {
++NumDeadStore;
++BBI;
@@ -1019,9 +1088,10 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
// the pointer we're loading and is producing the pointer we're storing,
// then *this* store is dead (X = load P; store X -> P).
if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
- if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
- LI->isSimple())
+ if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
+ assert(SI.isUnordered() && "can't eliminate ordering operation");
return EraseInstFromFunction(SI);
+ }
// Otherwise, this is a load from some other location. Stores before it
// may not be dead.
@@ -1047,10 +1117,14 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (isa<UndefValue>(Val))
return EraseInstFromFunction(SI);
+ // The code below needs to be audited and adjusted for unordered atomics
+ if (!SI.isSimple())
+ return nullptr;
+
// If this store is the last instruction in the basic block (possibly
// excepting debug info instructions), and if the block ends with an
// unconditional branch, try to move it to the successor block.
- BBI = &SI;
+ BBI = SI.getIterator();
do {
++BBI;
} while (isa<DbgInfoIntrinsic>(BBI) ||
@@ -1106,7 +1180,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
return false;
// Verify that the other block ends in a branch and is not otherwise empty.
- BasicBlock::iterator BBI = OtherBB->getTerminator();
+ BasicBlock::iterator BBI(OtherBB->getTerminator());
BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
if (!OtherBr || BBI == OtherBB->begin())
return false;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index a554e9f628e0..7ad0efc42fb4 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -22,9 +22,9 @@ using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
-/// simplifyValueKnownNonZero - The specific integer value is used in a context
-/// where it is known to be non-zero. If this allows us to simplify the
-/// computation, do so and return the new operand, otherwise return null.
+/// The specific integer value is used in a context where it is known to be
+/// non-zero. If this allows us to simplify the computation, do so and return
+/// the new operand, otherwise return null.
static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC,
Instruction &CxtI) {
// If V has multiple uses, then we would have to do more analysis to determine
@@ -76,8 +76,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC,
}
-/// MultiplyOverflows - True if the multiply can not be expressed in an int
-/// this size.
+/// True if the multiply can not be expressed in an int this size.
static bool MultiplyOverflows(const APInt &C1, const APInt &C2, APInt &Product,
bool IsSigned) {
bool Overflow;
@@ -95,6 +94,14 @@ static bool IsMultiple(const APInt &C1, const APInt &C2, APInt &Quotient,
assert(C1.getBitWidth() == C2.getBitWidth() &&
"Inconsistent width of constants!");
+ // Bail if we will divide by zero.
+ if (C2.isMinValue())
+ return false;
+
+ // Bail if we would divide INT_MIN by -1.
+ if (IsSigned && C1.isMinSignedValue() && C2.isAllOnesValue())
+ return false;
+
APInt Remainder(C1.getBitWidth(), /*Val=*/0ULL, IsSigned);
if (IsSigned)
APInt::sdivrem(C1, C2, Quotient, Remainder);
@@ -705,8 +712,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
return Changed ? &I : nullptr;
}
-/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select
-/// instruction.
+/// Try to fold a divide or remainder of a select instruction.
bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
SelectInst *SI = cast<SelectInst>(I.getOperand(1));
@@ -740,7 +746,7 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
return true;
// Scan the current block backward, looking for other uses of SI.
- BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin();
+ BasicBlock::iterator BBI = I.getIterator(), BBFront = I.getParent()->begin();
while (BBI != BBFront) {
--BBI;
@@ -754,10 +760,10 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
I != E; ++I) {
if (*I == SI) {
*I = SI->getOperand(NonNullOperand);
- Worklist.Add(BBI);
+ Worklist.Add(&*BBI);
} else if (*I == SelectCond) {
*I = Builder->getInt1(NonNullOperand == 1);
- Worklist.Add(BBI);
+ Worklist.Add(&*BBI);
}
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 460f6eb6a825..f1aa98b5e359 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
#define DEBUG_TYPE "instcombine"
@@ -245,7 +246,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
/// non-address-taken alloca. Doing so will cause us to not promote the alloca
/// to a register.
static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
- BasicBlock::iterator BBI = L, E = L->getParent()->end();
+ BasicBlock::iterator BBI = L->getIterator(), E = L->getParent()->end();
for (++BBI; BBI != E; ++BBI)
if (BBI->mayWriteToMemory())
@@ -349,24 +350,40 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
Value *InVal = FirstLI->getOperand(0);
NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
+ LoadInst *NewLI = new LoadInst(NewPN, "", isVolatile, LoadAlignment);
+
+ unsigned KnownIDs[] = {
+ LLVMContext::MD_tbaa,
+ LLVMContext::MD_range,
+ LLVMContext::MD_invariant_load,
+ LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias,
+ LLVMContext::MD_nonnull,
+ LLVMContext::MD_align,
+ LLVMContext::MD_dereferenceable,
+ LLVMContext::MD_dereferenceable_or_null,
+ };
- // Add all operands to the new PHI.
+ for (unsigned ID : KnownIDs)
+ NewLI->setMetadata(ID, FirstLI->getMetadata(ID));
+
+ // Add all operands to the new PHI and combine TBAA metadata.
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
- Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0);
+ LoadInst *LI = cast<LoadInst>(PN.getIncomingValue(i));
+ combineMetadata(NewLI, LI, KnownIDs);
+ Value *NewInVal = LI->getOperand(0);
if (NewInVal != InVal)
InVal = nullptr;
NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
}
- Value *PhiVal;
if (InVal) {
// The new PHI unions all of the same values together. This is really
// common, so we handle it intelligently here for compile-time speed.
- PhiVal = InVal;
+ NewLI->setOperand(0, InVal);
delete NewPN;
} else {
InsertNewInstBefore(NewPN, PN);
- PhiVal = NewPN;
}
// If this was a volatile load that we are merging, make sure to loop through
@@ -376,17 +393,94 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
for (Value *IncValue : PN.incoming_values())
cast<LoadInst>(IncValue)->setVolatile(false);
- LoadInst *NewLI = new LoadInst(PhiVal, "", isVolatile, LoadAlignment);
NewLI->setDebugLoc(FirstLI->getDebugLoc());
return NewLI;
}
+/// TODO: This function could handle other cast types, but then it might
+/// require special-casing a cast from the 'i1' type. See the comment in
+/// FoldPHIArgOpIntoPHI() about pessimizing illegal integer types.
+Instruction *InstCombiner::FoldPHIArgZextsIntoPHI(PHINode &Phi) {
+ // We cannot create a new instruction after the PHI if the terminator is an
+ // EHPad because there is no valid insertion point.
+ if (TerminatorInst *TI = Phi.getParent()->getTerminator())
+ if (TI->isEHPad())
+ return nullptr;
+
+ // Early exit for the common case of a phi with two operands. These are
+ // handled elsewhere. See the comment below where we check the count of zexts
+ // and constants for more details.
+ unsigned NumIncomingValues = Phi.getNumIncomingValues();
+ if (NumIncomingValues < 3)
+ return nullptr;
+ // Find the narrower type specified by the first zext.
+ Type *NarrowType = nullptr;
+ for (Value *V : Phi.incoming_values()) {
+ if (auto *Zext = dyn_cast<ZExtInst>(V)) {
+ NarrowType = Zext->getSrcTy();
+ break;
+ }
+ }
+ if (!NarrowType)
+ return nullptr;
+
+ // Walk the phi operands checking that we only have zexts or constants that
+ // we can shrink for free. Store the new operands for the new phi.
+ SmallVector<Value *, 4> NewIncoming;
+ unsigned NumZexts = 0;
+ unsigned NumConsts = 0;
+ for (Value *V : Phi.incoming_values()) {
+ if (auto *Zext = dyn_cast<ZExtInst>(V)) {
+ // All zexts must be identical and have one use.
+ if (Zext->getSrcTy() != NarrowType || !Zext->hasOneUse())
+ return nullptr;
+ NewIncoming.push_back(Zext->getOperand(0));
+ NumZexts++;
+ } else if (auto *C = dyn_cast<Constant>(V)) {
+ // Make sure that constants can fit in the new type.
+ Constant *Trunc = ConstantExpr::getTrunc(C, NarrowType);
+ if (ConstantExpr::getZExt(Trunc, C->getType()) != C)
+ return nullptr;
+ NewIncoming.push_back(Trunc);
+ NumConsts++;
+ } else {
+ // If it's not a cast or a constant, bail out.
+ return nullptr;
+ }
+ }
+
+ // The more common cases of a phi with no constant operands or just one
+ // variable operand are handled by FoldPHIArgOpIntoPHI() and FoldOpIntoPhi()
+ // respectively. FoldOpIntoPhi() wants to do the opposite transform that is
+ // performed here. It tries to replicate a cast in the phi operand's basic
+ // block to expose other folding opportunities. Thus, InstCombine will
+ // infinite loop without this check.
+ if (NumConsts == 0 || NumZexts < 2)
+ return nullptr;
+
+ // All incoming values are zexts or constants that are safe to truncate.
+ // Create a new phi node of the narrow type, phi together all of the new
+ // operands, and zext the result back to the original type.
+ PHINode *NewPhi = PHINode::Create(NarrowType, NumIncomingValues,
+ Phi.getName() + ".shrunk");
+ for (unsigned i = 0; i != NumIncomingValues; ++i)
+ NewPhi->addIncoming(NewIncoming[i], Phi.getIncomingBlock(i));
+
+ InsertNewInstBefore(NewPhi, Phi);
+ return CastInst::CreateZExtOrBitCast(NewPhi, Phi.getType());
+}
/// If all operands to a PHI node are the same "unary" operator and they all are
/// only used by the PHI, PHI together their inputs, and do the operation once,
/// to the result of the PHI.
Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
+ // We cannot create a new instruction after the PHI if the terminator is an
+ // EHPad because there is no valid insertion point.
+ if (TerminatorInst *TI = PN.getParent()->getTerminator())
+ if (TI->isEHPad())
+ return nullptr;
+
Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
if (isa<GetElementPtrInst>(FirstInst))
@@ -740,7 +834,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
}
// Otherwise, do an extract in the predecessor.
- Builder->SetInsertPoint(Pred, Pred->getTerminator());
+ Builder->SetInsertPoint(Pred->getTerminator());
Value *Res = InVal;
if (Offset)
Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(),
@@ -787,6 +881,9 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
if (Value *V = SimplifyInstruction(&PN, DL, TLI, DT, AC))
return ReplaceInstUsesWith(PN, V);
+ if (Instruction *Result = FoldPHIArgZextsIntoPHI(PN))
+ return Result;
+
// If all PHI operands are the same operation, pull them through the PHI,
// reducing code size.
if (isa<Instruction>(PN.getIncomingValue(0)) &&
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index f51442a9f36d..776704d1efa9 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -38,7 +38,8 @@ getInverseMinMaxSelectPattern(SelectPatternFlavor SPF) {
}
}
-static CmpInst::Predicate getICmpPredicateForMinMax(SelectPatternFlavor SPF) {
+static CmpInst::Predicate getCmpPredicateForMinMax(SelectPatternFlavor SPF,
+ bool Ordered=false) {
switch (SPF) {
default:
llvm_unreachable("unhandled!");
@@ -51,17 +52,22 @@ static CmpInst::Predicate getICmpPredicateForMinMax(SelectPatternFlavor SPF) {
return ICmpInst::ICMP_SGT;
case SPF_UMAX:
return ICmpInst::ICMP_UGT;
+ case SPF_FMINNUM:
+ return Ordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT;
+ case SPF_FMAXNUM:
+ return Ordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT;
}
}
static Value *generateMinMaxSelectPattern(InstCombiner::BuilderTy *Builder,
SelectPatternFlavor SPF, Value *A,
Value *B) {
- CmpInst::Predicate Pred = getICmpPredicateForMinMax(SPF);
+ CmpInst::Predicate Pred = getCmpPredicateForMinMax(SPF);
+ assert(CmpInst::isIntPredicate(Pred));
return Builder->CreateSelect(Builder->CreateICmp(Pred, A, B), A, B);
}
-/// GetSelectFoldableOperands - We want to turn code that looks like this:
+/// We want to turn code that looks like this:
/// %C = or %A, %B
/// %D = select %cond, %C, %A
/// into:
@@ -90,8 +96,8 @@ static unsigned GetSelectFoldableOperands(Instruction *I) {
}
}
-/// GetSelectFoldableConstant - For the same transformation as the previous
-/// function, return the identity constant that goes into the select.
+/// For the same transformation as the previous function, return the identity
+/// constant that goes into the select.
static Constant *GetSelectFoldableConstant(Instruction *I) {
switch (I->getOpcode()) {
default: llvm_unreachable("This cannot happen!");
@@ -110,7 +116,7 @@ static Constant *GetSelectFoldableConstant(Instruction *I) {
}
}
-/// FoldSelectOpOp - Here we have (select c, TI, FI), and we know that TI and FI
+/// Here we have (select c, TI, FI), and we know that TI and FI
/// have the same opcode and only one use each. Try to simplify this.
Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
Instruction *FI) {
@@ -197,8 +203,8 @@ static bool isSelect01(Constant *C1, Constant *C2) {
C2I->isOne() || C2I->isAllOnesValue();
}
-/// FoldSelectIntoOp - Try fold the select into one of the operands to
-/// facilitate further optimization.
+/// Try to fold the select into one of the operands to allow further
+/// optimization.
Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
Value *FalseVal) {
// See the comment above GetSelectFoldableOperands for a description of the
@@ -276,7 +282,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
return nullptr;
}
-/// foldSelectICmpAndOr - We want to turn:
+/// We want to turn:
/// (select (icmp eq (and X, C1), 0), Y, (or Y, C2))
/// into:
/// (or (shl (and X, C1), C3), y)
@@ -394,9 +400,7 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
return nullptr;
}
-/// visitSelectInstWithICmp - Visit a SelectInst that has an
-/// ICmpInst as its first operand.
-///
+/// Visit a SelectInst that has an ICmpInst as its first operand.
Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
ICmpInst *ICI) {
bool Changed = false;
@@ -595,10 +599,9 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
}
-/// CanSelectOperandBeMappingIntoPredBlock - SI is a select whose condition is a
-/// PHI node (but the two may be in different blocks). See if the true/false
-/// values (V) are live in all of the predecessor blocks of the PHI. For
-/// example, cases like this cannot be mapped:
+/// SI is a select whose condition is a PHI node (but the two may be in
+/// different blocks). See if the true/false values (V) are live in all of the
+/// predecessor blocks of the PHI. For example, cases like this can't be mapped:
///
/// X = phi [ C1, BB1], [C2, BB2]
/// Y = add
@@ -632,7 +635,7 @@ static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V,
return false;
}
-/// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form:
+/// We have an SPF (e.g. a min or max) of an SPF of the form:
/// SPF2(SPF1(A, B), C)
Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
SelectPatternFlavor SPF1,
@@ -745,10 +748,10 @@ Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
return nullptr;
}
-/// foldSelectICmpAnd - If one of the constants is zero (we know they can't
-/// both be) and we have an icmp instruction with zero, and we have an 'and'
-/// with the non-constant value and a power of two we can turn the select
-/// into a shift on the result of the 'and'.
+/// If one of the constants is zero (we know they can't both be) and we have an
+/// icmp instruction with zero, and we have an 'and' with the non-constant value
+/// and a power of two we can turn the select into a shift on the result of the
+/// 'and'.
static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
ConstantInt *FalseVal,
InstCombiner::BuilderTy *Builder) {
@@ -926,6 +929,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// (X ugt Y) ? X : Y -> (X ole Y) ? Y : X
if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) {
FCmpInst::Predicate InvPred = FCI->getInversePredicate();
+ IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
+ Builder->SetFastMathFlags(FCI->getFastMathFlags());
Value *NewCond = Builder->CreateFCmp(InvPred, TrueVal, FalseVal,
FCI->getName() + ".inv");
@@ -967,6 +972,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// (X ugt Y) ? X : Y -> (X ole Y) ? X : Y
if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) {
FCmpInst::Predicate InvPred = FCI->getInversePredicate();
+ IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
+ Builder->SetFastMathFlags(FCI->getFastMathFlags());
Value *NewCond = Builder->CreateFCmp(InvPred, FalseVal, TrueVal,
FCI->getName() + ".inv");
@@ -1054,35 +1061,50 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
}
// See if we can fold the select into one of our operands.
- if (SI.getType()->isIntOrIntVectorTy()) {
+ if (SI.getType()->isIntOrIntVectorTy() || SI.getType()->isFPOrFPVectorTy()) {
if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal))
return FoldI;
Value *LHS, *RHS, *LHS2, *RHS2;
Instruction::CastOps CastOp;
- SelectPatternFlavor SPF = matchSelectPattern(&SI, LHS, RHS, &CastOp);
+ SelectPatternResult SPR = matchSelectPattern(&SI, LHS, RHS, &CastOp);
+ auto SPF = SPR.Flavor;
- if (SPF) {
+ if (SelectPatternResult::isMinOrMax(SPF)) {
// Canonicalize so that type casts are outside select patterns.
if (LHS->getType()->getPrimitiveSizeInBits() !=
SI.getType()->getPrimitiveSizeInBits()) {
- CmpInst::Predicate Pred = getICmpPredicateForMinMax(SPF);
- Value *Cmp = Builder->CreateICmp(Pred, LHS, RHS);
+ CmpInst::Predicate Pred = getCmpPredicateForMinMax(SPF, SPR.Ordered);
+
+ Value *Cmp;
+ if (CmpInst::isIntPredicate(Pred)) {
+ Cmp = Builder->CreateICmp(Pred, LHS, RHS);
+ } else {
+ IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
+ auto FMF = cast<FPMathOperator>(SI.getCondition())->getFastMathFlags();
+ Builder->SetFastMathFlags(FMF);
+ Cmp = Builder->CreateFCmp(Pred, LHS, RHS);
+ }
+
Value *NewSI = Builder->CreateCast(CastOp,
Builder->CreateSelect(Cmp, LHS, RHS),
SI.getType());
return ReplaceInstUsesWith(SI, NewSI);
}
+ }
+ if (SPF) {
// MAX(MAX(a, b), a) -> MAX(a, b)
// MIN(MIN(a, b), a) -> MIN(a, b)
// MAX(MIN(a, b), a) -> a
// MIN(MAX(a, b), a) -> a
- if (SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2))
+ // ABS(ABS(a)) -> ABS(a)
+ // NABS(NABS(a)) -> NABS(a)
+ if (SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor)
if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
SI, SPF, RHS))
return R;
- if (SelectPatternFlavor SPF2 = matchSelectPattern(RHS, LHS2, RHS2))
+ if (SelectPatternFlavor SPF2 = matchSelectPattern(RHS, LHS2, RHS2).Flavor)
if (Instruction *R = FoldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2,
SI, SPF, LHS))
return R;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index d04ed58b014f..0c7defa5fff8 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -55,7 +55,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
return nullptr;
}
-/// CanEvaluateShifted - See if we can compute the specified value, but shifted
+/// See if we can compute the specified value, but shifted
/// logically to the left or right by some number of bits. This should return
/// true if the expression can be computed for the same cost as the current
/// expression tree. This is used to eliminate extraneous shifting from things
@@ -184,7 +184,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
}
}
-/// GetShiftedValue - When CanEvaluateShifted returned true for an expression,
+/// When CanEvaluateShifted returned true for an expression,
/// this value inserts the new computation that produces the shifted value.
static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
InstCombiner &IC, const DataLayout &DL) {
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 80628b23f111..743d51483ea1 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -410,9 +410,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If this is a select as part of a min/max pattern, don't simplify any
// further in case we break the structure.
Value *LHS, *RHS;
- if (matchSelectPattern(I, LHS, RHS) != SPF_UNKNOWN)
+ if (matchSelectPattern(I, LHS, RHS).Flavor != SPF_UNKNOWN)
return nullptr;
-
+
if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero,
RHSKnownOne, Depth + 1) ||
SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero,
@@ -1057,7 +1057,13 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts);
if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) {
for (unsigned i = 0; i < VWidth; i++) {
- if (CV->getAggregateElement(i)->isNullValue())
+ Constant *CElt = CV->getAggregateElement(i);
+ // Method isNullValue always returns false when called on a
+ // ConstantExpr. If CElt is a ConstantExpr then skip it in order to
+ // to avoid propagating incorrect information.
+ if (isa<ConstantExpr>(CElt))
+ continue;
+ if (CElt->isNullValue())
LeftDemanded.clearBit(i);
else
RightDemanded.clearBit(i);
@@ -1082,6 +1088,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
if (!VTy) break;
unsigned InVWidth = VTy->getNumElements();
APInt InputDemandedElts(InVWidth, 0);
+ UndefElts2 = APInt(InVWidth, 0);
unsigned Ratio;
if (VWidth == InVWidth) {
@@ -1089,29 +1096,25 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// elements as are demanded of us.
Ratio = 1;
InputDemandedElts = DemandedElts;
- } else if (VWidth > InVWidth) {
- // Untested so far.
- break;
-
- // If there are more elements in the result than there are in the source,
- // then an input element is live if any of the corresponding output
- // elements are live.
- Ratio = VWidth/InVWidth;
- for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
+ } else if ((VWidth % InVWidth) == 0) {
+ // If the number of elements in the output is a multiple of the number of
+ // elements in the input then an input element is live if any of the
+ // corresponding output elements are live.
+ Ratio = VWidth / InVWidth;
+ for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
if (DemandedElts[OutIdx])
- InputDemandedElts.setBit(OutIdx/Ratio);
- }
- } else {
- // Untested so far.
- break;
-
- // If there are more elements in the source than there are in the result,
- // then an input element is live if the corresponding output element is
- // live.
- Ratio = InVWidth/VWidth;
+ InputDemandedElts.setBit(OutIdx / Ratio);
+ } else if ((InVWidth % VWidth) == 0) {
+ // If the number of elements in the input is a multiple of the number of
+ // elements in the output then an input element is live if the
+ // corresponding output element is live.
+ Ratio = InVWidth / VWidth;
for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
- if (DemandedElts[InIdx/Ratio])
+ if (DemandedElts[InIdx / Ratio])
InputDemandedElts.setBit(InIdx);
+ } else {
+ // Unsupported so far.
+ break;
}
// div/rem demand all inputs, because they don't want divide by zero.
@@ -1122,24 +1125,26 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
MadeChange = true;
}
- UndefElts = UndefElts2;
- if (VWidth > InVWidth) {
- llvm_unreachable("Unimp");
- // If there are more elements in the result than there are in the source,
- // then an output element is undef if the corresponding input element is
- // undef.
+ if (VWidth == InVWidth) {
+ UndefElts = UndefElts2;
+ } else if ((VWidth % InVWidth) == 0) {
+ // If the number of elements in the output is a multiple of the number of
+ // elements in the input then an output element is undef if the
+ // corresponding input element is undef.
for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
- if (UndefElts2[OutIdx/Ratio])
+ if (UndefElts2[OutIdx / Ratio])
+ UndefElts.setBit(OutIdx);
+ } else if ((InVWidth % VWidth) == 0) {
+ // If the number of elements in the input is a multiple of the number of
+ // elements in the output then an output element is undef if all of the
+ // corresponding input elements are undef.
+ for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
+ APInt SubUndef = UndefElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio);
+ if (SubUndef.countPopulation() == Ratio)
UndefElts.setBit(OutIdx);
- } else if (VWidth < InVWidth) {
+ }
+ } else {
llvm_unreachable("Unimp");
- // If there are more elements in the source than there are in the result,
- // then a result element is undef if all of the corresponding input
- // elements are undef.
- UndefElts = ~0ULL >> (64-VWidth); // Start out all undef.
- for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
- if (!UndefElts2[InIdx]) // Not undef?
- UndefElts.clearBit(InIdx/Ratio); // Clear undef bit.
}
break;
}
@@ -1237,6 +1242,15 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// like undef&0. The result is known zero, not undef.
UndefElts &= UndefElts2;
break;
+
+ // SSE4A instructions leave the upper 64-bits of the 128-bit result
+ // in an undefined state.
+ case Intrinsic::x86_sse4a_extrq:
+ case Intrinsic::x86_sse4a_extrqi:
+ case Intrinsic::x86_sse4a_insertq:
+ case Intrinsic::x86_sse4a_insertqi:
+ UndefElts |= APInt::getHighBitsSet(VWidth, VWidth / 2);
+ break;
}
break;
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 273047279e90..e25639ae943b 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -22,10 +22,10 @@ using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
-/// CheapToScalarize - Return true if the value is cheaper to scalarize than it
-/// is to leave as a vector operation. isConstant indicates whether we're
-/// extracting one known element. If false we're extracting a variable index.
-static bool CheapToScalarize(Value *V, bool isConstant) {
+/// Return true if the value is cheaper to scalarize than it is to leave as a
+/// vector operation. isConstant indicates whether we're extracting one known
+/// element. If false we're extracting a variable index.
+static bool cheapToScalarize(Value *V, bool isConstant) {
if (Constant *C = dyn_cast<Constant>(V)) {
if (isConstant) return true;
@@ -50,13 +50,13 @@ static bool CheapToScalarize(Value *V, bool isConstant) {
return true;
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I))
if (BO->hasOneUse() &&
- (CheapToScalarize(BO->getOperand(0), isConstant) ||
- CheapToScalarize(BO->getOperand(1), isConstant)))
+ (cheapToScalarize(BO->getOperand(0), isConstant) ||
+ cheapToScalarize(BO->getOperand(1), isConstant)))
return true;
if (CmpInst *CI = dyn_cast<CmpInst>(I))
if (CI->hasOneUse() &&
- (CheapToScalarize(CI->getOperand(0), isConstant) ||
- CheapToScalarize(CI->getOperand(1), isConstant)))
+ (cheapToScalarize(CI->getOperand(0), isConstant) ||
+ cheapToScalarize(CI->getOperand(1), isConstant)))
return true;
return false;
@@ -82,7 +82,7 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
// and that it is a binary operation which is cheap to scalarize.
// otherwise return NULL.
if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
- !(isa<BinaryOperator>(PHIUser)) || !CheapToScalarize(PHIUser, true))
+ !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true))
return nullptr;
// Create a scalar PHI node that will replace the vector PHI node
@@ -115,8 +115,7 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
Instruction *pos = dyn_cast<Instruction>(PHIInVal);
BasicBlock::iterator InsertPos;
if (pos && !isa<PHINode>(pos)) {
- InsertPos = pos;
- ++InsertPos;
+ InsertPos = ++pos->getIterator();
} else {
InsertPos = inBB->getFirstInsertionPt();
}
@@ -137,7 +136,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
// If vector val is constant with all elements the same, replace EI with
// that element. We handle a known element # below.
if (Constant *C = dyn_cast<Constant>(EI.getOperand(0)))
- if (CheapToScalarize(C, false))
+ if (cheapToScalarize(C, false))
return ReplaceInstUsesWith(EI, C->getAggregateElement(0U));
// If extracting a specified index from the vector, see if we can recursively
@@ -163,7 +162,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
}
}
- // If the this extractelement is directly using a bitcast from a vector of
+ // If this extractelement is directly using a bitcast from a vector of
// the same number of elements, see if we can find the source element from
// it. In this case, we will end up needing to bitcast the scalars.
if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
@@ -184,10 +183,10 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
// Push extractelement into predecessor operation if legal and
- // profitable to do so
+ // profitable to do so.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
if (I->hasOneUse() &&
- CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
+ cheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
Value *newEI0 =
Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
EI.getName()+".lhs");
@@ -230,8 +229,9 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
SrcIdx, false));
}
} else if (CastInst *CI = dyn_cast<CastInst>(I)) {
- // Canonicalize extractelement(cast) -> cast(extractelement)
- // bitcasts can change the number of vector elements and they cost nothing
+ // Canonicalize extractelement(cast) -> cast(extractelement).
+ // Bitcasts can change the number of vector elements, and they cost
+ // nothing.
if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
EI.getIndexOperand());
@@ -245,7 +245,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
// fight the vectorizer.
// If we are extracting an element from a vector select or a select on
- // vectors, a select on the scalars extracted from the vector arguments.
+ // vectors, create a select on the scalars extracted from the vector
+ // arguments.
Value *TrueVal = SI->getTrueValue();
Value *FalseVal = SI->getFalseValue();
@@ -275,10 +276,9 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
return nullptr;
}
-/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
-/// elements from either LHS or RHS, return the shuffle mask and true.
-/// Otherwise, return false.
-static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
+/// If V is a shuffle of values that ONLY returns elements from either LHS or
+/// RHS, return the shuffle mask and true. Otherwise, return false.
+static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
SmallVectorImpl<Constant*> &Mask) {
assert(LHS->getType() == RHS->getType() &&
"Invalid CollectSingleShuffleElements");
@@ -315,7 +315,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
// We can handle this if the vector we are inserting into is
// transitively ok.
- if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+ if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
// If so, update the mask to reflect the inserted undef.
Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
return true;
@@ -330,7 +330,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
// We can handle this if the vector we are inserting into is
// transitively ok.
- if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+ if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
// If so, update the mask to reflect the inserted value.
if (EI->getOperand(0) == LHS) {
Mask[InsertedIdx % NumElts] =
@@ -352,6 +352,48 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
return false;
}
+/// If we have insertion into a vector that is wider than the vector that we
+/// are extracting from, try to widen the source vector to allow a single
+/// shufflevector to replace one or more insert/extract pairs.
+static void replaceExtractElements(InsertElementInst *InsElt,
+ ExtractElementInst *ExtElt,
+ InstCombiner &IC) {
+ VectorType *InsVecType = InsElt->getType();
+ VectorType *ExtVecType = ExtElt->getVectorOperandType();
+ unsigned NumInsElts = InsVecType->getVectorNumElements();
+ unsigned NumExtElts = ExtVecType->getVectorNumElements();
+
+ // The inserted-to vector must be wider than the extracted-from vector.
+ if (InsVecType->getElementType() != ExtVecType->getElementType() ||
+ NumExtElts >= NumInsElts)
+ return;
+
+ // Create a shuffle mask to widen the extended-from vector using undefined
+ // values. The mask selects all of the values of the original vector followed
+ // by as many undefined values as needed to create a vector of the same length
+ // as the inserted-to vector.
+ SmallVector<Constant *, 16> ExtendMask;
+ IntegerType *IntType = Type::getInt32Ty(InsElt->getContext());
+ for (unsigned i = 0; i < NumExtElts; ++i)
+ ExtendMask.push_back(ConstantInt::get(IntType, i));
+ for (unsigned i = NumExtElts; i < NumInsElts; ++i)
+ ExtendMask.push_back(UndefValue::get(IntType));
+
+ Value *ExtVecOp = ExtElt->getVectorOperand();
+ auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType),
+ ConstantVector::get(ExtendMask));
+
+ // Replace all extracts from the original narrow vector with extracts from
+ // the new wide vector.
+ WideVec->insertBefore(ExtElt);
+ for (User *U : ExtVecOp->users()) {
+ if (ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U)) {
+ auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
+ NewExt->insertAfter(WideVec);
+ IC.ReplaceInstUsesWith(*OldExt, NewExt);
+ }
+ }
+}
/// We are building a shuffle to create V, which is a sequence of insertelement,
/// extractelement pairs. If PermittedRHS is set, then we must either use it or
@@ -363,9 +405,10 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
/// often been chosen carefully to be efficiently implementable on the target.
typedef std::pair<Value *, Value *> ShuffleOps;
-static ShuffleOps CollectShuffleElements(Value *V,
+static ShuffleOps collectShuffleElements(Value *V,
SmallVectorImpl<Constant *> &Mask,
- Value *PermittedRHS) {
+ Value *PermittedRHS,
+ InstCombiner &IC) {
assert(V->getType()->isVectorTy() && "Invalid shuffle!");
unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
@@ -396,10 +439,14 @@ static ShuffleOps CollectShuffleElements(Value *V,
// otherwise we'd end up with a shuffle of three inputs.
if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
Value *RHS = EI->getOperand(0);
- ShuffleOps LR = CollectShuffleElements(VecOp, Mask, RHS);
+ ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
assert(LR.second == nullptr || LR.second == RHS);
if (LR.first->getType() != RHS->getType()) {
+ // Although we are giving up for now, see if we can create extracts
+ // that match the inserts for another round of combining.
+ replaceExtractElements(IEI, EI, IC);
+
// We tried our best, but we can't find anything compatible with RHS
// further up the chain. Return a trivial shuffle.
for (unsigned i = 0; i < NumElts; ++i)
@@ -429,14 +476,14 @@ static ShuffleOps CollectShuffleElements(Value *V,
// If this insertelement is a chain that comes from exactly these two
// vectors, return the vector and the effective shuffle.
if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
- CollectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
+ collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
Mask))
return std::make_pair(EI->getOperand(0), PermittedRHS);
}
}
}
- // Otherwise, can't do anything fancy. Return an identity vector.
+ // Otherwise, we can't do anything fancy. Return an identity vector.
for (unsigned i = 0; i != NumElts; ++i)
Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
return std::make_pair(V, nullptr);
@@ -512,7 +559,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
// (and any insertelements it points to), into one big shuffle.
if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.user_back())) {
SmallVector<Constant*, 16> Mask;
- ShuffleOps LR = CollectShuffleElements(&IE, Mask, nullptr);
+ ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
// The proposed shuffle may be trivial, in which case we shouldn't
// perform the combine.
@@ -588,8 +635,8 @@ static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask,
case Instruction::FPTrunc:
case Instruction::FPExt:
case Instruction::GetElementPtr: {
- for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
- if (!CanEvaluateShuffled(I->getOperand(i), Mask, Depth-1))
+ for (Value *Operand : I->operands()) {
+ if (!CanEvaluateShuffled(Operand, Mask, Depth-1))
return false;
}
return true;
@@ -617,7 +664,7 @@ static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask,
/// Rebuild a new instruction just like 'I' but with the new operands given.
/// In the event of type mismatch, the type of the operands is correct.
-static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) {
+static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) {
// We don't want to use the IRBuilder here because we want the replacement
// instructions to appear next to 'I', not the builder's insertion point.
switch (I->getOpcode()) {
@@ -760,7 +807,7 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
NeedsRebuild |= (V != I->getOperand(i));
}
if (NeedsRebuild) {
- return BuildNew(I, NewOps);
+ return buildNew(I, NewOps);
}
return I;
}
@@ -792,7 +839,7 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
llvm_unreachable("failed to reorder elements of vector instruction!");
}
-static void RecognizeIdentityMask(const SmallVectorImpl<int> &Mask,
+static void recognizeIdentityMask(const SmallVectorImpl<int> &Mask,
bool &isLHSID, bool &isRHSID) {
isLHSID = isRHSID = true;
@@ -891,7 +938,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (VWidth == LHSWidth) {
// Analyze the shuffle, are the LHS or RHS and identity shuffles?
bool isLHSID, isRHSID;
- RecognizeIdentityMask(Mask, isLHSID, isRHSID);
+ recognizeIdentityMask(Mask, isLHSID, isRHSID);
// Eliminate identity shuffles.
if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
@@ -1177,7 +1224,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// If the result mask is an identity, replace uses of this instruction with
// corresponding argument.
bool isLHSID, isRHSID;
- RecognizeIdentityMask(newMask, isLHSID, isRHSID);
+ recognizeIdentityMask(newMask, isLHSID, isRHSID);
if (isLHSID && VWidth == LHSOp0Width) return ReplaceInstUsesWith(SVI, newLHS);
if (isRHSID && VWidth == RHSOp0Width) return ReplaceInstUsesWith(SVI, newRHS);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index fd34a244f271..7c46cfd28fc9 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -42,8 +42,9 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LibCallSemantics.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -79,14 +80,12 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) {
return llvm::EmitGEPOffset(Builder, DL, GEP);
}
-/// ShouldChangeType - Return true if it is desirable to convert a computation
-/// from 'From' to 'To'. We don't want to convert from a legal to an illegal
-/// type for example, or from a smaller to a larger illegal type.
-bool InstCombiner::ShouldChangeType(Type *From, Type *To) const {
- assert(From->isIntegerTy() && To->isIntegerTy());
-
- unsigned FromWidth = From->getPrimitiveSizeInBits();
- unsigned ToWidth = To->getPrimitiveSizeInBits();
+/// Return true if it is desirable to convert an integer computation from a
+/// given bit width to a new bit width.
+/// We don't want to convert from a legal to an illegal type for example or from
+/// a smaller to a larger illegal type.
+bool InstCombiner::ShouldChangeType(unsigned FromWidth,
+ unsigned ToWidth) const {
bool FromLegal = DL.isLegalInteger(FromWidth);
bool ToLegal = DL.isLegalInteger(ToWidth);
@@ -103,6 +102,17 @@ bool InstCombiner::ShouldChangeType(Type *From, Type *To) const {
return true;
}
+/// Return true if it is desirable to convert a computation from 'From' to 'To'.
+/// We don't want to convert from a legal to an illegal type for example or from
+/// a smaller to a larger illegal type.
+bool InstCombiner::ShouldChangeType(Type *From, Type *To) const {
+ assert(From->isIntegerTy() && To->isIntegerTy());
+
+ unsigned FromWidth = From->getPrimitiveSizeInBits();
+ unsigned ToWidth = To->getPrimitiveSizeInBits();
+ return ShouldChangeType(FromWidth, ToWidth);
+}
+
// Return true, if No Signed Wrap should be maintained for I.
// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
// where both B and C should be ConstantInts, results in a constant that does
@@ -156,27 +166,26 @@ static void ClearSubclassDataAfterReassociation(BinaryOperator &I) {
I.setFastMathFlags(FMF);
}
-/// SimplifyAssociativeOrCommutative - This performs a few simplifications for
-/// operators which are associative or commutative:
-//
-// Commutative operators:
-//
-// 1. Order operands such that they are listed from right (least complex) to
-// left (most complex). This puts constants before unary operators before
-// binary operators.
-//
-// Associative operators:
-//
-// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
-// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
-//
-// Associative and commutative operators:
-//
-// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
-// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
-// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
-// if C1 and C2 are constants.
-//
+/// This performs a few simplifications for operators that are associative or
+/// commutative:
+///
+/// Commutative operators:
+///
+/// 1. Order operands such that they are listed from right (least complex) to
+/// left (most complex). This puts constants before unary operators before
+/// binary operators.
+///
+/// Associative operators:
+///
+/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
+/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
+///
+/// Associative and commutative operators:
+///
+/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
+/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
+/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
+/// if C1 and C2 are constants.
bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
Instruction::BinaryOps Opcode = I.getOpcode();
bool Changed = false;
@@ -322,7 +331,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
} while (1);
}
-/// LeftDistributesOverRight - Whether "X LOp (Y ROp Z)" is always equal to
+/// Return whether "X LOp (Y ROp Z)" is always equal to
/// "(X LOp Y) ROp (X LOp Z)".
static bool LeftDistributesOverRight(Instruction::BinaryOps LOp,
Instruction::BinaryOps ROp) {
@@ -361,7 +370,7 @@ static bool LeftDistributesOverRight(Instruction::BinaryOps LOp,
}
}
-/// RightDistributesOverLeft - Whether "(X LOp Y) ROp Z" is always equal to
+/// Return whether "(X LOp Y) ROp Z" is always equal to
/// "(X ROp Z) LOp (Y ROp Z)".
static bool RightDistributesOverLeft(Instruction::BinaryOps LOp,
Instruction::BinaryOps ROp) {
@@ -519,7 +528,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
if (isa<OverflowingBinaryOperator>(Op1))
HasNSW &= Op1->hasNoSignedWrap();
- // We can propogate 'nsw' if we know that
+ // We can propagate 'nsw' if we know that
// %Y = mul nsw i16 %X, C
// %Z = add nsw i16 %Y, %X
// =>
@@ -537,11 +546,11 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
return SimplifiedInst;
}
-/// SimplifyUsingDistributiveLaws - This tries to simplify binary operations
-/// which some other binary operation distributes over either by factorizing
-/// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this
-/// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is
-/// a win). Returns the simplified value, or null if it didn't simplify.
+/// This tries to simplify binary operations which some other binary operation
+/// distributes over either by factorizing out common terms
+/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
+/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
+/// Returns the simplified value, or null if it didn't simplify.
Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
@@ -623,12 +632,38 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
}
}
+ // (op (select (a, c, b)), (select (a, d, b))) -> (select (a, (op c, d), 0))
+ // (op (select (a, b, c)), (select (a, b, d))) -> (select (a, 0, (op c, d)))
+ if (auto *SI0 = dyn_cast<SelectInst>(LHS)) {
+ if (auto *SI1 = dyn_cast<SelectInst>(RHS)) {
+ if (SI0->getCondition() == SI1->getCondition()) {
+ Value *SI = nullptr;
+ if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getFalseValue(),
+ SI1->getFalseValue(), DL, TLI, DT, AC))
+ SI = Builder->CreateSelect(SI0->getCondition(),
+ Builder->CreateBinOp(TopLevelOpcode,
+ SI0->getTrueValue(),
+ SI1->getTrueValue()),
+ V);
+ if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getTrueValue(),
+ SI1->getTrueValue(), DL, TLI, DT, AC))
+ SI = Builder->CreateSelect(
+ SI0->getCondition(), V,
+ Builder->CreateBinOp(TopLevelOpcode, SI0->getFalseValue(),
+ SI1->getFalseValue()));
+ if (SI) {
+ SI->takeName(&I);
+ return SI;
+ }
+ }
+ }
+ }
+
return nullptr;
}
-// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
-// if the LHS is a constant zero (which is the 'negate' form).
-//
+/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
+/// constant zero (which is the 'negate' form).
Value *InstCombiner::dyn_castNegVal(Value *V) const {
if (BinaryOperator::isNeg(V))
return BinaryOperator::getNegArgument(V);
@@ -644,10 +679,8 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const {
return nullptr;
}
-// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the
-// instruction if the LHS is a constant negative zero (which is the 'negate'
-// form).
-//
+/// Given a 'fsub' instruction, return the RHS of the instruction if the LHS is
+/// a constant negative zero (which is the 'negate' form).
Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const {
if (BinaryOperator::isFNeg(V, IgnoreZeroSign))
return BinaryOperator::getFNegArgument(V);
@@ -700,10 +733,10 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
llvm_unreachable("Unknown binary instruction type!");
}
-// FoldOpIntoSelect - Given an instruction with a select as one operand and a
-// constant as the other operand, try to fold the binary operator into the
-// select arguments. This also works for Cast instructions, which obviously do
-// not have a second operand.
+/// Given an instruction with a select as one operand and a constant as the
+/// other operand, try to fold the binary operator into the select arguments.
+/// This also works for Cast instructions, which obviously do not have a second
+/// operand.
Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
// Don't modify shared select instructions
if (!SI->hasOneUse()) return nullptr;
@@ -752,10 +785,9 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
return nullptr;
}
-/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which
-/// has a PHI node as operand #0, see if we can fold the instruction into the
-/// PHI (which is only possible if all operands to the PHI are constants).
-///
+/// Given a binary operator, cast instruction, or select which has a PHI node as
+/// operand #0, see if we can fold the instruction into the PHI (which is only
+/// possible if all operands to the PHI are constants).
Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
PHINode *PN = cast<PHINode>(I.getOperand(0));
unsigned NumPHIValues = PN->getNumIncomingValues();
@@ -819,7 +851,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
NewPN->takeName(PN);
// If we are going to have to insert a new computation, do so right before the
- // predecessors terminator.
+ // predecessor's terminator.
if (NonConstBB)
Builder->SetInsertPoint(NonConstBB->getTerminator());
@@ -893,10 +925,10 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
return ReplaceInstUsesWith(I, NewPN);
}
-/// FindElementAtOffset - Given a pointer type and a constant offset, determine
-/// whether or not there is a sequence of GEP indices into the pointed type that
-/// will land us at the specified offset. If so, fill them into NewIndices and
-/// return the resultant element type, otherwise return null.
+/// Given a pointer type and a constant offset, determine whether or not there
+/// is a sequence of GEP indices into the pointed type that will land us at the
+/// specified offset. If so, fill them into NewIndices and return the resultant
+/// element type, otherwise return null.
Type *InstCombiner::FindElementAtOffset(PointerType *PtrTy, int64_t Offset,
SmallVectorImpl<Value *> &NewIndices) {
Type *Ty = PtrTy->getElementType();
@@ -965,8 +997,8 @@ static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
return true;
}
-/// Descale - Return a value X such that Val = X * Scale, or null if none. If
-/// the multiplication is known not to overflow then NoSignedWrap is set.
+/// Return a value X such that Val = X * Scale, or null if none.
+/// If the multiplication is known not to overflow, then NoSignedWrap is set.
Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
assert(isa<IntegerType>(Val->getType()) && "Can only descale integers!");
assert(cast<IntegerType>(Val->getType())->getBitWidth() ==
@@ -1008,11 +1040,11 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
// 0'th operand of Val.
std::pair<Instruction*, unsigned> Parent;
- // RequireNoSignedWrap - Set if the transform requires a descaling at deeper
- // levels that doesn't overflow.
+ // Set if the transform requires a descaling at deeper levels that doesn't
+ // overflow.
bool RequireNoSignedWrap = false;
- // logScale - log base 2 of the scale. Negative if not a power of 2.
+ // Log base 2 of the scale. Negative if not a power of 2.
int32_t logScale = Scale.exactLogBase2();
for (;; Op = Parent.first->getOperand(Parent.second)) { // Drill down
@@ -1213,16 +1245,11 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
/// specified one but with other operands.
static Value *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, Value *RHS,
InstCombiner::BuilderTy *B) {
- Value *BORes = B->CreateBinOp(Inst.getOpcode(), LHS, RHS);
- if (BinaryOperator *NewBO = dyn_cast<BinaryOperator>(BORes)) {
- if (isa<OverflowingBinaryOperator>(NewBO)) {
- NewBO->setHasNoSignedWrap(Inst.hasNoSignedWrap());
- NewBO->setHasNoUnsignedWrap(Inst.hasNoUnsignedWrap());
- }
- if (isa<PossiblyExactOperator>(NewBO))
- NewBO->setIsExact(Inst.isExact());
- }
- return BORes;
+ Value *BO = B->CreateBinOp(Inst.getOpcode(), LHS, RHS);
+ // If LHS and RHS are constant, BO won't be a binary operator.
+ if (BinaryOperator *NewBO = dyn_cast<BinaryOperator>(BO))
+ NewBO->copyIRFlags(&Inst);
+ return BO;
}
/// \brief Makes transformation of binary operation specific for vector types.
@@ -1256,9 +1283,8 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
LShuf->getMask() == RShuf->getMask()) {
Value *NewBO = CreateBinOpAsGiven(Inst, LShuf->getOperand(0),
RShuf->getOperand(0), Builder);
- Value *Res = Builder->CreateShuffleVector(NewBO,
+ return Builder->CreateShuffleVector(NewBO,
UndefValue::get(NewBO->getType()), LShuf->getMask());
- return Res;
}
}
@@ -1294,18 +1320,11 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
}
if (MayChange) {
Constant *C2 = ConstantVector::get(C2M);
- Value *NewLHS, *NewRHS;
- if (isa<Constant>(LHS)) {
- NewLHS = C2;
- NewRHS = Shuffle->getOperand(0);
- } else {
- NewLHS = Shuffle->getOperand(0);
- NewRHS = C2;
- }
+ Value *NewLHS = isa<Constant>(LHS) ? C2 : Shuffle->getOperand(0);
+ Value *NewRHS = isa<Constant>(LHS) ? Shuffle->getOperand(0) : C2;
Value *NewBO = CreateBinOpAsGiven(Inst, NewLHS, NewRHS, Builder);
- Value *Res = Builder->CreateShuffleVector(NewBO,
+ return Builder->CreateShuffleVector(NewBO,
UndefValue::get(Inst.getType()), Shuffle->getMask());
- return Res;
}
}
@@ -1323,7 +1342,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Eliminate unneeded casts for indices, and replace indices which displace
// by multiples of a zero size type with zero.
bool MadeChange = false;
- Type *IntPtrTy = DL.getIntPtrType(GEP.getPointerOperandType());
+ Type *IntPtrTy =
+ DL.getIntPtrType(GEP.getPointerOperandType()->getScalarType());
gep_type_iterator GTI = gep_type_begin(GEP);
for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
@@ -1333,21 +1353,25 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (!SeqTy)
continue;
+ // Index type should have the same width as IntPtr
+ Type *IndexTy = (*I)->getType();
+ Type *NewIndexType = IndexTy->isVectorTy() ?
+ VectorType::get(IntPtrTy, IndexTy->getVectorNumElements()) : IntPtrTy;
+
// If the element type has zero size then any index over it is equivalent
// to an index of zero, so replace it with zero if it is not zero already.
if (SeqTy->getElementType()->isSized() &&
DL.getTypeAllocSize(SeqTy->getElementType()) == 0)
if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) {
- *I = Constant::getNullValue(IntPtrTy);
+ *I = Constant::getNullValue(NewIndexType);
MadeChange = true;
}
- Type *IndexTy = (*I)->getType();
- if (IndexTy != IntPtrTy) {
+ if (IndexTy != NewIndexType) {
// If we are using a wider index than needed for this platform, shrink
// it to what we need. If narrower, sign-extend it to what we need.
// This explicit cast can make subsequent optimizations more obvious.
- *I = Builder->CreateIntCast(*I, IntPtrTy, true);
+ *I = Builder->CreateIntCast(*I, NewIndexType, true);
MadeChange = true;
}
}
@@ -1421,8 +1445,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
}
- GetElementPtrInst *NewGEP = cast<GetElementPtrInst>(Op1->clone());
+ // If not all GEPs are identical we'll have to create a new PHI node.
+ // Check that the old PHI node has only one use so that it will get
+ // removed.
+ if (DI != -1 && !PN->hasOneUse())
+ return nullptr;
+ GetElementPtrInst *NewGEP = cast<GetElementPtrInst>(Op1->clone());
if (DI == -1) {
// All the GEPs feeding the PHI are identical. Clone one down into our
// BB so that it can be merged with the current GEP.
@@ -1432,11 +1461,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// All the GEPs feeding the PHI differ at a single offset. Clone a GEP
// into the current block so it can be merged, and create a new PHI to
// set that index.
- Instruction *InsertPt = Builder->GetInsertPoint();
- Builder->SetInsertPoint(PN);
- PHINode *NewPN = Builder->CreatePHI(Op1->getOperand(DI)->getType(),
- PN->getNumOperands());
- Builder->SetInsertPoint(InsertPt);
+ PHINode *NewPN;
+ {
+ IRBuilderBase::InsertPointGuard Guard(*Builder);
+ Builder->SetInsertPoint(PN);
+ NewPN = Builder->CreatePHI(Op1->getOperand(DI)->getType(),
+ PN->getNumOperands());
+ }
for (auto &I : PN->operands())
NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
@@ -1790,7 +1821,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (Instruction *I = visitBitCast(*BCI)) {
if (I != BCI) {
I->takeName(BCI);
- BCI->getParent()->getInstList().insert(BCI, I);
+ BCI->getParent()->getInstList().insert(BCI->getIterator(), I);
ReplaceInstUsesWith(*BCI, I);
}
return &GEP;
@@ -1931,7 +1962,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) {
// Replace invoke with a NOP intrinsic to maintain the original CFG
- Module *M = II->getParent()->getParent()->getParent();
+ Module *M = II->getModule();
Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
None, "", II->getParent());
@@ -2280,9 +2311,10 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
}
if (LoadInst *L = dyn_cast<LoadInst>(Agg))
// If the (non-volatile) load only has one use, we can rewrite this to a
- // load from a GEP. This reduces the size of the load.
- // FIXME: If a load is used only by extractvalue instructions then this
- // could be done regardless of having multiple uses.
+ // load from a GEP. This reduces the size of the load. If a load is used
+ // only by extractvalue instructions then this either must have been
+ // optimized before, or it is a struct with padding, in which case we
+ // don't want to do the transformation as it loses padding knowledge.
if (L->isSimple() && L->hasOneUse()) {
// extractvalue has integer indices, getelementptr has Value*s. Convert.
SmallVector<Value*, 4> Indices;
@@ -2294,7 +2326,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
// We need to insert these at the location of the old load, not at that of
// the extractvalue.
- Builder->SetInsertPoint(L->getParent(), L);
+ Builder->SetInsertPoint(L);
Value *GEP = Builder->CreateInBoundsGEP(L->getType(),
L->getPointerOperand(), Indices);
// Returning the load directly will cause the main loop to insert it in
@@ -2312,7 +2344,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
return nullptr;
}
-/// isCatchAll - Return 'true' if the given typeinfo will match anything.
+/// Return 'true' if the given typeinfo will match anything.
static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
switch (Personality) {
case EHPersonality::GNU_C:
@@ -2330,6 +2362,7 @@ static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
case EHPersonality::MSVC_X86SEH:
case EHPersonality::MSVC_Win64SEH:
case EHPersonality::MSVC_CXX:
+ case EHPersonality::CoreCLR:
return TypeInfo->isNullValue();
}
llvm_unreachable("invalid enum");
@@ -2441,10 +2474,24 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
SawCatchAll = true;
break;
}
- if (AlreadyCaught.count(TypeInfo))
- // Already caught by an earlier clause, so having it in the filter
- // is pointless.
- continue;
+
+ // Even if we've seen a type in a catch clause, we don't want to
+ // remove it from the filter. An unexpected type handler may be
+ // set up for a call site which throws an exception of the same
+ // type caught. In order for the exception thrown by the unexpected
+ // handler to propogate correctly, the filter must be correctly
+ // described for the call site.
+ //
+ // Example:
+ //
+ // void unexpected() { throw 1;}
+ // void foo() throw (int) {
+ // std::set_unexpected(unexpected);
+ // try {
+ // throw 2.0;
+ // } catch (int i) {}
+ // }
+
// There is no point in having multiple copies of the same typeinfo in
// a filter, so only add it if we didn't already.
if (SeenInFilter.insert(TypeInfo).second)
@@ -2637,15 +2684,15 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
return nullptr;
}
-/// TryToSinkInstruction - Try to move the specified instruction from its
-/// current block into the beginning of DestBlock, which can only happen if it's
-/// safe to move the instruction past all of the instructions between it and the
-/// end of its block.
+/// Try to move the specified instruction from its current block into the
+/// beginning of DestBlock, which can only happen if it's safe to move the
+/// instruction past all of the instructions between it and the end of its
+/// block.
static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
assert(I->hasOneUse() && "Invariants didn't hold!");
// Cannot move control-flow-involving, volatile loads, vaarg, etc.
- if (isa<PHINode>(I) || isa<LandingPadInst>(I) || I->mayHaveSideEffects() ||
+ if (isa<PHINode>(I) || I->isEHPad() || I->mayHaveSideEffects() ||
isa<TerminatorInst>(I))
return false;
@@ -2654,17 +2701,24 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
&DestBlock->getParent()->getEntryBlock())
return false;
+ // Do not sink convergent call instructions.
+ if (auto *CI = dyn_cast<CallInst>(I)) {
+ if (CI->isConvergent())
+ return false;
+ }
+
// We can only sink load instructions if there is nothing between the load and
// the end of block that could change the value.
if (I->mayReadFromMemory()) {
- for (BasicBlock::iterator Scan = I, E = I->getParent()->end();
+ for (BasicBlock::iterator Scan = I->getIterator(),
+ E = I->getParent()->end();
Scan != E; ++Scan)
if (Scan->mayWriteToMemory())
return false;
}
BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
- I->moveBefore(InsertPos);
+ I->moveBefore(&*InsertPos);
++NumSunkInst;
return true;
}
@@ -2698,6 +2752,27 @@ bool InstCombiner::run() {
}
}
+ // In general, it is possible for computeKnownBits to determine all bits in a
+ // value even when the operands are not all constants.
+ if (!I->use_empty() && I->getType()->isIntegerTy()) {
+ unsigned BitWidth = I->getType()->getScalarSizeInBits();
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ computeKnownBits(I, KnownZero, KnownOne, /*Depth*/0, I);
+ if ((KnownZero | KnownOne).isAllOnesValue()) {
+ Constant *C = ConstantInt::get(I->getContext(), KnownOne);
+ DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C <<
+ " from: " << *I << '\n');
+
+ // Add operands to the worklist.
+ ReplaceInstUsesWith(*I, C);
+ ++NumConstProp;
+ EraseInstFromFunction(*I);
+ MadeIRChange = true;
+ continue;
+ }
+ }
+
// See if we can trivially sink this instruction to a successor basic block.
if (I->hasOneUse()) {
BasicBlock *BB = I->getParent();
@@ -2738,7 +2813,7 @@ bool InstCombiner::run() {
}
// Now that we have an instruction, try combining it to simplify it.
- Builder->SetInsertPoint(I->getParent(), I);
+ Builder->SetInsertPoint(I);
Builder->SetCurrentDebugLocation(I->getDebugLoc());
#ifndef NDEBUG
@@ -2768,7 +2843,7 @@ bool InstCombiner::run() {
// Insert the new instruction into the basic block...
BasicBlock *InstParent = I->getParent();
- BasicBlock::iterator InsertPos = I;
+ BasicBlock::iterator InsertPos = I->getIterator();
// If we replace a PHI with something that isn't a PHI, fix up the
// insertion point.
@@ -2801,8 +2876,8 @@ bool InstCombiner::run() {
return MadeIRChange;
}
-/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding
-/// all reachable code to the worklist.
+/// Walk the function in depth-first order, adding all reachable code to the
+/// worklist.
///
/// This has a couple of tricks to make the code faster and more powerful. In
/// particular, we constant fold and DCE instructions as we go, to avoid adding
@@ -2829,7 +2904,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
continue;
for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
- Instruction *Inst = BBI++;
+ Instruction *Inst = &*BBI++;
// DCE instruction if trivially dead.
if (isInstructionTriviallyDead(Inst, TLI)) {
@@ -2900,8 +2975,8 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
}
}
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- Worklist.push_back(TI->getSuccessor(i));
+ for (BasicBlock *SuccBB : TI->successors())
+ Worklist.push_back(SuccBB);
} while (!Worklist.empty());
// Once we've found all of the instructions to add to instcombine's worklist,
@@ -2909,8 +2984,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
// of the function down. This jives well with the way that it adds all uses
// of instructions to the worklist after doing a transformation, thus avoiding
// some N^2 behavior in pathological cases.
- ICWorklist.AddInitialGroup(&InstrsForInstCombineWorklist[0],
- InstrsForInstCombineWorklist.size());
+ ICWorklist.AddInitialGroup(InstrsForInstCombineWorklist);
return MadeIRChange;
}
@@ -2930,13 +3004,13 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
// track of which blocks we visit.
SmallPtrSet<BasicBlock *, 64> Visited;
MadeIRChange |=
- AddReachableCodeToWorklist(F.begin(), DL, Visited, ICWorklist, TLI);
+ AddReachableCodeToWorklist(&F.front(), DL, Visited, ICWorklist, TLI);
// Do a quick scan over the function. If we find any blocks that are
// unreachable, remove any instructions inside of them. This prevents
// the instcombine code from having to deal with some bad special cases.
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (Visited.count(BB))
+ if (Visited.count(&*BB))
continue;
// Delete the instructions backwards, as it has a reduced likelihood of
@@ -2944,11 +3018,10 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
while (EndInst != BB->begin()) {
// Delete the next to last instruction.
- BasicBlock::iterator I = EndInst;
- Instruction *Inst = --I;
- if (!Inst->use_empty())
+ Instruction *Inst = &*--EndInst->getIterator();
+ if (!Inst->use_empty() && !Inst->getType()->isTokenTy())
Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
- if (isa<LandingPadInst>(Inst)) {
+ if (Inst->isEHPad()) {
EndInst = Inst;
continue;
}
@@ -2956,7 +3029,8 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
++NumDeadInst;
MadeIRChange = true;
}
- Inst->eraseFromParent();
+ if (!Inst->getType()->isTokenTy())
+ Inst->eraseFromParent();
}
}
@@ -2968,8 +3042,6 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist,
AliasAnalysis *AA, AssumptionCache &AC,
TargetLibraryInfo &TLI, DominatorTree &DT,
LoopInfo *LI = nullptr) {
- // Minimizing size?
- bool MinimizeSize = F.hasFnAttribute(Attribute::MinSize);
auto &DL = F.getParent()->getDataLayout();
/// Builder - This is an IRBuilder that automatically inserts new
@@ -2992,7 +3064,7 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist,
if (prepareICWorklistFromFunction(F, DL, &TLI, Worklist))
Changed = true;
- InstCombiner IC(Worklist, &Builder, MinimizeSize,
+ InstCombiner IC(Worklist, &Builder, F.optForMinSize(),
AA, &AC, &TLI, &DT, DL, LI);
if (IC.run())
Changed = true;
@@ -3046,11 +3118,12 @@ public:
void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
bool InstructionCombiningPass::runOnFunction(Function &F) {
@@ -3058,7 +3131,7 @@ bool InstructionCombiningPass::runOnFunction(Function &F) {
return false;
// Required analyses.
- auto AA = &getAnalysis<AliasAnalysis>();
+ auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -3076,7 +3149,8 @@ INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine",
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
"Combine redundant instructions", false, false)
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index e7ef9f96edc2..a9df5e5898ae 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
@@ -90,7 +91,9 @@ static const char *const kAsanUnregisterGlobalsName =
"__asan_unregister_globals";
static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
-static const char *const kAsanInitName = "__asan_init_v5";
+static const char *const kAsanInitName = "__asan_init";
+static const char *const kAsanVersionCheckName =
+ "__asan_version_mismatch_check_v6";
static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp";
static const char *const kAsanPtrSub = "__sanitizer_ptr_sub";
static const char *const kAsanHandleNoReturnName = "__asan_handle_no_return";
@@ -119,6 +122,10 @@ static const unsigned kAllocaRzSize = 32;
static cl::opt<bool> ClEnableKasan(
"asan-kernel", cl::desc("Enable KernelAddressSanitizer instrumentation"),
cl::Hidden, cl::init(false));
+static cl::opt<bool> ClRecover(
+ "asan-recover",
+ cl::desc("Enable recovery mode (continue-after-error)."),
+ cl::Hidden, cl::init(false));
// This flag may need to be replaced with -f[no-]asan-reads.
static cl::opt<bool> ClInstrumentReads("asan-instrument-reads",
@@ -177,7 +184,7 @@ static cl::opt<std::string> ClMemoryAccessCallbackPrefix(
cl::init("__asan_"));
static cl::opt<bool> ClInstrumentAllocas("asan-instrument-allocas",
cl::desc("instrument dynamic allocas"),
- cl::Hidden, cl::init(false));
+ cl::Hidden, cl::init(true));
static cl::opt<bool> ClSkipPromotableAllocas(
"asan-skip-promotable-allocas",
cl::desc("Do not instrument promotable allocas"), cl::Hidden,
@@ -273,6 +280,11 @@ class GlobalsMetadata {
GlobalsMetadata() : inited_(false) {}
+ void reset() {
+ inited_ = false;
+ Entries.clear();
+ }
+
void init(Module &M) {
assert(!inited_);
inited_ = true;
@@ -321,7 +333,7 @@ struct ShadowMapping {
static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
bool IsKasan) {
- bool IsAndroid = TargetTriple.getEnvironment() == llvm::Triple::Android;
+ bool IsAndroid = TargetTriple.isAndroid();
bool IsIOS = TargetTriple.isiOS();
bool IsFreeBSD = TargetTriple.isOSFreeBSD();
bool IsLinux = TargetTriple.isOSLinux();
@@ -338,6 +350,8 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
ShadowMapping Mapping;
if (LongSize == 32) {
+ // Android is always PIE, which means that the beginning of the address
+ // space is always available.
if (IsAndroid)
Mapping.Offset = 0;
else if (IsMIPS32)
@@ -376,7 +390,8 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
// OR-ing shadow offset if more efficient (at least on x86) if the offset
// is a power of two, but on ppc64 we have to use add since the shadow
// offset is not necessary 1/8-th of the address space.
- Mapping.OrShadowOffset = !IsPPC64 && !(Mapping.Offset & (Mapping.Offset - 1));
+ Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64
+ && !(Mapping.Offset & (Mapping.Offset - 1));
return Mapping;
}
@@ -389,8 +404,9 @@ static size_t RedzoneSizeForScale(int MappingScale) {
/// AddressSanitizer: instrument the code in module to find memory bugs.
struct AddressSanitizer : public FunctionPass {
- explicit AddressSanitizer(bool CompileKernel = false)
- : FunctionPass(ID), CompileKernel(CompileKernel || ClEnableKasan) {
+ explicit AddressSanitizer(bool CompileKernel = false, bool Recover = false)
+ : FunctionPass(ID), CompileKernel(CompileKernel || ClEnableKasan),
+ Recover(Recover || ClRecover) {
initializeAddressSanitizerPass(*PassRegistry::getPassRegistry());
}
const char *getPassName() const override {
@@ -437,7 +453,9 @@ struct AddressSanitizer : public FunctionPass {
Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
bool runOnFunction(Function &F) override;
bool maybeInsertAsanInitAtFunctionEntry(Function &F);
+ void markEscapedLocalAllocas(Function &F);
bool doInitialization(Module &M) override;
+ bool doFinalization(Module &M) override;
static char ID; // Pass identification, replacement for typeid
DominatorTree &getDominatorTree() const { return *DT; }
@@ -450,10 +468,21 @@ struct AddressSanitizer : public FunctionPass {
bool isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis, Value *Addr,
uint64_t TypeSize) const;
+ /// Helper to cleanup per-function state.
+ struct FunctionStateRAII {
+ AddressSanitizer *Pass;
+ FunctionStateRAII(AddressSanitizer *Pass) : Pass(Pass) {
+ assert(Pass->ProcessedAllocas.empty() &&
+ "last pass forgot to clear cache");
+ }
+ ~FunctionStateRAII() { Pass->ProcessedAllocas.clear(); }
+ };
+
LLVMContext *C;
Triple TargetTriple;
int LongSize;
bool CompileKernel;
+ bool Recover;
Type *IntptrTy;
ShadowMapping Mapping;
DominatorTree *DT;
@@ -477,8 +506,10 @@ struct AddressSanitizer : public FunctionPass {
class AddressSanitizerModule : public ModulePass {
public:
- explicit AddressSanitizerModule(bool CompileKernel = false)
- : ModulePass(ID), CompileKernel(CompileKernel || ClEnableKasan) {}
+ explicit AddressSanitizerModule(bool CompileKernel = false,
+ bool Recover = false)
+ : ModulePass(ID), CompileKernel(CompileKernel || ClEnableKasan),
+ Recover(Recover || ClRecover) {}
bool runOnModule(Module &M) override;
static char ID; // Pass identification, replacement for typeid
const char *getPassName() const override { return "AddressSanitizerModule"; }
@@ -496,6 +527,7 @@ class AddressSanitizerModule : public ModulePass {
GlobalsMetadata GlobalsMD;
bool CompileKernel;
+ bool Recover;
Type *IntptrTy;
LLVMContext *C;
Triple TargetTriple;
@@ -525,6 +557,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
ShadowMapping Mapping;
SmallVector<AllocaInst *, 16> AllocaVec;
+ SmallSetVector<AllocaInst *, 16> NonInstrumentedStaticAllocaVec;
SmallVector<Instruction *, 8> RetVec;
unsigned StackAlignment;
@@ -545,12 +578,14 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
SmallVector<AllocaInst *, 1> DynamicAllocaVec;
SmallVector<IntrinsicInst *, 1> StackRestoreVec;
AllocaInst *DynamicAllocaLayout = nullptr;
+ IntrinsicInst *LocalEscapeCall = nullptr;
// Maps Value to an AllocaInst from which the Value is originated.
typedef DenseMap<Value *, AllocaInst *> AllocaForValueMapTy;
AllocaForValueMapTy AllocaForValue;
- bool HasNonEmptyInlineAsm;
+ bool HasNonEmptyInlineAsm = false;
+ bool HasReturnsTwiceCall = false;
std::unique_ptr<CallInst> EmptyInlineAsm;
FunctionStackPoisoner(Function &F, AddressSanitizer &ASan)
@@ -562,7 +597,6 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
IntptrPtrTy(PointerType::get(IntptrTy, 0)),
Mapping(ASan.Mapping),
StackAlignment(1 << Mapping.Scale),
- HasNonEmptyInlineAsm(false),
EmptyInlineAsm(CallInst::Create(ASan.EmptyAsm)) {}
bool runOnFunction() {
@@ -596,9 +630,24 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
void unpoisonDynamicAllocasBeforeInst(Instruction *InstBefore,
Value *SavedStack) {
IRBuilder<> IRB(InstBefore);
+ Value *DynamicAreaPtr = IRB.CreatePtrToInt(SavedStack, IntptrTy);
+ // When we insert _asan_allocas_unpoison before @llvm.stackrestore, we
+ // need to adjust extracted SP to compute the address of the most recent
+ // alloca. We have a special @llvm.get.dynamic.area.offset intrinsic for
+ // this purpose.
+ if (!isa<ReturnInst>(InstBefore)) {
+ Function *DynamicAreaOffsetFunc = Intrinsic::getDeclaration(
+ InstBefore->getModule(), Intrinsic::get_dynamic_area_offset,
+ {IntptrTy});
+
+ Value *DynamicAreaOffset = IRB.CreateCall(DynamicAreaOffsetFunc, {});
+
+ DynamicAreaPtr = IRB.CreateAdd(IRB.CreatePtrToInt(SavedStack, IntptrTy),
+ DynamicAreaOffset);
+ }
+
IRB.CreateCall(AsanAllocasUnpoisonFunc,
- {IRB.CreateLoad(DynamicAllocaLayout),
- IRB.CreatePtrToInt(SavedStack, IntptrTy)});
+ {IRB.CreateLoad(DynamicAllocaLayout), DynamicAreaPtr});
}
// Unpoison dynamic allocas redzones.
@@ -625,7 +674,10 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
/// \brief Collect Alloca instructions we want (and can) handle.
void visitAllocaInst(AllocaInst &AI) {
- if (!ASan.isInterestingAlloca(AI)) return;
+ if (!ASan.isInterestingAlloca(AI)) {
+ if (AI.isStaticAlloca()) NonInstrumentedStaticAllocaVec.insert(&AI);
+ return;
+ }
StackAlignment = std::max(StackAlignment, AI.getAlignment());
if (ASan.isDynamicAlloca(AI))
@@ -639,6 +691,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
void visitIntrinsicInst(IntrinsicInst &II) {
Intrinsic::ID ID = II.getIntrinsicID();
if (ID == Intrinsic::stackrestore) StackRestoreVec.push_back(&II);
+ if (ID == Intrinsic::localescape) LocalEscapeCall = &II;
if (!ClCheckLifetime) return;
if (ID != Intrinsic::lifetime_start && ID != Intrinsic::lifetime_end)
return;
@@ -660,9 +713,13 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
AllocaPoisonCallVec.push_back(APC);
}
- void visitCallInst(CallInst &CI) {
- HasNonEmptyInlineAsm |=
- CI.isInlineAsm() && !CI.isIdenticalTo(EmptyInlineAsm.get());
+ void visitCallSite(CallSite CS) {
+ Instruction *I = CS.getInstruction();
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ HasNonEmptyInlineAsm |=
+ CI->isInlineAsm() && !CI->isIdenticalTo(EmptyInlineAsm.get());
+ HasReturnsTwiceCall |= CI->canReturnTwice();
+ }
}
// ---------------------- Helpers.
@@ -689,7 +746,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
Instruction *ThenTerm, Value *ValueIfFalse);
};
-} // namespace
+} // anonymous namespace
char AddressSanitizer::ID = 0;
INITIALIZE_PASS_BEGIN(
@@ -697,12 +754,15 @@ INITIALIZE_PASS_BEGIN(
"AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(
AddressSanitizer, "asan",
"AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
false)
-FunctionPass *llvm::createAddressSanitizerFunctionPass(bool CompileKernel) {
- return new AddressSanitizer(CompileKernel);
+FunctionPass *llvm::createAddressSanitizerFunctionPass(bool CompileKernel,
+ bool Recover) {
+ assert(!CompileKernel || Recover);
+ return new AddressSanitizer(CompileKernel, Recover);
}
char AddressSanitizerModule::ID = 0;
@@ -711,8 +771,10 @@ INITIALIZE_PASS(
"AddressSanitizer: detects use-after-free and out-of-bounds bugs."
"ModulePass",
false, false)
-ModulePass *llvm::createAddressSanitizerModulePass(bool CompileKernel) {
- return new AddressSanitizerModule(CompileKernel);
+ModulePass *llvm::createAddressSanitizerModulePass(bool CompileKernel,
+ bool Recover) {
+ assert(!CompileKernel || Recover);
+ return new AddressSanitizerModule(CompileKernel, Recover);
}
static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
@@ -799,8 +861,10 @@ bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) {
getAllocaSizeInBytes(&AI) > 0 &&
// We are only interested in allocas not promotable to registers.
// Promotable allocas are common under -O0.
- (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI) ||
- isDynamicAlloca(AI)));
+ (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)) &&
+ // inalloca allocas are not treated as static, and we don't want
+ // dynamic alloca instrumentation for them as well.
+ !AI.isUsedWithInAlloca());
ProcessedAllocas[&AI] = IsInteresting;
return IsInteresting;
@@ -868,10 +932,8 @@ static bool isInterestingPointerComparisonOrSubtraction(Instruction *I) {
} else {
return false;
}
- if (!isPointerOperand(I->getOperand(0)) ||
- !isPointerOperand(I->getOperand(1)))
- return false;
- return true;
+ return isPointerOperand(I->getOperand(0)) &&
+ isPointerOperand(I->getOperand(1));
}
bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) {
@@ -919,7 +981,7 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
// If initialization order checking is disabled, a simple access to a
// dynamically initialized global is always valid.
GlobalVariable *G = dyn_cast<GlobalVariable>(GetUnderlyingObject(Addr, DL));
- if (G != NULL && (!ClInitializers || GlobalIsLinkerInitialized(G)) &&
+ if (G && (!ClInitializers || GlobalIsLinkerInitialized(G)) &&
isSafeAccess(ObjSizeVis, Addr, TypeSize)) {
NumOptimizedAccessesToGlobalVar++;
return;
@@ -1041,13 +1103,17 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
BasicBlock *NextBB = CheckTerm->getSuccessor(0);
IRB.SetInsertPoint(CheckTerm);
Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize);
- BasicBlock *CrashBlock =
+ if (Recover) {
+ CrashTerm = SplitBlockAndInsertIfThen(Cmp2, CheckTerm, false);
+ } else {
+ BasicBlock *CrashBlock =
BasicBlock::Create(*C, "", NextBB->getParent(), NextBB);
- CrashTerm = new UnreachableInst(*C, CrashBlock);
- BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2);
- ReplaceInstWithInst(CheckTerm, NewTerm);
+ CrashTerm = new UnreachableInst(*C, CrashBlock);
+ BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2);
+ ReplaceInstWithInst(CheckTerm, NewTerm);
+ }
} else {
- CrashTerm = SplitBlockAndInsertIfThen(Cmp, InsertBefore, true);
+ CrashTerm = SplitBlockAndInsertIfThen(Cmp, InsertBefore, !Recover);
}
Instruction *Crash = generateCrashCode(CrashTerm, AddrLong, IsWrite,
@@ -1084,7 +1150,8 @@ void AddressSanitizer::instrumentUnusualSizeOrAlignment(
void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit,
GlobalValue *ModuleName) {
// Set up the arguments to our poison/unpoison functions.
- IRBuilder<> IRB(GlobalInit.begin()->getFirstInsertionPt());
+ IRBuilder<> IRB(&GlobalInit.front(),
+ GlobalInit.front().getFirstInsertionPt());
// Add a call to poison all external globals before the given function starts.
Value *ModuleNameAddr = ConstantExpr::getPointerCast(ModuleName, IntptrTy);
@@ -1147,6 +1214,14 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
// Do not instrument globals from special LLVM sections.
if (Section.find("__llvm") != StringRef::npos) return false;
+ // Do not instrument function pointers to initialization and termination
+ // routines: dynamic linker will not properly handle redzones.
+ if (Section.startswith(".preinit_array") ||
+ Section.startswith(".init_array") ||
+ Section.startswith(".fini_array")) {
+ return false;
+ }
+
// Callbacks put into the CRT initializer/terminator sections
// should not be instrumented.
// See https://code.google.com/p/address-sanitizer/issues/detail?id=305
@@ -1162,10 +1237,7 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
bool TAAParsed;
std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier(
Section, ParsedSegment, ParsedSection, TAA, TAAParsed, StubSize);
- if (!ErrorCode.empty()) {
- assert(false && "Invalid section specifier.");
- return false;
- }
+ assert(ErrorCode.empty() && "Invalid section specifier.");
// Ignore the globals from the __OBJC section. The ObjC runtime assumes
// those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
@@ -1383,13 +1455,11 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
const std::string TypeStr = AccessIsWrite ? "store" : "load";
const std::string ExpStr = Exp ? "exp_" : "";
const std::string SuffixStr = CompileKernel ? "N" : "_n";
- const std::string EndingStr = CompileKernel ? "_noabort" : "";
- const Type *ExpType = Exp ? Type::getInt32Ty(*C) : nullptr;
- // TODO(glider): for KASan builds add _noabort to error reporting
- // functions and make them actually noabort (remove the UnreachableInst).
+ const std::string EndingStr = Recover ? "_noabort" : "";
+ Type *ExpType = Exp ? Type::getInt32Ty(*C) : nullptr;
AsanErrorCallbackSized[AccessIsWrite][Exp] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanReportErrorTemplate + ExpStr + TypeStr + SuffixStr,
+ kAsanReportErrorTemplate + ExpStr + TypeStr + SuffixStr + EndingStr,
IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr));
AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
@@ -1400,7 +1470,7 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
const std::string Suffix = TypeStr + itostr(1 << AccessSizeIndex);
AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanReportErrorTemplate + ExpStr + Suffix,
+ kAsanReportErrorTemplate + ExpStr + Suffix + EndingStr,
IRB.getVoidTy(), IntptrTy, ExpType, nullptr));
AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
@@ -1448,15 +1518,20 @@ bool AddressSanitizer::doInitialization(Module &M) {
if (!CompileKernel) {
std::tie(AsanCtorFunction, AsanInitFunction) =
- createSanitizerCtorAndInitFunctions(M, kAsanModuleCtorName, kAsanInitName,
- /*InitArgTypes=*/{},
- /*InitArgs=*/{});
+ createSanitizerCtorAndInitFunctions(
+ M, kAsanModuleCtorName, kAsanInitName,
+ /*InitArgTypes=*/{}, /*InitArgs=*/{}, kAsanVersionCheckName);
appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority);
}
Mapping = getShadowMapping(TargetTriple, LongSize, CompileKernel);
return true;
}
+bool AddressSanitizer::doFinalization(Module &M) {
+ GlobalsMD.reset();
+ return false;
+}
+
bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
// For each NSObject descendant having a +load method, this method is invoked
// by the ObjC runtime before any of the static constructors is called.
@@ -1466,13 +1541,41 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
// We cannot just ignore these methods, because they may call other
// instrumented functions.
if (F.getName().find(" load]") != std::string::npos) {
- IRBuilder<> IRB(F.begin()->begin());
+ IRBuilder<> IRB(&F.front(), F.front().begin());
IRB.CreateCall(AsanInitFunction, {});
return true;
}
return false;
}
+void AddressSanitizer::markEscapedLocalAllocas(Function &F) {
+ // Find the one possible call to llvm.localescape and pre-mark allocas passed
+ // to it as uninteresting. This assumes we haven't started processing allocas
+ // yet. This check is done up front because iterating the use list in
+ // isInterestingAlloca would be algorithmically slower.
+ assert(ProcessedAllocas.empty() && "must process localescape before allocas");
+
+ // Try to get the declaration of llvm.localescape. If it's not in the module,
+ // we can exit early.
+ if (!F.getParent()->getFunction("llvm.localescape")) return;
+
+ // Look for a call to llvm.localescape call in the entry block. It can't be in
+ // any other block.
+ for (Instruction &I : F.getEntryBlock()) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
+ if (II && II->getIntrinsicID() == Intrinsic::localescape) {
+ // We found a call. Mark all the allocas passed in as uninteresting.
+ for (Value *Arg : II->arg_operands()) {
+ AllocaInst *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts());
+ assert(AI && AI->isStaticAlloca() &&
+ "non-static alloca arg to localescape");
+ ProcessedAllocas[AI] = false;
+ }
+ break;
+ }
+ }
+}
+
bool AddressSanitizer::runOnFunction(Function &F) {
if (&F == AsanCtorFunction) return false;
if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false;
@@ -1488,6 +1591,12 @@ bool AddressSanitizer::runOnFunction(Function &F) {
if (!ClDebugFunc.empty() && ClDebugFunc != F.getName()) return false;
+ FunctionStateRAII CleanupObj(this);
+
+ // We can't instrument allocas used with llvm.localescape. Only static allocas
+ // can be passed to that intrinsic.
+ markEscapedLocalAllocas(F);
+
// We want to instrument every address only once per basic block (unless there
// are calls between uses).
SmallSet<Value *, 16> TempsToInstrument;
@@ -1715,6 +1824,16 @@ void FunctionStackPoisoner::createDynamicAllocasInitStorage() {
void FunctionStackPoisoner::poisonStack() {
assert(AllocaVec.size() > 0 || DynamicAllocaVec.size() > 0);
+ // Insert poison calls for lifetime intrinsics for alloca.
+ bool HavePoisonedAllocas = false;
+ for (const auto &APC : AllocaPoisonCallVec) {
+ assert(APC.InsBefore);
+ assert(APC.AI);
+ IRBuilder<> IRB(APC.InsBefore);
+ poisonAlloca(APC.AI, APC.Size, IRB, APC.DoPoison);
+ HavePoisonedAllocas |= APC.DoPoison;
+ }
+
if (ClInstrumentAllocas && DynamicAllocaVec.size() > 0) {
// Handle dynamic allocas.
createDynamicAllocasInitStorage();
@@ -1723,7 +1842,7 @@ void FunctionStackPoisoner::poisonStack() {
unpoisonDynamicAllocas();
}
- if (AllocaVec.size() == 0) return;
+ if (AllocaVec.empty()) return;
int StackMallocIdx = -1;
DebugLoc EntryDebugLocation;
@@ -1734,6 +1853,19 @@ void FunctionStackPoisoner::poisonStack() {
IRBuilder<> IRB(InsBefore);
IRB.SetCurrentDebugLocation(EntryDebugLocation);
+ // Make sure non-instrumented allocas stay in the entry block. Otherwise,
+ // debug info is broken, because only entry-block allocas are treated as
+ // regular stack slots.
+ auto InsBeforeB = InsBefore->getParent();
+ assert(InsBeforeB == &F.getEntryBlock());
+ for (BasicBlock::iterator I(InsBefore); I != InsBeforeB->end(); ++I)
+ if (auto *AI = dyn_cast<AllocaInst>(I))
+ if (NonInstrumentedStaticAllocaVec.count(AI) > 0)
+ AI->moveBefore(InsBefore);
+
+ // If we have a call to llvm.localescape, keep it in the entry block.
+ if (LocalEscapeCall) LocalEscapeCall->moveBefore(InsBefore);
+
SmallVector<ASanStackVariableDescription, 16> SVD;
SVD.reserve(AllocaVec.size());
for (AllocaInst *AI : AllocaVec) {
@@ -1751,10 +1883,15 @@ void FunctionStackPoisoner::poisonStack() {
uint64_t LocalStackSize = L.FrameSize;
bool DoStackMalloc = ClUseAfterReturn && !ASan.CompileKernel &&
LocalStackSize <= kMaxStackMallocSize;
- // Don't do dynamic alloca or stack malloc in presence of inline asm:
- // too often it makes assumptions on which registers are available.
- bool DoDynamicAlloca = ClDynamicAllocaStack && !HasNonEmptyInlineAsm;
- DoStackMalloc &= !HasNonEmptyInlineAsm;
+ bool DoDynamicAlloca = ClDynamicAllocaStack;
+ // Don't do dynamic alloca or stack malloc if:
+ // 1) There is inline asm: too often it makes assumptions on which registers
+ // are available.
+ // 2) There is a returns_twice call (typically setjmp), which is
+ // optimization-hostile, and doesn't play well with introduced indirect
+ // register-relative calculation of local variable addresses.
+ DoDynamicAlloca &= !HasNonEmptyInlineAsm && !HasReturnsTwiceCall;
+ DoStackMalloc &= !HasNonEmptyInlineAsm && !HasReturnsTwiceCall;
Value *StaticAlloca =
DoDynamicAlloca ? nullptr : createAllocaForLayout(IRB, L, false);
@@ -1804,16 +1941,6 @@ void FunctionStackPoisoner::poisonStack() {
DoDynamicAlloca ? createAllocaForLayout(IRB, L, true) : StaticAlloca;
}
- // Insert poison calls for lifetime intrinsics for alloca.
- bool HavePoisonedAllocas = false;
- for (const auto &APC : AllocaPoisonCallVec) {
- assert(APC.InsBefore);
- assert(APC.AI);
- IRBuilder<> IRB(APC.InsBefore);
- poisonAlloca(APC.AI, APC.Size, IRB, APC.DoPoison);
- HavePoisonedAllocas |= APC.DoPoison;
- }
-
// Replace Alloca instructions with base+offset.
for (const auto &Desc : SVD) {
AllocaInst *AI = Desc.AI;
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
index f6858034d79e..fd3dfd9af033 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -106,7 +106,7 @@ void BoundsChecking::emitBranchToTrap(Value *Cmp) {
}
++ChecksAdded;
- Instruction *Inst = Builder->GetInsertPoint();
+ BasicBlock::iterator Inst = Builder->GetInsertPoint();
BasicBlock *OldBB = Inst->getParent();
BasicBlock *Cont = OldBB->splitBasicBlock(Inst);
OldBB->getTerminator()->eraseFromParent();
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h b/contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h
new file mode 100644
index 000000000000..c47fdbf68996
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h
@@ -0,0 +1,217 @@
+//===-- CFGMST.h - Minimum Spanning Tree for CFG ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a Union-find algorithm to compute Minimum Spanning Tree
+// for a given CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+#define DEBUG_TYPE "cfgmst"
+
+/// \brief An union-find based Minimum Spanning Tree for CFG
+///
+/// Implements a Union-find algorithm to compute Minimum Spanning Tree
+/// for a given CFG.
+template <class Edge, class BBInfo> class CFGMST {
+public:
+ Function &F;
+
+ // Store all the edges in CFG. It may contain some stale edges
+ // when Removed is set.
+ std::vector<std::unique_ptr<Edge>> AllEdges;
+
+ // This map records the auxiliary information for each BB.
+ DenseMap<const BasicBlock *, std::unique_ptr<BBInfo>> BBInfos;
+
+ // Find the root group of the G and compress the path from G to the root.
+ BBInfo *findAndCompressGroup(BBInfo *G) {
+ if (G->Group != G)
+ G->Group = findAndCompressGroup(static_cast<BBInfo *>(G->Group));
+ return static_cast<BBInfo *>(G->Group);
+ }
+
+ // Union BB1 and BB2 into the same group and return true.
+ // Returns false if BB1 and BB2 are already in the same group.
+ bool unionGroups(const BasicBlock *BB1, const BasicBlock *BB2) {
+ BBInfo *BB1G = findAndCompressGroup(&getBBInfo(BB1));
+ BBInfo *BB2G = findAndCompressGroup(&getBBInfo(BB2));
+
+ if (BB1G == BB2G)
+ return false;
+
+ // Make the smaller rank tree a direct child or the root of high rank tree.
+ if (BB1G->Rank < BB2G->Rank)
+ BB1G->Group = BB2G;
+ else {
+ BB2G->Group = BB1G;
+ // If the ranks are the same, increment root of one tree by one.
+ if (BB1G->Rank == BB2G->Rank)
+ BB1G->Rank++;
+ }
+ return true;
+ }
+
+ // Give BB, return the auxiliary information.
+ BBInfo &getBBInfo(const BasicBlock *BB) const {
+ auto It = BBInfos.find(BB);
+ assert(It->second.get() != nullptr);
+ return *It->second.get();
+ }
+
+ // Traverse the CFG using a stack. Find all the edges and assign the weight.
+ // Edges with large weight will be put into MST first so they are less likely
+ // to be instrumented.
+ void buildEdges() {
+ DEBUG(dbgs() << "Build Edge on " << F.getName() << "\n");
+
+ const BasicBlock *BB = &(F.getEntryBlock());
+ uint64_t EntryWeight = (BFI != nullptr ? BFI->getEntryFreq() : 2);
+ // Add a fake edge to the entry.
+ addEdge(nullptr, BB, EntryWeight);
+
+ // Special handling for single BB functions.
+ if (succ_empty(BB)) {
+ addEdge(BB, nullptr, EntryWeight);
+ return;
+ }
+
+ static const uint32_t CriticalEdgeMultiplier = 1000;
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ uint64_t BBWeight =
+ (BFI != nullptr ? BFI->getBlockFreq(&*BB).getFrequency() : 2);
+ uint64_t Weight = 2;
+ if (int successors = TI->getNumSuccessors()) {
+ for (int i = 0; i != successors; ++i) {
+ BasicBlock *TargetBB = TI->getSuccessor(i);
+ bool Critical = isCriticalEdge(TI, i);
+ uint64_t scaleFactor = BBWeight;
+ if (Critical) {
+ if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier)
+ scaleFactor *= CriticalEdgeMultiplier;
+ else
+ scaleFactor = UINT64_MAX;
+ }
+ if (BPI != nullptr)
+ Weight = BPI->getEdgeProbability(&*BB, TargetBB).scale(scaleFactor);
+ addEdge(&*BB, TargetBB, Weight).IsCritical = Critical;
+ DEBUG(dbgs() << " Edge: from " << BB->getName() << " to "
+ << TargetBB->getName() << " w=" << Weight << "\n");
+ }
+ } else {
+ addEdge(&*BB, nullptr, BBWeight);
+ DEBUG(dbgs() << " Edge: from " << BB->getName() << " to exit"
+ << " w = " << BBWeight << "\n");
+ }
+ }
+ }
+
+ // Sort CFG edges based on its weight.
+ void sortEdgesByWeight() {
+ std::stable_sort(AllEdges.begin(), AllEdges.end(),
+ [](const std::unique_ptr<Edge> &Edge1,
+ const std::unique_ptr<Edge> &Edge2) {
+ return Edge1->Weight > Edge2->Weight;
+ });
+ }
+
+ // Traverse all the edges and compute the Minimum Weight Spanning Tree
+ // using union-find algorithm.
+ void computeMinimumSpanningTree() {
+ // First, put all the critical edge with landing-pad as the Dest to MST.
+ // This works around the insufficient support of critical edges split
+ // when destination BB is a landing pad.
+ for (auto &Ei : AllEdges) {
+ if (Ei->Removed)
+ continue;
+ if (Ei->IsCritical) {
+ if (Ei->DestBB && Ei->DestBB->isLandingPad()) {
+ if (unionGroups(Ei->SrcBB, Ei->DestBB))
+ Ei->InMST = true;
+ }
+ }
+ }
+
+ for (auto &Ei : AllEdges) {
+ if (Ei->Removed)
+ continue;
+ if (unionGroups(Ei->SrcBB, Ei->DestBB))
+ Ei->InMST = true;
+ }
+ }
+
+ // Dump the Debug information about the instrumentation.
+ void dumpEdges(raw_ostream &OS, const Twine &Message) const {
+ if (!Message.str().empty())
+ OS << Message << "\n";
+ OS << " Number of Basic Blocks: " << BBInfos.size() << "\n";
+ for (auto &BI : BBInfos) {
+ const BasicBlock *BB = BI.first;
+ OS << " BB: " << (BB == nullptr ? "FakeNode" : BB->getName()) << " "
+ << BI.second->infoString() << "\n";
+ }
+
+ OS << " Number of Edges: " << AllEdges.size()
+ << " (*: Instrument, C: CriticalEdge, -: Removed)\n";
+ uint32_t Count = 0;
+ for (auto &EI : AllEdges)
+ OS << " Edge " << Count++ << ": " << getBBInfo(EI->SrcBB).Index << "-->"
+ << getBBInfo(EI->DestBB).Index << EI->infoString() << "\n";
+ }
+
+ // Add an edge to AllEdges with weight W.
+ Edge &addEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W) {
+ uint32_t Index = BBInfos.size();
+ auto Iter = BBInfos.end();
+ bool Inserted;
+ std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Src, nullptr));
+ if (Inserted) {
+ // Newly inserted, update the real info.
+ Iter->second = std::move(llvm::make_unique<BBInfo>(Index));
+ Index++;
+ }
+ std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Dest, nullptr));
+ if (Inserted)
+ // Newly inserted, update the real info.
+ Iter->second = std::move(llvm::make_unique<BBInfo>(Index));
+ AllEdges.emplace_back(new Edge(Src, Dest, W));
+ return *AllEdges.back();
+ }
+
+ BranchProbabilityInfo *BPI;
+ BlockFrequencyInfo *BFI;
+
+public:
+ CFGMST(Function &Func, BranchProbabilityInfo *BPI_ = nullptr,
+ BlockFrequencyInfo *BFI_ = nullptr)
+ : F(Func), BPI(BPI_), BFI(BFI_) {
+ buildEdges();
+ sortEdgesByWeight();
+ computeMinimumSpanningTree();
+ }
+};
+
+#undef DEBUG_TYPE // "cfgmst"
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 2de6e1afaba9..d459fc50d136 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -72,6 +72,11 @@
using namespace llvm;
+// External symbol to be used when generating the shadow address for
+// architectures with multiple VMAs. Instead of using a constant integer
+// the runtime will set the external mask based on the VMA range.
+static const char *const kDFSanExternShadowPtrMask = "__dfsan_shadow_ptr_mask";
+
// The -dfsan-preserve-alignment flag controls whether this pass assumes that
// alignment requirements provided by the input IR are correct. For example,
// if the input IR contains a load with alignment 8, this flag will cause
@@ -124,6 +129,7 @@ static cl::opt<bool> ClDebugNonzeroLabels(
"load or return with a nonzero label"),
cl::Hidden);
+
namespace {
StringRef GetGlobalTypeString(const GlobalValue &G) {
@@ -231,6 +237,7 @@ class DataFlowSanitizer : public ModulePass {
void *(*GetRetvalTLSPtr)();
Constant *GetArgTLS;
Constant *GetRetvalTLS;
+ Constant *ExternalShadowMask;
FunctionType *DFSanUnionFnTy;
FunctionType *DFSanUnionLoadFnTy;
FunctionType *DFSanUnimplementedFnTy;
@@ -248,7 +255,7 @@ class DataFlowSanitizer : public ModulePass {
DFSanABIList ABIList;
DenseMap<Value *, Function *> UnwrappedFnMap;
AttributeSet ReadOnlyNoneAttrs;
- DenseMap<const Function *, DISubprogram *> FunctionDIs;
+ bool DFSanRuntimeShadowMask;
Value *getShadowAddress(Value *Addr, Instruction *Pos);
bool isInstrumented(const Function *F);
@@ -362,7 +369,8 @@ llvm::createDataFlowSanitizerPass(const std::vector<std::string> &ABIListFiles,
DataFlowSanitizer::DataFlowSanitizer(
const std::vector<std::string> &ABIListFiles, void *(*getArgTLS)(),
void *(*getRetValTLS)())
- : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS) {
+ : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS),
+ DFSanRuntimeShadowMask(false) {
std::vector<std::string> AllABIListFiles(std::move(ABIListFiles));
AllABIListFiles.insert(AllABIListFiles.end(), ClABIListFiles.begin(),
ClABIListFiles.end());
@@ -420,6 +428,8 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
bool IsX86_64 = TargetTriple.getArch() == llvm::Triple::x86_64;
bool IsMIPS64 = TargetTriple.getArch() == llvm::Triple::mips64 ||
TargetTriple.getArch() == llvm::Triple::mips64el;
+ bool IsAArch64 = TargetTriple.getArch() == llvm::Triple::aarch64 ||
+ TargetTriple.getArch() == llvm::Triple::aarch64_be;
const DataLayout &DL = M.getDataLayout();
@@ -434,6 +444,9 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0x700000000000LL);
else if (IsMIPS64)
ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0xF000000000LL);
+ // AArch64 supports multiple VMAs and the shadow mask is set at runtime.
+ else if (IsAArch64)
+ DFSanRuntimeShadowMask = true;
else
report_fatal_error("unsupported triple");
@@ -578,7 +591,7 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true);
Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI; ++ValAI;
for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N)
- DFSF.ValShadowMap[ValAI] = ShadowAI;
+ DFSF.ValShadowMap[&*ValAI] = &*ShadowAI;
DFSanVisitor(DFSF).visitCallInst(*CI);
if (!FT->getReturnType()->isVoidTy())
new StoreInst(DFSF.getShadow(RI->getReturnValue()),
@@ -592,8 +605,6 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
if (ABIList.isIn(M, "skip"))
return false;
- FunctionDIs = makeSubprogramMap(M);
-
if (!GetArgTLSPtr) {
Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
ArgTLS = Mod->getOrInsertGlobal("__dfsan_arg_tls", ArgTLSTy);
@@ -606,6 +617,9 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
}
+ ExternalShadowMask =
+ Mod->getOrInsertGlobal(kDFSanExternShadowPtrMask, IntptrTy);
+
DFSanUnionFn = Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy);
if (Function *F = dyn_cast<Function>(DFSanUnionFn)) {
F->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
@@ -643,16 +657,16 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
std::vector<Function *> FnsToInstrument;
llvm::SmallPtrSet<Function *, 2> FnsWithNativeABI;
- for (Module::iterator i = M.begin(), e = M.end(); i != e; ++i) {
- if (!i->isIntrinsic() &&
- i != DFSanUnionFn &&
- i != DFSanCheckedUnionFn &&
- i != DFSanUnionLoadFn &&
- i != DFSanUnimplementedFn &&
- i != DFSanSetLabelFn &&
- i != DFSanNonzeroLabelFn &&
- i != DFSanVarargWrapperFn)
- FnsToInstrument.push_back(&*i);
+ for (Function &i : M) {
+ if (!i.isIntrinsic() &&
+ &i != DFSanUnionFn &&
+ &i != DFSanCheckedUnionFn &&
+ &i != DFSanUnionLoadFn &&
+ &i != DFSanUnimplementedFn &&
+ &i != DFSanSetLabelFn &&
+ &i != DFSanNonzeroLabelFn &&
+ &i != DFSanVarargWrapperFn)
+ FnsToInstrument.push_back(&i);
}
// Give function aliases prefixes when necessary, and build wrappers where the
@@ -710,7 +724,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
NewFArg = NewF->arg_begin(),
FArgEnd = F.arg_end();
FArg != FArgEnd; ++FArg, ++NewFArg) {
- FArg->replaceAllUsesWith(NewFArg);
+ FArg->replaceAllUsesWith(&*NewFArg);
}
NewF->getBasicBlockList().splice(NewF->begin(), F.getBasicBlockList());
@@ -750,11 +764,6 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
F.replaceAllUsesWith(WrappedFnCst);
- // Patch the pointer to LLVM function in debug info descriptor.
- auto DI = FunctionDIs.find(&F);
- if (DI != FunctionDIs.end())
- DI->second->replaceFunction(&F);
-
UnwrappedFnMap[WrappedFnCst] = &F;
*i = NewF;
@@ -842,7 +851,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
if (Instruction *I = dyn_cast<Instruction>(V))
Pos = I->getNextNode();
else
- Pos = DFSF.F->getEntryBlock().begin();
+ Pos = &DFSF.F->getEntryBlock().front();
while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos))
Pos = Pos->getNextNode();
IRBuilder<> IRB(Pos);
@@ -864,7 +873,7 @@ Value *DFSanFunction::getArgTLSPtr() {
if (DFS.ArgTLS)
return ArgTLSPtr = DFS.ArgTLS;
- IRBuilder<> IRB(F->getEntryBlock().begin());
+ IRBuilder<> IRB(&F->getEntryBlock().front());
return ArgTLSPtr = IRB.CreateCall(DFS.GetArgTLS, {});
}
@@ -874,7 +883,7 @@ Value *DFSanFunction::getRetvalTLS() {
if (DFS.RetvalTLS)
return RetvalTLSPtr = DFS.RetvalTLS;
- IRBuilder<> IRB(F->getEntryBlock().begin());
+ IRBuilder<> IRB(&F->getEntryBlock().front());
return RetvalTLSPtr = IRB.CreateCall(DFS.GetRetvalTLS, {});
}
@@ -906,7 +915,7 @@ Value *DFSanFunction::getShadow(Value *V) {
Function::arg_iterator i = F->arg_begin();
while (ArgIdx--)
++i;
- Shadow = i;
+ Shadow = &*i;
assert(Shadow->getType() == DFS.ShadowTy);
break;
}
@@ -928,9 +937,15 @@ void DFSanFunction::setShadow(Instruction *I, Value *Shadow) {
Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) {
assert(Addr != RetvalTLS && "Reinstrumenting?");
IRBuilder<> IRB(Pos);
+ Value *ShadowPtrMaskValue;
+ if (DFSanRuntimeShadowMask)
+ ShadowPtrMaskValue = IRB.CreateLoad(IntptrTy, ExternalShadowMask);
+ else
+ ShadowPtrMaskValue = ShadowPtrMask;
return IRB.CreateIntToPtr(
IRB.CreateMul(
- IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy), ShadowPtrMask),
+ IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy),
+ IRB.CreatePtrToInt(ShadowPtrMaskValue, IntptrTy)),
ShadowPtrMul),
ShadowPtrTy);
}
@@ -991,7 +1006,7 @@ Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) {
Call->addAttribute(2, Attribute::ZExt);
BasicBlock *Tail = BI->getSuccessor(0);
- PHINode *Phi = PHINode::Create(DFS.ShadowTy, 2, "", Tail->begin());
+ PHINode *Phi = PHINode::Create(DFS.ShadowTy, 2, "", &Tail->front());
Phi->addIncoming(Call, Call->getParent());
Phi->addIncoming(V1, Head);
@@ -1105,7 +1120,7 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
Value *ShadowsEq = IRB.CreateICmpEQ(WideShadow, RotShadow);
BasicBlock *Head = Pos->getParent();
- BasicBlock *Tail = Head->splitBasicBlock(Pos);
+ BasicBlock *Tail = Head->splitBasicBlock(Pos->getIterator());
if (DomTreeNode *OldNode = DT.getNode(Head)) {
std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end());
@@ -1475,8 +1490,8 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
if (FT->isVarArg()) {
auto *LabelVATy = ArrayType::get(DFSF.DFS.ShadowTy,
CS.arg_size() - FT->getNumParams());
- auto *LabelVAAlloca = new AllocaInst(LabelVATy, "labelva",
- DFSF.F->getEntryBlock().begin());
+ auto *LabelVAAlloca = new AllocaInst(
+ LabelVATy, "labelva", &DFSF.F->getEntryBlock().front());
for (unsigned n = 0; i != CS.arg_end(); ++i, ++n) {
auto LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, n);
@@ -1490,7 +1505,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
if (!DFSF.LabelReturnAlloca) {
DFSF.LabelReturnAlloca =
new AllocaInst(DFSF.DFS.ShadowTy, "labelreturn",
- DFSF.F->getEntryBlock().begin());
+ &DFSF.F->getEntryBlock().front());
}
Args.push_back(DFSF.LabelReturnAlloca);
}
@@ -1529,13 +1544,14 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
if (!CS.getType()->isVoidTy()) {
if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
if (II->getNormalDest()->getSinglePredecessor()) {
- Next = II->getNormalDest()->begin();
+ Next = &II->getNormalDest()->front();
} else {
BasicBlock *NewBB =
SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DT);
- Next = NewBB->begin();
+ Next = &NewBB->front();
}
} else {
+ assert(CS->getIterator() != CS->getParent()->end());
Next = CS->getNextNode();
}
@@ -1568,7 +1584,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
unsigned VarArgSize = CS.arg_size() - FT->getNumParams();
ArrayType *VarArgArrayTy = ArrayType::get(DFSF.DFS.ShadowTy, VarArgSize);
AllocaInst *VarArgShadow =
- new AllocaInst(VarArgArrayTy, "", DFSF.F->getEntryBlock().begin());
+ new AllocaInst(VarArgArrayTy, "", &DFSF.F->getEntryBlock().front());
Args.push_back(IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, 0));
for (unsigned n = 0; i != e; ++i, ++n) {
IRB.CreateStore(
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 9a3ed5c04efc..fa939aee252a 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -138,6 +138,7 @@ namespace {
Module *M;
LLVMContext *Ctx;
SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs;
+ DenseMap<DISubprogram *, Function *> FnMap;
};
}
@@ -309,13 +310,12 @@ namespace {
// object users can construct, the blocks and lines will be rooted here.
class GCOVFunction : public GCOVRecord {
public:
- GCOVFunction(const DISubprogram *SP, raw_ostream *os, uint32_t Ident,
- bool UseCfgChecksum, bool ExitBlockBeforeBody)
+ GCOVFunction(const DISubprogram *SP, Function *F, raw_ostream *os,
+ uint32_t Ident, bool UseCfgChecksum, bool ExitBlockBeforeBody)
: SP(SP), Ident(Ident), UseCfgChecksum(UseCfgChecksum), CfgChecksum(0),
ReturnBlock(1, os) {
this->os = os;
- Function *F = SP->getFunction();
DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n");
uint32_t i = 0;
@@ -347,8 +347,8 @@ namespace {
std::string EdgeDestinations;
raw_string_ostream EDOS(EdgeDestinations);
Function *F = Blocks.begin()->first->getParent();
- for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
- GCOVBlock &Block = getBlock(I);
+ for (BasicBlock &I : *F) {
+ GCOVBlock &Block = getBlock(&I);
for (int i = 0, e = Block.OutEdges.size(); i != e; ++i)
EDOS << Block.OutEdges[i]->Number;
}
@@ -389,8 +389,8 @@ namespace {
// Emit edges between blocks.
if (Blocks.empty()) return;
Function *F = Blocks.begin()->first->getParent();
- for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
- GCOVBlock &Block = getBlock(I);
+ for (BasicBlock &I : *F) {
+ GCOVBlock &Block = getBlock(&I);
if (Block.OutEdges.empty()) continue;
writeBytes(EdgeTag, 4);
@@ -405,9 +405,8 @@ namespace {
}
// Emit lines for each block.
- for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
- getBlock(I).writeOut();
- }
+ for (BasicBlock &I : *F)
+ getBlock(&I).writeOut();
}
private:
@@ -451,6 +450,12 @@ bool GCOVProfiler::runOnModule(Module &M) {
this->M = &M;
Ctx = &M.getContext();
+ FnMap.clear();
+ for (Function &F : M) {
+ if (DISubprogram *SP = F.getSubprogram())
+ FnMap[SP] = &F;
+ }
+
if (Options.EmitNotes) emitProfileNotes();
if (Options.EmitData) return emitProfileArcs();
return false;
@@ -495,7 +500,7 @@ void GCOVProfiler::emitProfileNotes() {
unsigned FunctionIdent = 0;
for (auto *SP : CU->getSubprograms()) {
- Function *F = SP->getFunction();
+ Function *F = FnMap[SP];
if (!F) continue;
if (!functionHasLines(F)) continue;
@@ -507,13 +512,13 @@ void GCOVProfiler::emitProfileNotes() {
++It;
EntryBlock.splitBasicBlock(It);
- Funcs.push_back(make_unique<GCOVFunction>(SP, &out, FunctionIdent++,
+ Funcs.push_back(make_unique<GCOVFunction>(SP, F, &out, FunctionIdent++,
Options.UseCfgChecksum,
Options.ExitBlockBeforeBody));
GCOVFunction &Func = *Funcs.back();
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- GCOVBlock &Block = Func.getBlock(BB);
+ GCOVBlock &Block = Func.getBlock(&*BB);
TerminatorInst *TI = BB->getTerminator();
if (int successors = TI->getNumSuccessors()) {
for (int i = 0; i != successors; ++i) {
@@ -574,7 +579,7 @@ bool GCOVProfiler::emitProfileArcs() {
auto *CU = cast<DICompileUnit>(CU_Nodes->getOperand(i));
SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
for (auto *SP : CU->getSubprograms()) {
- Function *F = SP->getFunction();
+ Function *F = FnMap[SP];
if (!F) continue;
if (!functionHasLines(F)) continue;
if (!Result) Result = true;
@@ -605,7 +610,7 @@ bool GCOVProfiler::emitProfileArcs() {
int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
if (Successors) {
if (Successors == 1) {
- IRBuilder<> Builder(BB->getFirstInsertionPt());
+ IRBuilder<> Builder(&*BB->getFirstInsertionPt());
Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
Edge);
Value *Count = Builder.CreateLoad(Counter);
@@ -625,7 +630,7 @@ bool GCOVProfiler::emitProfileArcs() {
Count = Builder.CreateAdd(Count, Builder.getInt64(1));
Builder.CreateStore(Count, Counter);
} else {
- ComplexEdgePreds.insert(BB);
+ ComplexEdgePreds.insert(&*BB);
for (int i = 0; i != Successors; ++i)
ComplexEdgeSuccs.insert(TI->getSuccessor(i));
}
@@ -641,13 +646,13 @@ bool GCOVProfiler::emitProfileArcs() {
GlobalVariable *EdgeState = getEdgeStateValue();
for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) {
- IRBuilder<> Builder(ComplexEdgePreds[i + 1]->getFirstInsertionPt());
+ IRBuilder<> Builder(&*ComplexEdgePreds[i + 1]->getFirstInsertionPt());
Builder.CreateStore(Builder.getInt32(i), EdgeState);
}
for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) {
// Call runtime to perform increment.
- IRBuilder<> Builder(ComplexEdgeSuccs[i+1]->getFirstInsertionPt());
+ IRBuilder<> Builder(&*ComplexEdgeSuccs[i + 1]->getFirstInsertionPt());
Value *CounterPtrArray =
Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0,
i * ComplexEdgePreds.size());
@@ -731,8 +736,8 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
IRBuilder<> Builder(Succ);
Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
Edge + i);
- EdgeTable[((Succs.idFor(Succ)-1) * Preds.size()) +
- (Preds.idFor(BB)-1)] = cast<Constant>(Counter);
+ EdgeTable[((Succs.idFor(Succ) - 1) * Preds.size()) +
+ (Preds.idFor(&*BB) - 1)] = cast<Constant>(Counter);
}
}
Edge += Successors;
@@ -901,7 +906,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
// uint32_t pred = *predecessor;
// if (pred == 0xffffffff) return;
- Argument *Arg = Fn->arg_begin();
+ Argument *Arg = &*Fn->arg_begin();
Arg->setName("predecessor");
Value *Pred = Builder.CreateLoad(Arg, "pred");
Value *Cond = Builder.CreateICmpEQ(Pred, Builder.getInt32(0xffffffff));
@@ -912,7 +917,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
// uint64_t *counter = counters[pred];
// if (!counter) return;
Value *ZExtPred = Builder.CreateZExt(Pred, Builder.getInt64Ty());
- Arg = std::next(Fn->arg_begin());
+ Arg = &*std::next(Fn->arg_begin());
Arg->setName("counters");
Value *GEP = Builder.CreateGEP(Type::getInt64PtrTy(*Ctx), Arg, ZExtPred);
Value *Counter = Builder.CreateLoad(GEP, "counter");
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 712bf8edc7ea..92e41ee27c09 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -7,18 +7,18 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass lowers instrprof_increment intrinsics emitted by a frontend for
-// profiling. It also builds the data structures and initialization code needed
-// for updating execution counts and emitting the profile at runtime.
+// This pass lowers instrprof_* intrinsics emitted by a frontend for profiling.
+// It also builds the data structures and initialization code needed for
+// updating execution counts and emitting the profile at runtime.
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Instrumentation.h"
-
#include "llvm/ADT/Triple.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
@@ -49,7 +49,15 @@ public:
private:
InstrProfOptions Options;
Module *M;
- DenseMap<GlobalVariable *, GlobalVariable *> RegionCounters;
+ typedef struct PerFunctionProfileData {
+ uint32_t NumValueSites[IPVK_Last+1];
+ GlobalVariable* RegionCounters;
+ GlobalVariable* DataVar;
+ PerFunctionProfileData() : RegionCounters(nullptr), DataVar(nullptr) {
+ memset(NumValueSites, 0, sizeof(uint32_t) * (IPVK_Last+1));
+ }
+ } PerFunctionProfileData;
+ DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;
std::vector<Value *> UsedVars;
bool isMachO() const {
@@ -58,24 +66,30 @@ private:
/// Get the section name for the counter variables.
StringRef getCountersSection() const {
- return isMachO() ? "__DATA,__llvm_prf_cnts" : "__llvm_prf_cnts";
+ return getInstrProfCountersSectionName(isMachO());
}
/// Get the section name for the name variables.
StringRef getNameSection() const {
- return isMachO() ? "__DATA,__llvm_prf_names" : "__llvm_prf_names";
+ return getInstrProfNameSectionName(isMachO());
}
/// Get the section name for the profile data variables.
StringRef getDataSection() const {
- return isMachO() ? "__DATA,__llvm_prf_data" : "__llvm_prf_data";
+ return getInstrProfDataSectionName(isMachO());
}
/// Get the section name for the coverage mapping data.
StringRef getCoverageSection() const {
- return isMachO() ? "__DATA,__llvm_covmap" : "__llvm_covmap";
+ return getInstrProfCoverageSectionName(isMachO());
}
+ /// Count the number of instrumented value sites for the function.
+ void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins);
+
+ /// Replace instrprof_value_profile with a call to runtime library.
+ void lowerValueProfileInst(InstrProfValueProfileInst *Ins);
+
/// Replace instrprof_increment with an increment of the appropriate value.
void lowerIncrement(InstrProfIncrementInst *Inc);
@@ -117,20 +131,37 @@ bool InstrProfiling::runOnModule(Module &M) {
bool MadeChange = false;
this->M = &M;
- RegionCounters.clear();
+ ProfileDataMap.clear();
UsedVars.clear();
+ // We did not know how many value sites there would be inside
+ // the instrumented function. This is counting the number of instrumented
+ // target value sites to enter it as field in the profile data variable.
for (Function &F : M)
for (BasicBlock &BB : F)
for (auto I = BB.begin(), E = BB.end(); I != E;)
- if (auto *Inc = dyn_cast<InstrProfIncrementInst>(I++)) {
+ if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I++))
+ computeNumValueSiteCounts(Ind);
+
+ for (Function &F : M)
+ for (BasicBlock &BB : F)
+ for (auto I = BB.begin(), E = BB.end(); I != E;) {
+ auto Instr = I++;
+ if (auto *Inc = dyn_cast<InstrProfIncrementInst>(Instr)) {
lowerIncrement(Inc);
MadeChange = true;
+ } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) {
+ lowerValueProfileInst(Ind);
+ MadeChange = true;
}
- if (GlobalVariable *Coverage = M.getNamedGlobal("__llvm_coverage_mapping")) {
+ }
+
+ if (GlobalVariable *Coverage =
+ M.getNamedGlobal(getCoverageMappingVarName())) {
lowerCoverageData(Coverage);
MadeChange = true;
}
+
if (!MadeChange)
return false;
@@ -141,10 +172,59 @@ bool InstrProfiling::runOnModule(Module &M) {
return true;
}
+static Constant *getOrInsertValueProfilingCall(Module &M) {
+ LLVMContext &Ctx = M.getContext();
+ auto *ReturnTy = Type::getVoidTy(M.getContext());
+ Type *ParamTypes[] = {
+#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
+#include "llvm/ProfileData/InstrProfData.inc"
+ };
+ auto *ValueProfilingCallTy =
+ FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false);
+ return M.getOrInsertFunction(getInstrProfValueProfFuncName(),
+ ValueProfilingCallTy);
+}
+
+void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
+
+ GlobalVariable *Name = Ind->getName();
+ uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
+ uint64_t Index = Ind->getIndex()->getZExtValue();
+ auto It = ProfileDataMap.find(Name);
+ if (It == ProfileDataMap.end()) {
+ PerFunctionProfileData PD;
+ PD.NumValueSites[ValueKind] = Index + 1;
+ ProfileDataMap[Name] = PD;
+ } else if (It->second.NumValueSites[ValueKind] <= Index)
+ It->second.NumValueSites[ValueKind] = Index + 1;
+}
+
+void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
+
+ GlobalVariable *Name = Ind->getName();
+ auto It = ProfileDataMap.find(Name);
+ assert(It != ProfileDataMap.end() && It->second.DataVar &&
+ "value profiling detected in function with no counter incerement");
+
+ GlobalVariable *DataVar = It->second.DataVar;
+ uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
+ uint64_t Index = Ind->getIndex()->getZExtValue();
+ for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)
+ Index += It->second.NumValueSites[Kind];
+
+ IRBuilder<> Builder(Ind);
+ Value* Args[3] = {Ind->getTargetValue(),
+ Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
+ Builder.getInt32(Index)};
+ Ind->replaceAllUsesWith(
+ Builder.CreateCall(getOrInsertValueProfilingCall(*M), Args));
+ Ind->eraseFromParent();
+}
+
void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
GlobalVariable *Counters = getOrCreateRegionCounters(Inc);
- IRBuilder<> Builder(Inc->getParent(), *Inc);
+ IRBuilder<> Builder(Inc);
uint64_t Index = Inc->getIndex()->getZExtValue();
Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index);
Value *Count = Builder.CreateLoad(Addr, "pgocount");
@@ -172,9 +252,10 @@ void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageData) {
GlobalVariable *Name = cast<GlobalVariable>(V);
// If we have region counters for this name, we've already handled it.
- auto It = RegionCounters.find(Name);
- if (It != RegionCounters.end())
- continue;
+ auto It = ProfileDataMap.find(Name);
+ if (It != ProfileDataMap.end())
+ if (It->second.RegionCounters)
+ continue;
// Move the name variable to the right section.
Name->setSection(getNameSection());
@@ -183,69 +264,108 @@ void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageData) {
}
/// Get the name of a profiling variable for a particular function.
-static std::string getVarName(InstrProfIncrementInst *Inc, StringRef VarName) {
- auto *Arr = cast<ConstantDataArray>(Inc->getName()->getInitializer());
- StringRef Name = Arr->isCString() ? Arr->getAsCString() : Arr->getAsString();
- return ("__llvm_profile_" + VarName + "_" + Name).str();
+static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix) {
+ StringRef NamePrefix = getInstrProfNameVarPrefix();
+ StringRef Name = Inc->getName()->getName().substr(NamePrefix.size());
+ return (Prefix + Name).str();
+}
+
+static inline bool shouldRecordFunctionAddr(Function *F) {
+ // Check the linkage
+ if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
+ !F->hasAvailableExternallyLinkage())
+ return true;
+ // Check uses of this function for other than direct calls or invokes to it.
+ return F->hasAddressTaken();
+}
+
+static inline Comdat *getOrCreateProfileComdat(Module &M,
+ InstrProfIncrementInst *Inc) {
+ // COFF format requires a COMDAT section to have a key symbol with the same
+ // name. The linker targeting COFF also requires that the COMDAT section
+ // a section is associated to must precede the associating section. For this
+ // reason, we must choose the name var's name as the name of the comdat.
+ StringRef ComdatPrefix = (Triple(M.getTargetTriple()).isOSBinFormatCOFF()
+ ? getInstrProfNameVarPrefix()
+ : getInstrProfComdatPrefix());
+ return M.getOrInsertComdat(StringRef(getVarName(Inc, ComdatPrefix)));
}
GlobalVariable *
InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
- GlobalVariable *Name = Inc->getName();
- auto It = RegionCounters.find(Name);
- if (It != RegionCounters.end())
- return It->second;
-
- // Move the name variable to the right section. Make sure it is placed in the
- // same comdat as its associated function. Otherwise, we may get multiple
- // counters for the same function in certain cases.
+ GlobalVariable *NamePtr = Inc->getName();
+ auto It = ProfileDataMap.find(NamePtr);
+ PerFunctionProfileData PD;
+ if (It != ProfileDataMap.end()) {
+ if (It->second.RegionCounters)
+ return It->second.RegionCounters;
+ PD = It->second;
+ }
+
+ // Move the name variable to the right section. Place them in a COMDAT group
+ // if the associated function is a COMDAT. This will make sure that
+ // only one copy of counters of the COMDAT function will be emitted after
+ // linking.
Function *Fn = Inc->getParent()->getParent();
- Name->setSection(getNameSection());
- Name->setAlignment(1);
- Name->setComdat(Fn->getComdat());
+ Comdat *ProfileVarsComdat = nullptr;
+ if (Fn->hasComdat())
+ ProfileVarsComdat = getOrCreateProfileComdat(*M, Inc);
+ NamePtr->setSection(getNameSection());
+ NamePtr->setAlignment(1);
+ NamePtr->setComdat(ProfileVarsComdat);
uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
LLVMContext &Ctx = M->getContext();
ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
// Create the counters variable.
- auto *Counters = new GlobalVariable(*M, CounterTy, false, Name->getLinkage(),
- Constant::getNullValue(CounterTy),
- getVarName(Inc, "counters"));
- Counters->setVisibility(Name->getVisibility());
- Counters->setSection(getCountersSection());
- Counters->setAlignment(8);
- Counters->setComdat(Fn->getComdat());
-
- RegionCounters[Inc->getName()] = Counters;
+ auto *CounterPtr =
+ new GlobalVariable(*M, CounterTy, false, NamePtr->getLinkage(),
+ Constant::getNullValue(CounterTy),
+ getVarName(Inc, getInstrProfCountersVarPrefix()));
+ CounterPtr->setVisibility(NamePtr->getVisibility());
+ CounterPtr->setSection(getCountersSection());
+ CounterPtr->setAlignment(8);
+ CounterPtr->setComdat(ProfileVarsComdat);
// Create data variable.
- auto *NameArrayTy = Name->getType()->getPointerElementType();
- auto *Int32Ty = Type::getInt32Ty(Ctx);
- auto *Int64Ty = Type::getInt64Ty(Ctx);
auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
- auto *Int64PtrTy = Type::getInt64PtrTy(Ctx);
-
- Type *DataTypes[] = {Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int64PtrTy};
+ auto *Int16Ty = Type::getInt16Ty(Ctx);
+ auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last+1);
+ Type *DataTypes[] = {
+ #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
+ #include "llvm/ProfileData/InstrProfData.inc"
+ };
auto *DataTy = StructType::get(Ctx, makeArrayRef(DataTypes));
+
+ Constant *FunctionAddr = shouldRecordFunctionAddr(Fn) ?
+ ConstantExpr::getBitCast(Fn, Int8PtrTy) :
+ ConstantPointerNull::get(Int8PtrTy);
+
+ Constant *Int16ArrayVals[IPVK_Last+1];
+ for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
+ Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
+
Constant *DataVals[] = {
- ConstantInt::get(Int32Ty, NameArrayTy->getArrayNumElements()),
- ConstantInt::get(Int32Ty, NumCounters),
- ConstantInt::get(Int64Ty, Inc->getHash()->getZExtValue()),
- ConstantExpr::getBitCast(Name, Int8PtrTy),
- ConstantExpr::getBitCast(Counters, Int64PtrTy)};
- auto *Data = new GlobalVariable(*M, DataTy, true, Name->getLinkage(),
+ #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
+ #include "llvm/ProfileData/InstrProfData.inc"
+ };
+ auto *Data = new GlobalVariable(*M, DataTy, false, NamePtr->getLinkage(),
ConstantStruct::get(DataTy, DataVals),
- getVarName(Inc, "data"));
- Data->setVisibility(Name->getVisibility());
+ getVarName(Inc, getInstrProfDataVarPrefix()));
+ Data->setVisibility(NamePtr->getVisibility());
Data->setSection(getDataSection());
- Data->setAlignment(8);
- Data->setComdat(Fn->getComdat());
+ Data->setAlignment(INSTR_PROF_DATA_ALIGNMENT);
+ Data->setComdat(ProfileVarsComdat);
+
+ PD.RegionCounters = CounterPtr;
+ PD.DataVar = Data;
+ ProfileDataMap[NamePtr] = PD;
// Mark the data variable as used so that it isn't stripped out.
UsedVars.push_back(Data);
- return Counters;
+ return CounterPtr;
}
void InstrProfiling::emitRegistration() {
@@ -253,20 +373,24 @@ void InstrProfiling::emitRegistration() {
if (Triple(M->getTargetTriple()).isOSDarwin())
return;
+ // Use linker script magic to get data/cnts/name start/end.
+ if (Triple(M->getTargetTriple()).isOSLinux() ||
+ Triple(M->getTargetTriple()).isOSFreeBSD())
+ return;
+
// Construct the function.
auto *VoidTy = Type::getVoidTy(M->getContext());
auto *VoidPtrTy = Type::getInt8PtrTy(M->getContext());
auto *RegisterFTy = FunctionType::get(VoidTy, false);
auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage,
- "__llvm_profile_register_functions", M);
+ getInstrProfRegFuncsName(), M);
RegisterF->setUnnamedAddr(true);
- if (Options.NoRedZone)
- RegisterF->addFnAttr(Attribute::NoRedZone);
+ if (Options.NoRedZone) RegisterF->addFnAttr(Attribute::NoRedZone);
auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false);
auto *RuntimeRegisterF =
Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage,
- "__llvm_profile_register_function", M);
+ getInstrProfRegFuncName(), M);
IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF));
for (Value *Data : UsedVars)
@@ -275,26 +399,27 @@ void InstrProfiling::emitRegistration() {
}
void InstrProfiling::emitRuntimeHook() {
- const char *const RuntimeVarName = "__llvm_profile_runtime";
- const char *const RuntimeUserName = "__llvm_profile_runtime_user";
- // If the module's provided its own runtime, we don't need to do anything.
- if (M->getGlobalVariable(RuntimeVarName))
+ // We expect the linker to be invoked with -u<hook_var> flag for linux,
+ // for which case there is no need to emit the user function.
+ if (Triple(M->getTargetTriple()).isOSLinux())
return;
+ // If the module's provided its own runtime, we don't need to do anything.
+ if (M->getGlobalVariable(getInstrProfRuntimeHookVarName())) return;
+
// Declare an external variable that will pull in the runtime initialization.
auto *Int32Ty = Type::getInt32Ty(M->getContext());
auto *Var =
new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage,
- nullptr, RuntimeVarName);
+ nullptr, getInstrProfRuntimeHookVarName());
// Make a function that uses it.
- auto *User =
- Function::Create(FunctionType::get(Int32Ty, false),
- GlobalValue::LinkOnceODRLinkage, RuntimeUserName, M);
+ auto *User = Function::Create(FunctionType::get(Int32Ty, false),
+ GlobalValue::LinkOnceODRLinkage,
+ getInstrProfRuntimeHookVarUseFuncName(), M);
User->addFnAttr(Attribute::NoInline);
- if (Options.NoRedZone)
- User->addFnAttr(Attribute::NoRedZone);
+ if (Options.NoRedZone) User->addFnAttr(Attribute::NoRedZone);
User->setVisibility(GlobalValue::HiddenVisibility);
IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User));
@@ -330,26 +455,23 @@ void InstrProfiling::emitUses() {
LLVMUsed =
new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage,
ConstantArray::get(ATy, MergedVars), "llvm.used");
-
LLVMUsed->setSection("llvm.metadata");
}
void InstrProfiling::emitInitialization() {
std::string InstrProfileOutput = Options.InstrProfileOutput;
- Constant *RegisterF = M->getFunction("__llvm_profile_register_functions");
- if (!RegisterF && InstrProfileOutput.empty())
- return;
+ Constant *RegisterF = M->getFunction(getInstrProfRegFuncsName());
+ if (!RegisterF && InstrProfileOutput.empty()) return;
// Create the initialization function.
auto *VoidTy = Type::getVoidTy(M->getContext());
- auto *F =
- Function::Create(FunctionType::get(VoidTy, false),
- GlobalValue::InternalLinkage, "__llvm_profile_init", M);
+ auto *F = Function::Create(FunctionType::get(VoidTy, false),
+ GlobalValue::InternalLinkage,
+ getInstrProfInitFuncName(), M);
F->setUnnamedAddr(true);
F->addFnAttr(Attribute::NoInline);
- if (Options.NoRedZone)
- F->addFnAttr(Attribute::NoRedZone);
+ if (Options.NoRedZone) F->addFnAttr(Attribute::NoRedZone);
// Add the basic block and the necessary calls.
IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", F));
@@ -358,9 +480,8 @@ void InstrProfiling::emitInitialization() {
if (!InstrProfileOutput.empty()) {
auto *Int8PtrTy = Type::getInt8PtrTy(M->getContext());
auto *SetNameTy = FunctionType::get(VoidTy, Int8PtrTy, false);
- auto *SetNameF =
- Function::Create(SetNameTy, GlobalValue::ExternalLinkage,
- "__llvm_profile_override_default_filename", M);
+ auto *SetNameF = Function::Create(SetNameTy, GlobalValue::ExternalLinkage,
+ getInstrProfFileOverriderFuncName(), M);
// Create variable for profile name.
Constant *ProfileNameConst =
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
index 27505859100b..a05a5fa09f9a 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -12,12 +12,47 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Instrumentation.h"
#include "llvm-c/Initialization.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/InitializePasses.h"
#include "llvm/PassRegistry.h"
using namespace llvm;
+/// Moves I before IP. Returns new insert point.
+static BasicBlock::iterator moveBeforeInsertPoint(BasicBlock::iterator I, BasicBlock::iterator IP) {
+ // If I is IP, move the insert point down.
+ if (I == IP)
+ return ++IP;
+ // Otherwise, move I before IP and return IP.
+ I->moveBefore(&*IP);
+ return IP;
+}
+
+/// Instrumentation passes often insert conditional checks into entry blocks.
+/// Call this function before splitting the entry block to move instructions
+/// that must remain in the entry block up before the split point. Static
+/// allocas and llvm.localescape calls, for example, must remain in the entry
+/// block.
+BasicBlock::iterator llvm::PrepareToSplitEntryBlock(BasicBlock &BB,
+ BasicBlock::iterator IP) {
+ assert(&BB.getParent()->getEntryBlock() == &BB);
+ for (auto I = IP, E = BB.end(); I != E; ++I) {
+ bool KeepInEntry = false;
+ if (auto *AI = dyn_cast<AllocaInst>(I)) {
+ if (AI->isStaticAlloca())
+ KeepInEntry = true;
+ } else if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == llvm::Intrinsic::localescape)
+ KeepInEntry = true;
+ }
+ if (KeepInEntry)
+ IP = moveBeforeInsertPoint(I, IP);
+ }
+ return IP;
+}
+
/// initializeInstrumentation - Initialize all passes in the TransformUtils
/// library.
void llvm::initializeInstrumentation(PassRegistry &Registry) {
@@ -25,6 +60,8 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
initializeAddressSanitizerModulePass(Registry);
initializeBoundsCheckingPass(Registry);
initializeGCOVProfilerPass(Registry);
+ initializePGOInstrumentationGenPass(Registry);
+ initializePGOInstrumentationUsePass(Registry);
initializeInstrProfilingPass(Registry);
initializeMemorySanitizerPass(Registry);
initializeThreadSanitizerPass(Registry);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 286a56330248..5a7bce5a5413 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -148,7 +148,7 @@ static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call",
cl::desc("poison uninitialized stack variables with a call"),
cl::Hidden, cl::init(false));
static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern",
- cl::desc("poison uninitialized stack variables with the given patter"),
+ cl::desc("poison uninitialized stack variables with the given pattern"),
cl::Hidden, cl::init(0xff));
static cl::opt<bool> ClPoisonUndef("msan-poison-undef",
cl::desc("poison undef temps"),
@@ -222,10 +222,17 @@ static const MemoryMapParams Linux_I386_MemoryMapParams = {
// x86_64 Linux
static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
+#ifdef MSAN_LINUX_X86_64_OLD_MAPPING
0x400000000000, // AndMask
0, // XorMask (not used)
0, // ShadowBase (not used)
0x200000000000, // OriginBase
+#else
+ 0, // AndMask (not used)
+ 0x500000000000, // XorMask
+ 0, // ShadowBase (not used)
+ 0x100000000000, // OriginBase
+#endif
};
// mips64 Linux
@@ -244,6 +251,14 @@ static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = {
0x1C0000000000, // OriginBase
};
+// aarch64 Linux
+static const MemoryMapParams Linux_AArch64_MemoryMapParams = {
+ 0, // AndMask (not used)
+ 0x06000000000, // XorMask
+ 0, // ShadowBase (not used)
+ 0x01000000000, // OriginBase
+};
+
// i386 FreeBSD
static const MemoryMapParams FreeBSD_I386_MemoryMapParams = {
0x000180000000, // AndMask
@@ -266,15 +281,20 @@ static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = {
};
static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = {
- NULL,
+ nullptr,
&Linux_MIPS64_MemoryMapParams,
};
static const PlatformMemoryMapParams Linux_PowerPC_MemoryMapParams = {
- NULL,
+ nullptr,
&Linux_PowerPC64_MemoryMapParams,
};
+static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = {
+ nullptr,
+ &Linux_AArch64_MemoryMapParams,
+};
+
static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {
&FreeBSD_I386_MemoryMapParams,
&FreeBSD_X86_64_MemoryMapParams,
@@ -353,8 +373,9 @@ class MemorySanitizer : public FunctionPass {
friend struct MemorySanitizerVisitor;
friend struct VarArgAMD64Helper;
friend struct VarArgMIPS64Helper;
+ friend struct VarArgAArch64Helper;
};
-} // namespace
+} // anonymous namespace
char MemorySanitizer::ID = 0;
INITIALIZE_PASS(MemorySanitizer, "msan",
@@ -377,7 +398,6 @@ static GlobalVariable *createPrivateNonConstGlobalForString(Module &M,
GlobalValue::PrivateLinkage, StrConst, "");
}
-
/// \brief Insert extern declaration of runtime-provided functions and globals.
void MemorySanitizer::initializeCallbacks(Module &M) {
// Only do this once.
@@ -496,6 +516,10 @@ bool MemorySanitizer::doInitialization(Module &M) {
case Triple::ppc64le:
MapParams = Linux_PowerPC_MemoryMapParams.bits64;
break;
+ case Triple::aarch64:
+ case Triple::aarch64_be:
+ MapParams = Linux_ARM_MemoryMapParams.bits64;
+ break;
default:
report_fatal_error("unsupported architecture");
}
@@ -697,7 +721,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *Cmp = IRB.CreateICmpNE(
ConvertedShadow, getCleanShadow(ConvertedShadow), "_mscmp");
Instruction *CheckTerm = SplitBlockAndInsertIfThen(
- Cmp, IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
+ Cmp, &*IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
IRBuilder<> IRBNew(CheckTerm);
paintOrigin(IRBNew, updateOrigin(Origin, IRBNew),
getOriginPtr(Addr, IRBNew, Alignment), StoreSize,
@@ -893,16 +917,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
///
/// Offset = (Addr & ~AndMask) ^ XorMask
Value *getShadowPtrOffset(Value *Addr, IRBuilder<> &IRB) {
+ Value *OffsetLong = IRB.CreatePointerCast(Addr, MS.IntptrTy);
+
uint64_t AndMask = MS.MapParams->AndMask;
- assert(AndMask != 0 && "AndMask shall be specified");
- Value *OffsetLong =
- IRB.CreateAnd(IRB.CreatePointerCast(Addr, MS.IntptrTy),
- ConstantInt::get(MS.IntptrTy, ~AndMask));
+ if (AndMask)
+ OffsetLong =
+ IRB.CreateAnd(OffsetLong, ConstantInt::get(MS.IntptrTy, ~AndMask));
uint64_t XorMask = MS.MapParams->XorMask;
- if (XorMask != 0)
- OffsetLong = IRB.CreateXor(OffsetLong,
- ConstantInt::get(MS.IntptrTy, XorMask));
+ if (XorMask)
+ OffsetLong =
+ IRB.CreateXor(OffsetLong, ConstantInt::get(MS.IntptrTy, XorMask));
return OffsetLong;
}
@@ -1339,6 +1364,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
void visitBitCastInst(BitCastInst &I) {
+ // Special case: if this is the bitcast (there is exactly 1 allowed) between
+ // a musttail call and a ret, don't instrument. New instructions are not
+ // allowed after a musttail call.
+ if (auto *CI = dyn_cast<CallInst>(I.getOperand(0)))
+ if (CI->isMustTailCall())
+ return;
IRBuilder<> IRB(&I);
setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I)));
setOrigin(&I, getOrigin(&I, 0));
@@ -1570,18 +1601,24 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Type *EltTy = Ty->getSequentialElementType();
SmallVector<Constant *, 16> Elements;
for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
- ConstantInt *Elt =
- dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx));
- APInt V = Elt->getValue();
- APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
- Elements.push_back(ConstantInt::get(EltTy, V2));
+ if (ConstantInt *Elt =
+ dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx))) {
+ APInt V = Elt->getValue();
+ APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
+ Elements.push_back(ConstantInt::get(EltTy, V2));
+ } else {
+ Elements.push_back(ConstantInt::get(EltTy, 1));
+ }
}
ShadowMul = ConstantVector::get(Elements);
} else {
- ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg);
- APInt V = Elt->getValue();
- APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
- ShadowMul = ConstantInt::get(Elt->getType(), V2);
+ if (ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg)) {
+ APInt V = Elt->getValue();
+ APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
+ ShadowMul = ConstantInt::get(Ty, V2);
+ } else {
+ ShadowMul = ConstantInt::get(Ty, 1);
+ }
}
IRBuilder<> IRB(&I);
@@ -1730,25 +1767,30 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// \brief Instrument signed relational comparisons.
///
- /// Handle (x<0) and (x>=0) comparisons (essentially, sign bit tests) by
- /// propagating the highest bit of the shadow. Everything else is delegated
- /// to handleShadowOr().
+ /// Handle sign bit tests: x<0, x>=0, x<=-1, x>-1 by propagating the highest
+ /// bit of the shadow. Everything else is delegated to handleShadowOr().
void handleSignedRelationalComparison(ICmpInst &I) {
- Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
- Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
- Value* op = nullptr;
- CmpInst::Predicate pre = I.getPredicate();
- if (constOp0 && constOp0->isNullValue() &&
- (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE)) {
- op = I.getOperand(1);
- } else if (constOp1 && constOp1->isNullValue() &&
- (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) {
+ Constant *constOp;
+ Value *op = nullptr;
+ CmpInst::Predicate pre;
+ if ((constOp = dyn_cast<Constant>(I.getOperand(1)))) {
op = I.getOperand(0);
+ pre = I.getPredicate();
+ } else if ((constOp = dyn_cast<Constant>(I.getOperand(0)))) {
+ op = I.getOperand(1);
+ pre = I.getSwappedPredicate();
+ } else {
+ handleShadowOr(I);
+ return;
}
- if (op) {
+
+ if ((constOp->isNullValue() &&
+ (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) ||
+ (constOp->isAllOnesValue() &&
+ (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE))) {
IRBuilder<> IRB(&I);
- Value* Shadow =
- IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op), "_msprop_icmpslt");
+ Value *Shadow = IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op),
+ "_msprop_icmp_s");
setShadow(&I, Shadow);
setOrigin(&I, getOrigin(op));
} else {
@@ -1860,25 +1902,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
VAHelper->visitVACopyInst(I);
}
- enum IntrinsicKind {
- IK_DoesNotAccessMemory,
- IK_OnlyReadsMemory,
- IK_WritesMemory
- };
-
- static IntrinsicKind getIntrinsicKind(Intrinsic::ID iid) {
- const int DoesNotAccessMemory = IK_DoesNotAccessMemory;
- const int OnlyReadsArgumentPointees = IK_OnlyReadsMemory;
- const int OnlyReadsMemory = IK_OnlyReadsMemory;
- const int OnlyAccessesArgumentPointees = IK_WritesMemory;
- const int UnknownModRefBehavior = IK_WritesMemory;
-#define GET_INTRINSIC_MODREF_BEHAVIOR
-#define ModRefBehavior IntrinsicKind
-#include "llvm/IR/Intrinsics.gen"
-#undef ModRefBehavior
-#undef GET_INTRINSIC_MODREF_BEHAVIOR
- }
-
/// \brief Handle vector store-like intrinsics.
///
/// Instrument intrinsics that look like a simple SIMD store: writes memory,
@@ -1978,17 +2001,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (NumArgOperands == 0)
return false;
- Intrinsic::ID iid = I.getIntrinsicID();
- IntrinsicKind IK = getIntrinsicKind(iid);
- bool OnlyReadsMemory = IK == IK_OnlyReadsMemory;
- bool WritesMemory = IK == IK_WritesMemory;
- assert(!(OnlyReadsMemory && WritesMemory));
-
if (NumArgOperands == 2 &&
I.getArgOperand(0)->getType()->isPointerTy() &&
I.getArgOperand(1)->getType()->isVectorTy() &&
I.getType()->isVoidTy() &&
- WritesMemory) {
+ !I.onlyReadsMemory()) {
// This looks like a vector store.
return handleVectorStoreIntrinsic(I);
}
@@ -1996,12 +2013,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (NumArgOperands == 1 &&
I.getArgOperand(0)->getType()->isPointerTy() &&
I.getType()->isVectorTy() &&
- OnlyReadsMemory) {
+ I.onlyReadsMemory()) {
// This looks like a vector load.
return handleVectorLoadIntrinsic(I);
}
- if (!OnlyReadsMemory && !WritesMemory)
+ if (I.doesNotAccessMemory())
if (maybeHandleSimpleNomemIntrinsic(I))
return true;
@@ -2493,13 +2510,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Now, get the shadow for the RetVal.
if (!I.getType()->isSized()) return;
+ // Don't emit the epilogue for musttail call returns.
+ if (CS.isCall() && cast<CallInst>(&I)->isMustTailCall()) return;
IRBuilder<> IRBBefore(&I);
// Until we have full dynamic coverage, make sure the retval shadow is 0.
Value *Base = getShadowPtrForRetval(&I, IRBBefore);
IRBBefore.CreateAlignedStore(getCleanShadow(&I), Base, kShadowTLSAlignment);
- Instruction *NextInsn = nullptr;
+ BasicBlock::iterator NextInsn;
if (CS.isCall()) {
- NextInsn = I.getNextNode();
+ NextInsn = ++I.getIterator();
+ assert(NextInsn != I.getParent()->end());
} else {
BasicBlock *NormalDest = cast<InvokeInst>(&I)->getNormalDest();
if (!NormalDest->getSinglePredecessor()) {
@@ -2511,10 +2531,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return;
}
NextInsn = NormalDest->getFirstInsertionPt();
- assert(NextInsn &&
+ assert(NextInsn != NormalDest->end() &&
"Could not find insertion point for retval shadow load");
}
- IRBuilder<> IRBAfter(NextInsn);
+ IRBuilder<> IRBAfter(&*NextInsn);
Value *RetvalShadow =
IRBAfter.CreateAlignedLoad(getShadowPtrForRetval(&I, IRBAfter),
kShadowTLSAlignment, "_msret");
@@ -2523,10 +2543,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOrigin(&I, IRBAfter.CreateLoad(getOriginPtrForRetval(IRBAfter)));
}
+ bool isAMustTailRetVal(Value *RetVal) {
+ if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
+ RetVal = I->getOperand(0);
+ }
+ if (auto *I = dyn_cast<CallInst>(RetVal)) {
+ return I->isMustTailCall();
+ }
+ return false;
+ }
+
void visitReturnInst(ReturnInst &I) {
IRBuilder<> IRB(&I);
Value *RetVal = I.getReturnValue();
if (!RetVal) return;
+ // Don't emit the epilogue for musttail call returns.
+ if (isAMustTailRetVal(RetVal)) return;
Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
if (CheckReturnValue) {
insertShadowCheck(RetVal, &I);
@@ -2653,6 +2685,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOrigin(&I, getCleanOrigin());
}
+ void visitCatchSwitchInst(CatchSwitchInst &I) {
+ setShadow(&I, getCleanShadow(&I));
+ setOrigin(&I, getCleanOrigin());
+ }
+
+ void visitFuncletPadInst(FuncletPadInst &I) {
+ setShadow(&I, getCleanShadow(&I));
+ setOrigin(&I, getCleanOrigin());
+ }
+
void visitGetElementPtrInst(GetElementPtrInst &I) {
handleShadowOr(I);
}
@@ -2696,6 +2738,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Nothing to do here.
}
+ void visitCleanupReturnInst(CleanupReturnInst &CRI) {
+ DEBUG(dbgs() << "CleanupReturn: " << CRI << "\n");
+ // Nothing to do here.
+ }
+
+ void visitCatchReturnInst(CatchReturnInst &CRI) {
+ DEBUG(dbgs() << "CatchReturn: " << CRI << "\n");
+ // Nothing to do here.
+ }
+
void visitInstruction(Instruction &I) {
// Everything else: stop propagating and check for poisoned shadow.
if (ClDumpStrictInstructions)
@@ -2808,6 +2860,8 @@ struct VarArgAMD64Helper : public VarArgHelper {
}
void visitVAStartInst(VAStartInst &I) override {
+ if (F.getCallingConv() == CallingConv::X86_64_Win64)
+ return;
IRBuilder<> IRB(&I);
VAStartInstrumentationList.push_back(&I);
Value *VAListTag = I.getArgOperand(0);
@@ -2820,6 +2874,8 @@ struct VarArgAMD64Helper : public VarArgHelper {
}
void visitVACopyInst(VACopyInst &I) override {
+ if (F.getCallingConv() == CallingConv::X86_64_Win64)
+ return;
IRBuilder<> IRB(&I);
Value *VAListTag = I.getArgOperand(0);
Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB);
@@ -2979,6 +3035,242 @@ struct VarArgMIPS64Helper : public VarArgHelper {
}
};
+
+/// \brief AArch64-specific implementation of VarArgHelper.
+struct VarArgAArch64Helper : public VarArgHelper {
+ static const unsigned kAArch64GrArgSize = 56;
+ static const unsigned kAArch64VrArgSize = 128;
+
+ static const unsigned AArch64GrBegOffset = 0;
+ static const unsigned AArch64GrEndOffset = kAArch64GrArgSize;
+ // Make VR space aligned to 16 bytes.
+ static const unsigned AArch64VrBegOffset = AArch64GrEndOffset + 8;
+ static const unsigned AArch64VrEndOffset = AArch64VrBegOffset
+ + kAArch64VrArgSize;
+ static const unsigned AArch64VAEndOffset = AArch64VrEndOffset;
+
+ Function &F;
+ MemorySanitizer &MS;
+ MemorySanitizerVisitor &MSV;
+ Value *VAArgTLSCopy;
+ Value *VAArgOverflowSize;
+
+ SmallVector<CallInst*, 16> VAStartInstrumentationList;
+
+ VarArgAArch64Helper(Function &F, MemorySanitizer &MS,
+ MemorySanitizerVisitor &MSV)
+ : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(nullptr),
+ VAArgOverflowSize(nullptr) {}
+
+ enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
+
+ ArgKind classifyArgument(Value* arg) {
+ Type *T = arg->getType();
+ if (T->isFPOrFPVectorTy())
+ return AK_FloatingPoint;
+ if ((T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
+ || (T->isPointerTy()))
+ return AK_GeneralPurpose;
+ return AK_Memory;
+ }
+
+ // The instrumentation stores the argument shadow in a non ABI-specific
+ // format because it does not know which argument is named (since Clang,
+ // like x86_64 case, lowers the va_args in the frontend and this pass only
+ // sees the low level code that deals with va_list internals).
+ // The first seven GR registers are saved in the first 56 bytes of the
+ // va_arg tls arra, followers by the first 8 FP/SIMD registers, and then
+ // the remaining arguments.
+ // Using constant offset within the va_arg TLS array allows fast copy
+ // in the finalize instrumentation.
+ void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {
+ unsigned GrOffset = AArch64GrBegOffset;
+ unsigned VrOffset = AArch64VrBegOffset;
+ unsigned OverflowOffset = AArch64VAEndOffset;
+
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ for (CallSite::arg_iterator ArgIt = CS.arg_begin() + 1, End = CS.arg_end();
+ ArgIt != End; ++ArgIt) {
+ Value *A = *ArgIt;
+ ArgKind AK = classifyArgument(A);
+ if (AK == AK_GeneralPurpose && GrOffset >= AArch64GrEndOffset)
+ AK = AK_Memory;
+ if (AK == AK_FloatingPoint && VrOffset >= AArch64VrEndOffset)
+ AK = AK_Memory;
+ Value *Base;
+ switch (AK) {
+ case AK_GeneralPurpose:
+ Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset);
+ GrOffset += 8;
+ break;
+ case AK_FloatingPoint:
+ Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset);
+ VrOffset += 16;
+ break;
+ case AK_Memory:
+ uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
+ Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset);
+ OverflowOffset += RoundUpToAlignment(ArgSize, 8);
+ break;
+ }
+ IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
+ }
+ Constant *OverflowSize =
+ ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AArch64VAEndOffset);
+ IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
+ }
+
+ /// Compute the shadow address for a given va_arg.
+ Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
+ int ArgOffset) {
+ Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
+ Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+ return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
+ "_msarg");
+ }
+
+ void visitVAStartInst(VAStartInst &I) override {
+ IRBuilder<> IRB(&I);
+ VAStartInstrumentationList.push_back(&I);
+ Value *VAListTag = I.getArgOperand(0);
+ Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB);
+ // Unpoison the whole __va_list_tag.
+ // FIXME: magic ABI constants (size of va_list).
+ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+ /* size */32, /* alignment */8, false);
+ }
+
+ void visitVACopyInst(VACopyInst &I) override {
+ IRBuilder<> IRB(&I);
+ Value *VAListTag = I.getArgOperand(0);
+ Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB);
+ // Unpoison the whole __va_list_tag.
+ // FIXME: magic ABI constants (size of va_list).
+ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+ /* size */32, /* alignment */8, false);
+ }
+
+ // Retrieve a va_list field of 'void*' size.
+ Value* getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) {
+ Value *SaveAreaPtrPtr =
+ IRB.CreateIntToPtr(
+ IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+ ConstantInt::get(MS.IntptrTy, offset)),
+ Type::getInt64PtrTy(*MS.C));
+ return IRB.CreateLoad(SaveAreaPtrPtr);
+ }
+
+ // Retrieve a va_list field of 'int' size.
+ Value* getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) {
+ Value *SaveAreaPtr =
+ IRB.CreateIntToPtr(
+ IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+ ConstantInt::get(MS.IntptrTy, offset)),
+ Type::getInt32PtrTy(*MS.C));
+ Value *SaveArea32 = IRB.CreateLoad(SaveAreaPtr);
+ return IRB.CreateSExt(SaveArea32, MS.IntptrTy);
+ }
+
+ void finalizeInstrumentation() override {
+ assert(!VAArgOverflowSize && !VAArgTLSCopy &&
+ "finalizeInstrumentation called twice");
+ if (!VAStartInstrumentationList.empty()) {
+ // If there is a va_start in this function, make a backup copy of
+ // va_arg_tls somewhere in the function entry block.
+ IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
+ VAArgOverflowSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
+ Value *CopySize =
+ IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset),
+ VAArgOverflowSize);
+ VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
+ IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8);
+ }
+
+ Value *GrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64GrArgSize);
+ Value *VrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64VrArgSize);
+
+ // Instrument va_start, copy va_list shadow from the backup copy of
+ // the TLS contents.
+ for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
+ CallInst *OrigInst = VAStartInstrumentationList[i];
+ IRBuilder<> IRB(OrigInst->getNextNode());
+
+ Value *VAListTag = OrigInst->getArgOperand(0);
+
+ // The variadic ABI for AArch64 creates two areas to save the incoming
+ // argument registers (one for 64-bit general register xn-x7 and another
+ // for 128-bit FP/SIMD vn-v7).
+ // We need then to propagate the shadow arguments on both regions
+ // 'va::__gr_top + va::__gr_offs' and 'va::__vr_top + va::__vr_offs'.
+ // The remaning arguments are saved on shadow for 'va::stack'.
+ // One caveat is it requires only to propagate the non-named arguments,
+ // however on the call site instrumentation 'all' the arguments are
+ // saved. So to copy the shadow values from the va_arg TLS array
+ // we need to adjust the offset for both GR and VR fields based on
+ // the __{gr,vr}_offs value (since they are stores based on incoming
+ // named arguments).
+
+ // Read the stack pointer from the va_list.
+ Value *StackSaveAreaPtr = getVAField64(IRB, VAListTag, 0);
+
+ // Read both the __gr_top and __gr_off and add them up.
+ Value *GrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 8);
+ Value *GrOffSaveArea = getVAField32(IRB, VAListTag, 24);
+
+ Value *GrRegSaveAreaPtr = IRB.CreateAdd(GrTopSaveAreaPtr, GrOffSaveArea);
+
+ // Read both the __vr_top and __vr_off and add them up.
+ Value *VrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 16);
+ Value *VrOffSaveArea = getVAField32(IRB, VAListTag, 28);
+
+ Value *VrRegSaveAreaPtr = IRB.CreateAdd(VrTopSaveAreaPtr, VrOffSaveArea);
+
+ // It does not know how many named arguments is being used and, on the
+ // callsite all the arguments were saved. Since __gr_off is defined as
+ // '0 - ((8 - named_gr) * 8)', the idea is to just propagate the variadic
+ // argument by ignoring the bytes of shadow from named arguments.
+ Value *GrRegSaveAreaShadowPtrOff =
+ IRB.CreateAdd(GrArgSize, GrOffSaveArea);
+
+ Value *GrRegSaveAreaShadowPtr =
+ MSV.getShadowPtr(GrRegSaveAreaPtr, IRB.getInt8Ty(), IRB);
+
+ Value *GrSrcPtr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
+ GrRegSaveAreaShadowPtrOff);
+ Value *GrCopySize = IRB.CreateSub(GrArgSize, GrRegSaveAreaShadowPtrOff);
+
+ IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, GrSrcPtr, GrCopySize, 8);
+
+ // Again, but for FP/SIMD values.
+ Value *VrRegSaveAreaShadowPtrOff =
+ IRB.CreateAdd(VrArgSize, VrOffSaveArea);
+
+ Value *VrRegSaveAreaShadowPtr =
+ MSV.getShadowPtr(VrRegSaveAreaPtr, IRB.getInt8Ty(), IRB);
+
+ Value *VrSrcPtr = IRB.CreateInBoundsGEP(
+ IRB.getInt8Ty(),
+ IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
+ IRB.getInt32(AArch64VrBegOffset)),
+ VrRegSaveAreaShadowPtrOff);
+ Value *VrCopySize = IRB.CreateSub(VrArgSize, VrRegSaveAreaShadowPtrOff);
+
+ IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, VrSrcPtr, VrCopySize, 8);
+
+ // And finally for remaining arguments.
+ Value *StackSaveAreaShadowPtr =
+ MSV.getShadowPtr(StackSaveAreaPtr, IRB.getInt8Ty(), IRB);
+
+ Value *StackSrcPtr =
+ IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
+ IRB.getInt32(AArch64VAEndOffset));
+
+ IRB.CreateMemCpy(StackSaveAreaShadowPtr, StackSrcPtr,
+ VAArgOverflowSize, 16);
+ }
+ }
+};
+
/// \brief A no-op implementation of VarArgHelper.
struct VarArgNoOpHelper : public VarArgHelper {
VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
@@ -3003,11 +3295,13 @@ VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
else if (TargetTriple.getArch() == llvm::Triple::mips64 ||
TargetTriple.getArch() == llvm::Triple::mips64el)
return new VarArgMIPS64Helper(Func, Msan, Visitor);
+ else if (TargetTriple.getArch() == llvm::Triple::aarch64)
+ return new VarArgAArch64Helper(Func, Msan, Visitor);
else
return new VarArgNoOpHelper(Func, Msan, Visitor);
}
-} // namespace
+} // anonymous namespace
bool MemorySanitizer::runOnFunction(Function &F) {
if (&F == MsanCtorFunction)
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
new file mode 100644
index 000000000000..4b59b93b325f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -0,0 +1,718 @@
+//===-- PGOInstrumentation.cpp - MST-based PGO Instrumentation ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements PGO instrumentation using a minimum spanning tree based
+// on the following paper:
+// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
+// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
+// Issue 3, pp 313-322
+// The idea of the algorithm based on the fact that for each node (except for
+// the entry and exit), the sum of incoming edge counts equals the sum of
+// outgoing edge counts. The count of edge on spanning tree can be derived from
+// those edges not on the spanning tree. Knuth proves this method instruments
+// the minimum number of edges.
+//
+// The minimal spanning tree here is actually a maximum weight tree -- on-tree
+// edges have higher frequencies (more likely to execute). The idea is to
+// instrument those less frequently executed edges to reduce the runtime
+// overhead of instrumented binaries.
+//
+// This file contains two passes:
+// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
+// count profile, and
+// (2) Pass PGOInstrumentationUse which reads the edge count profile and
+// annotates the branch weights.
+// To get the precise counter information, These two passes need to invoke at
+// the same compilation point (so they see the same IR). For pass
+// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
+// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
+// the profile is opened in module level and passed to each PGOUseFunc instance.
+// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
+// in class FuncPGOInstrumentation.
+//
+// Class PGOEdge represents a CFG edge and some auxiliary information. Class
+// BBInfo contains auxiliary information for each BB. These two classes are used
+// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
+// class of PGOEdge and BBInfo, respectively. They contains extra data structure
+// used in populating profile counters.
+// The MST implementation is in Class CFGMST (CFGMST.h).
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation.h"
+#include "CFGMST.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/JamCRC.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pgo-instrumentation"
+
+STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
+STATISTIC(NumOfPGOEdge, "Number of edges.");
+STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
+STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
+STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
+STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
+STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
+
+// Command line option to specify the file to read profile from. This is
+// mainly used for testing.
+static cl::opt<std::string>
+ PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
+ cl::value_desc("filename"),
+ cl::desc("Specify the path of profile data file. This is"
+ "mainly for test purpose."));
+
+namespace {
+class PGOInstrumentationGen : public ModulePass {
+public:
+ static char ID;
+
+ PGOInstrumentationGen() : ModulePass(ID) {
+ initializePGOInstrumentationGenPass(*PassRegistry::getPassRegistry());
+ }
+
+ const char *getPassName() const override {
+ return "PGOInstrumentationGenPass";
+ }
+
+private:
+ bool runOnModule(Module &M) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
+ }
+};
+
+class PGOInstrumentationUse : public ModulePass {
+public:
+ static char ID;
+
+ // Provide the profile filename as the parameter.
+ PGOInstrumentationUse(std::string Filename = "")
+ : ModulePass(ID), ProfileFileName(Filename) {
+ if (!PGOTestProfileFile.empty())
+ ProfileFileName = PGOTestProfileFile;
+ initializePGOInstrumentationUsePass(*PassRegistry::getPassRegistry());
+ }
+
+ const char *getPassName() const override {
+ return "PGOInstrumentationUsePass";
+ }
+
+private:
+ std::string ProfileFileName;
+ std::unique_ptr<IndexedInstrProfReader> PGOReader;
+ bool runOnModule(Module &M) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
+ }
+};
+} // end anonymous namespace
+
+char PGOInstrumentationGen::ID = 0;
+INITIALIZE_PASS_BEGIN(PGOInstrumentationGen, "pgo-instr-gen",
+ "PGO instrumentation.", false, false)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_END(PGOInstrumentationGen, "pgo-instr-gen",
+ "PGO instrumentation.", false, false)
+
+ModulePass *llvm::createPGOInstrumentationGenPass() {
+ return new PGOInstrumentationGen();
+}
+
+char PGOInstrumentationUse::ID = 0;
+INITIALIZE_PASS_BEGIN(PGOInstrumentationUse, "pgo-instr-use",
+ "Read PGO instrumentation profile.", false, false)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_END(PGOInstrumentationUse, "pgo-instr-use",
+ "Read PGO instrumentation profile.", false, false)
+
+ModulePass *llvm::createPGOInstrumentationUsePass(StringRef Filename) {
+ return new PGOInstrumentationUse(Filename.str());
+}
+
+namespace {
+/// \brief An MST based instrumentation for PGO
+///
+/// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO
+/// in the function level.
+struct PGOEdge {
+ // This class implements the CFG edges. Note the CFG can be a multi-graph.
+ // So there might be multiple edges with same SrcBB and DestBB.
+ const BasicBlock *SrcBB;
+ const BasicBlock *DestBB;
+ uint64_t Weight;
+ bool InMST;
+ bool Removed;
+ bool IsCritical;
+ PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1)
+ : SrcBB(Src), DestBB(Dest), Weight(W), InMST(false), Removed(false),
+ IsCritical(false) {}
+ // Return the information string of an edge.
+ const std::string infoString() const {
+ return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
+ (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str();
+ }
+};
+
+// This class stores the auxiliary information for each BB.
+struct BBInfo {
+ BBInfo *Group;
+ uint32_t Index;
+ uint32_t Rank;
+
+ BBInfo(unsigned IX) : Group(this), Index(IX), Rank(0) {}
+
+ // Return the information string of this object.
+ const std::string infoString() const {
+ return (Twine("Index=") + Twine(Index)).str();
+ }
+};
+
+// This class implements the CFG edges. Note the CFG can be a multi-graph.
+template <class Edge, class BBInfo> class FuncPGOInstrumentation {
+private:
+ Function &F;
+ void computeCFGHash();
+
+public:
+ std::string FuncName;
+ GlobalVariable *FuncNameVar;
+ // CFG hash value for this function.
+ uint64_t FunctionHash;
+
+ // The Minimum Spanning Tree of function CFG.
+ CFGMST<Edge, BBInfo> MST;
+
+ // Give an edge, find the BB that will be instrumented.
+ // Return nullptr if there is no BB to be instrumented.
+ BasicBlock *getInstrBB(Edge *E);
+
+ // Return the auxiliary BB information.
+ BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
+
+ // Dump edges and BB information.
+ void dumpInfo(std::string Str = "") const {
+ MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " +
+ Twine(FunctionHash) + "\t" + Str);
+ }
+
+ FuncPGOInstrumentation(Function &Func, bool CreateGlobalVar = false,
+ BranchProbabilityInfo *BPI = nullptr,
+ BlockFrequencyInfo *BFI = nullptr)
+ : F(Func), FunctionHash(0), MST(F, BPI, BFI) {
+ FuncName = getPGOFuncName(F);
+ computeCFGHash();
+ DEBUG(dumpInfo("after CFGMST"));
+
+ NumOfPGOBB += MST.BBInfos.size();
+ for (auto &E : MST.AllEdges) {
+ if (E->Removed)
+ continue;
+ NumOfPGOEdge++;
+ if (!E->InMST)
+ NumOfPGOInstrument++;
+ }
+
+ if (CreateGlobalVar)
+ FuncNameVar = createPGOFuncNameVar(F, FuncName);
+ };
+};
+
+// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
+// value of each BB in the CFG. The higher 32 bits record the number of edges.
+template <class Edge, class BBInfo>
+void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
+ std::vector<char> Indexes;
+ JamCRC JC;
+ for (auto &BB : F) {
+ const TerminatorInst *TI = BB.getTerminator();
+ for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
+ BasicBlock *Succ = TI->getSuccessor(I);
+ uint32_t Index = getBBInfo(Succ).Index;
+ for (int J = 0; J < 4; J++)
+ Indexes.push_back((char)(Index >> (J * 8)));
+ }
+ }
+ JC.update(Indexes);
+ FunctionHash = (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
+}
+
+// Given a CFG E to be instrumented, find which BB to place the instrumented
+// code. The function will split the critical edge if necessary.
+template <class Edge, class BBInfo>
+BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
+ if (E->InMST || E->Removed)
+ return nullptr;
+
+ BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
+ BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
+ // For a fake edge, instrument the real BB.
+ if (SrcBB == nullptr)
+ return DestBB;
+ if (DestBB == nullptr)
+ return SrcBB;
+
+ // Instrument the SrcBB if it has a single successor,
+ // otherwise, the DestBB if this is not a critical edge.
+ TerminatorInst *TI = SrcBB->getTerminator();
+ if (TI->getNumSuccessors() <= 1)
+ return SrcBB;
+ if (!E->IsCritical)
+ return DestBB;
+
+ // For a critical edge, we have to split. Instrument the newly
+ // created BB.
+ NumOfPGOSplit++;
+ DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index << " --> "
+ << getBBInfo(DestBB).Index << "\n");
+ unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
+ BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
+ assert(InstrBB && "Critical edge is not split");
+
+ E->Removed = true;
+ return InstrBB;
+}
+
+// Visit all edge and instrument the edges not in MST.
+// Critical edges will be split.
+static void instrumentOneFunc(Function &F, Module *M,
+ BranchProbabilityInfo *BPI,
+ BlockFrequencyInfo *BFI) {
+ unsigned NumCounters = 0;
+ FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, true, BPI, BFI);
+ for (auto &E : FuncInfo.MST.AllEdges) {
+ if (!E->InMST && !E->Removed)
+ NumCounters++;
+ }
+
+ uint32_t I = 0;
+ for (auto &E : FuncInfo.MST.AllEdges) {
+ BasicBlock *InstrBB = FuncInfo.getInstrBB(E.get());
+ if (!InstrBB)
+ continue;
+
+ IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
+ assert(Builder.GetInsertPoint() != InstrBB->end() &&
+ "Cannot get the Instrumentation point");
+ Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
+ Builder.CreateCall(
+ Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment),
+ {llvm::ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
+ Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters),
+ Builder.getInt32(I++)});
+ }
+}
+
+// This class represents a CFG edge in profile use compilation.
+struct PGOUseEdge : public PGOEdge {
+ bool CountValid;
+ uint64_t CountValue;
+ PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1)
+ : PGOEdge(Src, Dest, W), CountValid(false), CountValue(0) {}
+
+ // Set edge count value
+ void setEdgeCount(uint64_t Value) {
+ CountValue = Value;
+ CountValid = true;
+ }
+
+ // Return the information string for this object.
+ const std::string infoString() const {
+ if (!CountValid)
+ return PGOEdge::infoString();
+ return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)).str();
+ }
+};
+
+typedef SmallVector<PGOUseEdge *, 2> DirectEdges;
+
+// This class stores the auxiliary information for each BB.
+struct UseBBInfo : public BBInfo {
+ uint64_t CountValue;
+ bool CountValid;
+ int32_t UnknownCountInEdge;
+ int32_t UnknownCountOutEdge;
+ DirectEdges InEdges;
+ DirectEdges OutEdges;
+ UseBBInfo(unsigned IX)
+ : BBInfo(IX), CountValue(0), CountValid(false), UnknownCountInEdge(0),
+ UnknownCountOutEdge(0) {}
+ UseBBInfo(unsigned IX, uint64_t C)
+ : BBInfo(IX), CountValue(C), CountValid(true), UnknownCountInEdge(0),
+ UnknownCountOutEdge(0) {}
+
+ // Set the profile count value for this BB.
+ void setBBInfoCount(uint64_t Value) {
+ CountValue = Value;
+ CountValid = true;
+ }
+
+ // Return the information string of this object.
+ const std::string infoString() const {
+ if (!CountValid)
+ return BBInfo::infoString();
+ return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str();
+ }
+};
+
+// Sum up the count values for all the edges.
+static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) {
+ uint64_t Total = 0;
+ for (auto &E : Edges) {
+ if (E->Removed)
+ continue;
+ Total += E->CountValue;
+ }
+ return Total;
+}
+
+class PGOUseFunc {
+private:
+ Function &F;
+ Module *M;
+ // This member stores the shared information with class PGOGenFunc.
+ FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
+
+ // Return the auxiliary BB information.
+ UseBBInfo &getBBInfo(const BasicBlock *BB) const {
+ return FuncInfo.getBBInfo(BB);
+ }
+
+ // The maximum count value in the profile. This is only used in PGO use
+ // compilation.
+ uint64_t ProgramMaxCount;
+
+ // Find the Instrumented BB and set the value.
+ void setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
+
+ // Set the edge counter value for the unknown edge -- there should be only
+ // one unknown edge.
+ void setEdgeCount(DirectEdges &Edges, uint64_t Value);
+
+ // Return FuncName string;
+ const std::string getFuncName() const { return FuncInfo.FuncName; }
+
+ // Set the hot/cold inline hints based on the count values.
+ // FIXME: This function should be removed once the functionality in
+ // the inliner is implemented.
+ void applyFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
+ if (ProgramMaxCount == 0)
+ return;
+ // Threshold of the hot functions.
+ const BranchProbability HotFunctionThreshold(1, 100);
+ // Threshold of the cold functions.
+ const BranchProbability ColdFunctionThreshold(2, 10000);
+ if (EntryCount >= HotFunctionThreshold.scale(ProgramMaxCount))
+ F.addFnAttr(llvm::Attribute::InlineHint);
+ else if (MaxCount <= ColdFunctionThreshold.scale(ProgramMaxCount))
+ F.addFnAttr(llvm::Attribute::Cold);
+ }
+
+public:
+ PGOUseFunc(Function &Func, Module *Modu, BranchProbabilityInfo *BPI = nullptr,
+ BlockFrequencyInfo *BFI = nullptr)
+ : F(Func), M(Modu), FuncInfo(Func, false, BPI, BFI) {}
+
+ // Read counts for the instrumented BB from profile.
+ bool readCounters(IndexedInstrProfReader *PGOReader);
+
+ // Populate the counts for all BBs.
+ void populateCounters();
+
+ // Set the branch weights based on the count values.
+ void setBranchWeights();
+};
+
+// Visit all the edges and assign the count value for the instrumented
+// edges and the BB.
+void PGOUseFunc::setInstrumentedCounts(
+ const std::vector<uint64_t> &CountFromProfile) {
+
+ // Use a worklist as we will update the vector during the iteration.
+ std::vector<PGOUseEdge *> WorkList;
+ for (auto &E : FuncInfo.MST.AllEdges)
+ WorkList.push_back(E.get());
+
+ uint32_t I = 0;
+ for (auto &E : WorkList) {
+ BasicBlock *InstrBB = FuncInfo.getInstrBB(E);
+ if (!InstrBB)
+ continue;
+ uint64_t CountValue = CountFromProfile[I++];
+ if (!E->Removed) {
+ getBBInfo(InstrBB).setBBInfoCount(CountValue);
+ E->setEdgeCount(CountValue);
+ continue;
+ }
+
+ // Need to add two new edges.
+ BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
+ BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
+ // Add new edge of SrcBB->InstrBB.
+ PGOUseEdge &NewEdge = FuncInfo.MST.addEdge(SrcBB, InstrBB, 0);
+ NewEdge.setEdgeCount(CountValue);
+ // Add new edge of InstrBB->DestBB.
+ PGOUseEdge &NewEdge1 = FuncInfo.MST.addEdge(InstrBB, DestBB, 0);
+ NewEdge1.setEdgeCount(CountValue);
+ NewEdge1.InMST = true;
+ getBBInfo(InstrBB).setBBInfoCount(CountValue);
+ }
+}
+
+// Set the count value for the unknown edge. There should be one and only one
+// unknown edge in Edges vector.
+void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
+ for (auto &E : Edges) {
+ if (E->CountValid)
+ continue;
+ E->setEdgeCount(Value);
+
+ getBBInfo(E->SrcBB).UnknownCountOutEdge--;
+ getBBInfo(E->DestBB).UnknownCountInEdge--;
+ return;
+ }
+ llvm_unreachable("Cannot find the unknown count edge");
+}
+
+// Read the profile from ProfileFileName and assign the value to the
+// instrumented BB and the edges. This function also updates ProgramMaxCount.
+// Return true if the profile are successfully read, and false on errors.
+bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader) {
+ auto &Ctx = M->getContext();
+ ErrorOr<InstrProfRecord> Result =
+ PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash);
+ if (std::error_code EC = Result.getError()) {
+ if (EC == instrprof_error::unknown_function)
+ NumOfPGOMissing++;
+ else if (EC == instrprof_error::hash_mismatch ||
+ EC == llvm::instrprof_error::malformed)
+ NumOfPGOMismatch++;
+
+ std::string Msg = EC.message() + std::string(" ") + F.getName().str();
+ Ctx.diagnose(
+ DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
+ return false;
+ }
+ std::vector<uint64_t> &CountFromProfile = Result.get().Counts;
+
+ NumOfPGOFunc++;
+ DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
+ uint64_t ValueSum = 0;
+ for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
+ DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
+ ValueSum += CountFromProfile[I];
+ }
+
+ DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
+
+ getBBInfo(nullptr).UnknownCountOutEdge = 2;
+ getBBInfo(nullptr).UnknownCountInEdge = 2;
+
+ setInstrumentedCounts(CountFromProfile);
+ ProgramMaxCount = PGOReader->getMaximumFunctionCount();
+ return true;
+}
+
+// Populate the counters from instrumented BBs to all BBs.
+// In the end of this operation, all BBs should have a valid count value.
+void PGOUseFunc::populateCounters() {
+ // First set up Count variable for all BBs.
+ for (auto &E : FuncInfo.MST.AllEdges) {
+ if (E->Removed)
+ continue;
+
+ const BasicBlock *SrcBB = E->SrcBB;
+ const BasicBlock *DestBB = E->DestBB;
+ UseBBInfo &SrcInfo = getBBInfo(SrcBB);
+ UseBBInfo &DestInfo = getBBInfo(DestBB);
+ SrcInfo.OutEdges.push_back(E.get());
+ DestInfo.InEdges.push_back(E.get());
+ SrcInfo.UnknownCountOutEdge++;
+ DestInfo.UnknownCountInEdge++;
+
+ if (!E->CountValid)
+ continue;
+ DestInfo.UnknownCountInEdge--;
+ SrcInfo.UnknownCountOutEdge--;
+ }
+
+ bool Changes = true;
+ unsigned NumPasses = 0;
+ while (Changes) {
+ NumPasses++;
+ Changes = false;
+
+ // For efficient traversal, it's better to start from the end as most
+ // of the instrumented edges are at the end.
+ for (auto &BB : reverse(F)) {
+ UseBBInfo &Count = getBBInfo(&BB);
+ if (!Count.CountValid) {
+ if (Count.UnknownCountOutEdge == 0) {
+ Count.CountValue = sumEdgeCount(Count.OutEdges);
+ Count.CountValid = true;
+ Changes = true;
+ } else if (Count.UnknownCountInEdge == 0) {
+ Count.CountValue = sumEdgeCount(Count.InEdges);
+ Count.CountValid = true;
+ Changes = true;
+ }
+ }
+ if (Count.CountValid) {
+ if (Count.UnknownCountOutEdge == 1) {
+ uint64_t Total = Count.CountValue - sumEdgeCount(Count.OutEdges);
+ setEdgeCount(Count.OutEdges, Total);
+ Changes = true;
+ }
+ if (Count.UnknownCountInEdge == 1) {
+ uint64_t Total = Count.CountValue - sumEdgeCount(Count.InEdges);
+ setEdgeCount(Count.InEdges, Total);
+ Changes = true;
+ }
+ }
+ }
+ }
+
+ DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
+ // Assert every BB has a valid counter.
+ uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue;
+ uint64_t FuncMaxCount = FuncEntryCount;
+ for (auto &BB : F) {
+ assert(getBBInfo(&BB).CountValid && "BB count is not valid");
+ uint64_t Count = getBBInfo(&BB).CountValue;
+ if (Count > FuncMaxCount)
+ FuncMaxCount = Count;
+ }
+ applyFunctionAttributes(FuncEntryCount, FuncMaxCount);
+
+ DEBUG(FuncInfo.dumpInfo("after reading profile."));
+}
+
+// Assign the scaled count values to the BB with multiple out edges.
+void PGOUseFunc::setBranchWeights() {
+ // Generate MD_prof metadata for every branch instruction.
+ DEBUG(dbgs() << "\nSetting branch weights.\n");
+ MDBuilder MDB(M->getContext());
+ for (auto &BB : F) {
+ TerminatorInst *TI = BB.getTerminator();
+ if (TI->getNumSuccessors() < 2)
+ continue;
+ if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
+ continue;
+ if (getBBInfo(&BB).CountValue == 0)
+ continue;
+
+ // We have a non-zero Branch BB.
+ const UseBBInfo &BBCountInfo = getBBInfo(&BB);
+ unsigned Size = BBCountInfo.OutEdges.size();
+ SmallVector<unsigned, 2> EdgeCounts(Size, 0);
+ uint64_t MaxCount = 0;
+ for (unsigned s = 0; s < Size; s++) {
+ const PGOUseEdge *E = BBCountInfo.OutEdges[s];
+ const BasicBlock *SrcBB = E->SrcBB;
+ const BasicBlock *DestBB = E->DestBB;
+ if (DestBB == 0)
+ continue;
+ unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
+ uint64_t EdgeCount = E->CountValue;
+ if (EdgeCount > MaxCount)
+ MaxCount = EdgeCount;
+ EdgeCounts[SuccNum] = EdgeCount;
+ }
+ assert(MaxCount > 0 && "Bad max count");
+ uint64_t Scale = calculateCountScale(MaxCount);
+ SmallVector<unsigned, 4> Weights;
+ for (const auto &ECI : EdgeCounts)
+ Weights.push_back(scaleBranchCount(ECI, Scale));
+
+ TI->setMetadata(llvm::LLVMContext::MD_prof,
+ MDB.createBranchWeights(Weights));
+ DEBUG(dbgs() << "Weight is: ";
+ for (const auto &W : Weights) { dbgs() << W << " "; }
+ dbgs() << "\n";);
+ }
+}
+} // end anonymous namespace
+
+bool PGOInstrumentationGen::runOnModule(Module &M) {
+ for (auto &F : M) {
+ if (F.isDeclaration())
+ continue;
+ BranchProbabilityInfo *BPI =
+ &(getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI());
+ BlockFrequencyInfo *BFI =
+ &(getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI());
+ instrumentOneFunc(F, &M, BPI, BFI);
+ }
+ return true;
+}
+
+static void setPGOCountOnFunc(PGOUseFunc &Func,
+ IndexedInstrProfReader *PGOReader) {
+ if (Func.readCounters(PGOReader)) {
+ Func.populateCounters();
+ Func.setBranchWeights();
+ }
+}
+
+bool PGOInstrumentationUse::runOnModule(Module &M) {
+ DEBUG(dbgs() << "Read in profile counters: ");
+ auto &Ctx = M.getContext();
+ // Read the counter array from file.
+ auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName);
+ if (std::error_code EC = ReaderOrErr.getError()) {
+ Ctx.diagnose(
+ DiagnosticInfoPGOProfile(ProfileFileName.data(), EC.message()));
+ return false;
+ }
+
+ PGOReader = std::move(ReaderOrErr.get());
+ if (!PGOReader) {
+ Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
+ "Cannot get PGOReader"));
+ return false;
+ }
+
+ for (auto &F : M) {
+ if (F.isDeclaration())
+ continue;
+ BranchProbabilityInfo *BPI =
+ &(getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI());
+ BlockFrequencyInfo *BFI =
+ &(getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI());
+ PGOUseFunc Func(F, &M, BPI, BFI);
+ setPGOCountOnFunc(Func, PGOReader.get());
+ }
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/SafeStack.cpp b/contrib/llvm/lib/Transforms/Instrumentation/SafeStack.cpp
index 6b185a2b127b..abed465f102d 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/SafeStack.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/SafeStack.cpp
@@ -18,8 +18,9 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -37,6 +38,8 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -44,6 +47,17 @@ using namespace llvm;
#define DEBUG_TYPE "safestack"
+enum UnsafeStackPtrStorageVal { ThreadLocalUSP, SingleThreadUSP };
+
+static cl::opt<UnsafeStackPtrStorageVal> USPStorage("safe-stack-usp-storage",
+ cl::Hidden, cl::init(ThreadLocalUSP),
+ cl::desc("Type of storage for the unsafe stack pointer"),
+ cl::values(clEnumValN(ThreadLocalUSP, "thread-local",
+ "Thread-local storage"),
+ clEnumValN(SingleThreadUSP, "single-thread",
+ "Non-thread-local storage"),
+ clEnumValEnd));
+
namespace llvm {
STATISTIC(NumFunctions, "Total number of functions");
@@ -54,118 +68,48 @@ STATISTIC(NumUnsafeStackRestorePointsFunctions,
STATISTIC(NumAllocas, "Total number of allocas");
STATISTIC(NumUnsafeStaticAllocas, "Number of unsafe static allocas");
STATISTIC(NumUnsafeDynamicAllocas, "Number of unsafe dynamic allocas");
+STATISTIC(NumUnsafeByValArguments, "Number of unsafe byval arguments");
STATISTIC(NumUnsafeStackRestorePoints, "Number of setjmps and landingpads");
} // namespace llvm
namespace {
-/// Check whether a given alloca instruction (AI) should be put on the safe
-/// stack or not. The function analyzes all uses of AI and checks whether it is
-/// only accessed in a memory safe way (as decided statically).
-bool IsSafeStackAlloca(const AllocaInst *AI) {
- // Go through all uses of this alloca and check whether all accesses to the
- // allocated object are statically known to be memory safe and, hence, the
- // object can be placed on the safe stack.
-
- SmallPtrSet<const Value *, 16> Visited;
- SmallVector<const Instruction *, 8> WorkList;
- WorkList.push_back(AI);
+/// Rewrite an SCEV expression for a memory access address to an expression that
+/// represents offset from the given alloca.
+///
+/// The implementation simply replaces all mentions of the alloca with zero.
+class AllocaOffsetRewriter : public SCEVRewriteVisitor<AllocaOffsetRewriter> {
+ const Value *AllocaPtr;
- // A DFS search through all uses of the alloca in bitcasts/PHI/GEPs/etc.
- while (!WorkList.empty()) {
- const Instruction *V = WorkList.pop_back_val();
- for (const Use &UI : V->uses()) {
- auto I = cast<const Instruction>(UI.getUser());
- assert(V == UI.get());
-
- switch (I->getOpcode()) {
- case Instruction::Load:
- // Loading from a pointer is safe.
- break;
- case Instruction::VAArg:
- // "va-arg" from a pointer is safe.
- break;
- case Instruction::Store:
- if (V == I->getOperand(0))
- // Stored the pointer - conservatively assume it may be unsafe.
- return false;
- // Storing to the pointee is safe.
- break;
-
- case Instruction::GetElementPtr:
- if (!cast<const GetElementPtrInst>(I)->hasAllConstantIndices())
- // GEP with non-constant indices can lead to memory errors.
- // This also applies to inbounds GEPs, as the inbounds attribute
- // represents an assumption that the address is in bounds, rather than
- // an assertion that it is.
- return false;
-
- // We assume that GEP on static alloca with constant indices is safe,
- // otherwise a compiler would detect it and warn during compilation.
-
- if (!isa<const ConstantInt>(AI->getArraySize()))
- // However, if the array size itself is not constant, the access
- // might still be unsafe at runtime.
- return false;
-
- /* fallthrough */
-
- case Instruction::BitCast:
- case Instruction::IntToPtr:
- case Instruction::PHI:
- case Instruction::PtrToInt:
- case Instruction::Select:
- // The object can be safe or not, depending on how the result of the
- // instruction is used.
- if (Visited.insert(I).second)
- WorkList.push_back(cast<const Instruction>(I));
- break;
-
- case Instruction::Call:
- case Instruction::Invoke: {
- // FIXME: add support for memset and memcpy intrinsics.
- ImmutableCallSite CS(I);
-
- // LLVM 'nocapture' attribute is only set for arguments whose address
- // is not stored, passed around, or used in any other non-trivial way.
- // We assume that passing a pointer to an object as a 'nocapture'
- // argument is safe.
- // FIXME: a more precise solution would require an interprocedural
- // analysis here, which would look at all uses of an argument inside
- // the function being called.
- ImmutableCallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
- for (ImmutableCallSite::arg_iterator A = B; A != E; ++A)
- if (A->get() == V && !CS.doesNotCapture(A - B))
- // The parameter is not marked 'nocapture' - unsafe.
- return false;
- continue;
- }
+public:
+ AllocaOffsetRewriter(ScalarEvolution &SE, const Value *AllocaPtr)
+ : SCEVRewriteVisitor(SE), AllocaPtr(AllocaPtr) {}
- default:
- // The object is unsafe if it is used in any other way.
- return false;
- }
- }
+ const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+ if (Expr->getValue() == AllocaPtr)
+ return SE.getZero(Expr->getType());
+ return Expr;
}
+};
- // All uses of the alloca are safe, we can place it on the safe stack.
- return true;
-}
-
-/// The SafeStack pass splits the stack of each function into the
-/// safe stack, which is only accessed through memory safe dereferences
-/// (as determined statically), and the unsafe stack, which contains all
-/// local variables that are accessed in unsafe ways.
+/// The SafeStack pass splits the stack of each function into the safe
+/// stack, which is only accessed through memory safe dereferences (as
+/// determined statically), and the unsafe stack, which contains all
+/// local variables that are accessed in ways that we can't prove to
+/// be safe.
class SafeStack : public FunctionPass {
+ const TargetMachine *TM;
+ const TargetLoweringBase *TL;
const DataLayout *DL;
+ ScalarEvolution *SE;
Type *StackPtrTy;
Type *IntPtrTy;
Type *Int32Ty;
Type *Int8Ty;
- Constant *UnsafeStackPtr = nullptr;
+ Value *UnsafeStackPtr = nullptr;
/// Unsafe stack alignment. Each stack frame must ensure that the stack is
/// aligned to this value. We need to re-align the unsafe stack if the
@@ -175,26 +119,31 @@ class SafeStack : public FunctionPass {
/// might expect to appear on the stack on most common targets.
enum { StackAlignment = 16 };
- /// \brief Build a constant representing a pointer to the unsafe stack
- /// pointer.
- Constant *getOrCreateUnsafeStackPtr(Module &M);
+ /// \brief Build a value representing a pointer to the unsafe stack pointer.
+ Value *getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F);
/// \brief Find all static allocas, dynamic allocas, return instructions and
/// stack restore points (exception unwind blocks and setjmp calls) in the
/// given function and append them to the respective vectors.
void findInsts(Function &F, SmallVectorImpl<AllocaInst *> &StaticAllocas,
SmallVectorImpl<AllocaInst *> &DynamicAllocas,
+ SmallVectorImpl<Argument *> &ByValArguments,
SmallVectorImpl<ReturnInst *> &Returns,
SmallVectorImpl<Instruction *> &StackRestorePoints);
+ /// \brief Calculate the allocation size of a given alloca. Returns 0 if the
+ /// size can not be statically determined.
+ uint64_t getStaticAllocaAllocationSize(const AllocaInst* AI);
+
/// \brief Allocate space for all static allocas in \p StaticAllocas,
/// replace allocas with pointers into the unsafe stack and generate code to
/// restore the stack pointer before all return instructions in \p Returns.
///
/// \returns A pointer to the top of the unsafe stack after all unsafe static
/// allocas are allocated.
- Value *moveStaticAllocasToUnsafeStack(Function &F,
+ Value *moveStaticAllocasToUnsafeStack(IRBuilder<> &IRB, Function &F,
ArrayRef<AllocaInst *> StaticAllocas,
+ ArrayRef<Argument *> ByValArguments,
ArrayRef<ReturnInst *> Returns);
/// \brief Generate code to restore the stack after all stack restore points
@@ -203,7 +152,7 @@ class SafeStack : public FunctionPass {
/// \returns A local variable in which to maintain the dynamic top of the
/// unsafe stack if needed.
AllocaInst *
- createStackRestorePoints(Function &F,
+ createStackRestorePoints(IRBuilder<> &IRB, Function &F,
ArrayRef<Instruction *> StackRestorePoints,
Value *StaticTop, bool NeedDynamicTop);
@@ -214,17 +163,26 @@ class SafeStack : public FunctionPass {
AllocaInst *DynamicTop,
ArrayRef<AllocaInst *> DynamicAllocas);
+ bool IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize);
+
+ bool IsMemIntrinsicSafe(const MemIntrinsic *MI, const Use &U,
+ const Value *AllocaPtr, uint64_t AllocaSize);
+ bool IsAccessSafe(Value *Addr, uint64_t Size, const Value *AllocaPtr,
+ uint64_t AllocaSize);
+
public:
static char ID; // Pass identification, replacement for typeid.
- SafeStack() : FunctionPass(ID), DL(nullptr) {
+ SafeStack(const TargetMachine *TM)
+ : FunctionPass(ID), TM(TM), TL(nullptr), DL(nullptr) {
initializeSafeStackPass(*PassRegistry::getPassRegistry());
}
+ SafeStack() : SafeStack(nullptr) {}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<AliasAnalysis>();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ScalarEvolutionWrapperPass>();
}
- virtual bool doInitialization(Module &M) {
+ bool doInitialization(Module &M) override {
DL = &M.getDataLayout();
StackPtrTy = Type::getInt8PtrTy(M.getContext());
@@ -235,51 +193,203 @@ public:
return false;
}
- bool runOnFunction(Function &F);
-
+ bool runOnFunction(Function &F) override;
}; // class SafeStack
-Constant *SafeStack::getOrCreateUnsafeStackPtr(Module &M) {
- // The unsafe stack pointer is stored in a global variable with a magic name.
- const char *kUnsafeStackPtrVar = "__safestack_unsafe_stack_ptr";
+uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) {
+ uint64_t Size = DL->getTypeAllocSize(AI->getAllocatedType());
+ if (AI->isArrayAllocation()) {
+ auto C = dyn_cast<ConstantInt>(AI->getArraySize());
+ if (!C)
+ return 0;
+ Size *= C->getZExtValue();
+ }
+ return Size;
+}
+
+bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize,
+ const Value *AllocaPtr, uint64_t AllocaSize) {
+ AllocaOffsetRewriter Rewriter(*SE, AllocaPtr);
+ const SCEV *Expr = Rewriter.visit(SE->getSCEV(Addr));
+
+ uint64_t BitWidth = SE->getTypeSizeInBits(Expr->getType());
+ ConstantRange AccessStartRange = SE->getUnsignedRange(Expr);
+ ConstantRange SizeRange =
+ ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AccessSize));
+ ConstantRange AccessRange = AccessStartRange.add(SizeRange);
+ ConstantRange AllocaRange =
+ ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AllocaSize));
+ bool Safe = AllocaRange.contains(AccessRange);
+
+ DEBUG(dbgs() << "[SafeStack] "
+ << (isa<AllocaInst>(AllocaPtr) ? "Alloca " : "ByValArgument ")
+ << *AllocaPtr << "\n"
+ << " Access " << *Addr << "\n"
+ << " SCEV " << *Expr
+ << " U: " << SE->getUnsignedRange(Expr)
+ << ", S: " << SE->getSignedRange(Expr) << "\n"
+ << " Range " << AccessRange << "\n"
+ << " AllocaRange " << AllocaRange << "\n"
+ << " " << (Safe ? "safe" : "unsafe") << "\n");
+
+ return Safe;
+}
+
+bool SafeStack::IsMemIntrinsicSafe(const MemIntrinsic *MI, const Use &U,
+ const Value *AllocaPtr,
+ uint64_t AllocaSize) {
+ // All MemIntrinsics have destination address in Arg0 and size in Arg2.
+ if (MI->getRawDest() != U) return true;
+ const auto *Len = dyn_cast<ConstantInt>(MI->getLength());
+ // Non-constant size => unsafe. FIXME: try SCEV getRange.
+ if (!Len) return false;
+ return IsAccessSafe(U, Len->getZExtValue(), AllocaPtr, AllocaSize);
+}
+
+/// Check whether a given allocation must be put on the safe
+/// stack or not. The function analyzes all uses of AI and checks whether it is
+/// only accessed in a memory safe way (as decided statically).
+bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
+ // Go through all uses of this alloca and check whether all accesses to the
+ // allocated object are statically known to be memory safe and, hence, the
+ // object can be placed on the safe stack.
+ SmallPtrSet<const Value *, 16> Visited;
+ SmallVector<const Value *, 8> WorkList;
+ WorkList.push_back(AllocaPtr);
+
+ // A DFS search through all uses of the alloca in bitcasts/PHI/GEPs/etc.
+ while (!WorkList.empty()) {
+ const Value *V = WorkList.pop_back_val();
+ for (const Use &UI : V->uses()) {
+ auto I = cast<const Instruction>(UI.getUser());
+ assert(V == UI.get());
+
+ switch (I->getOpcode()) {
+ case Instruction::Load: {
+ if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getType()), AllocaPtr,
+ AllocaSize))
+ return false;
+ break;
+ }
+ case Instruction::VAArg:
+ // "va-arg" from a pointer is safe.
+ break;
+ case Instruction::Store: {
+ if (V == I->getOperand(0)) {
+ // Stored the pointer - conservatively assume it may be unsafe.
+ DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr
+ << "\n store of address: " << *I << "\n");
+ return false;
+ }
+
+ if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getOperand(0)->getType()),
+ AllocaPtr, AllocaSize))
+ return false;
+ break;
+ }
+ case Instruction::Ret: {
+ // Information leak.
+ return false;
+ }
+
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ ImmutableCallSite CS(I);
+
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end)
+ continue;
+ }
+
+ if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+ if (!IsMemIntrinsicSafe(MI, UI, AllocaPtr, AllocaSize)) {
+ DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr
+ << "\n unsafe memintrinsic: " << *I
+ << "\n");
+ return false;
+ }
+ continue;
+ }
+ // LLVM 'nocapture' attribute is only set for arguments whose address
+ // is not stored, passed around, or used in any other non-trivial way.
+ // We assume that passing a pointer to an object as a 'nocapture
+ // readnone' argument is safe.
+ // FIXME: a more precise solution would require an interprocedural
+ // analysis here, which would look at all uses of an argument inside
+ // the function being called.
+ ImmutableCallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
+ for (ImmutableCallSite::arg_iterator A = B; A != E; ++A)
+ if (A->get() == V)
+ if (!(CS.doesNotCapture(A - B) && (CS.doesNotAccessMemory(A - B) ||
+ CS.doesNotAccessMemory()))) {
+ DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr
+ << "\n unsafe call: " << *I << "\n");
+ return false;
+ }
+ continue;
+ }
+
+ default:
+ if (Visited.insert(I).second)
+ WorkList.push_back(cast<const Instruction>(I));
+ }
+ }
+ }
+
+ // All uses of the alloca are safe, we can place it on the safe stack.
+ return true;
+}
+
+Value *SafeStack::getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F) {
+ // Check if there is a target-specific location for the unsafe stack pointer.
+ if (TL)
+ if (Value *V = TL->getSafeStackPointerLocation(IRB))
+ return V;
+
+ // Otherwise, assume the target links with compiler-rt, which provides a
+ // thread-local variable with a magic name.
+ Module &M = *F.getParent();
+ const char *UnsafeStackPtrVar = "__safestack_unsafe_stack_ptr";
auto UnsafeStackPtr =
- dyn_cast_or_null<GlobalVariable>(M.getNamedValue(kUnsafeStackPtrVar));
+ dyn_cast_or_null<GlobalVariable>(M.getNamedValue(UnsafeStackPtrVar));
+
+ bool UseTLS = USPStorage == ThreadLocalUSP;
if (!UnsafeStackPtr) {
+ auto TLSModel = UseTLS ?
+ GlobalValue::InitialExecTLSModel :
+ GlobalValue::NotThreadLocal;
// The global variable is not defined yet, define it ourselves.
- // We use the initial-exec TLS model because we do not support the variable
- // living anywhere other than in the main executable.
+ // We use the initial-exec TLS model because we do not support the
+ // variable living anywhere other than in the main executable.
UnsafeStackPtr = new GlobalVariable(
- /*Module=*/M, /*Type=*/StackPtrTy,
- /*isConstant=*/false, /*Linkage=*/GlobalValue::ExternalLinkage,
- /*Initializer=*/0, /*Name=*/kUnsafeStackPtrVar,
- /*InsertBefore=*/nullptr,
- /*ThreadLocalMode=*/GlobalValue::InitialExecTLSModel);
+ M, StackPtrTy, false, GlobalValue::ExternalLinkage, nullptr,
+ UnsafeStackPtrVar, nullptr, TLSModel);
} else {
// The variable exists, check its type and attributes.
- if (UnsafeStackPtr->getValueType() != StackPtrTy) {
- report_fatal_error(Twine(kUnsafeStackPtrVar) + " must have void* type");
- }
-
- if (!UnsafeStackPtr->isThreadLocal()) {
- report_fatal_error(Twine(kUnsafeStackPtrVar) + " must be thread-local");
- }
+ if (UnsafeStackPtr->getValueType() != StackPtrTy)
+ report_fatal_error(Twine(UnsafeStackPtrVar) + " must have void* type");
+ if (UseTLS != UnsafeStackPtr->isThreadLocal())
+ report_fatal_error(Twine(UnsafeStackPtrVar) + " must " +
+ (UseTLS ? "" : "not ") + "be thread-local");
}
-
return UnsafeStackPtr;
}
void SafeStack::findInsts(Function &F,
SmallVectorImpl<AllocaInst *> &StaticAllocas,
SmallVectorImpl<AllocaInst *> &DynamicAllocas,
+ SmallVectorImpl<Argument *> &ByValArguments,
SmallVectorImpl<ReturnInst *> &Returns,
SmallVectorImpl<Instruction *> &StackRestorePoints) {
- for (Instruction &I : inst_range(&F)) {
+ for (Instruction &I : instructions(&F)) {
if (auto AI = dyn_cast<AllocaInst>(&I)) {
++NumAllocas;
- if (IsSafeStackAlloca(AI))
+ uint64_t Size = getStaticAllocaAllocationSize(AI);
+ if (IsSafeStackAlloca(AI, Size))
continue;
if (AI->isStaticAlloca()) {
@@ -304,19 +414,26 @@ void SafeStack::findInsts(Function &F,
"gcroot intrinsic not compatible with safestack attribute");
}
}
+ for (Argument &Arg : F.args()) {
+ if (!Arg.hasByValAttr())
+ continue;
+ uint64_t Size =
+ DL->getTypeStoreSize(Arg.getType()->getPointerElementType());
+ if (IsSafeStackAlloca(&Arg, Size))
+ continue;
+
+ ++NumUnsafeByValArguments;
+ ByValArguments.push_back(&Arg);
+ }
}
AllocaInst *
-SafeStack::createStackRestorePoints(Function &F,
+SafeStack::createStackRestorePoints(IRBuilder<> &IRB, Function &F,
ArrayRef<Instruction *> StackRestorePoints,
Value *StaticTop, bool NeedDynamicTop) {
if (StackRestorePoints.empty())
return nullptr;
- IRBuilder<> IRB(StaticTop
- ? cast<Instruction>(StaticTop)->getNextNode()
- : (Instruction *)F.getEntryBlock().getFirstInsertionPt());
-
// We need the current value of the shadow stack pointer to restore
// after longjmp or exception catching.
@@ -342,7 +459,7 @@ SafeStack::createStackRestorePoints(Function &F,
for (Instruction *I : StackRestorePoints) {
++NumUnsafeStackRestorePoints;
- IRB.SetInsertPoint(cast<Instruction>(I->getNextNode()));
+ IRB.SetInsertPoint(I->getNextNode());
Value *CurrentTop = DynamicTop ? IRB.CreateLoad(DynamicTop) : StaticTop;
IRB.CreateStore(CurrentTop, UnsafeStackPtr);
}
@@ -350,14 +467,12 @@ SafeStack::createStackRestorePoints(Function &F,
return DynamicTop;
}
-Value *
-SafeStack::moveStaticAllocasToUnsafeStack(Function &F,
- ArrayRef<AllocaInst *> StaticAllocas,
- ArrayRef<ReturnInst *> Returns) {
- if (StaticAllocas.empty())
+Value *SafeStack::moveStaticAllocasToUnsafeStack(
+ IRBuilder<> &IRB, Function &F, ArrayRef<AllocaInst *> StaticAllocas,
+ ArrayRef<Argument *> ByValArguments, ArrayRef<ReturnInst *> Returns) {
+ if (StaticAllocas.empty() && ByValArguments.empty())
return nullptr;
- IRBuilder<> IRB(F.getEntryBlock().getFirstInsertionPt());
DIBuilder DIB(*F.getParent());
// We explicitly compute and set the unsafe stack layout for all unsafe
@@ -377,6 +492,13 @@ SafeStack::moveStaticAllocasToUnsafeStack(Function &F,
// Compute maximum alignment among static objects on the unsafe stack.
unsigned MaxAlignment = 0;
+ for (Argument *Arg : ByValArguments) {
+ Type *Ty = Arg->getType()->getPointerElementType();
+ unsigned Align = std::max((unsigned)DL->getPrefTypeAlignment(Ty),
+ Arg->getParamAlignment());
+ if (Align > MaxAlignment)
+ MaxAlignment = Align;
+ }
for (AllocaInst *AI : StaticAllocas) {
Type *Ty = AI->getAllocatedType();
unsigned Align =
@@ -388,22 +510,51 @@ SafeStack::moveStaticAllocasToUnsafeStack(Function &F,
if (MaxAlignment > StackAlignment) {
// Re-align the base pointer according to the max requested alignment.
assert(isPowerOf2_32(MaxAlignment));
- IRB.SetInsertPoint(cast<Instruction>(BasePointer->getNextNode()));
+ IRB.SetInsertPoint(BasePointer->getNextNode());
BasePointer = cast<Instruction>(IRB.CreateIntToPtr(
IRB.CreateAnd(IRB.CreatePtrToInt(BasePointer, IntPtrTy),
ConstantInt::get(IntPtrTy, ~uint64_t(MaxAlignment - 1))),
StackPtrTy));
}
- // Allocate space for every unsafe static AllocaInst on the unsafe stack.
int64_t StaticOffset = 0; // Current stack top.
+ IRB.SetInsertPoint(BasePointer->getNextNode());
+
+ for (Argument *Arg : ByValArguments) {
+ Type *Ty = Arg->getType()->getPointerElementType();
+
+ uint64_t Size = DL->getTypeStoreSize(Ty);
+ if (Size == 0)
+ Size = 1; // Don't create zero-sized stack objects.
+
+ // Ensure the object is properly aligned.
+ unsigned Align = std::max((unsigned)DL->getPrefTypeAlignment(Ty),
+ Arg->getParamAlignment());
+
+ // Add alignment.
+ // NOTE: we ensure that BasePointer itself is aligned to >= Align.
+ StaticOffset += Size;
+ StaticOffset = RoundUpToAlignment(StaticOffset, Align);
+
+ Value *Off = IRB.CreateGEP(BasePointer, // BasePointer is i8*
+ ConstantInt::get(Int32Ty, -StaticOffset));
+ Value *NewArg = IRB.CreateBitCast(Off, Arg->getType(),
+ Arg->getName() + ".unsafe-byval");
+
+ // Replace alloc with the new location.
+ replaceDbgDeclare(Arg, BasePointer, BasePointer->getNextNode(), DIB,
+ /*Deref=*/true, -StaticOffset);
+ Arg->replaceAllUsesWith(NewArg);
+ IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode());
+ IRB.CreateMemCpy(Off, Arg, Size, Arg->getParamAlignment());
+ }
+
+ // Allocate space for every unsafe static AllocaInst on the unsafe stack.
for (AllocaInst *AI : StaticAllocas) {
IRB.SetInsertPoint(AI);
- auto CArraySize = cast<ConstantInt>(AI->getArraySize());
Type *Ty = AI->getAllocatedType();
-
- uint64_t Size = DL->getTypeAllocSize(Ty) * CArraySize->getZExtValue();
+ uint64_t Size = getStaticAllocaAllocationSize(AI);
if (Size == 0)
Size = 1; // Don't create zero-sized stack objects.
@@ -423,7 +574,7 @@ SafeStack::moveStaticAllocasToUnsafeStack(Function &F,
cast<Instruction>(NewAI)->takeName(AI);
// Replace alloc with the new location.
- replaceDbgDeclareForAlloca(AI, NewAI, DIB, /*Deref=*/true);
+ replaceDbgDeclareForAlloca(AI, BasePointer, DIB, /*Deref=*/true, -StaticOffset);
AI->replaceAllUsesWith(NewAI);
AI->eraseFromParent();
}
@@ -434,7 +585,7 @@ SafeStack::moveStaticAllocasToUnsafeStack(Function &F,
StaticOffset = RoundUpToAlignment(StaticOffset, StackAlignment);
// Update shadow stack pointer in the function epilogue.
- IRB.SetInsertPoint(cast<Instruction>(BasePointer->getNextNode()));
+ IRB.SetInsertPoint(BasePointer->getNextNode());
Value *StaticTop =
IRB.CreateGEP(BasePointer, ConstantInt::get(Int32Ty, -StaticOffset),
@@ -478,7 +629,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
if (DynamicTop)
IRB.CreateStore(NewTop, DynamicTop);
- Value *NewAI = IRB.CreateIntToPtr(SP, AI->getType());
+ Value *NewAI = IRB.CreatePointerCast(NewTop, AI->getType());
if (AI->hasName() && isa<Instruction>(NewAI))
NewAI->takeName(AI);
@@ -513,8 +664,6 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
}
bool SafeStack::runOnFunction(Function &F) {
- auto AA = &getAnalysis<AliasAnalysis>();
-
DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n");
if (!F.hasFnAttribute(Attribute::SafeStack)) {
@@ -529,6 +678,9 @@ bool SafeStack::runOnFunction(Function &F) {
return false;
}
+ TL = TM ? TM->getSubtargetImpl(F)->getTargetLowering() : nullptr;
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+
{
// Make sure the regular stack protector won't run on this function
// (safestack attribute takes precedence).
@@ -541,16 +693,11 @@ bool SafeStack::runOnFunction(Function &F) {
AttributeSet::get(F.getContext(), AttributeSet::FunctionIndex, B));
}
- if (AA->onlyReadsMemory(&F)) {
- // XXX: we don't protect against information leak attacks for now.
- DEBUG(dbgs() << "[SafeStack] function only reads memory\n");
- return false;
- }
-
++NumFunctions;
SmallVector<AllocaInst *, 16> StaticAllocas;
SmallVector<AllocaInst *, 4> DynamicAllocas;
+ SmallVector<Argument *, 4> ByValArguments;
SmallVector<ReturnInst *, 4> Returns;
// Collect all points where stack gets unwound and needs to be restored
@@ -562,23 +709,26 @@ bool SafeStack::runOnFunction(Function &F) {
// Find all static and dynamic alloca instructions that must be moved to the
// unsafe stack, all return instructions and stack restore points.
- findInsts(F, StaticAllocas, DynamicAllocas, Returns, StackRestorePoints);
+ findInsts(F, StaticAllocas, DynamicAllocas, ByValArguments, Returns,
+ StackRestorePoints);
if (StaticAllocas.empty() && DynamicAllocas.empty() &&
- StackRestorePoints.empty())
+ ByValArguments.empty() && StackRestorePoints.empty())
return false; // Nothing to do in this function.
- if (!StaticAllocas.empty() || !DynamicAllocas.empty())
+ if (!StaticAllocas.empty() || !DynamicAllocas.empty() ||
+ !ByValArguments.empty())
++NumUnsafeStackFunctions; // This function has the unsafe stack.
if (!StackRestorePoints.empty())
++NumUnsafeStackRestorePointsFunctions;
- if (!UnsafeStackPtr)
- UnsafeStackPtr = getOrCreateUnsafeStackPtr(*F.getParent());
+ IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt());
+ UnsafeStackPtr = getOrCreateUnsafeStackPtr(IRB, F);
// The top of the unsafe stack after all unsafe static allocas are allocated.
- Value *StaticTop = moveStaticAllocasToUnsafeStack(F, StaticAllocas, Returns);
+ Value *StaticTop = moveStaticAllocasToUnsafeStack(IRB, F, StaticAllocas,
+ ByValArguments, Returns);
// Safe stack object that stores the current unsafe stack top. It is updated
// as unsafe dynamic (non-constant-sized) allocas are allocated and freed.
@@ -587,7 +737,7 @@ bool SafeStack::runOnFunction(Function &F) {
// FIXME: a better alternative might be to store the unsafe stack pointer
// before setjmp / invoke instructions.
AllocaInst *DynamicTop = createStackRestorePoints(
- F, StackRestorePoints, StaticTop, !DynamicAllocas.empty());
+ IRB, F, StackRestorePoints, StaticTop, !DynamicAllocas.empty());
// Handle dynamic allocas.
moveDynamicAllocasToUnsafeStack(F, UnsafeStackPtr, DynamicTop,
@@ -597,13 +747,14 @@ bool SafeStack::runOnFunction(Function &F) {
return true;
}
-} // end anonymous namespace
+} // anonymous namespace
char SafeStack::ID = 0;
-INITIALIZE_PASS_BEGIN(SafeStack, "safe-stack",
- "Safe Stack instrumentation pass", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(SafeStack, "safe-stack", "Safe Stack instrumentation pass",
- false, false)
+INITIALIZE_TM_PASS_BEGIN(SafeStack, "safe-stack",
+ "Safe Stack instrumentation pass", false, false)
+INITIALIZE_TM_PASS_END(SafeStack, "safe-stack",
+ "Safe Stack instrumentation pass", false, false)
-FunctionPass *llvm::createSafeStackPass() { return new SafeStack(); }
+FunctionPass *llvm::createSafeStackPass(const llvm::TargetMachine *TM) {
+ return new SafeStack(TM);
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index 7a5b4cb0178b..09de7a2cda2b 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -31,6 +31,7 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
@@ -59,6 +60,7 @@ static const char *const kSanCovIndirCallName = "__sanitizer_cov_indir_call16";
static const char *const kSanCovTraceEnter = "__sanitizer_cov_trace_func_enter";
static const char *const kSanCovTraceBB = "__sanitizer_cov_trace_basic_block";
static const char *const kSanCovTraceCmp = "__sanitizer_cov_trace_cmp";
+static const char *const kSanCovTraceSwitch = "__sanitizer_cov_trace_switch";
static const char *const kSanCovModuleCtorName = "sancov.module_ctor";
static const uint64_t kSanCtorAndDtorPriority = 2;
@@ -148,19 +150,25 @@ class SanitizerCoverageModule : public ModulePass {
void InjectCoverageForIndirectCalls(Function &F,
ArrayRef<Instruction *> IndirCalls);
void InjectTraceForCmp(Function &F, ArrayRef<Instruction *> CmpTraceTargets);
+ void InjectTraceForSwitch(Function &F,
+ ArrayRef<Instruction *> SwitchTraceTargets);
bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks);
void SetNoSanitizeMetadata(Instruction *I);
void InjectCoverageAtBlock(Function &F, BasicBlock &BB, bool UseCalls);
unsigned NumberOfInstrumentedBlocks() {
- return SanCovFunction->getNumUses() + SanCovWithCheckFunction->getNumUses();
+ return SanCovFunction->getNumUses() +
+ SanCovWithCheckFunction->getNumUses() + SanCovTraceBB->getNumUses() +
+ SanCovTraceEnter->getNumUses();
}
Function *SanCovFunction;
Function *SanCovWithCheckFunction;
Function *SanCovIndirCallFunction;
Function *SanCovTraceEnter, *SanCovTraceBB;
Function *SanCovTraceCmpFunction;
+ Function *SanCovTraceSwitchFunction;
InlineAsm *EmptyAsm;
- Type *IntptrTy, *Int64Ty;
+ Type *IntptrTy, *Int64Ty, *Int64PtrTy;
+ Module *CurModule;
LLVMContext *C;
const DataLayout *DL;
@@ -177,11 +185,13 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
return false;
C = &(M.getContext());
DL = &M.getDataLayout();
+ CurModule = &M;
IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits());
Type *VoidTy = Type::getVoidTy(*C);
IRBuilder<> IRB(*C);
Type *Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty());
Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
+ Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty());
Int64Ty = IRB.getInt64Ty();
SanCovFunction = checkSanitizerInterfaceFunction(
@@ -194,18 +204,19 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
SanCovTraceCmpFunction =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
kSanCovTraceCmp, VoidTy, Int64Ty, Int64Ty, Int64Ty, nullptr));
+ SanCovTraceSwitchFunction =
+ checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ kSanCovTraceSwitch, VoidTy, Int64Ty, Int64PtrTy, nullptr));
// We insert an empty inline asm after cov callbacks to avoid callback merge.
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
StringRef(""), StringRef(""),
/*hasSideEffects=*/true);
- if (Options.TraceBB) {
- SanCovTraceEnter = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(kSanCovTraceEnter, VoidTy, Int32PtrTy, nullptr));
- SanCovTraceBB = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(kSanCovTraceBB, VoidTy, Int32PtrTy, nullptr));
- }
+ SanCovTraceEnter = checkSanitizerInterfaceFunction(
+ M.getOrInsertFunction(kSanCovTraceEnter, VoidTy, Int32PtrTy, nullptr));
+ SanCovTraceBB = checkSanitizerInterfaceFunction(
+ M.getOrInsertFunction(kSanCovTraceBB, VoidTy, Int32PtrTy, nullptr));
// At this point we create a dummy array of guards because we don't
// know how many elements we will need.
@@ -280,11 +291,18 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {
if (F.empty()) return false;
if (F.getName().find(".module_ctor") != std::string::npos)
return false; // Should not instrument sanitizer init functions.
+ // Don't instrument functions using SEH for now. Splitting basic blocks like
+ // we do for coverage breaks WinEHPrepare.
+ // FIXME: Remove this when SEH no longer uses landingpad pattern matching.
+ if (F.hasPersonalityFn() &&
+ isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
+ return false;
if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge)
SplitAllCriticalEdges(F);
SmallVector<Instruction*, 8> IndirCalls;
SmallVector<BasicBlock*, 16> AllBlocks;
SmallVector<Instruction*, 8> CmpTraceTargets;
+ SmallVector<Instruction*, 8> SwitchTraceTargets;
for (auto &BB : F) {
AllBlocks.push_back(&BB);
for (auto &Inst : BB) {
@@ -293,13 +311,18 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {
if (CS && !CS.getCalledFunction())
IndirCalls.push_back(&Inst);
}
- if (Options.TraceCmp && isa<ICmpInst>(&Inst))
- CmpTraceTargets.push_back(&Inst);
+ if (Options.TraceCmp) {
+ if (isa<ICmpInst>(&Inst))
+ CmpTraceTargets.push_back(&Inst);
+ if (isa<SwitchInst>(&Inst))
+ SwitchTraceTargets.push_back(&Inst);
+ }
}
}
InjectCoverage(F, AllBlocks);
InjectCoverageForIndirectCalls(F, IndirCalls);
InjectTraceForCmp(F, CmpTraceTargets);
+ InjectTraceForSwitch(F, SwitchTraceTargets);
return true;
}
@@ -348,6 +371,45 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls(
}
}
+// For every switch statement we insert a call:
+// __sanitizer_cov_trace_switch(CondValue,
+// {NumCases, ValueSizeInBits, Case0Value, Case1Value, Case2Value, ... })
+
+void SanitizerCoverageModule::InjectTraceForSwitch(
+ Function &F, ArrayRef<Instruction *> SwitchTraceTargets) {
+ for (auto I : SwitchTraceTargets) {
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
+ IRBuilder<> IRB(I);
+ SmallVector<Constant *, 16> Initializers;
+ Value *Cond = SI->getCondition();
+ if (Cond->getType()->getScalarSizeInBits() >
+ Int64Ty->getScalarSizeInBits())
+ continue;
+ Initializers.push_back(ConstantInt::get(Int64Ty, SI->getNumCases()));
+ Initializers.push_back(
+ ConstantInt::get(Int64Ty, Cond->getType()->getScalarSizeInBits()));
+ if (Cond->getType()->getScalarSizeInBits() <
+ Int64Ty->getScalarSizeInBits())
+ Cond = IRB.CreateIntCast(Cond, Int64Ty, false);
+ for (auto It: SI->cases()) {
+ Constant *C = It.getCaseValue();
+ if (C->getType()->getScalarSizeInBits() <
+ Int64Ty->getScalarSizeInBits())
+ C = ConstantExpr::getCast(CastInst::ZExt, It.getCaseValue(), Int64Ty);
+ Initializers.push_back(C);
+ }
+ ArrayType *ArrayOfInt64Ty = ArrayType::get(Int64Ty, Initializers.size());
+ GlobalVariable *GV = new GlobalVariable(
+ *CurModule, ArrayOfInt64Ty, false, GlobalVariable::InternalLinkage,
+ ConstantArray::get(ArrayOfInt64Ty, Initializers),
+ "__sancov_gen_cov_switch_values");
+ IRB.CreateCall(SanCovTraceSwitchFunction,
+ {Cond, IRB.CreatePointerCast(GV, Int64PtrTy)});
+ }
+ }
+}
+
+
void SanitizerCoverageModule::InjectTraceForCmp(
Function &F, ArrayRef<Instruction *> CmpTraceTargets) {
for (auto I : CmpTraceTargets) {
@@ -369,8 +431,7 @@ void SanitizerCoverageModule::InjectTraceForCmp(
void SanitizerCoverageModule::SetNoSanitizeMetadata(Instruction *I) {
I->setMetadata(
- I->getParent()->getParent()->getParent()->getMDKindID("nosanitize"),
- MDNode::get(*C, None));
+ I->getModule()->getMDKindID("nosanitize"), MDNode::get(*C, None));
}
void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
@@ -382,34 +443,31 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
// locations.
if (isa<UnreachableInst>(BB.getTerminator()))
return;
- BasicBlock::iterator IP = BB.getFirstInsertionPt(), BE = BB.end();
- // Skip static allocas at the top of the entry block so they don't become
- // dynamic when we split the block. If we used our optimized stack layout,
- // then there will only be one alloca and it will come first.
- for (; IP != BE; ++IP) {
- AllocaInst *AI = dyn_cast<AllocaInst>(IP);
- if (!AI || !AI->isStaticAlloca())
- break;
- }
+ BasicBlock::iterator IP = BB.getFirstInsertionPt();
bool IsEntryBB = &BB == &F.getEntryBlock();
DebugLoc EntryLoc;
if (IsEntryBB) {
if (auto SP = getDISubprogram(&F))
EntryLoc = DebugLoc::get(SP->getScopeLine(), 0, SP);
+ // Keep static allocas and llvm.localescape calls in the entry block. Even
+ // if we aren't splitting the block, it's nice for allocas to be before
+ // calls.
+ IP = PrepareToSplitEntryBlock(BB, IP);
} else {
EntryLoc = IP->getDebugLoc();
}
- IRBuilder<> IRB(IP);
+ IRBuilder<> IRB(&*IP);
IRB.SetCurrentDebugLocation(EntryLoc);
- SmallVector<Value *, 1> Indices;
Value *GuardP = IRB.CreateAdd(
IRB.CreatePointerCast(GuardArray, IntptrTy),
ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4));
Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy);
- if (UseCalls) {
+ if (Options.TraceBB) {
+ IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP);
+ } else if (UseCalls) {
IRB.CreateCall(SanCovWithCheckFunction, GuardP);
} else {
LoadInst *Load = IRB.CreateLoad(GuardP);
@@ -418,7 +476,7 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
SetNoSanitizeMetadata(Load);
Value *Cmp = IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load);
Instruction *Ins = SplitBlockAndInsertIfThen(
- Cmp, IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
+ Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
IRB.SetInsertPoint(Ins);
IRB.SetCurrentDebugLocation(EntryLoc);
// __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC.
@@ -427,7 +485,7 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
}
if (Options.Use8bitCounters) {
- IRB.SetInsertPoint(IP);
+ IRB.SetInsertPoint(&*IP);
Value *P = IRB.CreateAdd(
IRB.CreatePointerCast(EightBitCounterArray, IntptrTy),
ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1));
@@ -438,13 +496,6 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
SetNoSanitizeMetadata(LI);
SetNoSanitizeMetadata(SI);
}
-
- if (Options.TraceBB) {
- // Experimental support for tracing.
- // Insert a callback with the same guard variable as used for coverage.
- IRB.SetInsertPoint(IP);
- IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP);
- }
}
char SanitizerCoverageModule::ID = 0;
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 1a46bbb86122..9331e1d2b3fd 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -142,37 +142,35 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
M.getOrInsertFunction("__tsan_func_exit", IRB.getVoidTy(), nullptr));
OrdTy = IRB.getInt32Ty();
for (size_t i = 0; i < kNumberOfAccessSizes; ++i) {
- const size_t ByteSize = 1 << i;
- const size_t BitSize = ByteSize * 8;
- SmallString<32> ReadName("__tsan_read" + itostr(ByteSize));
+ const unsigned ByteSize = 1U << i;
+ const unsigned BitSize = ByteSize * 8;
+ std::string ByteSizeStr = utostr(ByteSize);
+ std::string BitSizeStr = utostr(BitSize);
+ SmallString<32> ReadName("__tsan_read" + ByteSizeStr);
TsanRead[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
ReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
- SmallString<32> WriteName("__tsan_write" + itostr(ByteSize));
+ SmallString<32> WriteName("__tsan_write" + ByteSizeStr);
TsanWrite[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
WriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
- SmallString<64> UnalignedReadName("__tsan_unaligned_read" +
- itostr(ByteSize));
+ SmallString<64> UnalignedReadName("__tsan_unaligned_read" + ByteSizeStr);
TsanUnalignedRead[i] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
UnalignedReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
- SmallString<64> UnalignedWriteName("__tsan_unaligned_write" +
- itostr(ByteSize));
+ SmallString<64> UnalignedWriteName("__tsan_unaligned_write" + ByteSizeStr);
TsanUnalignedWrite[i] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
UnalignedWriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
Type *Ty = Type::getIntNTy(M.getContext(), BitSize);
Type *PtrTy = Ty->getPointerTo();
- SmallString<32> AtomicLoadName("__tsan_atomic" + itostr(BitSize) +
- "_load");
+ SmallString<32> AtomicLoadName("__tsan_atomic" + BitSizeStr + "_load");
TsanAtomicLoad[i] = checkSanitizerInterfaceFunction(
M.getOrInsertFunction(AtomicLoadName, Ty, PtrTy, OrdTy, nullptr));
- SmallString<32> AtomicStoreName("__tsan_atomic" + itostr(BitSize) +
- "_store");
+ SmallString<32> AtomicStoreName("__tsan_atomic" + BitSizeStr + "_store");
TsanAtomicStore[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy, nullptr));
@@ -201,7 +199,7 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
M.getOrInsertFunction(RMWName, Ty, PtrTy, Ty, OrdTy, nullptr));
}
- SmallString<32> AtomicCASName("__tsan_atomic" + itostr(BitSize) +
+ SmallString<32> AtomicCASName("__tsan_atomic" + BitSizeStr +
"_compare_exchange_val");
TsanAtomicCAS[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
AtomicCASName, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy, nullptr));
@@ -513,8 +511,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
int Idx = getMemoryAccessFuncIndex(Addr, DL);
if (Idx < 0)
return false;
- const size_t ByteSize = 1 << Idx;
- const size_t BitSize = ByteSize * 8;
+ const unsigned ByteSize = 1U << Idx;
+ const unsigned BitSize = ByteSize * 8;
Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
Type *PtrTy = Ty->getPointerTo();
Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
@@ -527,8 +525,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
int Idx = getMemoryAccessFuncIndex(Addr, DL);
if (Idx < 0)
return false;
- const size_t ByteSize = 1 << Idx;
- const size_t BitSize = ByteSize * 8;
+ const unsigned ByteSize = 1U << Idx;
+ const unsigned BitSize = ByteSize * 8;
Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
Type *PtrTy = Ty->getPointerTo();
Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
@@ -544,8 +542,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx];
if (!F)
return false;
- const size_t ByteSize = 1 << Idx;
- const size_t BitSize = ByteSize * 8;
+ const unsigned ByteSize = 1U << Idx;
+ const unsigned BitSize = ByteSize * 8;
Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
Type *PtrTy = Ty->getPointerTo();
Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
@@ -558,8 +556,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
int Idx = getMemoryAccessFuncIndex(Addr, DL);
if (Idx < 0)
return false;
- const size_t ByteSize = 1 << Idx;
- const size_t BitSize = ByteSize * 8;
+ const unsigned ByteSize = 1U << Idx;
+ const unsigned BitSize = ByteSize * 8;
Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
Type *PtrTy = Ty->getPointerTo();
Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.cpp
deleted file mode 100644
index afb873a355a7..000000000000
--- a/contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.cpp
+++ /dev/null
@@ -1,673 +0,0 @@
-//===- ARCInstKind.cpp - ObjC ARC Optimization ----------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// This file defines several utility functions used by various ARC
-/// optimizations which are IMHO too big to be in a header file.
-///
-/// WARNING: This file knows about certain library functions. It recognizes them
-/// by name, and hardwires knowledge of their semantics.
-///
-/// WARNING: This file knows about how certain Objective-C library functions are
-/// used. Naive LLVM IR transformations which would otherwise be
-/// behavior-preserving may break these assumptions.
-///
-//===----------------------------------------------------------------------===//
-
-#include "ObjCARC.h"
-#include "llvm/IR/Intrinsics.h"
-
-using namespace llvm;
-using namespace llvm::objcarc;
-
-raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS,
- const ARCInstKind Class) {
- switch (Class) {
- case ARCInstKind::Retain:
- return OS << "ARCInstKind::Retain";
- case ARCInstKind::RetainRV:
- return OS << "ARCInstKind::RetainRV";
- case ARCInstKind::RetainBlock:
- return OS << "ARCInstKind::RetainBlock";
- case ARCInstKind::Release:
- return OS << "ARCInstKind::Release";
- case ARCInstKind::Autorelease:
- return OS << "ARCInstKind::Autorelease";
- case ARCInstKind::AutoreleaseRV:
- return OS << "ARCInstKind::AutoreleaseRV";
- case ARCInstKind::AutoreleasepoolPush:
- return OS << "ARCInstKind::AutoreleasepoolPush";
- case ARCInstKind::AutoreleasepoolPop:
- return OS << "ARCInstKind::AutoreleasepoolPop";
- case ARCInstKind::NoopCast:
- return OS << "ARCInstKind::NoopCast";
- case ARCInstKind::FusedRetainAutorelease:
- return OS << "ARCInstKind::FusedRetainAutorelease";
- case ARCInstKind::FusedRetainAutoreleaseRV:
- return OS << "ARCInstKind::FusedRetainAutoreleaseRV";
- case ARCInstKind::LoadWeakRetained:
- return OS << "ARCInstKind::LoadWeakRetained";
- case ARCInstKind::StoreWeak:
- return OS << "ARCInstKind::StoreWeak";
- case ARCInstKind::InitWeak:
- return OS << "ARCInstKind::InitWeak";
- case ARCInstKind::LoadWeak:
- return OS << "ARCInstKind::LoadWeak";
- case ARCInstKind::MoveWeak:
- return OS << "ARCInstKind::MoveWeak";
- case ARCInstKind::CopyWeak:
- return OS << "ARCInstKind::CopyWeak";
- case ARCInstKind::DestroyWeak:
- return OS << "ARCInstKind::DestroyWeak";
- case ARCInstKind::StoreStrong:
- return OS << "ARCInstKind::StoreStrong";
- case ARCInstKind::CallOrUser:
- return OS << "ARCInstKind::CallOrUser";
- case ARCInstKind::Call:
- return OS << "ARCInstKind::Call";
- case ARCInstKind::User:
- return OS << "ARCInstKind::User";
- case ARCInstKind::IntrinsicUser:
- return OS << "ARCInstKind::IntrinsicUser";
- case ARCInstKind::None:
- return OS << "ARCInstKind::None";
- }
- llvm_unreachable("Unknown instruction class!");
-}
-
-ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) {
- Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
-
- // No (mandatory) arguments.
- if (AI == AE)
- return StringSwitch<ARCInstKind>(F->getName())
- .Case("objc_autoreleasePoolPush", ARCInstKind::AutoreleasepoolPush)
- .Case("clang.arc.use", ARCInstKind::IntrinsicUser)
- .Default(ARCInstKind::CallOrUser);
-
- // One argument.
- const Argument *A0 = AI++;
- if (AI == AE)
- // Argument is a pointer.
- if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
- Type *ETy = PTy->getElementType();
- // Argument is i8*.
- if (ETy->isIntegerTy(8))
- return StringSwitch<ARCInstKind>(F->getName())
- .Case("objc_retain", ARCInstKind::Retain)
- .Case("objc_retainAutoreleasedReturnValue", ARCInstKind::RetainRV)
- .Case("objc_retainBlock", ARCInstKind::RetainBlock)
- .Case("objc_release", ARCInstKind::Release)
- .Case("objc_autorelease", ARCInstKind::Autorelease)
- .Case("objc_autoreleaseReturnValue", ARCInstKind::AutoreleaseRV)
- .Case("objc_autoreleasePoolPop", ARCInstKind::AutoreleasepoolPop)
- .Case("objc_retainedObject", ARCInstKind::NoopCast)
- .Case("objc_unretainedObject", ARCInstKind::NoopCast)
- .Case("objc_unretainedPointer", ARCInstKind::NoopCast)
- .Case("objc_retain_autorelease",
- ARCInstKind::FusedRetainAutorelease)
- .Case("objc_retainAutorelease", ARCInstKind::FusedRetainAutorelease)
- .Case("objc_retainAutoreleaseReturnValue",
- ARCInstKind::FusedRetainAutoreleaseRV)
- .Case("objc_sync_enter", ARCInstKind::User)
- .Case("objc_sync_exit", ARCInstKind::User)
- .Default(ARCInstKind::CallOrUser);
-
- // Argument is i8**
- if (PointerType *Pte = dyn_cast<PointerType>(ETy))
- if (Pte->getElementType()->isIntegerTy(8))
- return StringSwitch<ARCInstKind>(F->getName())
- .Case("objc_loadWeakRetained", ARCInstKind::LoadWeakRetained)
- .Case("objc_loadWeak", ARCInstKind::LoadWeak)
- .Case("objc_destroyWeak", ARCInstKind::DestroyWeak)
- .Default(ARCInstKind::CallOrUser);
- }
-
- // Two arguments, first is i8**.
- const Argument *A1 = AI++;
- if (AI == AE)
- if (PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
- if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
- if (Pte->getElementType()->isIntegerTy(8))
- if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) {
- Type *ETy1 = PTy1->getElementType();
- // Second argument is i8*
- if (ETy1->isIntegerTy(8))
- return StringSwitch<ARCInstKind>(F->getName())
- .Case("objc_storeWeak", ARCInstKind::StoreWeak)
- .Case("objc_initWeak", ARCInstKind::InitWeak)
- .Case("objc_storeStrong", ARCInstKind::StoreStrong)
- .Default(ARCInstKind::CallOrUser);
- // Second argument is i8**.
- if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
- if (Pte1->getElementType()->isIntegerTy(8))
- return StringSwitch<ARCInstKind>(F->getName())
- .Case("objc_moveWeak", ARCInstKind::MoveWeak)
- .Case("objc_copyWeak", ARCInstKind::CopyWeak)
- // Ignore annotation calls. This is important to stop the
- // optimizer from treating annotations as uses which would
- // make the state of the pointers they are attempting to
- // elucidate to be incorrect.
- .Case("llvm.arc.annotation.topdown.bbstart",
- ARCInstKind::None)
- .Case("llvm.arc.annotation.topdown.bbend",
- ARCInstKind::None)
- .Case("llvm.arc.annotation.bottomup.bbstart",
- ARCInstKind::None)
- .Case("llvm.arc.annotation.bottomup.bbend",
- ARCInstKind::None)
- .Default(ARCInstKind::CallOrUser);
- }
-
- // Anything else.
- return ARCInstKind::CallOrUser;
-}
-
-// A whitelist of intrinsics that we know do not use objc pointers or decrement
-// ref counts.
-static bool isInertIntrinsic(unsigned ID) {
- // TODO: Make this into a covered switch.
- switch (ID) {
- case Intrinsic::returnaddress:
- case Intrinsic::frameaddress:
- case Intrinsic::stacksave:
- case Intrinsic::stackrestore:
- case Intrinsic::vastart:
- case Intrinsic::vacopy:
- case Intrinsic::vaend:
- case Intrinsic::objectsize:
- case Intrinsic::prefetch:
- case Intrinsic::stackprotector:
- case Intrinsic::eh_return_i32:
- case Intrinsic::eh_return_i64:
- case Intrinsic::eh_typeid_for:
- case Intrinsic::eh_dwarf_cfa:
- case Intrinsic::eh_sjlj_lsda:
- case Intrinsic::eh_sjlj_functioncontext:
- case Intrinsic::init_trampoline:
- case Intrinsic::adjust_trampoline:
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- case Intrinsic::invariant_start:
- case Intrinsic::invariant_end:
- // Don't let dbg info affect our results.
- case Intrinsic::dbg_declare:
- case Intrinsic::dbg_value:
- // Short cut: Some intrinsics obviously don't use ObjC pointers.
- return true;
- default:
- return false;
- }
-}
-
-// A whitelist of intrinsics that we know do not use objc pointers or decrement
-// ref counts.
-static bool isUseOnlyIntrinsic(unsigned ID) {
- // We are conservative and even though intrinsics are unlikely to touch
- // reference counts, we white list them for safety.
- //
- // TODO: Expand this into a covered switch. There is a lot more here.
- switch (ID) {
- case Intrinsic::memcpy:
- case Intrinsic::memmove:
- case Intrinsic::memset:
- return true;
- default:
- return false;
- }
-}
-
-/// \brief Determine what kind of construct V is.
-ARCInstKind llvm::objcarc::GetARCInstKind(const Value *V) {
- if (const Instruction *I = dyn_cast<Instruction>(V)) {
- // Any instruction other than bitcast and gep with a pointer operand have a
- // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer
- // to a subsequent use, rather than using it themselves, in this sense.
- // As a short cut, several other opcodes are known to have no pointer
- // operands of interest. And ret is never followed by a release, so it's
- // not interesting to examine.
- switch (I->getOpcode()) {
- case Instruction::Call: {
- const CallInst *CI = cast<CallInst>(I);
- // See if we have a function that we know something about.
- if (const Function *F = CI->getCalledFunction()) {
- ARCInstKind Class = GetFunctionClass(F);
- if (Class != ARCInstKind::CallOrUser)
- return Class;
- Intrinsic::ID ID = F->getIntrinsicID();
- if (isInertIntrinsic(ID))
- return ARCInstKind::None;
- if (isUseOnlyIntrinsic(ID))
- return ARCInstKind::User;
- }
-
- // Otherwise, be conservative.
- return GetCallSiteClass(CI);
- }
- case Instruction::Invoke:
- // Otherwise, be conservative.
- return GetCallSiteClass(cast<InvokeInst>(I));
- case Instruction::BitCast:
- case Instruction::GetElementPtr:
- case Instruction::Select:
- case Instruction::PHI:
- case Instruction::Ret:
- case Instruction::Br:
- case Instruction::Switch:
- case Instruction::IndirectBr:
- case Instruction::Alloca:
- case Instruction::VAArg:
- case Instruction::Add:
- case Instruction::FAdd:
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::Mul:
- case Instruction::FMul:
- case Instruction::SDiv:
- case Instruction::UDiv:
- case Instruction::FDiv:
- case Instruction::SRem:
- case Instruction::URem:
- case Instruction::FRem:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- case Instruction::SExt:
- case Instruction::ZExt:
- case Instruction::Trunc:
- case Instruction::IntToPtr:
- case Instruction::FCmp:
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- case Instruction::InsertElement:
- case Instruction::ExtractElement:
- case Instruction::ShuffleVector:
- case Instruction::ExtractValue:
- break;
- case Instruction::ICmp:
- // Comparing a pointer with null, or any other constant, isn't an
- // interesting use, because we don't care what the pointer points to, or
- // about the values of any other dynamic reference-counted pointers.
- if (IsPotentialRetainableObjPtr(I->getOperand(1)))
- return ARCInstKind::User;
- break;
- default:
- // For anything else, check all the operands.
- // Note that this includes both operands of a Store: while the first
- // operand isn't actually being dereferenced, it is being stored to
- // memory where we can no longer track who might read it and dereference
- // it, so we have to consider it potentially used.
- for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end();
- OI != OE; ++OI)
- if (IsPotentialRetainableObjPtr(*OI))
- return ARCInstKind::User;
- }
- }
-
- // Otherwise, it's totally inert for ARC purposes.
- return ARCInstKind::None;
-}
-
-/// \brief Test if the given class is a kind of user.
-bool llvm::objcarc::IsUser(ARCInstKind Class) {
- switch (Class) {
- case ARCInstKind::User:
- case ARCInstKind::CallOrUser:
- case ARCInstKind::IntrinsicUser:
- return true;
- case ARCInstKind::Retain:
- case ARCInstKind::RetainRV:
- case ARCInstKind::RetainBlock:
- case ARCInstKind::Release:
- case ARCInstKind::Autorelease:
- case ARCInstKind::AutoreleaseRV:
- case ARCInstKind::AutoreleasepoolPush:
- case ARCInstKind::AutoreleasepoolPop:
- case ARCInstKind::NoopCast:
- case ARCInstKind::FusedRetainAutorelease:
- case ARCInstKind::FusedRetainAutoreleaseRV:
- case ARCInstKind::LoadWeakRetained:
- case ARCInstKind::StoreWeak:
- case ARCInstKind::InitWeak:
- case ARCInstKind::LoadWeak:
- case ARCInstKind::MoveWeak:
- case ARCInstKind::CopyWeak:
- case ARCInstKind::DestroyWeak:
- case ARCInstKind::StoreStrong:
- case ARCInstKind::Call:
- case ARCInstKind::None:
- return false;
- }
- llvm_unreachable("covered switch isn't covered?");
-}
-
-/// \brief Test if the given class is objc_retain or equivalent.
-bool llvm::objcarc::IsRetain(ARCInstKind Class) {
- switch (Class) {
- case ARCInstKind::Retain:
- case ARCInstKind::RetainRV:
- return true;
- // I believe we treat retain block as not a retain since it can copy its
- // block.
- case ARCInstKind::RetainBlock:
- case ARCInstKind::Release:
- case ARCInstKind::Autorelease:
- case ARCInstKind::AutoreleaseRV:
- case ARCInstKind::AutoreleasepoolPush:
- case ARCInstKind::AutoreleasepoolPop:
- case ARCInstKind::NoopCast:
- case ARCInstKind::FusedRetainAutorelease:
- case ARCInstKind::FusedRetainAutoreleaseRV:
- case ARCInstKind::LoadWeakRetained:
- case ARCInstKind::StoreWeak:
- case ARCInstKind::InitWeak:
- case ARCInstKind::LoadWeak:
- case ARCInstKind::MoveWeak:
- case ARCInstKind::CopyWeak:
- case ARCInstKind::DestroyWeak:
- case ARCInstKind::StoreStrong:
- case ARCInstKind::IntrinsicUser:
- case ARCInstKind::CallOrUser:
- case ARCInstKind::Call:
- case ARCInstKind::User:
- case ARCInstKind::None:
- return false;
- }
- llvm_unreachable("covered switch isn't covered?");
-}
-
-/// \brief Test if the given class is objc_autorelease or equivalent.
-bool llvm::objcarc::IsAutorelease(ARCInstKind Class) {
- switch (Class) {
- case ARCInstKind::Autorelease:
- case ARCInstKind::AutoreleaseRV:
- return true;
- case ARCInstKind::Retain:
- case ARCInstKind::RetainRV:
- case ARCInstKind::RetainBlock:
- case ARCInstKind::Release:
- case ARCInstKind::AutoreleasepoolPush:
- case ARCInstKind::AutoreleasepoolPop:
- case ARCInstKind::NoopCast:
- case ARCInstKind::FusedRetainAutorelease:
- case ARCInstKind::FusedRetainAutoreleaseRV:
- case ARCInstKind::LoadWeakRetained:
- case ARCInstKind::StoreWeak:
- case ARCInstKind::InitWeak:
- case ARCInstKind::LoadWeak:
- case ARCInstKind::MoveWeak:
- case ARCInstKind::CopyWeak:
- case ARCInstKind::DestroyWeak:
- case ARCInstKind::StoreStrong:
- case ARCInstKind::IntrinsicUser:
- case ARCInstKind::CallOrUser:
- case ARCInstKind::Call:
- case ARCInstKind::User:
- case ARCInstKind::None:
- return false;
- }
- llvm_unreachable("covered switch isn't covered?");
-}
-
-/// \brief Test if the given class represents instructions which return their
-/// argument verbatim.
-bool llvm::objcarc::IsForwarding(ARCInstKind Class) {
- switch (Class) {
- case ARCInstKind::Retain:
- case ARCInstKind::RetainRV:
- case ARCInstKind::Autorelease:
- case ARCInstKind::AutoreleaseRV:
- case ARCInstKind::NoopCast:
- return true;
- case ARCInstKind::RetainBlock:
- case ARCInstKind::Release:
- case ARCInstKind::AutoreleasepoolPush:
- case ARCInstKind::AutoreleasepoolPop:
- case ARCInstKind::FusedRetainAutorelease:
- case ARCInstKind::FusedRetainAutoreleaseRV:
- case ARCInstKind::LoadWeakRetained:
- case ARCInstKind::StoreWeak:
- case ARCInstKind::InitWeak:
- case ARCInstKind::LoadWeak:
- case ARCInstKind::MoveWeak:
- case ARCInstKind::CopyWeak:
- case ARCInstKind::DestroyWeak:
- case ARCInstKind::StoreStrong:
- case ARCInstKind::IntrinsicUser:
- case ARCInstKind::CallOrUser:
- case ARCInstKind::Call:
- case ARCInstKind::User:
- case ARCInstKind::None:
- return false;
- }
- llvm_unreachable("covered switch isn't covered?");
-}
-
-/// \brief Test if the given class represents instructions which do nothing if
-/// passed a null pointer.
-bool llvm::objcarc::IsNoopOnNull(ARCInstKind Class) {
- switch (Class) {
- case ARCInstKind::Retain:
- case ARCInstKind::RetainRV:
- case ARCInstKind::Release:
- case ARCInstKind::Autorelease:
- case ARCInstKind::AutoreleaseRV:
- case ARCInstKind::RetainBlock:
- return true;
- case ARCInstKind::AutoreleasepoolPush:
- case ARCInstKind::AutoreleasepoolPop:
- case ARCInstKind::FusedRetainAutorelease:
- case ARCInstKind::FusedRetainAutoreleaseRV:
- case ARCInstKind::LoadWeakRetained:
- case ARCInstKind::StoreWeak:
- case ARCInstKind::InitWeak:
- case ARCInstKind::LoadWeak:
- case ARCInstKind::MoveWeak:
- case ARCInstKind::CopyWeak:
- case ARCInstKind::DestroyWeak:
- case ARCInstKind::StoreStrong:
- case ARCInstKind::IntrinsicUser:
- case ARCInstKind::CallOrUser:
- case ARCInstKind::Call:
- case ARCInstKind::User:
- case ARCInstKind::None:
- case ARCInstKind::NoopCast:
- return false;
- }
- llvm_unreachable("covered switch isn't covered?");
-}
-
-/// \brief Test if the given class represents instructions which are always safe
-/// to mark with the "tail" keyword.
-bool llvm::objcarc::IsAlwaysTail(ARCInstKind Class) {
- // ARCInstKind::RetainBlock may be given a stack argument.
- switch (Class) {
- case ARCInstKind::Retain:
- case ARCInstKind::RetainRV:
- case ARCInstKind::AutoreleaseRV:
- return true;
- case ARCInstKind::Release:
- case ARCInstKind::Autorelease:
- case ARCInstKind::RetainBlock:
- case ARCInstKind::AutoreleasepoolPush:
- case ARCInstKind::AutoreleasepoolPop:
- case ARCInstKind::FusedRetainAutorelease:
- case ARCInstKind::FusedRetainAutoreleaseRV:
- case ARCInstKind::LoadWeakRetained:
- case ARCInstKind::StoreWeak:
- case ARCInstKind::InitWeak:
- case ARCInstKind::LoadWeak:
- case ARCInstKind::MoveWeak:
- case ARCInstKind::CopyWeak:
- case ARCInstKind::DestroyWeak:
- case ARCInstKind::StoreStrong:
- case ARCInstKind::IntrinsicUser:
- case ARCInstKind::CallOrUser:
- case ARCInstKind::Call:
- case ARCInstKind::User:
- case ARCInstKind::None:
- case ARCInstKind::NoopCast:
- return false;
- }
- llvm_unreachable("covered switch isn't covered?");
-}
-
-/// \brief Test if the given class represents instructions which are never safe
-/// to mark with the "tail" keyword.
-bool llvm::objcarc::IsNeverTail(ARCInstKind Class) {
- /// It is never safe to tail call objc_autorelease since by tail calling
- /// objc_autorelease: fast autoreleasing causing our object to be potentially
- /// reclaimed from the autorelease pool which violates the semantics of
- /// __autoreleasing types in ARC.
- switch (Class) {
- case ARCInstKind::Autorelease:
- return true;
- case ARCInstKind::Retain:
- case ARCInstKind::RetainRV:
- case ARCInstKind::AutoreleaseRV:
- case ARCInstKind::Release:
- case ARCInstKind::RetainBlock:
- case ARCInstKind::AutoreleasepoolPush:
- case ARCInstKind::AutoreleasepoolPop:
- case ARCInstKind::FusedRetainAutorelease:
- case ARCInstKind::FusedRetainAutoreleaseRV:
- case ARCInstKind::LoadWeakRetained:
- case ARCInstKind::StoreWeak:
- case ARCInstKind::InitWeak:
- case ARCInstKind::LoadWeak:
- case ARCInstKind::MoveWeak:
- case ARCInstKind::CopyWeak:
- case ARCInstKind::DestroyWeak:
- case ARCInstKind::StoreStrong:
- case ARCInstKind::IntrinsicUser:
- case ARCInstKind::CallOrUser:
- case ARCInstKind::Call:
- case ARCInstKind::User:
- case ARCInstKind::None:
- case ARCInstKind::NoopCast:
- return false;
- }
- llvm_unreachable("covered switch isn't covered?");
-}
-
-/// \brief Test if the given class represents instructions which are always safe
-/// to mark with the nounwind attribute.
-bool llvm::objcarc::IsNoThrow(ARCInstKind Class) {
- // objc_retainBlock is not nounwind because it calls user copy constructors
- // which could theoretically throw.
- switch (Class) {
- case ARCInstKind::Retain:
- case ARCInstKind::RetainRV:
- case ARCInstKind::Release:
- case ARCInstKind::Autorelease:
- case ARCInstKind::AutoreleaseRV:
- case ARCInstKind::AutoreleasepoolPush:
- case ARCInstKind::AutoreleasepoolPop:
- return true;
- case ARCInstKind::RetainBlock:
- case ARCInstKind::FusedRetainAutorelease:
- case ARCInstKind::FusedRetainAutoreleaseRV:
- case ARCInstKind::LoadWeakRetained:
- case ARCInstKind::StoreWeak:
- case ARCInstKind::InitWeak:
- case ARCInstKind::LoadWeak:
- case ARCInstKind::MoveWeak:
- case ARCInstKind::CopyWeak:
- case ARCInstKind::DestroyWeak:
- case ARCInstKind::StoreStrong:
- case ARCInstKind::IntrinsicUser:
- case ARCInstKind::CallOrUser:
- case ARCInstKind::Call:
- case ARCInstKind::User:
- case ARCInstKind::None:
- case ARCInstKind::NoopCast:
- return false;
- }
- llvm_unreachable("covered switch isn't covered?");
-}
-
-/// Test whether the given instruction can autorelease any pointer or cause an
-/// autoreleasepool pop.
-///
-/// This means that it *could* interrupt the RV optimization.
-bool llvm::objcarc::CanInterruptRV(ARCInstKind Class) {
- switch (Class) {
- case ARCInstKind::AutoreleasepoolPop:
- case ARCInstKind::CallOrUser:
- case ARCInstKind::Call:
- case ARCInstKind::Autorelease:
- case ARCInstKind::AutoreleaseRV:
- case ARCInstKind::FusedRetainAutorelease:
- case ARCInstKind::FusedRetainAutoreleaseRV:
- return true;
- case ARCInstKind::Retain:
- case ARCInstKind::RetainRV:
- case ARCInstKind::Release:
- case ARCInstKind::AutoreleasepoolPush:
- case ARCInstKind::RetainBlock:
- case ARCInstKind::LoadWeakRetained:
- case ARCInstKind::StoreWeak:
- case ARCInstKind::InitWeak:
- case ARCInstKind::LoadWeak:
- case ARCInstKind::MoveWeak:
- case ARCInstKind::CopyWeak:
- case ARCInstKind::DestroyWeak:
- case ARCInstKind::StoreStrong:
- case ARCInstKind::IntrinsicUser:
- case ARCInstKind::User:
- case ARCInstKind::None:
- case ARCInstKind::NoopCast:
- return false;
- }
- llvm_unreachable("covered switch isn't covered?");
-}
-
-bool llvm::objcarc::CanDecrementRefCount(ARCInstKind Kind) {
- switch (Kind) {
- case ARCInstKind::Retain:
- case ARCInstKind::RetainRV:
- case ARCInstKind::Autorelease:
- case ARCInstKind::AutoreleaseRV:
- case ARCInstKind::NoopCast:
- case ARCInstKind::FusedRetainAutorelease:
- case ARCInstKind::FusedRetainAutoreleaseRV:
- case ARCInstKind::IntrinsicUser:
- case ARCInstKind::User:
- case ARCInstKind::None:
- return false;
-
- // The cases below are conservative.
-
- // RetainBlock can result in user defined copy constructors being called
- // implying releases may occur.
- case ARCInstKind::RetainBlock:
- case ARCInstKind::Release:
- case ARCInstKind::AutoreleasepoolPush:
- case ARCInstKind::AutoreleasepoolPop:
- case ARCInstKind::LoadWeakRetained:
- case ARCInstKind::StoreWeak:
- case ARCInstKind::InitWeak:
- case ARCInstKind::LoadWeak:
- case ARCInstKind::MoveWeak:
- case ARCInstKind::CopyWeak:
- case ARCInstKind::DestroyWeak:
- case ARCInstKind::StoreStrong:
- case ARCInstKind::CallOrUser:
- case ARCInstKind::Call:
- return true;
- }
-
- llvm_unreachable("covered switch isn't covered?");
-}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.h b/contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.h
deleted file mode 100644
index 636c65c9b627..000000000000
--- a/contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.h
+++ /dev/null
@@ -1,123 +0,0 @@
-//===--- ARCInstKind.h - ARC instruction equivalence classes -*- C++ -*----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TRANSFORMS_OBJCARC_ARCINSTKIND_H
-#define LLVM_LIB_TRANSFORMS_OBJCARC_ARCINSTKIND_H
-
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Function.h"
-
-namespace llvm {
-namespace objcarc {
-
-/// \enum ARCInstKind
-///
-/// \brief Equivalence classes of instructions in the ARC Model.
-///
-/// Since we do not have "instructions" to represent ARC concepts in LLVM IR,
-/// we instead operate on equivalence classes of instructions.
-///
-/// TODO: This should be split into two enums: a runtime entry point enum
-/// (possibly united with the ARCRuntimeEntrypoint class) and an enum that deals
-/// with effects of instructions in the ARC model (which would handle the notion
-/// of a User or CallOrUser).
-enum class ARCInstKind {
- Retain, ///< objc_retain
- RetainRV, ///< objc_retainAutoreleasedReturnValue
- RetainBlock, ///< objc_retainBlock
- Release, ///< objc_release
- Autorelease, ///< objc_autorelease
- AutoreleaseRV, ///< objc_autoreleaseReturnValue
- AutoreleasepoolPush, ///< objc_autoreleasePoolPush
- AutoreleasepoolPop, ///< objc_autoreleasePoolPop
- NoopCast, ///< objc_retainedObject, etc.
- FusedRetainAutorelease, ///< objc_retainAutorelease
- FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue
- LoadWeakRetained, ///< objc_loadWeakRetained (primitive)
- StoreWeak, ///< objc_storeWeak (primitive)
- InitWeak, ///< objc_initWeak (derived)
- LoadWeak, ///< objc_loadWeak (derived)
- MoveWeak, ///< objc_moveWeak (derived)
- CopyWeak, ///< objc_copyWeak (derived)
- DestroyWeak, ///< objc_destroyWeak (derived)
- StoreStrong, ///< objc_storeStrong (derived)
- IntrinsicUser, ///< clang.arc.use
- CallOrUser, ///< could call objc_release and/or "use" pointers
- Call, ///< could call objc_release
- User, ///< could "use" a pointer
- None ///< anything that is inert from an ARC perspective.
-};
-
-raw_ostream &operator<<(raw_ostream &OS, const ARCInstKind Class);
-
-/// \brief Test if the given class is a kind of user.
-bool IsUser(ARCInstKind Class);
-
-/// \brief Test if the given class is objc_retain or equivalent.
-bool IsRetain(ARCInstKind Class);
-
-/// \brief Test if the given class is objc_autorelease or equivalent.
-bool IsAutorelease(ARCInstKind Class);
-
-/// \brief Test if the given class represents instructions which return their
-/// argument verbatim.
-bool IsForwarding(ARCInstKind Class);
-
-/// \brief Test if the given class represents instructions which do nothing if
-/// passed a null pointer.
-bool IsNoopOnNull(ARCInstKind Class);
-
-/// \brief Test if the given class represents instructions which are always safe
-/// to mark with the "tail" keyword.
-bool IsAlwaysTail(ARCInstKind Class);
-
-/// \brief Test if the given class represents instructions which are never safe
-/// to mark with the "tail" keyword.
-bool IsNeverTail(ARCInstKind Class);
-
-/// \brief Test if the given class represents instructions which are always safe
-/// to mark with the nounwind attribute.
-bool IsNoThrow(ARCInstKind Class);
-
-/// Test whether the given instruction can autorelease any pointer or cause an
-/// autoreleasepool pop.
-bool CanInterruptRV(ARCInstKind Class);
-
-/// \brief Determine if F is one of the special known Functions. If it isn't,
-/// return ARCInstKind::CallOrUser.
-ARCInstKind GetFunctionClass(const Function *F);
-
-/// \brief Determine which objc runtime call instruction class V belongs to.
-///
-/// This is similar to GetARCInstKind except that it only detects objc
-/// runtime calls. This allows it to be faster.
-///
-static inline ARCInstKind GetBasicARCInstKind(const Value *V) {
- if (const CallInst *CI = dyn_cast<CallInst>(V)) {
- if (const Function *F = CI->getCalledFunction())
- return GetFunctionClass(F);
- // Otherwise, be conservative.
- return ARCInstKind::CallOrUser;
- }
-
- // Otherwise, be conservative.
- return isa<InvokeInst>(V) ? ARCInstKind::CallOrUser : ARCInstKind::User;
-}
-
-/// Map V to its ARCInstKind equivalence class.
-ARCInstKind GetARCInstKind(const Value *V);
-
-/// Returns false if conservatively we can prove that any instruction mapped to
-/// this kind can not decrement ref counts. Returns true otherwise.
-bool CanDecrementRefCount(ARCInstKind Kind);
-
-} // end namespace objcarc
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index 4edd02904b22..9d78e5ae3b9b 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -49,7 +49,7 @@ bool llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
assert(CS && "Only calls can alter reference counts!");
// See if AliasAnalysis can help us with the call.
- AliasAnalysis::ModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS);
+ FunctionModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS);
if (AliasAnalysis::onlyReadsMemory(MRB))
return false;
if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
@@ -226,7 +226,7 @@ llvm::objcarc::FindDependencies(DependenceKind Flavor,
SmallPtrSetImpl<Instruction *> &DependingInsts,
SmallPtrSetImpl<const BasicBlock *> &Visited,
ProvenanceAnalysis &PA) {
- BasicBlock::iterator StartPos = StartInst;
+ BasicBlock::iterator StartPos = StartInst->getIterator();
SmallVector<std::pair<BasicBlock *, BasicBlock::iterator>, 4> Worklist;
Worklist.push_back(std::make_pair(StartBB, StartPos));
@@ -252,7 +252,7 @@ llvm::objcarc::FindDependencies(DependenceKind Flavor,
break;
}
- Instruction *Inst = --LocalStartPos;
+ Instruction *Inst = &*--LocalStartPos;
if (Depends(Flavor, Inst, Arg, PA)) {
DependingInsts.insert(Inst);
break;
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
index 6ea038b8ba8c..d860723bb460 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
@@ -26,18 +26,10 @@ namespace llvm {
using namespace llvm;
using namespace llvm::objcarc;
-/// \brief A handy option to enable/disable all ARC Optimizations.
-bool llvm::objcarc::EnableARCOpts;
-static cl::opt<bool, true>
-EnableARCOptimizations("enable-objc-arc-opts",
- cl::desc("enable/disable all ARC Optimizations"),
- cl::location(EnableARCOpts),
- cl::init(true));
-
/// initializeObjCARCOptsPasses - Initialize all passes linked into the
/// ObjCARCOpts library.
void llvm::initializeObjCARCOpts(PassRegistry &Registry) {
- initializeObjCARCAliasAnalysisPass(Registry);
+ initializeObjCARCAAWrapperPassPass(Registry);
initializeObjCARCAPElimPass(Registry);
initializeObjCARCExpandPass(Registry);
initializeObjCARCContractPass(Registry);
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h
index 7595e2db1a7a..5fd45b00af17 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h
@@ -26,6 +26,8 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Optional.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
+#include "llvm/Analysis/ObjCARCInstKind.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CallSite.h"
@@ -34,7 +36,6 @@
#include "llvm/Pass.h"
#include "llvm/Transforms/ObjCARC.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "ARCInstKind.h"
namespace llvm {
class raw_ostream;
@@ -43,99 +44,6 @@ class raw_ostream;
namespace llvm {
namespace objcarc {
-/// \brief A handy option to enable/disable all ARC Optimizations.
-extern bool EnableARCOpts;
-
-/// \brief Test if the given module looks interesting to run ARC optimization
-/// on.
-static inline bool ModuleHasARC(const Module &M) {
- return
- M.getNamedValue("objc_retain") ||
- M.getNamedValue("objc_release") ||
- M.getNamedValue("objc_autorelease") ||
- M.getNamedValue("objc_retainAutoreleasedReturnValue") ||
- M.getNamedValue("objc_retainBlock") ||
- M.getNamedValue("objc_autoreleaseReturnValue") ||
- M.getNamedValue("objc_autoreleasePoolPush") ||
- M.getNamedValue("objc_loadWeakRetained") ||
- M.getNamedValue("objc_loadWeak") ||
- M.getNamedValue("objc_destroyWeak") ||
- M.getNamedValue("objc_storeWeak") ||
- M.getNamedValue("objc_initWeak") ||
- M.getNamedValue("objc_moveWeak") ||
- M.getNamedValue("objc_copyWeak") ||
- M.getNamedValue("objc_retainedObject") ||
- M.getNamedValue("objc_unretainedObject") ||
- M.getNamedValue("objc_unretainedPointer") ||
- M.getNamedValue("clang.arc.use");
-}
-
-/// \brief This is a wrapper around getUnderlyingObject which also knows how to
-/// look through objc_retain and objc_autorelease calls, which we know to return
-/// their argument verbatim.
-static inline const Value *GetUnderlyingObjCPtr(const Value *V,
- const DataLayout &DL) {
- for (;;) {
- V = GetUnderlyingObject(V, DL);
- if (!IsForwarding(GetBasicARCInstKind(V)))
- break;
- V = cast<CallInst>(V)->getArgOperand(0);
- }
-
- return V;
-}
-
-/// The RCIdentity root of a value \p V is a dominating value U for which
-/// retaining or releasing U is equivalent to retaining or releasing V. In other
-/// words, ARC operations on \p V are equivalent to ARC operations on \p U.
-///
-/// We use this in the ARC optimizer to make it easier to match up ARC
-/// operations by always mapping ARC operations to RCIdentityRoots instead of
-/// pointers themselves.
-///
-/// The two ways that we see RCIdentical values in ObjC are via:
-///
-/// 1. PointerCasts
-/// 2. Forwarding Calls that return their argument verbatim.
-///
-/// Thus this function strips off pointer casts and forwarding calls. *NOTE*
-/// This implies that two RCIdentical values must alias.
-static inline const Value *GetRCIdentityRoot(const Value *V) {
- for (;;) {
- V = V->stripPointerCasts();
- if (!IsForwarding(GetBasicARCInstKind(V)))
- break;
- V = cast<CallInst>(V)->getArgOperand(0);
- }
- return V;
-}
-
-/// Helper which calls const Value *GetRCIdentityRoot(const Value *V) and just
-/// casts away the const of the result. For documentation about what an
-/// RCIdentityRoot (and by extension GetRCIdentityRoot is) look at that
-/// function.
-static inline Value *GetRCIdentityRoot(Value *V) {
- return const_cast<Value *>(GetRCIdentityRoot((const Value *)V));
-}
-
-/// \brief Assuming the given instruction is one of the special calls such as
-/// objc_retain or objc_release, return the RCIdentity root of the argument of
-/// the call.
-static inline Value *GetArgRCIdentityRoot(Value *Inst) {
- return GetRCIdentityRoot(cast<CallInst>(Inst)->getArgOperand(0));
-}
-
-static inline bool IsNullOrUndef(const Value *V) {
- return isa<ConstantPointerNull>(V) || isa<UndefValue>(V);
-}
-
-static inline bool IsNoopInstruction(const Instruction *I) {
- return isa<BitCastInst>(I) ||
- (isa<GetElementPtrInst>(I) &&
- cast<GetElementPtrInst>(I)->hasAllZeroIndices());
-}
-
-
/// \brief Erase the given instruction.
///
/// Many ObjC calls return their argument verbatim,
@@ -162,152 +70,6 @@ static inline void EraseInstruction(Instruction *CI) {
RecursivelyDeleteTriviallyDeadInstructions(OldArg);
}
-/// \brief Test whether the given value is possible a retainable object pointer.
-static inline bool IsPotentialRetainableObjPtr(const Value *Op) {
- // Pointers to static or stack storage are not valid retainable object
- // pointers.
- if (isa<Constant>(Op) || isa<AllocaInst>(Op))
- return false;
- // Special arguments can not be a valid retainable object pointer.
- if (const Argument *Arg = dyn_cast<Argument>(Op))
- if (Arg->hasByValAttr() ||
- Arg->hasInAllocaAttr() ||
- Arg->hasNestAttr() ||
- Arg->hasStructRetAttr())
- return false;
- // Only consider values with pointer types.
- //
- // It seemes intuitive to exclude function pointer types as well, since
- // functions are never retainable object pointers, however clang occasionally
- // bitcasts retainable object pointers to function-pointer type temporarily.
- PointerType *Ty = dyn_cast<PointerType>(Op->getType());
- if (!Ty)
- return false;
- // Conservatively assume anything else is a potential retainable object
- // pointer.
- return true;
-}
-
-static inline bool IsPotentialRetainableObjPtr(const Value *Op,
- AliasAnalysis &AA) {
- // First make the rudimentary check.
- if (!IsPotentialRetainableObjPtr(Op))
- return false;
-
- // Objects in constant memory are not reference-counted.
- if (AA.pointsToConstantMemory(Op))
- return false;
-
- // Pointers in constant memory are not pointing to reference-counted objects.
- if (const LoadInst *LI = dyn_cast<LoadInst>(Op))
- if (AA.pointsToConstantMemory(LI->getPointerOperand()))
- return false;
-
- // Otherwise assume the worst.
- return true;
-}
-
-/// \brief Helper for GetARCInstKind. Determines what kind of construct CS
-/// is.
-static inline ARCInstKind GetCallSiteClass(ImmutableCallSite CS) {
- for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
- I != E; ++I)
- if (IsPotentialRetainableObjPtr(*I))
- return CS.onlyReadsMemory() ? ARCInstKind::User : ARCInstKind::CallOrUser;
-
- return CS.onlyReadsMemory() ? ARCInstKind::None : ARCInstKind::Call;
-}
-
-/// \brief Return true if this value refers to a distinct and identifiable
-/// object.
-///
-/// This is similar to AliasAnalysis's isIdentifiedObject, except that it uses
-/// special knowledge of ObjC conventions.
-static inline bool IsObjCIdentifiedObject(const Value *V) {
- // Assume that call results and arguments have their own "provenance".
- // Constants (including GlobalVariables) and Allocas are never
- // reference-counted.
- if (isa<CallInst>(V) || isa<InvokeInst>(V) ||
- isa<Argument>(V) || isa<Constant>(V) ||
- isa<AllocaInst>(V))
- return true;
-
- if (const LoadInst *LI = dyn_cast<LoadInst>(V)) {
- const Value *Pointer =
- GetRCIdentityRoot(LI->getPointerOperand());
- if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) {
- // A constant pointer can't be pointing to an object on the heap. It may
- // be reference-counted, but it won't be deleted.
- if (GV->isConstant())
- return true;
- StringRef Name = GV->getName();
- // These special variables are known to hold values which are not
- // reference-counted pointers.
- if (Name.startswith("\01l_objc_msgSend_fixup_"))
- return true;
-
- StringRef Section = GV->getSection();
- if (Section.find("__message_refs") != StringRef::npos ||
- Section.find("__objc_classrefs") != StringRef::npos ||
- Section.find("__objc_superrefs") != StringRef::npos ||
- Section.find("__objc_methname") != StringRef::npos ||
- Section.find("__cstring") != StringRef::npos)
- return true;
- }
- }
-
- return false;
-}
-
-enum class ARCMDKindID {
- ImpreciseRelease,
- CopyOnEscape,
- NoObjCARCExceptions,
-};
-
-/// A cache of MDKinds used by various ARC optimizations.
-class ARCMDKindCache {
- Module *M;
-
- /// The Metadata Kind for clang.imprecise_release metadata.
- llvm::Optional<unsigned> ImpreciseReleaseMDKind;
-
- /// The Metadata Kind for clang.arc.copy_on_escape metadata.
- llvm::Optional<unsigned> CopyOnEscapeMDKind;
-
- /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata.
- llvm::Optional<unsigned> NoObjCARCExceptionsMDKind;
-
-public:
- void init(Module *Mod) {
- M = Mod;
- ImpreciseReleaseMDKind = NoneType::None;
- CopyOnEscapeMDKind = NoneType::None;
- NoObjCARCExceptionsMDKind = NoneType::None;
- }
-
- unsigned get(ARCMDKindID ID) {
- switch (ID) {
- case ARCMDKindID::ImpreciseRelease:
- if (!ImpreciseReleaseMDKind)
- ImpreciseReleaseMDKind =
- M->getContext().getMDKindID("clang.imprecise_release");
- return *ImpreciseReleaseMDKind;
- case ARCMDKindID::CopyOnEscape:
- if (!CopyOnEscapeMDKind)
- CopyOnEscapeMDKind =
- M->getContext().getMDKindID("clang.arc.copy_on_escape");
- return *CopyOnEscapeMDKind;
- case ARCMDKindID::NoObjCARCExceptions:
- if (!NoObjCARCExceptionsMDKind)
- NoObjCARCExceptionsMDKind =
- M->getContext().getMDKindID("clang.arc.no_objc_arc_exceptions");
- return *NoObjCARCExceptionsMDKind;
- }
- llvm_unreachable("Covered switch isn't covered?!");
- }
-};
-
} // end namespace objcarc
} // end namespace llvm
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
index d318643a359a..969e77c1f888 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
@@ -72,12 +72,9 @@ bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) {
if (const Function *Callee = CS.getCalledFunction()) {
if (Callee->isDeclaration() || Callee->mayBeOverridden())
return true;
- for (Function::const_iterator I = Callee->begin(), E = Callee->end();
- I != E; ++I) {
- const BasicBlock *BB = I;
- for (BasicBlock::const_iterator J = BB->begin(), F = BB->end();
- J != F; ++J)
- if (ImmutableCallSite JCS = ImmutableCallSite(J))
+ for (const BasicBlock &BB : *Callee) {
+ for (const Instruction &I : BB)
+ if (ImmutableCallSite JCS = ImmutableCallSite(&I))
// This recursion depth limit is arbitrary. It's just great
// enough to cover known interesting testcases.
if (Depth < 3 &&
@@ -96,7 +93,7 @@ bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) {
Instruction *Push = nullptr;
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
- Instruction *Inst = I++;
+ Instruction *Inst = &*I++;
switch (GetBasicARCInstKind(Inst)) {
case ARCInstKind::AutoreleasepoolPush:
Push = Inst;
@@ -169,7 +166,7 @@ bool ObjCARCAPElim::runOnModule(Module &M) {
if (std::next(F->begin()) != F->end())
continue;
// Ok, a single-block constructor function definition. Try to optimize it.
- Changed |= OptimizeBB(F->begin());
+ Changed |= OptimizeBB(&F->front());
}
return Changed;
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
deleted file mode 100644
index 3893aab76b2a..000000000000
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
+++ /dev/null
@@ -1,168 +0,0 @@
-//===- ObjCARCAliasAnalysis.cpp - ObjC ARC Optimization -------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// This file defines a simple ARC-aware AliasAnalysis using special knowledge
-/// of Objective C to enhance other optimization passes which rely on the Alias
-/// Analysis infrastructure.
-///
-/// WARNING: This file knows about certain library functions. It recognizes them
-/// by name, and hardwires knowledge of their semantics.
-///
-/// WARNING: This file knows about how certain Objective-C library functions are
-/// used. Naive LLVM IR transformations which would otherwise be
-/// behavior-preserving may break these assumptions.
-///
-//===----------------------------------------------------------------------===//
-
-#include "ObjCARC.h"
-#include "ObjCARCAliasAnalysis.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/PassAnalysisSupport.h"
-#include "llvm/PassSupport.h"
-
-#define DEBUG_TYPE "objc-arc-aa"
-
-namespace llvm {
- class Function;
- class Value;
-}
-
-using namespace llvm;
-using namespace llvm::objcarc;
-
-// Register this pass...
-char ObjCARCAliasAnalysis::ID = 0;
-INITIALIZE_AG_PASS(ObjCARCAliasAnalysis, AliasAnalysis, "objc-arc-aa",
- "ObjC-ARC-Based Alias Analysis", false, true, false)
-
-ImmutablePass *llvm::createObjCARCAliasAnalysisPass() {
- return new ObjCARCAliasAnalysis();
-}
-
-bool ObjCARCAliasAnalysis::doInitialization(Module &M) {
- InitializeAliasAnalysis(this, &M.getDataLayout());
- return true;
-}
-
-void
-ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AliasAnalysis::getAnalysisUsage(AU);
-}
-
-AliasResult ObjCARCAliasAnalysis::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
- if (!EnableARCOpts)
- return AliasAnalysis::alias(LocA, LocB);
-
- // First, strip off no-ops, including ObjC-specific no-ops, and try making a
- // precise alias query.
- const Value *SA = GetRCIdentityRoot(LocA.Ptr);
- const Value *SB = GetRCIdentityRoot(LocB.Ptr);
- AliasResult Result =
- AliasAnalysis::alias(MemoryLocation(SA, LocA.Size, LocA.AATags),
- MemoryLocation(SB, LocB.Size, LocB.AATags));
- if (Result != MayAlias)
- return Result;
-
- // If that failed, climb to the underlying object, including climbing through
- // ObjC-specific no-ops, and try making an imprecise alias query.
- const Value *UA = GetUnderlyingObjCPtr(SA, *DL);
- const Value *UB = GetUnderlyingObjCPtr(SB, *DL);
- if (UA != SA || UB != SB) {
- Result = AliasAnalysis::alias(MemoryLocation(UA), MemoryLocation(UB));
- // We can't use MustAlias or PartialAlias results here because
- // GetUnderlyingObjCPtr may return an offsetted pointer value.
- if (Result == NoAlias)
- return NoAlias;
- }
-
- // If that failed, fail. We don't need to chain here, since that's covered
- // by the earlier precise query.
- return MayAlias;
-}
-
-bool ObjCARCAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) {
- if (!EnableARCOpts)
- return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
-
- // First, strip off no-ops, including ObjC-specific no-ops, and try making
- // a precise alias query.
- const Value *S = GetRCIdentityRoot(Loc.Ptr);
- if (AliasAnalysis::pointsToConstantMemory(
- MemoryLocation(S, Loc.Size, Loc.AATags), OrLocal))
- return true;
-
- // If that failed, climb to the underlying object, including climbing through
- // ObjC-specific no-ops, and try making an imprecise alias query.
- const Value *U = GetUnderlyingObjCPtr(S, *DL);
- if (U != S)
- return AliasAnalysis::pointsToConstantMemory(MemoryLocation(U), OrLocal);
-
- // If that failed, fail. We don't need to chain here, since that's covered
- // by the earlier precise query.
- return false;
-}
-
-AliasAnalysis::ModRefBehavior
-ObjCARCAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
- // We have nothing to do. Just chain to the next AliasAnalysis.
- return AliasAnalysis::getModRefBehavior(CS);
-}
-
-AliasAnalysis::ModRefBehavior
-ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) {
- if (!EnableARCOpts)
- return AliasAnalysis::getModRefBehavior(F);
-
- switch (GetFunctionClass(F)) {
- case ARCInstKind::NoopCast:
- return DoesNotAccessMemory;
- default:
- break;
- }
-
- return AliasAnalysis::getModRefBehavior(F);
-}
-
-AliasAnalysis::ModRefResult
-ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) {
- if (!EnableARCOpts)
- return AliasAnalysis::getModRefInfo(CS, Loc);
-
- switch (GetBasicARCInstKind(CS.getInstruction())) {
- case ARCInstKind::Retain:
- case ARCInstKind::RetainRV:
- case ARCInstKind::Autorelease:
- case ARCInstKind::AutoreleaseRV:
- case ARCInstKind::NoopCast:
- case ARCInstKind::AutoreleasepoolPush:
- case ARCInstKind::FusedRetainAutorelease:
- case ARCInstKind::FusedRetainAutoreleaseRV:
- // These functions don't access any memory visible to the compiler.
- // Note that this doesn't include objc_retainBlock, because it updates
- // pointers when it copies block data.
- return NoModRef;
- default:
- break;
- }
-
- return AliasAnalysis::getModRefInfo(CS, Loc);
-}
-
-AliasAnalysis::ModRefResult
-ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) {
- // TODO: Theoretically we could check for dependencies between objc_* calls
- // and OnlyAccessesArgumentPointees calls or other well-behaved calls.
- return AliasAnalysis::getModRefInfo(CS1, CS2);
-}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
deleted file mode 100644
index eecc82fe572c..000000000000
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
+++ /dev/null
@@ -1,74 +0,0 @@
-//===- ObjCARCAliasAnalysis.h - ObjC ARC Optimization -*- C++ -*-----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// This file declares a simple ARC-aware AliasAnalysis using special knowledge
-/// of Objective C to enhance other optimization passes which rely on the Alias
-/// Analysis infrastructure.
-///
-/// WARNING: This file knows about certain library functions. It recognizes them
-/// by name, and hardwires knowledge of their semantics.
-///
-/// WARNING: This file knows about how certain Objective-C library functions are
-/// used. Naive LLVM IR transformations which would otherwise be
-/// behavior-preserving may break these assumptions.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H
-#define LLVM_LIB_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H
-
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Pass.h"
-
-namespace llvm {
-namespace objcarc {
-
- /// \brief This is a simple alias analysis implementation that uses knowledge
- /// of ARC constructs to answer queries.
- ///
- /// TODO: This class could be generalized to know about other ObjC-specific
- /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing
- /// even though their offsets are dynamic.
- class ObjCARCAliasAnalysis : public ImmutablePass,
- public AliasAnalysis {
- public:
- static char ID; // Class identification, replacement for typeinfo
- ObjCARCAliasAnalysis() : ImmutablePass(ID) {
- initializeObjCARCAliasAnalysisPass(*PassRegistry::getPassRegistry());
- }
-
- private:
- bool doInitialization(Module &M) override;
-
- /// This method is used when a pass implements an analysis interface through
- /// multiple inheritance. If needed, it should override this to adjust the
- /// this pointer as needed for the specified pass info.
- void *getAdjustedAnalysisPointer(const void *PI) override {
- if (PI == &AliasAnalysis::ID)
- return static_cast<AliasAnalysis *>(this);
- return this;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) override;
- bool pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) override;
- ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override;
- ModRefBehavior getModRefBehavior(const Function *F) override;
- ModRefResult getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) override;
- ModRefResult getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) override;
- };
-
-} // namespace objcarc
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index baca76ba3f2a..1cdf5689f42a 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -119,9 +119,9 @@ bool ObjCARCContract::optimizeRetainCall(Function &F, Instruction *Retain) {
return false;
// Check that the call is next to the retain.
- BasicBlock::const_iterator I = Call;
- ++I;
- while (IsNoopInstruction(I)) ++I;
+ BasicBlock::const_iterator I = ++Call->getIterator();
+ while (IsNoopInstruction(&*I))
+ ++I;
if (&*I != Retain)
return false;
@@ -247,7 +247,7 @@ static StoreInst *findSafeStoreForStoreStrongContraction(LoadInst *Load,
// Ok, now we know we have not seen a store yet. See if Inst can write to
// our load location, if it can not, just ignore the instruction.
- if (!(AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod))
+ if (!(AA->getModRefInfo(Inst, Loc) & MRI_Mod))
continue;
Store = dyn_cast<StoreInst>(Inst);
@@ -282,9 +282,9 @@ findRetainForStoreStrongContraction(Value *New, StoreInst *Store,
Instruction *Release,
ProvenanceAnalysis &PA) {
// Walk up from the Store to find the retain.
- BasicBlock::iterator I = Store;
+ BasicBlock::iterator I = Store->getIterator();
BasicBlock::iterator Begin = Store->getParent()->begin();
- while (I != Begin && GetBasicARCInstKind(I) != ARCInstKind::Retain) {
+ while (I != Begin && GetBasicARCInstKind(&*I) != ARCInstKind::Retain) {
Instruction *Inst = &*I;
// It is only safe to move the retain to the store if we can prove
@@ -294,7 +294,7 @@ findRetainForStoreStrongContraction(Value *New, StoreInst *Store,
return nullptr;
--I;
}
- Instruction *Retain = I;
+ Instruction *Retain = &*I;
if (GetBasicARCInstKind(Retain) != ARCInstKind::Retain)
return nullptr;
if (GetArgRCIdentityRoot(Retain) != New)
@@ -429,7 +429,7 @@ bool ObjCARCContract::tryToPeepholeInstruction(
// insert it now.
if (!RetainRVMarker)
return false;
- BasicBlock::iterator BBI = Inst;
+ BasicBlock::iterator BBI = Inst->getIterator();
BasicBlock *InstParent = Inst->getParent();
// Step up to see if the call immediately precedes the RetainRV call.
@@ -440,11 +440,11 @@ bool ObjCARCContract::tryToPeepholeInstruction(
BasicBlock *Pred = InstParent->getSinglePredecessor();
if (!Pred)
goto decline_rv_optimization;
- BBI = Pred->getTerminator();
+ BBI = Pred->getTerminator()->getIterator();
break;
}
--BBI;
- } while (IsNoopInstruction(BBI));
+ } while (IsNoopInstruction(&*BBI));
if (&*BBI == GetArgRCIdentityRoot(Inst)) {
DEBUG(dbgs() << "Adding inline asm marker for "
@@ -511,10 +511,10 @@ bool ObjCARCContract::runOnFunction(Function &F) {
return false;
Changed = false;
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- PA.setAA(&getAnalysis<AliasAnalysis>());
+ PA.setAA(&getAnalysis<AAResultsWrapperPass>().getAAResults());
DEBUG(llvm::dbgs() << "**** ObjCARC Contract ****\n");
@@ -629,13 +629,13 @@ bool ObjCARCContract::runOnFunction(Function &F) {
char ObjCARCContract::ID = 0;
INITIALIZE_PASS_BEGIN(ObjCARCContract, "objc-arc-contract",
"ObjC ARC contraction", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(ObjCARCContract, "objc-arc-contract",
"ObjC ARC contraction", false, false)
void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 9edbb17e8d1b..f0ee6e2be487 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -28,7 +28,6 @@
#include "ARCRuntimeEntryPoints.h"
#include "BlotMapVector.h"
#include "DependencyAnalysis.h"
-#include "ObjCARCAliasAnalysis.h"
#include "ProvenanceAnalysis.h"
#include "PtrState.h"
#include "llvm/ADT/DenseMap.h"
@@ -36,6 +35,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ObjCARCAliasAnalysis.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LLVMContext.h"
@@ -482,7 +482,7 @@ namespace {
/// A flag indicating whether this optimization pass should run.
bool Run;
- /// Flags which determine whether each of the interesting runtine functions
+ /// Flags which determine whether each of the interesting runtime functions
/// is in fact used in the current function.
unsigned UsedInThisFunction;
@@ -556,7 +556,7 @@ namespace {
char ObjCARCOpt::ID = 0;
INITIALIZE_PASS_BEGIN(ObjCARCOpt,
"objc-arc", "ObjC ARC optimization", false, false)
-INITIALIZE_PASS_DEPENDENCY(ObjCARCAliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(ObjCARCAAWrapperPass)
INITIALIZE_PASS_END(ObjCARCOpt,
"objc-arc", "ObjC ARC optimization", false, false)
@@ -565,8 +565,8 @@ Pass *llvm::createObjCARCOptPass() {
}
void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<ObjCARCAliasAnalysis>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<ObjCARCAAWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
// ARC optimization doesn't currently split critical edges.
AU.setPreservesCFG();
}
@@ -581,16 +581,18 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
ImmutableCallSite CS(Arg);
if (const Instruction *Call = CS.getInstruction()) {
if (Call->getParent() == RetainRV->getParent()) {
- BasicBlock::const_iterator I = Call;
+ BasicBlock::const_iterator I(Call);
++I;
- while (IsNoopInstruction(I)) ++I;
+ while (IsNoopInstruction(&*I))
+ ++I;
if (&*I == RetainRV)
return false;
} else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
BasicBlock *RetainRVParent = RetainRV->getParent();
if (II->getNormalDest() == RetainRVParent) {
BasicBlock::const_iterator I = RetainRVParent->begin();
- while (IsNoopInstruction(I)) ++I;
+ while (IsNoopInstruction(&*I))
+ ++I;
if (&*I == RetainRV)
return false;
}
@@ -599,18 +601,21 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
// Check for being preceded by an objc_autoreleaseReturnValue on the same
// pointer. In this case, we can delete the pair.
- BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin();
+ BasicBlock::iterator I = RetainRV->getIterator(),
+ Begin = RetainRV->getParent()->begin();
if (I != Begin) {
- do --I; while (I != Begin && IsNoopInstruction(I));
- if (GetBasicARCInstKind(I) == ARCInstKind::AutoreleaseRV &&
- GetArgRCIdentityRoot(I) == Arg) {
+ do
+ --I;
+ while (I != Begin && IsNoopInstruction(&*I));
+ if (GetBasicARCInstKind(&*I) == ARCInstKind::AutoreleaseRV &&
+ GetArgRCIdentityRoot(&*I) == Arg) {
Changed = true;
++NumPeeps;
DEBUG(dbgs() << "Erasing autoreleaseRV,retainRV pair: " << *I << "\n"
<< "Erasing " << *RetainRV << "\n");
- EraseInstruction(I);
+ EraseInstruction(&*I);
EraseInstruction(RetainRV);
return true;
}
@@ -1216,7 +1221,7 @@ bool ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
// Visit all the instructions, bottom-up.
for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
- Instruction *Inst = std::prev(I);
+ Instruction *Inst = &*std::prev(I);
// Invoke instructions are visited as part of their successors (below).
if (isa<InvokeInst>(Inst))
@@ -1264,7 +1269,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
Arg = GetArgRCIdentityRoot(Inst);
TopDownPtrState &S = MyStates.getPtrTopDownState(Arg);
NestingDetected |= S.InitTopDown(Class, Inst);
- // A retain can be a potential use; procede to the generic checking
+ // A retain can be a potential use; proceed to the generic checking
// code below.
break;
}
@@ -1342,12 +1347,10 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
<< "Performing Dataflow:\n");
// Visit all the instructions, top-down.
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- Instruction *Inst = I;
+ for (Instruction &Inst : *BB) {
+ DEBUG(dbgs() << " Visiting " << Inst << "\n");
- DEBUG(dbgs() << " Visiting " << *Inst << "\n");
-
- NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates);
+ NestingDetected |= VisitInstructionTopDown(&Inst, Releases, MyStates);
}
DEBUG(llvm::dbgs() << "\nState Before Checking for CFG Hazards:\n"
@@ -1413,16 +1416,15 @@ ComputePostOrders(Function &F,
// Functions may have many exits, and there also blocks which we treat
// as exits due to ignored edges.
SmallVector<std::pair<BasicBlock *, BBState::edge_iterator>, 16> PredStack;
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
- BasicBlock *ExitBB = I;
- BBState &MyStates = BBStates[ExitBB];
+ for (BasicBlock &ExitBB : F) {
+ BBState &MyStates = BBStates[&ExitBB];
if (!MyStates.isExit())
continue;
MyStates.SetAsExit();
- PredStack.push_back(std::make_pair(ExitBB, MyStates.pred_begin()));
- Visited.insert(ExitBB);
+ PredStack.push_back(std::make_pair(&ExitBB, MyStates.pred_begin()));
+ Visited.insert(&ExitBB);
while (!PredStack.empty()) {
reverse_dfs_next_succ:
BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end();
@@ -1830,7 +1832,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
// analysis too, but that would want caching. A better approach would be to
// use the technique that EarlyCSE uses.
inst_iterator Current = std::prev(I);
- BasicBlock *CurrentBB = Current.getBasicBlockIterator();
+ BasicBlock *CurrentBB = &*Current.getBasicBlockIterator();
for (BasicBlock::iterator B = CurrentBB->begin(),
J = Current.getInstructionIterator();
J != B; --J) {
@@ -2008,10 +2010,7 @@ HasSafePathToPredecessorCall(const Value *Arg, Instruction *Retain,
// Check that the call is a regular call.
ARCInstKind Class = GetBasicARCInstKind(Call);
- if (Class != ARCInstKind::CallOrUser && Class != ARCInstKind::Call)
- return false;
-
- return true;
+ return Class == ARCInstKind::CallOrUser || Class == ARCInstKind::Call;
}
/// Find a dependent retain that precedes the given autorelease for which there
@@ -2081,9 +2080,8 @@ void ObjCARCOpt::OptimizeReturns(Function &F) {
SmallPtrSet<Instruction *, 4> DependingInstructions;
SmallPtrSet<const BasicBlock *, 4> Visited;
- for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
- BasicBlock *BB = FI;
- ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back());
+ for (BasicBlock &BB: F) {
+ ReturnInst *Ret = dyn_cast<ReturnInst>(&BB.back());
DEBUG(dbgs() << "Visiting: " << *Ret << "\n");
@@ -2095,19 +2093,16 @@ void ObjCARCOpt::OptimizeReturns(Function &F) {
// Look for an ``autorelease'' instruction that is a predecessor of Ret and
// dependent on Arg such that there are no instructions dependent on Arg
// that need a positive ref count in between the autorelease and Ret.
- CallInst *Autorelease =
- FindPredecessorAutoreleaseWithSafePath(Arg, BB, Ret,
- DependingInstructions, Visited,
- PA);
+ CallInst *Autorelease = FindPredecessorAutoreleaseWithSafePath(
+ Arg, &BB, Ret, DependingInstructions, Visited, PA);
DependingInstructions.clear();
Visited.clear();
if (!Autorelease)
continue;
- CallInst *Retain =
- FindPredecessorRetainWithSafePath(Arg, BB, Autorelease,
- DependingInstructions, Visited, PA);
+ CallInst *Retain = FindPredecessorRetainWithSafePath(
+ Arg, &BB, Autorelease, DependingInstructions, Visited, PA);
DependingInstructions.clear();
Visited.clear();
@@ -2192,7 +2187,7 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName() << " >>>"
"\n");
- PA.setAA(&getAnalysis<AliasAnalysis>());
+ PA.setAA(&getAnalysis<AAResultsWrapperPass>().getAAResults());
#ifndef NDEBUG
if (AreStatisticsEnabled()) {
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
index 0ac41d3ea326..1a12b659e5a3 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
@@ -26,10 +26,10 @@
#define LLVM_LIB_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/AliasAnalysis.h"
namespace llvm {
class Value;
- class AliasAnalysis;
class DataLayout;
class PHINode;
class SelectInst;
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
index 0be75af52014..c274e8182fb5 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
@@ -35,7 +35,7 @@ char PAEval::ID = 0;
PAEval::PAEval() : FunctionPass(ID) {}
void PAEval::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
}
static StringRef getName(Value *V) {
@@ -65,7 +65,7 @@ bool PAEval::runOnFunction(Function &F) {
}
ProvenanceAnalysis PA;
- PA.setAA(&getAnalysis<AliasAnalysis>());
+ PA.setAA(&getAnalysis<AAResultsWrapperPass>().getAAResults());
const DataLayout &DL = F.getParent()->getDataLayout();
for (Value *V1 : Values) {
@@ -89,6 +89,6 @@ FunctionPass *llvm::createPAEvalPass() { return new PAEval(); }
INITIALIZE_PASS_BEGIN(PAEval, "pa-eval",
"Evaluate ProvenanceAnalysis on all pairs", false, true)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(PAEval, "pa-eval",
"Evaluate ProvenanceAnalysis on all pairs", false, true)
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp b/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp
index ae20e7e6d347..df64fa32f3f8 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp
@@ -256,9 +256,9 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst,
// one of its successor blocks, since we can't insert code after it
// in its own block, and we don't want to split critical edges.
if (isa<InvokeInst>(Inst))
- InsertReverseInsertPt(BB->getFirstInsertionPt());
+ InsertReverseInsertPt(&*BB->getFirstInsertionPt());
else
- InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst)));
+ InsertReverseInsertPt(&*++Inst->getIterator());
SetSeq(S_Use);
} else if (Seq == S_Release && IsUser(Class)) {
DEBUG(dbgs() << " PreciseReleaseUse: Seq: " << GetSeq() << "; "
@@ -268,9 +268,9 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst,
assert(!HasReverseInsertPts());
// As above; handle invoke specially.
if (isa<InvokeInst>(Inst))
- InsertReverseInsertPt(BB->getFirstInsertionPt());
+ InsertReverseInsertPt(&*BB->getFirstInsertionPt());
else
- InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst)));
+ InsertReverseInsertPt(&*++Inst->getIterator());
}
break;
case S_Stop:
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/PtrState.h b/contrib/llvm/lib/Transforms/ObjCARC/PtrState.h
index e45e1ea96c53..9749e44822b2 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/PtrState.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/PtrState.h
@@ -17,8 +17,8 @@
#ifndef LLVM_LIB_TRANSFORMS_OBJCARC_PTRSTATE_H
#define LLVM_LIB_TRANSFORMS_OBJCARC_PTRSTATE_H
-#include "ARCInstKind.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/ObjCARCInstKind.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/raw_ostream.h"
@@ -96,7 +96,7 @@ struct RRInfo {
};
/// \brief This class summarizes several per-pointer runtime properties which
-/// are propogated through the flow graph.
+/// are propagated through the flow graph.
class PtrState {
protected:
/// True if the reference count is known to be incremented.
@@ -172,7 +172,7 @@ struct BottomUpPtrState : PtrState {
bool InitBottomUp(ARCMDKindCache &Cache, Instruction *I);
/// Return true if this set of releases can be paired with a release. Modifies
- /// state appropriately to reflect that the matching occured if it is
+ /// state appropriately to reflect that the matching occurred if it is
/// successful.
///
/// It is assumed that one has already checked that the RCIdentity of the
@@ -194,7 +194,7 @@ struct TopDownPtrState : PtrState {
/// Return true if this set of retains can be paired with the given
/// release. Modifies state appropriately to reflect that the matching
- /// occured.
+ /// occurred.
bool MatchWithRelease(ARCMDKindCache &Cache, Instruction *Release);
void HandlePotentialUse(Instruction *Inst, const Value *Ptr,
diff --git a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
index d6fc91641588..590a52da6b19 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -1,4 +1,4 @@
-//===- DCE.cpp - Code to perform dead code elimination --------------------===//
+//===- ADCE.cpp - Code to perform dead code elimination -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,52 +14,33 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/ADCE.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
using namespace llvm;
#define DEBUG_TYPE "adce"
STATISTIC(NumRemoved, "Number of instructions removed");
-namespace {
-struct ADCE : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- ADCE() : FunctionPass(ID) {
- initializeADCEPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function& F) override;
-
- void getAnalysisUsage(AnalysisUsage& AU) const override {
- AU.setPreservesCFG();
- }
-};
-}
-
-char ADCE::ID = 0;
-INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false)
-
-bool ADCE::runOnFunction(Function& F) {
- if (skipOptnoneFunction(F))
- return false;
-
+static bool aggressiveDCE(Function& F) {
SmallPtrSet<Instruction*, 128> Alive;
SmallVector<Instruction*, 128> Worklist;
// Collect the set of "root" instructions that are known live.
- for (Instruction &I : inst_range(F)) {
- if (isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
- isa<LandingPadInst>(I) || I.mayHaveSideEffects()) {
+ for (Instruction &I : instructions(F)) {
+ if (isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) || I.isEHPad() ||
+ I.mayHaveSideEffects()) {
Alive.insert(&I);
Worklist.push_back(&I);
}
@@ -79,7 +60,7 @@ bool ADCE::runOnFunction(Function& F) {
// which have no side effects and do not influence the control flow or return
// value of the function, and may therefore be deleted safely.
// NOTE: We reuse the Worklist vector here for memory efficiency.
- for (Instruction &I : inst_range(F)) {
+ for (Instruction &I : instructions(F)) {
if (!Alive.count(&I)) {
Worklist.push_back(&I);
I.dropAllReferences();
@@ -94,6 +75,34 @@ bool ADCE::runOnFunction(Function& F) {
return !Worklist.empty();
}
-FunctionPass *llvm::createAggressiveDCEPass() {
- return new ADCE();
+PreservedAnalyses ADCEPass::run(Function &F) {
+ if (aggressiveDCE(F))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
}
+
+namespace {
+struct ADCELegacyPass : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ ADCELegacyPass() : FunctionPass(ID) {
+ initializeADCELegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function& F) override {
+ if (skipOptnoneFunction(F))
+ return false;
+ return aggressiveDCE(F);
+ }
+
+ void getAnalysisUsage(AnalysisUsage& AU) const override {
+ AU.setPreservesCFG();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
+}
+
+char ADCELegacyPass::ID = 0;
+INITIALIZE_PASS(ADCELegacyPass, "adce", "Aggressive Dead Code Elimination",
+ false, false)
+
+FunctionPass *llvm::createAggressiveDCEPass() { return new ADCELegacyPass(); }
diff --git a/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 8918909f484a..4b721d38adba 100644
--- a/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -21,6 +21,8 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -54,13 +56,15 @@ struct AlignmentFromAssumptions : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
}
// For memory transfers, we need a common alignment for both the source and
@@ -84,7 +88,7 @@ INITIALIZE_PASS_BEGIN(AlignmentFromAssumptions, AA_NAME,
aip_name, false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(AlignmentFromAssumptions, AA_NAME,
aip_name, false, false)
@@ -249,8 +253,7 @@ bool AlignmentFromAssumptions::extractAlignmentInfo(CallInst *I,
// The mask must have some trailing ones (otherwise the condition is
// trivial and tells us nothing about the alignment of the left operand).
- unsigned TrailingOnes =
- MaskSCEV->getValue()->getValue().countTrailingOnes();
+ unsigned TrailingOnes = MaskSCEV->getAPInt().countTrailingOnes();
if (!TrailingOnes)
return false;
@@ -270,7 +273,7 @@ bool AlignmentFromAssumptions::extractAlignmentInfo(CallInst *I,
OffSCEV = nullptr;
if (PtrToIntInst *PToI = dyn_cast<PtrToIntInst>(AndLHS)) {
AAPtr = PToI->getPointerOperand();
- OffSCEV = SE->getConstant(Int64Ty, 0);
+ OffSCEV = SE->getZero(Int64Ty);
} else if (const SCEVAddExpr* AndLHSAddSCEV =
dyn_cast<SCEVAddExpr>(AndLHSSCEV)) {
// Try to find the ptrtoint; subtract it and the rest is the offset.
@@ -410,7 +413,7 @@ bool AlignmentFromAssumptions::processAssumption(CallInst *ACall) {
bool AlignmentFromAssumptions::runOnFunction(Function &F) {
bool Changed = false;
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
NewDestAlignments.clear();
diff --git a/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp b/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
index 09c605e76737..cb9b8b6fffc8 100644
--- a/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
@@ -15,26 +15,18 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/BasicBlock.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/DemandedBits.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-
using namespace llvm;
#define DEBUG_TYPE "bdce"
@@ -53,342 +45,42 @@ struct BDCE : public FunctionPass {
void getAnalysisUsage(AnalysisUsage& AU) const override {
AU.setPreservesCFG();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<DemandedBits>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
-
- void determineLiveOperandBits(const Instruction *UserI,
- const Instruction *I, unsigned OperandNo,
- const APInt &AOut, APInt &AB,
- APInt &KnownZero, APInt &KnownOne,
- APInt &KnownZero2, APInt &KnownOne2);
-
- AssumptionCache *AC;
- DominatorTree *DT;
};
}
char BDCE::ID = 0;
INITIALIZE_PASS_BEGIN(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DemandedBits)
INITIALIZE_PASS_END(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
false, false)
-static bool isAlwaysLive(Instruction *I) {
- return isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
- isa<LandingPadInst>(I) || I->mayHaveSideEffects();
-}
-
-void BDCE::determineLiveOperandBits(const Instruction *UserI,
- const Instruction *I, unsigned OperandNo,
- const APInt &AOut, APInt &AB,
- APInt &KnownZero, APInt &KnownOne,
- APInt &KnownZero2, APInt &KnownOne2) {
- unsigned BitWidth = AB.getBitWidth();
-
- // We're called once per operand, but for some instructions, we need to
- // compute known bits of both operands in order to determine the live bits of
- // either (when both operands are instructions themselves). We don't,
- // however, want to do this twice, so we cache the result in APInts that live
- // in the caller. For the two-relevant-operands case, both operand values are
- // provided here.
- auto ComputeKnownBits =
- [&](unsigned BitWidth, const Value *V1, const Value *V2) {
- const DataLayout &DL = I->getModule()->getDataLayout();
- KnownZero = APInt(BitWidth, 0);
- KnownOne = APInt(BitWidth, 0);
- computeKnownBits(const_cast<Value *>(V1), KnownZero, KnownOne, DL, 0,
- AC, UserI, DT);
-
- if (V2) {
- KnownZero2 = APInt(BitWidth, 0);
- KnownOne2 = APInt(BitWidth, 0);
- computeKnownBits(const_cast<Value *>(V2), KnownZero2, KnownOne2, DL,
- 0, AC, UserI, DT);
- }
- };
-
- switch (UserI->getOpcode()) {
- default: break;
- case Instruction::Call:
- case Instruction::Invoke:
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(UserI))
- switch (II->getIntrinsicID()) {
- default: break;
- case Intrinsic::bswap:
- // The alive bits of the input are the swapped alive bits of
- // the output.
- AB = AOut.byteSwap();
- break;
- case Intrinsic::ctlz:
- if (OperandNo == 0) {
- // We need some output bits, so we need all bits of the
- // input to the left of, and including, the leftmost bit
- // known to be one.
- ComputeKnownBits(BitWidth, I, nullptr);
- AB = APInt::getHighBitsSet(BitWidth,
- std::min(BitWidth, KnownOne.countLeadingZeros()+1));
- }
- break;
- case Intrinsic::cttz:
- if (OperandNo == 0) {
- // We need some output bits, so we need all bits of the
- // input to the right of, and including, the rightmost bit
- // known to be one.
- ComputeKnownBits(BitWidth, I, nullptr);
- AB = APInt::getLowBitsSet(BitWidth,
- std::min(BitWidth, KnownOne.countTrailingZeros()+1));
- }
- break;
- }
- break;
- case Instruction::Add:
- case Instruction::Sub:
- // Find the highest live output bit. We don't need any more input
- // bits than that (adds, and thus subtracts, ripple only to the
- // left).
- AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits());
- break;
- case Instruction::Shl:
- if (OperandNo == 0)
- if (ConstantInt *CI =
- dyn_cast<ConstantInt>(UserI->getOperand(1))) {
- uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
- AB = AOut.lshr(ShiftAmt);
-
- // If the shift is nuw/nsw, then the high bits are not dead
- // (because we've promised that they *must* be zero).
- const ShlOperator *S = cast<ShlOperator>(UserI);
- if (S->hasNoSignedWrap())
- AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
- else if (S->hasNoUnsignedWrap())
- AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
- }
- break;
- case Instruction::LShr:
- if (OperandNo == 0)
- if (ConstantInt *CI =
- dyn_cast<ConstantInt>(UserI->getOperand(1))) {
- uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
- AB = AOut.shl(ShiftAmt);
-
- // If the shift is exact, then the low bits are not dead
- // (they must be zero).
- if (cast<LShrOperator>(UserI)->isExact())
- AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
- }
- break;
- case Instruction::AShr:
- if (OperandNo == 0)
- if (ConstantInt *CI =
- dyn_cast<ConstantInt>(UserI->getOperand(1))) {
- uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
- AB = AOut.shl(ShiftAmt);
- // Because the high input bit is replicated into the
- // high-order bits of the result, if we need any of those
- // bits, then we must keep the highest input bit.
- if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt))
- .getBoolValue())
- AB.setBit(BitWidth-1);
-
- // If the shift is exact, then the low bits are not dead
- // (they must be zero).
- if (cast<AShrOperator>(UserI)->isExact())
- AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
- }
- break;
- case Instruction::And:
- AB = AOut;
-
- // For bits that are known zero, the corresponding bits in the
- // other operand are dead (unless they're both zero, in which
- // case they can't both be dead, so just mark the LHS bits as
- // dead).
- if (OperandNo == 0) {
- ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
- AB &= ~KnownZero2;
- } else {
- if (!isa<Instruction>(UserI->getOperand(0)))
- ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
- AB &= ~(KnownZero & ~KnownZero2);
- }
- break;
- case Instruction::Or:
- AB = AOut;
-
- // For bits that are known one, the corresponding bits in the
- // other operand are dead (unless they're both one, in which
- // case they can't both be dead, so just mark the LHS bits as
- // dead).
- if (OperandNo == 0) {
- ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
- AB &= ~KnownOne2;
- } else {
- if (!isa<Instruction>(UserI->getOperand(0)))
- ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
- AB &= ~(KnownOne & ~KnownOne2);
- }
- break;
- case Instruction::Xor:
- case Instruction::PHI:
- AB = AOut;
- break;
- case Instruction::Trunc:
- AB = AOut.zext(BitWidth);
- break;
- case Instruction::ZExt:
- AB = AOut.trunc(BitWidth);
- break;
- case Instruction::SExt:
- AB = AOut.trunc(BitWidth);
- // Because the high input bit is replicated into the
- // high-order bits of the result, if we need any of those
- // bits, then we must keep the highest input bit.
- if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(),
- AOut.getBitWidth() - BitWidth))
- .getBoolValue())
- AB.setBit(BitWidth-1);
- break;
- case Instruction::Select:
- if (OperandNo != 0)
- AB = AOut;
- break;
- }
-}
-
bool BDCE::runOnFunction(Function& F) {
if (skipOptnoneFunction(F))
return false;
+ DemandedBits &DB = getAnalysis<DemandedBits>();
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
- DenseMap<Instruction *, APInt> AliveBits;
SmallVector<Instruction*, 128> Worklist;
-
- // The set of visited instructions (non-integer-typed only).
- SmallPtrSet<Instruction*, 128> Visited;
-
- // Collect the set of "root" instructions that are known live.
- for (Instruction &I : inst_range(F)) {
- if (!isAlwaysLive(&I))
- continue;
-
- DEBUG(dbgs() << "BDCE: Root: " << I << "\n");
- // For integer-valued instructions, set up an initial empty set of alive
- // bits and add the instruction to the work list. For other instructions
- // add their operands to the work list (for integer values operands, mark
- // all bits as live).
- if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
- if (!AliveBits.count(&I)) {
- AliveBits[&I] = APInt(IT->getBitWidth(), 0);
- Worklist.push_back(&I);
- }
-
- continue;
- }
-
- // Non-integer-typed instructions...
- for (Use &OI : I.operands()) {
- if (Instruction *J = dyn_cast<Instruction>(OI)) {
- if (IntegerType *IT = dyn_cast<IntegerType>(J->getType()))
- AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth());
- Worklist.push_back(J);
- }
- }
- // To save memory, we don't add I to the Visited set here. Instead, we
- // check isAlwaysLive on every instruction when searching for dead
- // instructions later (we need to check isAlwaysLive for the
- // integer-typed instructions anyway).
- }
-
- // Propagate liveness backwards to operands.
- while (!Worklist.empty()) {
- Instruction *UserI = Worklist.pop_back_val();
-
- DEBUG(dbgs() << "BDCE: Visiting: " << *UserI);
- APInt AOut;
- if (UserI->getType()->isIntegerTy()) {
- AOut = AliveBits[UserI];
- DEBUG(dbgs() << " Alive Out: " << AOut);
- }
- DEBUG(dbgs() << "\n");
-
- if (!UserI->getType()->isIntegerTy())
- Visited.insert(UserI);
-
- APInt KnownZero, KnownOne, KnownZero2, KnownOne2;
- // Compute the set of alive bits for each operand. These are anded into the
- // existing set, if any, and if that changes the set of alive bits, the
- // operand is added to the work-list.
- for (Use &OI : UserI->operands()) {
- if (Instruction *I = dyn_cast<Instruction>(OI)) {
- if (IntegerType *IT = dyn_cast<IntegerType>(I->getType())) {
- unsigned BitWidth = IT->getBitWidth();
- APInt AB = APInt::getAllOnesValue(BitWidth);
- if (UserI->getType()->isIntegerTy() && !AOut &&
- !isAlwaysLive(UserI)) {
- AB = APInt(BitWidth, 0);
- } else {
- // If all bits of the output are dead, then all bits of the input
- // Bits of each operand that are used to compute alive bits of the
- // output are alive, all others are dead.
- determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB,
- KnownZero, KnownOne,
- KnownZero2, KnownOne2);
- }
-
- // If we've added to the set of alive bits (or the operand has not
- // been previously visited), then re-queue the operand to be visited
- // again.
- APInt ABPrev(BitWidth, 0);
- auto ABI = AliveBits.find(I);
- if (ABI != AliveBits.end())
- ABPrev = ABI->second;
-
- APInt ABNew = AB | ABPrev;
- if (ABNew != ABPrev || ABI == AliveBits.end()) {
- AliveBits[I] = std::move(ABNew);
- Worklist.push_back(I);
- }
- } else if (!Visited.count(I)) {
- Worklist.push_back(I);
- }
- }
- }
- }
-
bool Changed = false;
- // The inverse of the live set is the dead set. These are those instructions
- // which have no side effects and do not influence the control flow or return
- // value of the function, and may therefore be deleted safely.
- // NOTE: We reuse the Worklist vector here for memory efficiency.
- for (Instruction &I : inst_range(F)) {
- // For live instructions that have all dead bits, first make them dead by
- // replacing all uses with something else. Then, if they don't need to
- // remain live (because they have side effects, etc.) we can remove them.
- if (I.getType()->isIntegerTy()) {
- auto ABI = AliveBits.find(&I);
- if (ABI != AliveBits.end()) {
- if (ABI->second.getBoolValue())
- continue;
-
- DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n");
- // FIXME: In theory we could substitute undef here instead of zero.
- // This should be reconsidered once we settle on the semantics of
- // undef, poison, etc.
- Value *Zero = ConstantInt::get(I.getType(), 0);
- ++NumSimplified;
- I.replaceAllUsesWith(Zero);
- Changed = true;
- }
- } else if (Visited.count(&I)) {
- continue;
+ for (Instruction &I : instructions(F)) {
+ if (I.getType()->isIntegerTy() &&
+ !DB.getDemandedBits(&I).getBoolValue()) {
+ // For live instructions that have all dead bits, first make them dead by
+ // replacing all uses with something else. Then, if they don't need to
+ // remain live (because they have side effects, etc.) we can remove them.
+ DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n");
+ // FIXME: In theory we could substitute undef here instead of zero.
+ // This should be reconsidered once we settle on the semantics of
+ // undef, poison, etc.
+ Value *Zero = ConstantInt::get(I.getType(), 0);
+ ++NumSimplified;
+ I.replaceAllUsesWith(Zero);
+ Changed = true;
}
-
- if (isAlwaysLive(&I))
+ if (!DB.isInstructionDead(&I))
continue;
Worklist.push_back(&I);
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 4288742dd3eb..84f7f5fff5b5 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -223,10 +223,10 @@ Instruction *ConstantHoisting::findMatInsertPt(Instruction *Inst,
}
// The simple and common case. This also includes constant expressions.
- if (!isa<PHINode>(Inst) && !isa<LandingPadInst>(Inst))
+ if (!isa<PHINode>(Inst) && !Inst->isEHPad())
return Inst;
- // We can't insert directly before a phi node or landing pad. Insert before
+ // We can't insert directly before a phi node or an eh pad. Insert before
// the terminator of the incoming or dominating block.
assert(Entry != Inst->getParent() && "PHI or landing pad in entry block!");
if (Idx != ~0U && isa<PHINode>(Inst))
@@ -365,9 +365,9 @@ void ConstantHoisting::collectConstantCandidates(ConstCandMapType &ConstCandMap,
/// into an instruction itself.
void ConstantHoisting::collectConstantCandidates(Function &Fn) {
ConstCandMapType ConstCandMap;
- for (Function::iterator BB : Fn)
- for (BasicBlock::iterator Inst : *BB)
- collectConstantCandidates(ConstCandMap, Inst);
+ for (BasicBlock &BB : Fn)
+ for (Instruction &Inst : BB)
+ collectConstantCandidates(ConstCandMap, &Inst);
}
/// \brief Find the base constant within the given range and rebase all other
diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 79624b2e4c47..686bd4071104 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -13,6 +13,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/IR/CFG.h"
@@ -32,6 +33,7 @@ STATISTIC(NumPhis, "Number of phis propagated");
STATISTIC(NumSelects, "Number of selects propagated");
STATISTIC(NumMemAccess, "Number of memory access targets propagated");
STATISTIC(NumCmps, "Number of comparisons propagated");
+STATISTIC(NumReturns, "Number of return values propagated");
STATISTIC(NumDeadCases, "Number of switch cases removed");
namespace {
@@ -43,6 +45,11 @@ namespace {
bool processMemAccess(Instruction *I);
bool processCmp(CmpInst *C);
bool processSwitch(SwitchInst *SI);
+ bool processCallSite(CallSite CS);
+
+ /// Return a constant value for V usable at At and everything it
+ /// dominates. If no such Constant can be found, return nullptr.
+ Constant *getConstantAt(Value *V, Instruction *At);
public:
static char ID;
@@ -54,6 +61,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LazyValueInfo>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
};
}
@@ -178,44 +186,33 @@ bool CorrelatedValuePropagation::processMemAccess(Instruction *I) {
return true;
}
-/// processCmp - If the value of this comparison could be determined locally,
-/// constant propagation would already have figured it out. Instead, walk
-/// the predecessors and statically evaluate the comparison based on information
-/// available on that edge. If a given static evaluation is true on ALL
-/// incoming edges, then it's true universally and we can simplify the compare.
+/// processCmp - See if LazyValueInfo's ability to exploit edge conditions,
+/// or range information is sufficient to prove this comparison. Even for
+/// local conditions, this can sometimes prove conditions instcombine can't by
+/// exploiting range information.
bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
Value *Op0 = C->getOperand(0);
- if (isa<Instruction>(Op0) &&
- cast<Instruction>(Op0)->getParent() == C->getParent())
- return false;
-
Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
if (!Op1) return false;
- pred_iterator PI = pred_begin(C->getParent()), PE = pred_end(C->getParent());
- if (PI == PE) return false;
+ // As a policy choice, we choose not to waste compile time on anything where
+ // the comparison is testing local values. While LVI can sometimes reason
+ // about such cases, it's not its primary purpose. We do make sure to do
+ // the block local query for uses from terminator instructions, but that's
+ // handled in the code for each terminator.
+ auto *I = dyn_cast<Instruction>(Op0);
+ if (I && I->getParent() == C->getParent())
+ return false;
- LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(),
- C->getOperand(0), Op1, *PI,
- C->getParent(), C);
+ LazyValueInfo::Tristate Result =
+ LVI->getPredicateAt(C->getPredicate(), Op0, Op1, C);
if (Result == LazyValueInfo::Unknown) return false;
- ++PI;
- while (PI != PE) {
- LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(),
- C->getOperand(0), Op1, *PI,
- C->getParent(), C);
- if (Res != Result) return false;
- ++PI;
- }
-
++NumCmps;
-
if (Result == LazyValueInfo::True)
C->replaceAllUsesWith(ConstantInt::getTrue(C->getContext()));
else
C->replaceAllUsesWith(ConstantInt::getFalse(C->getContext()));
-
C->eraseFromParent();
return true;
@@ -307,6 +304,59 @@ bool CorrelatedValuePropagation::processSwitch(SwitchInst *SI) {
return Changed;
}
+/// processCallSite - Infer nonnull attributes for the arguments at the
+/// specified callsite.
+bool CorrelatedValuePropagation::processCallSite(CallSite CS) {
+ SmallVector<unsigned, 4> Indices;
+ unsigned ArgNo = 0;
+
+ for (Value *V : CS.args()) {
+ PointerType *Type = dyn_cast<PointerType>(V->getType());
+
+ if (Type && !CS.paramHasAttr(ArgNo + 1, Attribute::NonNull) &&
+ LVI->getPredicateAt(ICmpInst::ICMP_EQ, V,
+ ConstantPointerNull::get(Type),
+ CS.getInstruction()) == LazyValueInfo::False)
+ Indices.push_back(ArgNo + 1);
+ ArgNo++;
+ }
+
+ assert(ArgNo == CS.arg_size() && "sanity check");
+
+ if (Indices.empty())
+ return false;
+
+ AttributeSet AS = CS.getAttributes();
+ LLVMContext &Ctx = CS.getInstruction()->getContext();
+ AS = AS.addAttribute(Ctx, Indices, Attribute::get(Ctx, Attribute::NonNull));
+ CS.setAttributes(AS);
+
+ return true;
+}
+
+Constant *CorrelatedValuePropagation::getConstantAt(Value *V, Instruction *At) {
+ if (Constant *C = LVI->getConstant(V, At->getParent(), At))
+ return C;
+
+ // TODO: The following really should be sunk inside LVI's core algorithm, or
+ // at least the outer shims around such.
+ auto *C = dyn_cast<CmpInst>(V);
+ if (!C) return nullptr;
+
+ Value *Op0 = C->getOperand(0);
+ Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
+ if (!Op1) return nullptr;
+
+ LazyValueInfo::Tristate Result =
+ LVI->getPredicateAt(C->getPredicate(), Op0, Op1, At);
+ if (Result == LazyValueInfo::Unknown)
+ return nullptr;
+
+ return (Result == LazyValueInfo::True) ?
+ ConstantInt::getTrue(C->getContext()) :
+ ConstantInt::getFalse(C->getContext());
+}
+
bool CorrelatedValuePropagation::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
@@ -318,7 +368,7 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
bool BBChanged = false;
for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) {
- Instruction *II = BI++;
+ Instruction *II = &*BI++;
switch (II->getOpcode()) {
case Instruction::Select:
BBChanged |= processSelect(cast<SelectInst>(II));
@@ -334,6 +384,10 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
case Instruction::Store:
BBChanged |= processMemAccess(II);
break;
+ case Instruction::Call:
+ case Instruction::Invoke:
+ BBChanged |= processCallSite(CallSite(II));
+ break;
}
}
@@ -342,7 +396,21 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
case Instruction::Switch:
BBChanged |= processSwitch(cast<SwitchInst>(Term));
break;
+ case Instruction::Ret: {
+ auto *RI = cast<ReturnInst>(Term);
+ // Try to determine the return value if we can. This is mainly here to
+ // simplify the writing of unit tests, but also helps to enable IPO by
+ // constant folding the return values of callees.
+ auto *RetVal = RI->getReturnValue();
+ if (!RetVal) break; // handle "ret void"
+ if (isa<Constant>(RetVal)) break; // nothing to do
+ if (auto *C = getConstantAt(RetVal, RI)) {
+ ++NumReturns;
+ RI->replaceUsesOfWith(RetVal, C);
+ BBChanged = true;
+ }
}
+ };
FnChanged |= BBChanged;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
index 3b262a23091f..b67c3c7742fd 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
@@ -17,6 +17,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
@@ -46,7 +47,7 @@ namespace {
TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
bool Changed = false;
for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
- Instruction *Inst = DI++;
+ Instruction *Inst = &*DI++;
if (isInstructionTriviallyDead(Inst, TLI)) {
Inst->eraseFromParent();
Changed = true;
@@ -92,6 +93,34 @@ namespace {
char DCE::ID = 0;
INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false)
+static bool DCEInstruction(Instruction *I,
+ SmallSetVector<Instruction *, 16> &WorkList,
+ const TargetLibraryInfo *TLI) {
+ if (isInstructionTriviallyDead(I, TLI)) {
+ // Null out all of the instruction's operands to see if any operand becomes
+ // dead as we go.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Value *OpV = I->getOperand(i);
+ I->setOperand(i, nullptr);
+
+ if (!OpV->use_empty() || I == OpV)
+ continue;
+
+ // If the operand is an instruction that became dead as we nulled out the
+ // operand, and if it is 'trivially' dead, delete it in a future loop
+ // iteration.
+ if (Instruction *OpI = dyn_cast<Instruction>(OpV))
+ if (isInstructionTriviallyDead(OpI, TLI))
+ WorkList.insert(OpI);
+ }
+
+ I->eraseFromParent();
+ ++DCEEliminated;
+ return true;
+ }
+ return false;
+}
+
bool DCE::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
@@ -99,39 +128,24 @@ bool DCE::runOnFunction(Function &F) {
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
- // Start out with all of the instructions in the worklist...
- std::vector<Instruction*> WorkList;
- for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i)
- WorkList.push_back(&*i);
-
- // Loop over the worklist finding instructions that are dead. If they are
- // dead make them drop all of their uses, making other instructions
- // potentially dead, and work until the worklist is empty.
- //
bool MadeChange = false;
+ SmallSetVector<Instruction *, 16> WorkList;
+ // Iterate over the original function, only adding insts to the worklist
+ // if they actually need to be revisited. This avoids having to pre-init
+ // the worklist with the entire function's worth of instructions.
+ for (inst_iterator FI = inst_begin(F), FE = inst_end(F); FI != FE;) {
+ Instruction *I = &*FI;
+ ++FI;
+
+ // We're visiting this instruction now, so make sure it's not in the
+ // worklist from an earlier visit.
+ if (!WorkList.count(I))
+ MadeChange |= DCEInstruction(I, WorkList, TLI);
+ }
+
while (!WorkList.empty()) {
- Instruction *I = WorkList.back();
- WorkList.pop_back();
-
- if (isInstructionTriviallyDead(I, TLI)) { // If the instruction is dead.
- // Loop over all of the values that the instruction uses, if there are
- // instructions being used, add them to the worklist, because they might
- // go dead after this one is removed.
- //
- for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
- if (Instruction *Used = dyn_cast<Instruction>(*OI))
- WorkList.push_back(Used);
-
- // Remove the instruction.
- I->eraseFromParent();
-
- // Remove the instruction from the worklist if it still exists in it.
- WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), I),
- WorkList.end());
-
- MadeChange = true;
- ++DCEEliminated;
- }
+ Instruction *I = WorkList.pop_back_val();
+ MadeChange |= DCEInstruction(I, WorkList, TLI);
}
return MadeChange;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index c50558434da2..36ad0a5f7b91 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -40,6 +41,7 @@ using namespace llvm;
#define DEBUG_TYPE "dse"
+STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
STATISTIC(NumFastStores, "Number of stores deleted");
STATISTIC(NumFastOther , "Number of other instrs removed");
@@ -59,23 +61,24 @@ namespace {
if (skipOptnoneFunction(F))
return false;
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
MD = &getAnalysis<MemoryDependenceAnalysis>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- TLI = AA->getTargetLibraryInfo();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
bool Changed = false;
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ for (BasicBlock &I : F)
// Only check non-dead blocks. Dead blocks may have strange pointer
// cycles that will confuse alias analysis.
- if (DT->isReachableFromEntry(I))
- Changed |= runOnBasicBlock(*I);
+ if (DT->isReachableFromEntry(&I))
+ Changed |= runOnBasicBlock(I);
AA = nullptr; MD = nullptr; DT = nullptr;
return Changed;
}
bool runOnBasicBlock(BasicBlock &BB);
+ bool MemoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI);
bool HandleFree(CallInst *F);
bool handleEndBlock(BasicBlock &BB);
void RemoveAccessedObjects(const MemoryLocation &LoadedLoc,
@@ -85,10 +88,11 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<MemoryDependenceAnalysis>();
- AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<MemoryDependenceAnalysis>();
}
};
@@ -97,8 +101,10 @@ namespace {
char DSE::ID = 0;
INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false)
FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
@@ -115,7 +121,7 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
///
static void DeleteDeadInstruction(Instruction *I,
MemoryDependenceAnalysis &MD,
- const TargetLibraryInfo *TLI,
+ const TargetLibraryInfo &TLI,
SmallSetVector<Value*, 16> *ValueSet = nullptr) {
SmallVector<Instruction*, 32> NowDeadInsts;
@@ -140,7 +146,7 @@ static void DeleteDeadInstruction(Instruction *I,
if (!Op->use_empty()) continue;
if (Instruction *OpI = dyn_cast<Instruction>(Op))
- if (isInstructionTriviallyDead(OpI, TLI))
+ if (isInstructionTriviallyDead(OpI, &TLI))
NowDeadInsts.push_back(OpI);
}
@@ -153,7 +159,7 @@ static void DeleteDeadInstruction(Instruction *I,
/// hasMemoryWrite - Does this instruction write some memory? This only returns
/// true for things that we can analyze with other helpers below.
-static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
+static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo &TLI) {
if (isa<StoreInst>(I))
return true;
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
@@ -170,20 +176,20 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
}
if (auto CS = CallSite(I)) {
if (Function *F = CS.getCalledFunction()) {
- if (TLI && TLI->has(LibFunc::strcpy) &&
- F->getName() == TLI->getName(LibFunc::strcpy)) {
+ if (TLI.has(LibFunc::strcpy) &&
+ F->getName() == TLI.getName(LibFunc::strcpy)) {
return true;
}
- if (TLI && TLI->has(LibFunc::strncpy) &&
- F->getName() == TLI->getName(LibFunc::strncpy)) {
+ if (TLI.has(LibFunc::strncpy) &&
+ F->getName() == TLI.getName(LibFunc::strncpy)) {
return true;
}
- if (TLI && TLI->has(LibFunc::strcat) &&
- F->getName() == TLI->getName(LibFunc::strcat)) {
+ if (TLI.has(LibFunc::strcat) &&
+ F->getName() == TLI.getName(LibFunc::strcat)) {
return true;
}
- if (TLI && TLI->has(LibFunc::strncat) &&
- F->getName() == TLI->getName(LibFunc::strncat)) {
+ if (TLI.has(LibFunc::strncat) &&
+ F->getName() == TLI.getName(LibFunc::strncat)) {
return true;
}
}
@@ -224,9 +230,9 @@ static MemoryLocation getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
/// getLocForRead - Return the location read by the specified "hasMemoryWrite"
/// instruction if any.
-static MemoryLocation getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
- assert(hasMemoryWrite(Inst, AA.getTargetLibraryInfo()) &&
- "Unknown instruction case");
+static MemoryLocation getLocForRead(Instruction *Inst,
+ const TargetLibraryInfo &TLI) {
+ assert(hasMemoryWrite(Inst, TLI) && "Unknown instruction case");
// The only instructions that both read and write are the mem transfer
// instructions (memcpy/memmove).
@@ -313,9 +319,9 @@ static Value *getStoredPointerOperand(Instruction *I) {
}
static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
+ const TargetLibraryInfo &TLI) {
uint64_t Size;
- if (getObjectSize(V, Size, DL, TLI))
+ if (getObjectSize(V, Size, DL, &TLI))
return Size;
return MemoryLocation::UnknownSize;
}
@@ -336,7 +342,7 @@ namespace {
static OverwriteResult isOverwrite(const MemoryLocation &Later,
const MemoryLocation &Earlier,
const DataLayout &DL,
- const TargetLibraryInfo *TLI,
+ const TargetLibraryInfo &TLI,
int64_t &EarlierOff, int64_t &LaterOff) {
const Value *P1 = Earlier.Ptr->stripPointerCasts();
const Value *P2 = Later.Ptr->stripPointerCasts();
@@ -442,10 +448,12 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
/// because the DSE inducing instruction may be a self-read.
static bool isPossibleSelfRead(Instruction *Inst,
const MemoryLocation &InstStoreLoc,
- Instruction *DepWrite, AliasAnalysis &AA) {
+ Instruction *DepWrite,
+ const TargetLibraryInfo &TLI,
+ AliasAnalysis &AA) {
// Self reads can only happen for instructions that read memory. Get the
// location read.
- MemoryLocation InstReadLoc = getLocForRead(Inst, AA);
+ MemoryLocation InstReadLoc = getLocForRead(Inst, TLI);
if (!InstReadLoc.Ptr) return false; // Not a reading instruction.
// If the read and written loc obviously don't alias, it isn't a read.
@@ -459,7 +467,7 @@ static bool isPossibleSelfRead(Instruction *Inst,
// Here we don't know if A/B may alias, but we do know that B/B are must
// aliases, so removing the first memcpy is safe (assuming it writes <= #
// bytes as the second one.
- MemoryLocation DepReadLoc = getLocForRead(DepWrite, AA);
+ MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI);
if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr))
return false;
@@ -475,11 +483,12 @@ static bool isPossibleSelfRead(Instruction *Inst,
//===----------------------------------------------------------------------===//
bool DSE::runOnBasicBlock(BasicBlock &BB) {
+ const DataLayout &DL = BB.getModule()->getDataLayout();
bool MadeChange = false;
// Do a top-down walk on the BB.
for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
- Instruction *Inst = BBI++;
+ Instruction *Inst = &*BBI++;
// Handle 'free' calls specially.
if (CallInst *F = isFreeCall(Inst, TLI)) {
@@ -488,42 +497,68 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
}
// If we find something that writes memory, get its memory dependence.
- if (!hasMemoryWrite(Inst, TLI))
- continue;
-
- MemDepResult InstDep = MD->getDependency(Inst);
-
- // Ignore any store where we can't find a local dependence.
- // FIXME: cross-block DSE would be fun. :)
- if (!InstDep.isDef() && !InstDep.isClobber())
+ if (!hasMemoryWrite(Inst, *TLI))
continue;
// If we're storing the same value back to a pointer that we just
// loaded from, then the store can be removed.
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
+
+ auto RemoveDeadInstAndUpdateBBI = [&](Instruction *DeadInst) {
+ // DeleteDeadInstruction can delete the current instruction. Save BBI
+ // in case we need it.
+ WeakVH NextInst(&*BBI);
+
+ DeleteDeadInstruction(DeadInst, *MD, *TLI);
+
+ if (!NextInst) // Next instruction deleted.
+ BBI = BB.begin();
+ else if (BBI != BB.begin()) // Revisit this instruction if possible.
+ --BBI;
+ ++NumRedundantStores;
+ MadeChange = true;
+ };
+
+ if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) {
if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
- SI->getOperand(0) == DepLoad && isRemovable(SI)) {
+ isRemovable(SI) &&
+ MemoryIsNotModifiedBetween(DepLoad, SI)) {
+
DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n "
<< "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n');
- // DeleteDeadInstruction can delete the current instruction. Save BBI
- // in case we need it.
- WeakVH NextInst(BBI);
+ RemoveDeadInstAndUpdateBBI(SI);
+ continue;
+ }
+ }
- DeleteDeadInstruction(SI, *MD, TLI);
+ // Remove null stores into the calloc'ed objects
+ Constant *StoredConstant = dyn_cast<Constant>(SI->getValueOperand());
- if (!NextInst) // Next instruction deleted.
- BBI = BB.begin();
- else if (BBI != BB.begin()) // Revisit this instruction if possible.
- --BBI;
- ++NumFastStores;
- MadeChange = true;
+ if (StoredConstant && StoredConstant->isNullValue() &&
+ isRemovable(SI)) {
+ Instruction *UnderlyingPointer = dyn_cast<Instruction>(
+ GetUnderlyingObject(SI->getPointerOperand(), DL));
+
+ if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) &&
+ MemoryIsNotModifiedBetween(UnderlyingPointer, SI)) {
+ DEBUG(dbgs()
+ << "DSE: Remove null store to the calloc'ed object:\n DEAD: "
+ << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n');
+
+ RemoveDeadInstAndUpdateBBI(SI);
continue;
}
}
}
+ MemDepResult InstDep = MD->getDependency(Inst);
+
+ // Ignore any store where we can't find a local dependence.
+ // FIXME: cross-block DSE would be fun. :)
+ if (!InstDep.isDef() && !InstDep.isClobber())
+ continue;
+
// Figure out what location is being stored to.
MemoryLocation Loc = getLocForWrite(Inst, *AA);
@@ -549,24 +584,22 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
// completely obliterated by the store to 'Loc', and c) which we know that
// 'Inst' doesn't load from, then we can remove it.
if (isRemovable(DepWrite) &&
- !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
+ !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) {
int64_t InstWriteOffset, DepWriteOffset;
- const DataLayout &DL = BB.getModule()->getDataLayout();
OverwriteResult OR =
- isOverwrite(Loc, DepLoc, DL, AA->getTargetLibraryInfo(),
- DepWriteOffset, InstWriteOffset);
+ isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset);
if (OR == OverwriteComplete) {
DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
<< *DepWrite << "\n KILLER: " << *Inst << '\n');
// Delete the store and now-dead instructions that feed it.
- DeleteDeadInstruction(DepWrite, *MD, TLI);
+ DeleteDeadInstruction(DepWrite, *MD, *TLI);
++NumFastStores;
MadeChange = true;
// DeleteDeadInstruction can delete the current instruction in loop
// cases, reset BBI.
- BBI = Inst;
+ BBI = Inst->getIterator();
if (BBI != BB.begin())
--BBI;
break;
@@ -609,10 +642,11 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
if (DepWrite == &BB.front()) break;
// Can't look past this instruction if it might read 'Loc'.
- if (AA->getModRefInfo(DepWrite, Loc) & AliasAnalysis::Ref)
+ if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref)
break;
- InstDep = MD->getPointerDependencyFrom(Loc, false, DepWrite, &BB);
+ InstDep = MD->getPointerDependencyFrom(Loc, false,
+ DepWrite->getIterator(), &BB);
}
}
@@ -624,6 +658,64 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
return MadeChange;
}
+/// Returns true if the memory which is accessed by the second instruction is not
+/// modified between the first and the second instruction.
+/// Precondition: Second instruction must be dominated by the first
+/// instruction.
+bool DSE::MemoryIsNotModifiedBetween(Instruction *FirstI,
+ Instruction *SecondI) {
+ SmallVector<BasicBlock *, 16> WorkList;
+ SmallPtrSet<BasicBlock *, 8> Visited;
+ BasicBlock::iterator FirstBBI(FirstI);
+ ++FirstBBI;
+ BasicBlock::iterator SecondBBI(SecondI);
+ BasicBlock *FirstBB = FirstI->getParent();
+ BasicBlock *SecondBB = SecondI->getParent();
+ MemoryLocation MemLoc = MemoryLocation::get(SecondI);
+
+ // Start checking the store-block.
+ WorkList.push_back(SecondBB);
+ bool isFirstBlock = true;
+
+ // Check all blocks going backward until we reach the load-block.
+ while (!WorkList.empty()) {
+ BasicBlock *B = WorkList.pop_back_val();
+
+ // Ignore instructions before LI if this is the FirstBB.
+ BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
+
+ BasicBlock::iterator EI;
+ if (isFirstBlock) {
+ // Ignore instructions after SI if this is the first visit of SecondBB.
+ assert(B == SecondBB && "first block is not the store block");
+ EI = SecondBBI;
+ isFirstBlock = false;
+ } else {
+ // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
+ // In this case we also have to look at instructions after SI.
+ EI = B->end();
+ }
+ for (; BI != EI; ++BI) {
+ Instruction *I = &*BI;
+ if (I->mayWriteToMemory() && I != SecondI) {
+ auto Res = AA->getModRefInfo(I, MemLoc);
+ if (Res != MRI_NoModRef)
+ return false;
+ }
+ }
+ if (B != FirstBB) {
+ assert(B != &FirstBB->getParent()->getEntryBlock() &&
+ "Should not hit the entry block because SI must be dominated by LI");
+ for (auto PredI = pred_begin(B), PE = pred_end(B); PredI != PE; ++PredI) {
+ if (!Visited.insert(*PredI).second)
+ continue;
+ WorkList.push_back(*PredI);
+ }
+ }
+ }
+ return true;
+}
+
/// Find all blocks that will unconditionally lead to the block BB and append
/// them to F.
static void FindUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
@@ -655,10 +747,11 @@ bool DSE::HandleFree(CallInst *F) {
Instruction *InstPt = BB->getTerminator();
if (BB == F->getParent()) InstPt = F;
- MemDepResult Dep = MD->getPointerDependencyFrom(Loc, false, InstPt, BB);
+ MemDepResult Dep =
+ MD->getPointerDependencyFrom(Loc, false, InstPt->getIterator(), BB);
while (Dep.isDef() || Dep.isClobber()) {
Instruction *Dependency = Dep.getInst();
- if (!hasMemoryWrite(Dependency, TLI) || !isRemovable(Dependency))
+ if (!hasMemoryWrite(Dependency, *TLI) || !isRemovable(Dependency))
break;
Value *DepPointer =
@@ -668,10 +761,10 @@ bool DSE::HandleFree(CallInst *F) {
if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
break;
- Instruction *Next = std::next(BasicBlock::iterator(Dependency));
+ auto Next = ++Dependency->getIterator();
// DCE instructions only used to calculate that store
- DeleteDeadInstruction(Dependency, *MD, TLI);
+ DeleteDeadInstruction(Dependency, *MD, *TLI);
++NumFastStores;
MadeChange = true;
@@ -704,23 +797,22 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
SmallSetVector<Value*, 16> DeadStackObjects;
// Find all of the alloca'd pointers in the entry block.
- BasicBlock *Entry = BB.getParent()->begin();
- for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) {
- if (isa<AllocaInst>(I))
- DeadStackObjects.insert(I);
+ BasicBlock &Entry = BB.getParent()->front();
+ for (Instruction &I : Entry) {
+ if (isa<AllocaInst>(&I))
+ DeadStackObjects.insert(&I);
// Okay, so these are dead heap objects, but if the pointer never escapes
// then it's leaked by this function anyways.
- else if (isAllocLikeFn(I, TLI) && !PointerMayBeCaptured(I, true, true))
- DeadStackObjects.insert(I);
+ else if (isAllocLikeFn(&I, TLI) && !PointerMayBeCaptured(&I, true, true))
+ DeadStackObjects.insert(&I);
}
// Treat byval or inalloca arguments the same, stores to them are dead at the
// end of the function.
- for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
- AE = BB.getParent()->arg_end(); AI != AE; ++AI)
- if (AI->hasByValOrInAllocaAttr())
- DeadStackObjects.insert(AI);
+ for (Argument &AI : BB.getParent()->args())
+ if (AI.hasByValOrInAllocaAttr())
+ DeadStackObjects.insert(&AI);
const DataLayout &DL = BB.getModule()->getDataLayout();
@@ -729,10 +821,10 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
--BBI;
// If we find a store, check to see if it points into a dead stack value.
- if (hasMemoryWrite(BBI, TLI) && isRemovable(BBI)) {
+ if (hasMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) {
// See through pointer-to-pointer bitcasts
SmallVector<Value *, 4> Pointers;
- GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers, DL);
+ GetUnderlyingObjects(getStoredPointerOperand(&*BBI), Pointers, DL);
// Stores to stack values are valid candidates for removal.
bool AllDead = true;
@@ -744,7 +836,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
}
if (AllDead) {
- Instruction *Dead = BBI++;
+ Instruction *Dead = &*BBI++;
DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: "
<< *Dead << "\n Objects: ";
@@ -757,7 +849,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
dbgs() << '\n');
// DCE instructions only used to calculate that store.
- DeleteDeadInstruction(Dead, *MD, TLI, &DeadStackObjects);
+ DeleteDeadInstruction(Dead, *MD, *TLI, &DeadStackObjects);
++NumFastStores;
MadeChange = true;
continue;
@@ -765,9 +857,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
}
// Remove any dead non-memory-mutating instructions.
- if (isInstructionTriviallyDead(BBI, TLI)) {
- Instruction *Inst = BBI++;
- DeleteDeadInstruction(Inst, *MD, TLI, &DeadStackObjects);
+ if (isInstructionTriviallyDead(&*BBI, TLI)) {
+ Instruction *Inst = &*BBI++;
+ DeleteDeadInstruction(Inst, *MD, *TLI, &DeadStackObjects);
++NumFastOther;
MadeChange = true;
continue;
@@ -776,15 +868,15 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
if (isa<AllocaInst>(BBI)) {
// Remove allocas from the list of dead stack objects; there can't be
// any references before the definition.
- DeadStackObjects.remove(BBI);
+ DeadStackObjects.remove(&*BBI);
continue;
}
- if (auto CS = CallSite(BBI)) {
+ if (auto CS = CallSite(&*BBI)) {
// Remove allocation function calls from the list of dead stack objects;
// there can't be any references before the definition.
- if (isAllocLikeFn(BBI, TLI))
- DeadStackObjects.remove(BBI);
+ if (isAllocLikeFn(&*BBI, TLI))
+ DeadStackObjects.remove(&*BBI);
// If this call does not access memory, it can't be loading any of our
// pointers.
@@ -795,10 +887,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// the call is live.
DeadStackObjects.remove_if([&](Value *I) {
// See if the call site touches the value.
- AliasAnalysis::ModRefResult A = AA->getModRefInfo(
- CS, I, getPointerSize(I, DL, AA->getTargetLibraryInfo()));
+ ModRefInfo A = AA->getModRefInfo(CS, I, getPointerSize(I, DL, *TLI));
- return A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref;
+ return A == MRI_ModRef || A == MRI_Ref;
});
// If all of the allocas were clobbered by the call then we're not going
@@ -864,8 +955,7 @@ void DSE::RemoveAccessedObjects(const MemoryLocation &LoadedLoc,
// Remove objects that could alias LoadedLoc.
DeadStackObjects.remove_if([&](Value *I) {
// See if the loaded location could alias the stack location.
- MemoryLocation StackLoc(I,
- getPointerSize(I, DL, AA->getTargetLibraryInfo()));
+ MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI));
return !AA->isNoAlias(StackLoc, LoadedLoc);
});
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 029b44c2ea80..7ef062e71ff3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/ScopedHashTable.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -263,7 +264,6 @@ namespace {
/// expected that a later pass of GVN will catch the interesting/hard cases.
class EarlyCSE {
public:
- Function &F;
const TargetLibraryInfo &TLI;
const TargetTransformInfo &TTI;
DominatorTree &DT;
@@ -281,20 +281,37 @@ public:
/// that dominated values can succeed in their lookup.
ScopedHTType AvailableValues;
- /// \brief A scoped hash table of the current values of loads.
+ /// A scoped hash table of the current values of previously encounted memory
+ /// locations.
///
- /// This allows us to get efficient access to dominating loads when we have
- /// a fully redundant load. In addition to the most recent load, we keep
- /// track of a generation count of the read, which is compared against the
- /// current generation count. The current generation count is incremented
+ /// This allows us to get efficient access to dominating loads or stores when
+ /// we have a fully redundant load. In addition to the most recent load, we
+ /// keep track of a generation count of the read, which is compared against
+ /// the current generation count. The current generation count is incremented
/// after every possibly writing memory operation, which ensures that we only
- /// CSE loads with other loads that have no intervening store.
- typedef RecyclingAllocator<
- BumpPtrAllocator,
- ScopedHashTableVal<Value *, std::pair<Value *, unsigned>>>
+ /// CSE loads with other loads that have no intervening store. Ordering
+ /// events (such as fences or atomic instructions) increment the generation
+ /// count as well; essentially, we model these as writes to all possible
+ /// locations. Note that atomic and/or volatile loads and stores can be
+ /// present the table; it is the responsibility of the consumer to inspect
+ /// the atomicity/volatility if needed.
+ struct LoadValue {
+ Value *Data;
+ unsigned Generation;
+ int MatchingId;
+ bool IsAtomic;
+ LoadValue()
+ : Data(nullptr), Generation(0), MatchingId(-1), IsAtomic(false) {}
+ LoadValue(Value *Data, unsigned Generation, unsigned MatchingId,
+ bool IsAtomic)
+ : Data(Data), Generation(Generation), MatchingId(MatchingId),
+ IsAtomic(IsAtomic) {}
+ };
+ typedef RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<Value *, LoadValue>>
LoadMapAllocator;
- typedef ScopedHashTable<Value *, std::pair<Value *, unsigned>,
- DenseMapInfo<Value *>, LoadMapAllocator> LoadHTType;
+ typedef ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,
+ LoadMapAllocator> LoadHTType;
LoadHTType AvailableLoads;
/// \brief A scoped hash table of the current values of read-only call
@@ -308,10 +325,9 @@ public:
unsigned CurrentGeneration;
/// \brief Set up the EarlyCSE runner for a particular function.
- EarlyCSE(Function &F, const TargetLibraryInfo &TLI,
- const TargetTransformInfo &TTI, DominatorTree &DT,
- AssumptionCache &AC)
- : F(F), TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {}
+ EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI,
+ DominatorTree &DT, AssumptionCache &AC)
+ : TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {}
bool run();
@@ -382,57 +398,91 @@ private:
class ParseMemoryInst {
public:
ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
- : Load(false), Store(false), Vol(false), MayReadFromMemory(false),
- MayWriteToMemory(false), MatchingId(-1), Ptr(nullptr) {
- MayReadFromMemory = Inst->mayReadFromMemory();
- MayWriteToMemory = Inst->mayWriteToMemory();
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- MemIntrinsicInfo Info;
- if (!TTI.getTgtMemIntrinsic(II, Info))
- return;
- if (Info.NumMemRefs == 1) {
- Store = Info.WriteMem;
- Load = Info.ReadMem;
- MatchingId = Info.MatchingId;
- MayReadFromMemory = Info.ReadMem;
- MayWriteToMemory = Info.WriteMem;
- Vol = Info.Vol;
- Ptr = Info.PtrVal;
- }
- } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- Load = true;
- Vol = !LI->isSimple();
- Ptr = LI->getPointerOperand();
+ : IsTargetMemInst(false), Inst(Inst) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
+ if (TTI.getTgtMemIntrinsic(II, Info) && Info.NumMemRefs == 1)
+ IsTargetMemInst = true;
+ }
+ bool isLoad() const {
+ if (IsTargetMemInst) return Info.ReadMem;
+ return isa<LoadInst>(Inst);
+ }
+ bool isStore() const {
+ if (IsTargetMemInst) return Info.WriteMem;
+ return isa<StoreInst>(Inst);
+ }
+ bool isAtomic() const {
+ if (IsTargetMemInst) {
+ assert(Info.IsSimple && "need to refine IsSimple in TTI");
+ return false;
+ }
+ return Inst->isAtomic();
+ }
+ bool isUnordered() const {
+ if (IsTargetMemInst) {
+ assert(Info.IsSimple && "need to refine IsSimple in TTI");
+ return true;
+ }
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ return LI->isUnordered();
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ return SI->isUnordered();
+ }
+ // Conservative answer
+ return !Inst->isAtomic();
+ }
+
+ bool isVolatile() const {
+ if (IsTargetMemInst) {
+ assert(Info.IsSimple && "need to refine IsSimple in TTI");
+ return false;
+ }
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ return LI->isVolatile();
} else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- Store = true;
- Vol = !SI->isSimple();
- Ptr = SI->getPointerOperand();
+ return SI->isVolatile();
}
+ // Conservative answer
+ return true;
}
- bool isLoad() { return Load; }
- bool isStore() { return Store; }
- bool isVolatile() { return Vol; }
- bool isMatchingMemLoc(const ParseMemoryInst &Inst) {
- return Ptr == Inst.Ptr && MatchingId == Inst.MatchingId;
+
+
+ bool isMatchingMemLoc(const ParseMemoryInst &Inst) const {
+ return (getPointerOperand() == Inst.getPointerOperand() &&
+ getMatchingId() == Inst.getMatchingId());
}
- bool isValid() { return Ptr != nullptr; }
- int getMatchingId() { return MatchingId; }
- Value *getPtr() { return Ptr; }
- bool mayReadFromMemory() { return MayReadFromMemory; }
- bool mayWriteToMemory() { return MayWriteToMemory; }
+ bool isValid() const { return getPointerOperand() != nullptr; }
- private:
- bool Load;
- bool Store;
- bool Vol;
- bool MayReadFromMemory;
- bool MayWriteToMemory;
// For regular (non-intrinsic) loads/stores, this is set to -1. For
// intrinsic loads/stores, the id is retrieved from the corresponding
// field in the MemIntrinsicInfo structure. That field contains
// non-negative values only.
- int MatchingId;
- Value *Ptr;
+ int getMatchingId() const {
+ if (IsTargetMemInst) return Info.MatchingId;
+ return -1;
+ }
+ Value *getPointerOperand() const {
+ if (IsTargetMemInst) return Info.PtrVal;
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ return LI->getPointerOperand();
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ return SI->getPointerOperand();
+ }
+ return nullptr;
+ }
+ bool mayReadFromMemory() const {
+ if (IsTargetMemInst) return Info.ReadMem;
+ return Inst->mayReadFromMemory();
+ }
+ bool mayWriteToMemory() const {
+ if (IsTargetMemInst) return Info.WriteMem;
+ return Inst->mayWriteToMemory();
+ }
+
+ private:
+ bool IsTargetMemInst;
+ MemIntrinsicInfo Info;
+ Instruction *Inst;
};
bool processNode(DomTreeNode *Node);
@@ -497,7 +547,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// See if any instructions in the block can be eliminated. If so, do it. If
// not, add them to AvailableValues.
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
- Instruction *Inst = I++;
+ Instruction *Inst = &*I++;
// Dead instructions should just be removed.
if (isInstructionTriviallyDead(Inst, &TLI)) {
@@ -548,24 +598,26 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
ParseMemoryInst MemInst(Inst, TTI);
// If this is a non-volatile load, process it.
if (MemInst.isValid() && MemInst.isLoad()) {
- // Ignore volatile loads.
- if (MemInst.isVolatile()) {
+ // (conservatively) we can't peak past the ordering implied by this
+ // operation, but we can add this load to our set of available values
+ if (MemInst.isVolatile() || !MemInst.isUnordered()) {
LastStore = nullptr;
- // Don't CSE across synchronization boundaries.
- if (Inst->mayWriteToMemory())
- ++CurrentGeneration;
- continue;
+ ++CurrentGeneration;
}
// If we have an available version of this load, and if it is the right
// generation, replace this instruction.
- std::pair<Value *, unsigned> InVal =
- AvailableLoads.lookup(MemInst.getPtr());
- if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
- Value *Op = getOrCreateResult(InVal.first, Inst->getType());
+ LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
+ if (InVal.Data != nullptr && InVal.Generation == CurrentGeneration &&
+ InVal.MatchingId == MemInst.getMatchingId() &&
+ // We don't yet handle removing loads with ordering of any kind.
+ !MemInst.isVolatile() && MemInst.isUnordered() &&
+ // We can't replace an atomic load with one which isn't also atomic.
+ InVal.IsAtomic >= MemInst.isAtomic()) {
+ Value *Op = getOrCreateResult(InVal.Data, Inst->getType());
if (Op != nullptr) {
DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
- << " to: " << *InVal.first << '\n');
+ << " to: " << *InVal.Data << '\n');
if (!Inst->use_empty())
Inst->replaceAllUsesWith(Op);
Inst->eraseFromParent();
@@ -576,8 +628,10 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
}
// Otherwise, remember that we have this instruction.
- AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>(
- Inst, CurrentGeneration));
+ AvailableLoads.insert(
+ MemInst.getPointerOperand(),
+ LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
+ MemInst.isAtomic()));
LastStore = nullptr;
continue;
}
@@ -613,6 +667,44 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
continue;
}
+ // A release fence requires that all stores complete before it, but does
+ // not prevent the reordering of following loads 'before' the fence. As a
+ // result, we don't need to consider it as writing to memory and don't need
+ // to advance the generation. We do need to prevent DSE across the fence,
+ // but that's handled above.
+ if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
+ if (FI->getOrdering() == Release) {
+ assert(Inst->mayReadFromMemory() && "relied on to prevent DSE above");
+ continue;
+ }
+
+ // write back DSE - If we write back the same value we just loaded from
+ // the same location and haven't passed any intervening writes or ordering
+ // operations, we can remove the write. The primary benefit is in allowing
+ // the available load table to remain valid and value forward past where
+ // the store originally was.
+ if (MemInst.isValid() && MemInst.isStore()) {
+ LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
+ if (InVal.Data &&
+ InVal.Data == getOrCreateResult(Inst, InVal.Data->getType()) &&
+ InVal.Generation == CurrentGeneration &&
+ InVal.MatchingId == MemInst.getMatchingId() &&
+ // We don't yet handle removing stores with ordering of any kind.
+ !MemInst.isVolatile() && MemInst.isUnordered()) {
+ assert((!LastStore ||
+ ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
+ MemInst.getPointerOperand()) &&
+ "can't have an intervening store!");
+ DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n');
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumDSE;
+ // We can avoid incrementing the generation count since we were able
+ // to eliminate this store.
+ continue;
+ }
+ }
+
// Okay, this isn't something we can CSE at all. Check to see if it is
// something that could modify memory. If so, our available memory values
// cannot be used so bump the generation count.
@@ -622,8 +714,16 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
if (MemInst.isValid() && MemInst.isStore()) {
// We do a trivial form of DSE if there are two stores to the same
// location with no intervening loads. Delete the earlier store.
+ // At the moment, we don't remove ordered stores, but do remove
+ // unordered atomic stores. There's no special requirement (for
+ // unordered atomics) about removing atomic stores only in favor of
+ // other atomic stores since we we're going to execute the non-atomic
+ // one anyway and the atomic one might never have become visible.
if (LastStore) {
ParseMemoryInst LastStoreMemInst(LastStore, TTI);
+ assert(LastStoreMemInst.isUnordered() &&
+ !LastStoreMemInst.isVolatile() &&
+ "Violated invariant");
if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
<< " due to: " << *Inst << '\n');
@@ -640,12 +740,22 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// version of the pointer. It is safe to forward from volatile stores
// to non-volatile loads, so we don't have to check for volatility of
// the store.
- AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>(
- Inst, CurrentGeneration));
-
- // Remember that this was the last store we saw for DSE.
- if (!MemInst.isVolatile())
+ AvailableLoads.insert(
+ MemInst.getPointerOperand(),
+ LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
+ MemInst.isAtomic()));
+
+ // Remember that this was the last unordered store we saw for DSE. We
+ // don't yet handle DSE on ordered or volatile stores since we don't
+ // have a good way to model the ordering requirement for following
+ // passes once the store is removed. We could insert a fence, but
+ // since fences are slightly stronger than stores in their ordering,
+ // it's not clear this is a profitable transform. Another option would
+ // be to merge the ordering with that of the post dominating store.
+ if (MemInst.isUnordered() && !MemInst.isVolatile())
LastStore = Inst;
+ else
+ LastStore = nullptr;
}
}
}
@@ -714,7 +824,7 @@ PreservedAnalyses EarlyCSEPass::run(Function &F,
auto &DT = AM->getResult<DominatorTreeAnalysis>(F);
auto &AC = AM->getResult<AssumptionAnalysis>(F);
- EarlyCSE CSE(F, TLI, TTI, DT, AC);
+ EarlyCSE CSE(TLI, TTI, DT, AC);
if (!CSE.run())
return PreservedAnalyses::all();
@@ -751,7 +861,7 @@ public:
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- EarlyCSE CSE(F, TLI, TTI, DT, AC);
+ EarlyCSE CSE(TLI, TTI, DT, AC);
return CSE.run();
}
@@ -761,6 +871,7 @@ public:
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.setPreservesCFG();
}
};
diff --git a/contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
index 0430c1898c8d..185cdbdda378 100644
--- a/contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -30,7 +30,7 @@ public:
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
}
private:
@@ -41,7 +41,7 @@ private:
char FlattenCFGPass::ID = 0;
INITIALIZE_PASS_BEGIN(FlattenCFGPass, "flattencfg", "Flatten the CFG", false,
false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(FlattenCFGPass, "flattencfg", "Flatten the CFG", false,
false)
@@ -59,7 +59,7 @@ static bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) {
// Loop over all of the basic blocks and remove them if they are unneeded...
//
for (Function::iterator BBIt = F.begin(); BBIt != F.end();) {
- if (FlattenCFG(BBIt++, AA)) {
+ if (FlattenCFG(&*BBIt++, AA)) {
LocalChange = true;
}
}
@@ -69,7 +69,7 @@ static bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) {
}
bool FlattenCFGPass::runOnFunction(Function &F) {
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
bool EverChanged = false;
// iterativelyFlattenCFG can make some blocks dead.
while (iterativelyFlattenCFG(F, AA)) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp b/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp
index c9314229c38b..7f5d78656b50 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -19,6 +19,8 @@
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IRBuilder.h"
@@ -41,7 +43,7 @@ using namespace llvm;
// integer domain inputs, produce an integer output; fadd, for example.
//
// If a non-mappable instruction is seen, this entire def-use graph is marked
-// as non-transformable. If we see an instruction that converts from the
+// as non-transformable. If we see an instruction that converts from the
// integer domain to FP domain (uitofp,sitofp), we terminate our walk.
/// The largest integer type worth dealing with.
@@ -60,6 +62,7 @@ namespace {
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
void findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots);
@@ -82,7 +85,9 @@ namespace {
}
char Float2Int::ID = 0;
-INITIALIZE_PASS(Float2Int, "float2int", "Float to int", false, false)
+INITIALIZE_PASS_BEGIN(Float2Int, "float2int", "Float to int", false, false)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_END(Float2Int, "float2int", "Float to int", false, false)
// Given a FCmp predicate, return a matching ICmp predicate if one
// exists, otherwise return BAD_ICMP_PREDICATE.
@@ -125,7 +130,9 @@ static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) {
// Find the roots - instructions that convert from the FP domain to
// integer domain.
void Float2Int::findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots) {
- for (auto &I : inst_range(F)) {
+ for (auto &I : instructions(F)) {
+ if (isa<VectorType>(I.getType()))
+ continue;
switch (I.getOpcode()) {
default: break;
case Instruction::FPToUI:
@@ -133,7 +140,7 @@ void Float2Int::findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots) {
Roots.insert(&I);
break;
case Instruction::FCmp:
- if (mapFCmpPred(cast<CmpInst>(&I)->getPredicate()) !=
+ if (mapFCmpPred(cast<CmpInst>(&I)->getPredicate()) !=
CmpInst::BAD_ICMP_PREDICATE)
Roots.insert(&I);
break;
@@ -176,7 +183,7 @@ ConstantRange Float2Int::validateRange(ConstantRange R) {
// - walkForwards: Iterate over SeenInsts in reverse order, so we visit
// defs before their uses. Calculate the real range info.
-// Breadth-first walk of the use-def graph; determine the set of nodes
+// Breadth-first walk of the use-def graph; determine the set of nodes
// we care about and eagerly determine if some of them are poisonous.
void Float2Int::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
std::deque<Instruction*> Worklist(Roots.begin(), Roots.end());
@@ -222,14 +229,14 @@ void Float2Int::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
seen(I, unknownRange());
break;
}
-
+
for (Value *O : I->operands()) {
if (Instruction *OI = dyn_cast<Instruction>(O)) {
// Unify def-use chains if they interfere.
ECs.unionSets(I, OI);
- if (SeenInsts.find(I)->second != badRange())
+ if (SeenInsts.find(I)->second != badRange())
Worklist.push_back(OI);
- } else if (!isa<ConstantFP>(O)) {
+ } else if (!isa<ConstantFP>(O)) {
// Not an instruction or ConstantFP? we can't do anything.
seen(I, badRange());
}
@@ -240,11 +247,11 @@ void Float2Int::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
// Walk forwards down the list of seen instructions, so we visit defs before
// uses.
void Float2Int::walkForwards() {
- for (auto It = SeenInsts.rbegin(), E = SeenInsts.rend(); It != E; ++It) {
- if (It->second != unknownRange())
+ for (auto &It : make_range(SeenInsts.rbegin(), SeenInsts.rend())) {
+ if (It.second != unknownRange())
continue;
- Instruction *I = It->first;
+ Instruction *I = It.first;
std::function<ConstantRange(ArrayRef<ConstantRange>)> Op;
switch (I->getOpcode()) {
// FIXME: Handle select and phi nodes.
@@ -299,7 +306,7 @@ void Float2Int::walkForwards() {
for (Value *O : I->operands()) {
if (Instruction *OI = dyn_cast<Instruction>(O)) {
assert(SeenInsts.find(OI) != SeenInsts.end() &&
- "def not seen before use!");
+ "def not seen before use!");
OpRanges.push_back(SeenInsts.find(OI)->second);
} else if (ConstantFP *CF = dyn_cast<ConstantFP>(O)) {
// Work out if the floating point number can be losslessly represented
@@ -314,11 +321,11 @@ void Float2Int::walkForwards() {
APFloat F = CF->getValueAPF();
// First, weed out obviously incorrect values. Non-finite numbers
- // can't be represented and neither can negative zero, unless
+ // can't be represented and neither can negative zero, unless
// we're in fast math mode.
if (!F.isFinite() ||
(F.isZero() && F.isNegative() && isa<FPMathOperator>(I) &&
- !I->hasNoSignedZeros())) {
+ !I->hasNoSignedZeros())) {
seen(I, badRange());
Abort = true;
break;
@@ -345,7 +352,7 @@ void Float2Int::walkForwards() {
// Reduce the operands' ranges to a single range and return.
if (!Abort)
- seen(I, Op(OpRanges));
+ seen(I, Op(OpRanges));
}
}
@@ -395,7 +402,7 @@ bool Float2Int::validateAndTransform() {
R.isFullSet() || R.isSignWrappedSet())
continue;
assert(ConvertedToTy && "Must have set the convertedtoty by this point!");
-
+
// The number of bits required is the maximum of the upper and
// lower limits, plus one so it can be signed.
unsigned MinBW = std::max(R.getLower().getMinSignedBits(),
@@ -505,9 +512,8 @@ Value *Float2Int::convert(Instruction *I, Type *ToTy) {
// Perform dead code elimination on the instructions we just modified.
void Float2Int::cleanup() {
- for (auto I = ConvertedInsts.rbegin(), E = ConvertedInsts.rend();
- I != E; ++I)
- I->first->eraseFromParent();
+ for (auto &I : make_range(ConvertedInsts.rbegin(), ConvertedInsts.rend()))
+ I.first->eraseFromParent();
}
bool Float2Int::runOnFunction(Function &F) {
@@ -534,7 +540,4 @@ bool Float2Int::runOnFunction(Function &F) {
return Modified;
}
-FunctionPass *llvm::createFloat2IntPass() {
- return new Float2Int();
-}
-
+FunctionPass *llvm::createFloat2IntPass() { return new Float2Int(); }
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
index 89a0d0af93be..a028b8c444ba 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -28,6 +28,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
@@ -128,6 +129,7 @@ namespace {
uint32_t lookup(Value *V) const;
uint32_t lookup_or_add_cmp(unsigned Opcode, CmpInst::Predicate Pred,
Value *LHS, Value *RHS);
+ bool exists(Value *V) const;
void add(Value *V, uint32_t num);
void clear();
void erase(Value *v);
@@ -388,6 +390,9 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
}
}
+/// Returns true if a value number exists for the specified value.
+bool ValueTable::exists(Value *V) const { return valueNumbering.count(V) != 0; }
+
/// lookup_or_add - Returns the value number for the specified value, assigning
/// it a new number if it did not have one before.
uint32_t ValueTable::lookup_or_add(Value *V) {
@@ -608,6 +613,10 @@ namespace {
DenseMap<uint32_t, LeaderTableEntry> LeaderTable;
BumpPtrAllocator TableAllocator;
+ // Block-local map of equivalent values to their leader, does not
+ // propagate to any successors. Entries added mid-block are applied
+ // to the remaining instructions in the block.
+ SmallMapVector<llvm::Value *, llvm::Constant *, 4> ReplaceWithConstMap;
SmallVector<Instruction*, 8> InstrsToErase;
typedef SmallVector<NonLocalDepResult, 64> LoadDepVect;
@@ -689,16 +698,17 @@ namespace {
AU.addRequired<TargetLibraryInfoWrapperPass>();
if (!NoLoads)
AU.addRequired<MemoryDependenceAnalysis>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
- // Helper fuctions of redundant load elimination
+ // Helper functions of redundant load elimination
bool processLoad(LoadInst *L);
bool processNonLocalLoad(LoadInst *L);
+ bool processAssumeIntrinsic(IntrinsicInst *II);
void AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
AvailValInBlkVect &ValuesPerBlock,
UnavailBlkVect &UnavailableBlocks);
@@ -719,7 +729,9 @@ namespace {
void verifyRemoved(const Instruction *I) const;
bool splitCriticalEdges();
BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ);
- bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root);
+ bool replaceOperandsWithConsts(Instruction *I) const;
+ bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
+ bool DominatesByEdge);
bool processFoldableCondBr(BranchInst *BI);
void addDeadBlock(BasicBlock *BB);
void assignValNumForDeadCode();
@@ -738,7 +750,8 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1290,8 +1303,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
SSAUpdater SSAUpdate(&NewPHIs);
SSAUpdate.Initialize(LI->getType(), LI->getName());
- for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
- const AvailableValueInBlock &AV = ValuesPerBlock[i];
+ for (const AvailableValueInBlock &AV : ValuesPerBlock) {
BasicBlock *BB = AV.BB;
if (SSAUpdate.HasValueForBlock(BB))
@@ -1301,24 +1313,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
}
// Perform PHI construction.
- Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
-
- // If new PHI nodes were created, notify alias analysis.
- if (V->getType()->getScalarType()->isPointerTy()) {
- AliasAnalysis *AA = gvn.getAliasAnalysis();
-
- // Scan the new PHIs and inform alias analysis that we've added potentially
- // escaping uses to any values that are operands to these PHIs.
- for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) {
- PHINode *P = NewPHIs[i];
- for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) {
- unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
- AA->addEscapingUse(P->getOperandUse(jj));
- }
- }
- }
-
- return V;
+ return SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
}
Value *AvailableValueInBlock::MaterializeAdjustedValue(LoadInst *LI,
@@ -1518,9 +1513,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// that we only have to insert *one* load (which means we're basically moving
// the load, not inserting a new one).
- SmallPtrSet<BasicBlock *, 4> Blockers;
- for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
- Blockers.insert(UnavailableBlocks[i]);
+ SmallPtrSet<BasicBlock *, 4> Blockers(UnavailableBlocks.begin(),
+ UnavailableBlocks.end());
// Let's find the first basic block with more than one predecessor. Walk
// backwards through predecessors if needed.
@@ -1550,15 +1544,22 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// available.
MapVector<BasicBlock *, Value *> PredLoads;
DenseMap<BasicBlock*, char> FullyAvailableBlocks;
- for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
- FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
- for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
- FullyAvailableBlocks[UnavailableBlocks[i]] = false;
+ for (const AvailableValueInBlock &AV : ValuesPerBlock)
+ FullyAvailableBlocks[AV.BB] = true;
+ for (BasicBlock *UnavailableBB : UnavailableBlocks)
+ FullyAvailableBlocks[UnavailableBB] = false;
SmallVector<BasicBlock *, 4> CriticalEdgePred;
- for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
- PI != E; ++PI) {
- BasicBlock *Pred = *PI;
+ for (BasicBlock *Pred : predecessors(LoadBB)) {
+ // If any predecessor block is an EH pad that does not allow non-PHI
+ // instructions before the terminator, we can't PRE the load.
+ if (Pred->getTerminator()->isEHPad()) {
+ DEBUG(dbgs()
+ << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD PREDECESSOR '"
+ << Pred->getName() << "': " << *LI << '\n');
+ return false;
+ }
+
if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) {
continue;
}
@@ -1570,9 +1571,9 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
return false;
}
- if (LoadBB->isLandingPad()) {
+ if (LoadBB->isEHPad()) {
DEBUG(dbgs()
- << "COULD NOT PRE LOAD BECAUSE OF LANDING PAD CRITICAL EDGE '"
+ << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '"
<< Pred->getName() << "': " << *LI << '\n');
return false;
}
@@ -1655,12 +1656,12 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
<< *NewInsts.back() << '\n');
// Assign value numbers to the new instructions.
- for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
+ for (Instruction *I : NewInsts) {
// FIXME: We really _ought_ to insert these value numbers into their
// parent's availability map. However, in doing so, we risk getting into
// ordering issues. If a block hasn't been processed yet, we would be
// marking a value as AVAIL-IN, which isn't what we intend.
- VN.lookup_or_add(NewInsts[i]);
+ VN.lookup_or_add(I);
}
for (const auto &PredLoad : PredLoads) {
@@ -1677,6 +1678,11 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
if (Tags)
NewLoad->setAAMetadata(Tags);
+ if (auto *MD = LI->getMetadata(LLVMContext::MD_invariant_load))
+ NewLoad->setMetadata(LLVMContext::MD_invariant_load, MD);
+ if (auto *InvGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group))
+ NewLoad->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD);
+
// Transfer DebugLoc.
NewLoad->setDebugLoc(LI->getDebugLoc());
@@ -1704,6 +1710,10 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
/// Attempt to eliminate a load whose dependencies are
/// non-local by performing PHI construction.
bool GVN::processNonLocalLoad(LoadInst *LI) {
+ // non-local speculations are not allowed under asan.
+ if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeAddress))
+ return false;
+
// Step 1: Find the non-local dependencies of the load.
LoadDepVect Deps;
MD->getNonLocalPointerDependency(LI, Deps);
@@ -1777,6 +1787,63 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks);
}
+bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) {
+ assert(IntrinsicI->getIntrinsicID() == Intrinsic::assume &&
+ "This function can only be called with llvm.assume intrinsic");
+ Value *V = IntrinsicI->getArgOperand(0);
+
+ if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) {
+ if (Cond->isZero()) {
+ Type *Int8Ty = Type::getInt8Ty(V->getContext());
+ // Insert a new store to null instruction before the load to indicate that
+ // this code is not reachable. FIXME: We could insert unreachable
+ // instruction directly because we can modify the CFG.
+ new StoreInst(UndefValue::get(Int8Ty),
+ Constant::getNullValue(Int8Ty->getPointerTo()),
+ IntrinsicI);
+ }
+ markInstructionForDeletion(IntrinsicI);
+ return false;
+ }
+
+ Constant *True = ConstantInt::getTrue(V->getContext());
+ bool Changed = false;
+
+ for (BasicBlock *Successor : successors(IntrinsicI->getParent())) {
+ BasicBlockEdge Edge(IntrinsicI->getParent(), Successor);
+
+ // This property is only true in dominated successors, propagateEquality
+ // will check dominance for us.
+ Changed |= propagateEquality(V, True, Edge, false);
+ }
+
+ // We can replace assume value with true, which covers cases like this:
+ // call void @llvm.assume(i1 %cmp)
+ // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true
+ ReplaceWithConstMap[V] = True;
+
+ // If one of *cmp *eq operand is const, adding it to map will cover this:
+ // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen
+ // call void @llvm.assume(i1 %cmp)
+ // ret float %0 ; will change it to ret float 3.000000e+00
+ if (auto *CmpI = dyn_cast<CmpInst>(V)) {
+ if (CmpI->getPredicate() == CmpInst::Predicate::ICMP_EQ ||
+ CmpI->getPredicate() == CmpInst::Predicate::FCMP_OEQ ||
+ (CmpI->getPredicate() == CmpInst::Predicate::FCMP_UEQ &&
+ CmpI->getFastMathFlags().noNaNs())) {
+ Value *CmpLHS = CmpI->getOperand(0);
+ Value *CmpRHS = CmpI->getOperand(1);
+ if (isa<Constant>(CmpLHS))
+ std::swap(CmpLHS, CmpRHS);
+ auto *RHSConst = dyn_cast<Constant>(CmpRHS);
+
+ // If only one operand is constant.
+ if (RHSConst != nullptr && !isa<Constant>(CmpLHS))
+ ReplaceWithConstMap[CmpLHS] = RHSConst;
+ }
+ }
+ return Changed;
+}
static void patchReplacementInstruction(Instruction *I, Value *Repl) {
// Patch the replacement so that it is not more restrictive than the value
@@ -1789,7 +1856,7 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) {
if (Instruction *ReplInst = dyn_cast<Instruction>(Repl)) {
// FIXME: If both the original and replacement value are part of the
// same control-flow region (meaning that the execution of one
- // guarentees the executation of the other), then we can combine the
+ // guarantees the execution of the other), then we can combine the
// noalias scopes here and do better than the general conservative
// answer used in combineMetadata().
@@ -1797,13 +1864,10 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) {
// regions, and so we need a conservative combination of the noalias
// scopes.
static const unsigned KnownIDs[] = {
- LLVMContext::MD_tbaa,
- LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias,
- LLVMContext::MD_range,
- LLVMContext::MD_fpmath,
- LLVMContext::MD_invariant_load,
- };
+ LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias, LLVMContext::MD_range,
+ LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load,
+ LLVMContext::MD_invariant_group};
combineMetadata(ReplInst, I, KnownIDs);
}
}
@@ -1890,10 +1954,8 @@ bool GVN::processLoad(LoadInst *L) {
++NumGVNLoad;
return true;
}
- }
- // If the value isn't available, don't do anything!
- if (Dep.isClobber()) {
+ // If the value isn't available, don't do anything!
DEBUG(
// fast print dep, using operator<< on instruction is too slow.
dbgs() << "GVN: load ";
@@ -2049,11 +2111,31 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
return Pred != nullptr;
}
+// Tries to replace instruction with const, using information from
+// ReplaceWithConstMap.
+bool GVN::replaceOperandsWithConsts(Instruction *Instr) const {
+ bool Changed = false;
+ for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) {
+ Value *Operand = Instr->getOperand(OpNum);
+ auto it = ReplaceWithConstMap.find(Operand);
+ if (it != ReplaceWithConstMap.end()) {
+ assert(!isa<Constant>(Operand) &&
+ "Replacing constants with constants is invalid");
+ DEBUG(dbgs() << "GVN replacing: " << *Operand << " with " << *it->second
+ << " in instruction " << *Instr << '\n');
+ Instr->setOperand(OpNum, it->second);
+ Changed = true;
+ }
+ }
+ return Changed;
+}
+
/// The given values are known to be equal in every block
/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
/// 'RHS' everywhere in the scope. Returns whether a change was made.
-bool GVN::propagateEquality(Value *LHS, Value *RHS,
- const BasicBlockEdge &Root) {
+/// If DominatesByEdge is false, then it means that it is dominated by Root.End.
+bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
+ bool DominatesByEdge) {
SmallVector<std::pair<Value*, Value*>, 4> Worklist;
Worklist.push_back(std::make_pair(LHS, RHS));
bool Changed = false;
@@ -2065,11 +2147,13 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,
std::pair<Value*, Value*> Item = Worklist.pop_back_val();
LHS = Item.first; RHS = Item.second;
- if (LHS == RHS) continue;
+ if (LHS == RHS)
+ continue;
assert(LHS->getType() == RHS->getType() && "Equality but unequal types!");
// Don't try to propagate equalities between constants.
- if (isa<Constant>(LHS) && isa<Constant>(RHS)) continue;
+ if (isa<Constant>(LHS) && isa<Constant>(RHS))
+ continue;
// Prefer a constant on the right-hand side, or an Argument if no constants.
if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS)))
@@ -2108,7 +2192,11 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,
// LHS always has at least one use that is not dominated by Root, this will
// never do anything if LHS has only one use.
if (!LHS->hasOneUse()) {
- unsigned NumReplacements = replaceDominatedUsesWith(LHS, RHS, *DT, Root);
+ unsigned NumReplacements =
+ DominatesByEdge
+ ? replaceDominatedUsesWith(LHS, RHS, *DT, Root)
+ : replaceDominatedUsesWith(LHS, RHS, *DT, Root.getEnd());
+
Changed |= NumReplacements > 0;
NumGVNEqProp += NumReplacements;
}
@@ -2180,7 +2268,10 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,
Value *NotCmp = findLeader(Root.getEnd(), Num);
if (NotCmp && isa<Instruction>(NotCmp)) {
unsigned NumReplacements =
- replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root);
+ DominatesByEdge
+ ? replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root)
+ : replaceDominatedUsesWith(NotCmp, NotVal, *DT,
+ Root.getEnd());
Changed |= NumReplacements > 0;
NumGVNEqProp += NumReplacements;
}
@@ -2220,6 +2311,10 @@ bool GVN::processInstruction(Instruction *I) {
return true;
}
+ if (IntrinsicInst *IntrinsicI = dyn_cast<IntrinsicInst>(I))
+ if (IntrinsicI->getIntrinsicID() == Intrinsic::assume)
+ return processAssumeIntrinsic(IntrinsicI);
+
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (processLoad(LI))
return true;
@@ -2250,11 +2345,11 @@ bool GVN::processInstruction(Instruction *I) {
Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext());
BasicBlockEdge TrueE(Parent, TrueSucc);
- Changed |= propagateEquality(BranchCond, TrueVal, TrueE);
+ Changed |= propagateEquality(BranchCond, TrueVal, TrueE, true);
Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext());
BasicBlockEdge FalseE(Parent, FalseSucc);
- Changed |= propagateEquality(BranchCond, FalseVal, FalseE);
+ Changed |= propagateEquality(BranchCond, FalseVal, FalseE, true);
return Changed;
}
@@ -2276,7 +2371,7 @@ bool GVN::processInstruction(Instruction *I) {
// If there is only a single edge, propagate the case value into it.
if (SwitchEdges.lookup(Dst) == 1) {
BasicBlockEdge E(Parent, Dst);
- Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E);
+ Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E, true);
}
}
return Changed;
@@ -2284,7 +2379,8 @@ bool GVN::processInstruction(Instruction *I) {
// Instructions with void type don't return a value, so there's
// no point in trying to find redundancies in them.
- if (I->getType()->isVoidTy()) return false;
+ if (I->getType()->isVoidTy())
+ return false;
uint32_t NextNum = VN.getNextUnusedValueNumber();
unsigned Num = VN.lookup_or_add(I);
@@ -2306,17 +2402,21 @@ bool GVN::processInstruction(Instruction *I) {
// Perform fast-path value-number based elimination of values inherited from
// dominators.
- Value *repl = findLeader(I->getParent(), Num);
- if (!repl) {
+ Value *Repl = findLeader(I->getParent(), Num);
+ if (!Repl) {
// Failure, just remember this instance for future use.
addToLeaderTable(Num, I, I->getParent());
return false;
+ } else if (Repl == I) {
+ // If I was the result of a shortcut PRE, it might already be in the table
+ // and the best replacement for itself. Nothing to do.
+ return false;
}
// Remove it!
- patchAndReplaceAllUsesWith(I, repl);
- if (MD && repl->getType()->getScalarType()->isPointerTy())
- MD->invalidateCachedPointerInfo(repl);
+ patchAndReplaceAllUsesWith(I, Repl);
+ if (MD && Repl->getType()->getScalarType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(Repl);
markInstructionForDeletion(I);
return true;
}
@@ -2331,7 +2431,7 @@ bool GVN::runOnFunction(Function& F) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
+ VN.setAliasAnalysis(&getAnalysis<AAResultsWrapperPass>().getAAResults());
VN.setMemDep(MD);
VN.setDomTree(DT);
@@ -2341,10 +2441,10 @@ bool GVN::runOnFunction(Function& F) {
// Merge unconditional branches, allowing PRE to catch more
// optimization opportunities.
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
- BasicBlock *BB = FI++;
+ BasicBlock *BB = &*FI++;
- bool removedBlock = MergeBlockIntoPredecessor(
- BB, DT, /* LoopInfo */ nullptr, VN.getAliasAnalysis(), MD);
+ bool removedBlock =
+ MergeBlockIntoPredecessor(BB, DT, /* LoopInfo */ nullptr, MD);
if (removedBlock) ++NumGVNBlocks;
Changed |= removedBlock;
@@ -2382,7 +2482,6 @@ bool GVN::runOnFunction(Function& F) {
return Changed;
}
-
bool GVN::processBlock(BasicBlock *BB) {
// FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
// (and incrementing BI before processing an instruction).
@@ -2391,11 +2490,16 @@ bool GVN::processBlock(BasicBlock *BB) {
if (DeadBlocks.count(BB))
return false;
+ // Clearing map before every BB because it can be used only for single BB.
+ ReplaceWithConstMap.clear();
bool ChangedFunction = false;
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
BI != BE;) {
- ChangedFunction |= processInstruction(BI);
+ if (!ReplaceWithConstMap.empty())
+ ChangedFunction |= replaceOperandsWithConsts(&*BI);
+ ChangedFunction |= processInstruction(&*BI);
+
if (InstrsToErase.empty()) {
++BI;
continue;
@@ -2439,7 +2543,14 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
Value *Op = Instr->getOperand(i);
if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
continue;
-
+ // This could be a newly inserted instruction, in which case, we won't
+ // find a value number, and should give up before we hurt ourselves.
+ // FIXME: Rewrite the infrastructure to let it easier to value number
+ // and process newly inserted instructions.
+ if (!VN.exists(Op)) {
+ success = false;
+ break;
+ }
if (Value *V = findLeader(Pred, VN.lookup(Op))) {
Instr->setOperand(i, V);
} else {
@@ -2499,9 +2610,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
BasicBlock *CurrentBlock = CurInst->getParent();
predMap.clear();
- for (pred_iterator PI = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock);
- PI != PE; ++PI) {
- BasicBlock *P = *PI;
+ for (BasicBlock *P : predecessors(CurrentBlock)) {
// We're not interested in PRE where the block is its
// own predecessor, or in blocks with predecessors
// that are not reachable.
@@ -2570,7 +2679,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
// Create a PHI to make the value available in this block.
PHINode *Phi =
PHINode::Create(CurInst->getType(), predMap.size(),
- CurInst->getName() + ".pre-phi", CurrentBlock->begin());
+ CurInst->getName() + ".pre-phi", &CurrentBlock->front());
for (unsigned i = 0, e = predMap.size(); i != e; ++i) {
if (Value *V = predMap[i].first)
Phi->addIncoming(V, predMap[i].second);
@@ -2582,18 +2691,8 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
addToLeaderTable(ValNo, Phi, CurrentBlock);
Phi->setDebugLoc(CurInst->getDebugLoc());
CurInst->replaceAllUsesWith(Phi);
- if (Phi->getType()->getScalarType()->isPointerTy()) {
- // Because we have added a PHI-use of the pointer value, it has now
- // "escaped" from alias analysis' perspective. We need to inform
- // AA of this.
- for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii) {
- unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
- VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj));
- }
-
- if (MD)
- MD->invalidateCachedPointerInfo(Phi);
- }
+ if (MD && Phi->getType()->getScalarType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(Phi);
VN.erase(CurInst);
removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
@@ -2616,15 +2715,15 @@ bool GVN::performPRE(Function &F) {
if (CurrentBlock == &F.getEntryBlock())
continue;
- // Don't perform PRE on a landing pad.
- if (CurrentBlock->isLandingPad())
+ // Don't perform PRE on an EH pad.
+ if (CurrentBlock->isEHPad())
continue;
for (BasicBlock::iterator BI = CurrentBlock->begin(),
BE = CurrentBlock->end();
BI != BE;) {
- Instruction *CurInst = BI++;
- Changed = performScalarPRE(CurInst);
+ Instruction *CurInst = &*BI++;
+ Changed |= performScalarPRE(CurInst);
}
}
@@ -2637,8 +2736,8 @@ bool GVN::performPRE(Function &F) {
/// Split the critical edge connecting the given two blocks, and return
/// the block inserted to the critical edge.
BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
- BasicBlock *BB = SplitCriticalEdge(
- Pred, Succ, CriticalEdgeSplittingOptions(getAliasAnalysis(), DT));
+ BasicBlock *BB =
+ SplitCriticalEdge(Pred, Succ, CriticalEdgeSplittingOptions(DT));
if (MD)
MD->invalidateCachedPredecessors();
return BB;
@@ -2652,7 +2751,7 @@ bool GVN::splitCriticalEdges() {
do {
std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
SplitCriticalEdge(Edge.first, Edge.second,
- CriticalEdgeSplittingOptions(getAliasAnalysis(), DT));
+ CriticalEdgeSplittingOptions(DT));
} while (!toSplit.empty());
if (MD) MD->invalidateCachedPredecessors();
return true;
@@ -2728,17 +2827,14 @@ void GVN::addDeadBlock(BasicBlock *BB) {
DeadBlocks.insert(Dom.begin(), Dom.end());
// Figure out the dominance-frontier(D).
- for (SmallVectorImpl<BasicBlock *>::iterator I = Dom.begin(),
- E = Dom.end(); I != E; I++) {
- BasicBlock *B = *I;
- for (succ_iterator SI = succ_begin(B), SE = succ_end(B); SI != SE; SI++) {
- BasicBlock *S = *SI;
+ for (BasicBlock *B : Dom) {
+ for (BasicBlock *S : successors(B)) {
if (DeadBlocks.count(S))
continue;
bool AllPredDead = true;
- for (pred_iterator PI = pred_begin(S), PE = pred_end(S); PI != PE; PI++)
- if (!DeadBlocks.count(*PI)) {
+ for (BasicBlock *P : predecessors(S))
+ if (!DeadBlocks.count(P)) {
AllPredDead = false;
break;
}
@@ -2766,10 +2862,7 @@ void GVN::addDeadBlock(BasicBlock *BB) {
continue;
SmallVector<BasicBlock *, 4> Preds(pred_begin(B), pred_end(B));
- for (SmallVectorImpl<BasicBlock *>::iterator PI = Preds.begin(),
- PE = Preds.end(); PI != PE; PI++) {
- BasicBlock *P = *PI;
-
+ for (BasicBlock *P : Preds) {
if (!DeadBlocks.count(P))
continue;
@@ -2794,7 +2887,7 @@ void GVN::addDeadBlock(BasicBlock *BB) {
// R be the target of the dead out-coming edge.
// 1) Identify the set of dead blocks implied by the branch's dead outcoming
// edge. The result of this step will be {X| X is dominated by R}
-// 2) Identify those blocks which haves at least one dead prodecessor. The
+// 2) Identify those blocks which haves at least one dead predecessor. The
// result of this step will be dominance-frontier(R).
// 3) Update the PHIs in DF(R) by replacing the operands corresponding to
// dead blocks with "UndefVal" in an hope these PHIs will optimized away.
@@ -2829,14 +2922,10 @@ bool GVN::processFoldableCondBr(BranchInst *BI) {
// instructions, it makes more sense just to "fabricate" a val-number for the
// dead code than checking if instruction involved is dead or not.
void GVN::assignValNumForDeadCode() {
- for (SetVector<BasicBlock *>::iterator I = DeadBlocks.begin(),
- E = DeadBlocks.end(); I != E; I++) {
- BasicBlock *BB = *I;
- for (BasicBlock::iterator II = BB->begin(), EE = BB->end();
- II != EE; II++) {
- Instruction *Inst = &*II;
- unsigned ValNum = VN.lookup_or_add(Inst);
- addToLeaderTable(ValNum, Inst, BB);
+ for (BasicBlock *BB : DeadBlocks) {
+ for (Instruction &Inst : *BB) {
+ unsigned ValNum = VN.lookup_or_add(&Inst);
+ addToLeaderTable(ValNum, &Inst, BB);
}
}
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 2a954d9961f2..ec5e15f0b8f8 100644
--- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -28,9 +28,11 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
@@ -48,6 +50,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
using namespace llvm;
@@ -83,64 +86,62 @@ static cl::opt<ReplaceExitVal> ReplaceExitValue(
namespace {
struct RewritePhi;
-}
-namespace {
- class IndVarSimplify : public LoopPass {
- LoopInfo *LI;
- ScalarEvolution *SE;
- DominatorTree *DT;
- TargetLibraryInfo *TLI;
- const TargetTransformInfo *TTI;
-
- SmallVector<WeakVH, 16> DeadInsts;
- bool Changed;
- public:
-
- static char ID; // Pass identification, replacement for typeid
- IndVarSimplify()
- : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), Changed(false) {
- initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
- }
+class IndVarSimplify : public LoopPass {
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ DominatorTree *DT;
+ TargetLibraryInfo *TLI;
+ const TargetTransformInfo *TTI;
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<ScalarEvolution>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
- AU.addPreserved<ScalarEvolution>();
- AU.addPreservedID(LoopSimplifyID);
- AU.addPreservedID(LCSSAID);
- AU.setPreservesCFG();
- }
+ SmallVector<WeakVH, 16> DeadInsts;
+ bool Changed;
+public:
- private:
- void releaseMemory() override {
- DeadInsts.clear();
- }
+ static char ID; // Pass identification, replacement for typeid
+ IndVarSimplify()
+ : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), Changed(false) {
+ initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
+ }
- bool isValidRewrite(Value *FromVal, Value *ToVal);
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreservedID(LCSSAID);
+ AU.setPreservesCFG();
+ }
- void HandleFloatingPointIV(Loop *L, PHINode *PH);
- void RewriteNonIntegerIVs(Loop *L);
+private:
+ void releaseMemory() override {
+ DeadInsts.clear();
+ }
- void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM);
+ bool isValidRewrite(Value *FromVal, Value *ToVal);
- bool CanLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
- void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
+ void handleFloatingPointIV(Loop *L, PHINode *PH);
+ void rewriteNonIntegerIVs(Loop *L);
- Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
- PHINode *IndVar, SCEVExpander &Rewriter);
+ void simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
- void SinkUnusedInvariants(Loop *L);
+ bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
+ void rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
- Value *ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
- Instruction *InsertPt, Type *Ty,
- bool &IsHighCostExpansion);
- };
+ Value *linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+ PHINode *IndVar, SCEVExpander &Rewriter);
+
+ void sinkUnusedInvariants(Loop *L);
+
+ Value *expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
+ Instruction *InsertPt, Type *Ty);
+};
}
char IndVarSimplify::ID = 0;
@@ -148,7 +149,7 @@ INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
"Induction Variable Simplification", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(IndVarSimplify, "indvars",
@@ -158,10 +159,10 @@ Pass *llvm::createIndVarSimplifyPass() {
return new IndVarSimplify();
}
-/// isValidRewrite - Return true if the SCEV expansion generated by the
-/// rewriter can replace the original value. SCEV guarantees that it
-/// produces the same value, but the way it is produced may be illegal IR.
-/// Ideally, this function will only be called for verification.
+/// Return true if the SCEV expansion generated by the rewriter can replace the
+/// original value. SCEV guarantees that it produces the same value, but the way
+/// it is produced may be illegal IR. Ideally, this function will only be
+/// called for verification.
bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
// If an SCEV expression subsumed multiple pointers, its expansion could
// reassociate the GEP changing the base pointer. This is illegal because the
@@ -175,10 +176,10 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
// because it understands lcssa phis while SCEV does not.
Value *FromPtr = FromVal;
Value *ToPtr = ToVal;
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(FromVal)) {
+ if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
FromPtr = GEP->getPointerOperand();
}
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(ToVal)) {
+ if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
ToPtr = GEP->getPointerOperand();
}
if (FromPtr != FromVal || ToPtr != ToVal) {
@@ -215,7 +216,7 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
/// loop. For PHI nodes, there may be multiple uses, so compute the nearest
/// common dominator for the incoming blocks.
static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
- DominatorTree *DT) {
+ DominatorTree *DT, LoopInfo *LI) {
PHINode *PHI = dyn_cast<PHINode>(User);
if (!PHI)
return User;
@@ -234,17 +235,28 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
InsertPt = InsertBB->getTerminator();
}
assert(InsertPt && "Missing phi operand");
- assert((!isa<Instruction>(Def) ||
- DT->dominates(cast<Instruction>(Def), InsertPt)) &&
- "def does not dominate all uses");
- return InsertPt;
+
+ auto *DefI = dyn_cast<Instruction>(Def);
+ if (!DefI)
+ return InsertPt;
+
+ assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
+
+ auto *L = LI->getLoopFor(DefI->getParent());
+ assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
+
+ for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
+ if (LI->getLoopFor(DTN->getBlock()) == L)
+ return DTN->getBlock()->getTerminator();
+
+ llvm_unreachable("DefI dominates InsertPt!");
}
//===----------------------------------------------------------------------===//
-// RewriteNonIntegerIVs and helpers. Prefer integer IVs.
+// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
//===----------------------------------------------------------------------===//
-/// ConvertToSInt - Convert APF to an integer, if possible.
+/// Convert APF to an integer, if possible.
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
bool isExact = false;
// See if we can convert this to an int64_t
@@ -256,8 +268,8 @@ static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
return true;
}
-/// HandleFloatingPointIV - If the loop has floating induction variable
-/// then insert corresponding integer induction variable if possible.
+/// If the loop has floating induction variable then insert corresponding
+/// integer induction variable if possible.
/// For example,
/// for(double i = 0; i < 10000; ++i)
/// bar(i)
@@ -265,13 +277,12 @@ static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
/// for(int i = 0; i < 10000; ++i)
/// bar((double)i);
///
-void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
+void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
unsigned BackEdge = IncomingEdge^1;
// Check incoming value.
- ConstantFP *InitValueVal =
- dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
+ auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
int64_t InitValue;
if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
@@ -279,8 +290,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// Check IV increment. Reject this PN if increment operation is not
// an add or increment value can not be represented by an integer.
- BinaryOperator *Incr =
- dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
+ auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return;
// If this is not an add of the PHI with a constantfp, or if the constant fp
@@ -456,14 +466,14 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// platforms.
if (WeakPH) {
Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
- PN->getParent()->getFirstInsertionPt());
+ &*PN->getParent()->getFirstInsertionPt());
PN->replaceAllUsesWith(Conv);
RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
}
Changed = true;
}
-void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
+void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
// First step. Check to see if there are any floating-point recurrences.
// If there are, change them into integer recurrences, permitting analysis by
// the SCEV routines.
@@ -477,7 +487,7 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
- HandleFloatingPointIV(L, PN);
+ handleFloatingPointIV(L, PN);
// If the loop previously had floating-point IV, ScalarEvolution
// may not have been able to compute a trip count. Now that we've done some
@@ -488,7 +498,7 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
namespace {
// Collect information about PHI nodes which can be transformed in
-// RewriteLoopExitValues.
+// rewriteLoopExitValues.
struct RewritePhi {
PHINode *PN;
unsigned Ith; // Ith incoming value.
@@ -501,70 +511,37 @@ struct RewritePhi {
};
}
-Value *IndVarSimplify::ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
+Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
Loop *L, Instruction *InsertPt,
- Type *ResultTy,
- bool &IsHighCostExpansion) {
- using namespace llvm::PatternMatch;
-
- if (!Rewriter.isHighCostExpansion(S, L)) {
- IsHighCostExpansion = false;
- return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
- }
-
+ Type *ResultTy) {
// Before expanding S into an expensive LLVM expression, see if we can use an
- // already existing value as the expansion for S. There is potential to make
- // this significantly smarter, but this simple heuristic already gets some
- // interesting cases.
-
- SmallVector<BasicBlock *, 4> Latches;
- L->getLoopLatches(Latches);
-
- for (BasicBlock *BB : Latches) {
- ICmpInst::Predicate Pred;
- Instruction *LHS, *RHS;
- BasicBlock *TrueBB, *FalseBB;
-
- if (!match(BB->getTerminator(),
- m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)),
- TrueBB, FalseBB)))
- continue;
-
- if (SE->getSCEV(LHS) == S && DT->dominates(LHS, InsertPt)) {
- IsHighCostExpansion = false;
- return LHS;
- }
-
- if (SE->getSCEV(RHS) == S && DT->dominates(RHS, InsertPt)) {
- IsHighCostExpansion = false;
- return RHS;
- }
- }
+ // already existing value as the expansion for S.
+ if (Value *ExistingValue = Rewriter.findExistingExpansion(S, InsertPt, L))
+ if (ExistingValue->getType() == ResultTy)
+ return ExistingValue;
// We didn't find anything, fall back to using SCEVExpander.
- assert(Rewriter.isHighCostExpansion(S, L) && "this should not have changed!");
- IsHighCostExpansion = true;
return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
}
//===----------------------------------------------------------------------===//
-// RewriteLoopExitValues - Optimize IV users outside the loop.
+// rewriteLoopExitValues - Optimize IV users outside the loop.
// As a side effect, reduces the amount of IV processing within the loop.
//===----------------------------------------------------------------------===//
-/// RewriteLoopExitValues - Check to see if this loop has a computable
-/// loop-invariant execution count. If so, this means that we can compute the
-/// final value of any expressions that are recurrent in the loop, and
-/// substitute the exit values from the loop into any instructions outside of
-/// the loop that use the final values of the current expressions.
+/// Check to see if this loop has a computable loop-invariant execution count.
+/// If so, this means that we can compute the final value of any expressions
+/// that are recurrent in the loop, and substitute the exit values from the loop
+/// into any instructions outside of the loop that use the final values of the
+/// current expressions.
///
/// This is mostly redundant with the regular IndVarSimplify activities that
/// happen later, except that it's more powerful in some cases, because it's
/// able to brute-force evaluate arbitrary instructions as long as they have
/// constant operands at the beginning of the loop.
-void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
- // Verify the input to the pass in already in LCSSA form.
- assert(L->isLCSSAForm(*DT));
+void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
+ // Check a pre-condition.
+ assert(L->isRecursivelyLCSSAForm(*DT) && "Indvars did not preserve LCSSA!");
SmallVector<BasicBlock*, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);
@@ -679,9 +656,9 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
continue;
}
- bool HighCost = false;
- Value *ExitVal = ExpandSCEVIfNeeded(Rewriter, ExitValue, L, Inst,
- PN->getType(), HighCost);
+ bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
+ Value *ExitVal =
+ expandSCEVIfNeeded(Rewriter, ExitValue, L, Inst, PN->getType());
DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
<< " LoopVal = " << *Inst << "\n");
@@ -698,7 +675,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
}
}
- bool LoopCanBeDel = CanLoopBeDeleted(L, RewritePhiSet);
+ bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
// Transformation.
for (const RewritePhi &Phi : RewritePhiSet) {
@@ -735,10 +712,10 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
Rewriter.clearInsertPoint();
}
-/// CanLoopBeDeleted - Check whether it is possible to delete the loop after
-/// rewriting exit value. If it is possible, ignore ReplaceExitValue and
-/// do rewriting aggressively.
-bool IndVarSimplify::CanLoopBeDeleted(
+/// Check whether it is possible to delete the loop after rewriting exit
+/// value. If it is possible, ignore ReplaceExitValue and do rewriting
+/// aggressively.
+bool IndVarSimplify::canLoopBeDeleted(
Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
BasicBlock *Preheader = L->getLoopPreheader();
@@ -782,14 +759,9 @@ bool IndVarSimplify::CanLoopBeDeleted(
++BI;
}
- for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
- LI != LE; ++LI) {
- for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end(); BI != BE;
- ++BI) {
- if (BI->mayHaveSideEffects())
- return false;
- }
- }
+ for (auto *BB : L->blocks())
+ if (any_of(*BB, [](Instruction &I) { return I.mayHaveSideEffects(); }))
+ return false;
return true;
}
@@ -799,22 +771,19 @@ bool IndVarSimplify::CanLoopBeDeleted(
//===----------------------------------------------------------------------===//
namespace {
- // Collect information about induction variables that are used by sign/zero
- // extend operations. This information is recorded by CollectExtend and
- // provides the input to WidenIV.
- struct WideIVInfo {
- PHINode *NarrowIV;
- Type *WidestNativeType; // Widest integer type created [sz]ext
- bool IsSigned; // Was a sext user seen before a zext?
-
- WideIVInfo() : NarrowIV(nullptr), WidestNativeType(nullptr),
- IsSigned(false) {}
- };
+// Collect information about induction variables that are used by sign/zero
+// extend operations. This information is recorded by CollectExtend and provides
+// the input to WidenIV.
+struct WideIVInfo {
+ PHINode *NarrowIV = nullptr;
+ Type *WidestNativeType = nullptr; // Widest integer type created [sz]ext
+ bool IsSigned = false; // Was a sext user seen before a zext?
+};
}
-/// visitCast - Update information about the induction variable that is
-/// extended by this sign or zero extend operation. This is used to determine
-/// the final width of the IV before actually widening it.
+/// Update information about the induction variable that is extended by this
+/// sign or zero extend operation. This is used to determine the final width of
+/// the IV before actually widening it.
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
const TargetTransformInfo *TTI) {
bool IsSigned = Cast->getOpcode() == Instruction::SExt;
@@ -855,24 +824,29 @@ static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
namespace {
-/// NarrowIVDefUse - Record a link in the Narrow IV def-use chain along with the
-/// WideIV that computes the same value as the Narrow IV def. This avoids
-/// caching Use* pointers.
+/// Record a link in the Narrow IV def-use chain along with the WideIV that
+/// computes the same value as the Narrow IV def. This avoids caching Use*
+/// pointers.
struct NarrowIVDefUse {
- Instruction *NarrowDef;
- Instruction *NarrowUse;
- Instruction *WideDef;
-
- NarrowIVDefUse(): NarrowDef(nullptr), NarrowUse(nullptr), WideDef(nullptr) {}
-
- NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD):
- NarrowDef(ND), NarrowUse(NU), WideDef(WD) {}
+ Instruction *NarrowDef = nullptr;
+ Instruction *NarrowUse = nullptr;
+ Instruction *WideDef = nullptr;
+
+ // True if the narrow def is never negative. Tracking this information lets
+ // us use a sign extension instead of a zero extension or vice versa, when
+ // profitable and legal.
+ bool NeverNegative = false;
+
+ NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
+ bool NeverNegative)
+ : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
+ NeverNegative(NeverNegative) {}
};
-/// WidenIV - The goal of this transform is to remove sign and zero extends
-/// without creating any new induction variables. To do this, it creates a new
-/// phi of the wider type and redirects all users, either removing extends or
-/// inserting truncs whenever we stop propagating the type.
+/// The goal of this transform is to remove sign and zero extends without
+/// creating any new induction variables. To do this, it creates a new phi of
+/// the wider type and redirects all users, either removing extends or inserting
+/// truncs whenever we stop propagating the type.
///
class WidenIV {
// Parameters
@@ -913,32 +887,35 @@ public:
assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
}
- PHINode *CreateWideIV(SCEVExpander &Rewriter);
+ PHINode *createWideIV(SCEVExpander &Rewriter);
protected:
- Value *getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
- Instruction *Use);
+ Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
+ Instruction *Use);
- Instruction *CloneIVUser(NarrowIVDefUse DU);
+ Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
+ Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
+ const SCEVAddRecExpr *WideAR);
+ Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
- const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse);
+ const SCEVAddRecExpr *getWideRecurrence(Instruction *NarrowUse);
- const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU);
+ const SCEVAddRecExpr* getExtendedOperandRecurrence(NarrowIVDefUse DU);
- const SCEV *GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
+ const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
unsigned OpCode) const;
- Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
+ Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
- bool WidenLoopCompare(NarrowIVDefUse DU);
+ bool widenLoopCompare(NarrowIVDefUse DU);
void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
};
} // anonymous namespace
-/// isLoopInvariant - Perform a quick domtree based check for loop invariance
-/// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems
-/// gratuitous for this purpose.
+/// Perform a quick domtree based check for loop invariance assuming that V is
+/// used within the loop. LoopInfo::isLoopInvariant() seems gratuitous for this
+/// purpose.
static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
Instruction *Inst = dyn_cast<Instruction>(V);
if (!Inst)
@@ -947,8 +924,8 @@ static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
return DT->properlyDominates(Inst->getParent(), L->getHeader());
}
-Value *WidenIV::getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
- Instruction *Use) {
+Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
+ bool IsSigned, Instruction *Use) {
// Set the debug location and conservative insertion point.
IRBuilder<> Builder(Use);
// Hoist the insertion point into loop preheaders as far as possible.
@@ -961,10 +938,11 @@ Value *WidenIV::getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
Builder.CreateZExt(NarrowOper, WideType);
}
-/// CloneIVUser - Instantiate a wide operation to replace a narrow
-/// operation. This only needs to handle operations that can evaluation to
-/// SCEVAddRec. It can safely return 0 for any operation we decide not to clone.
-Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
+/// Instantiate a wide operation to replace a narrow operation. This only needs
+/// to handle operations that can evaluation to SCEVAddRec. It can safely return
+/// 0 for any operation we decide not to clone.
+Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
+ const SCEVAddRecExpr *WideAR) {
unsigned Opcode = DU.NarrowUse->getOpcode();
switch (Opcode) {
default:
@@ -973,40 +951,140 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
case Instruction::Mul:
case Instruction::UDiv:
case Instruction::Sub:
+ return cloneArithmeticIVUser(DU, WideAR);
+
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
- DEBUG(dbgs() << "Cloning IVUser: " << *DU.NarrowUse << "\n");
-
- // Replace NarrowDef operands with WideDef. Otherwise, we don't know
- // anything about the narrow operand yet so must insert a [sz]ext. It is
- // probably loop invariant and will be folded or hoisted. If it actually
- // comes from a widened IV, it should be removed during a future call to
- // WidenIVUse.
- Value *LHS = (DU.NarrowUse->getOperand(0) == DU.NarrowDef) ? DU.WideDef :
- getExtend(DU.NarrowUse->getOperand(0), WideType, IsSigned, DU.NarrowUse);
- Value *RHS = (DU.NarrowUse->getOperand(1) == DU.NarrowDef) ? DU.WideDef :
- getExtend(DU.NarrowUse->getOperand(1), WideType, IsSigned, DU.NarrowUse);
-
- BinaryOperator *NarrowBO = cast<BinaryOperator>(DU.NarrowUse);
- BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(),
- LHS, RHS,
- NarrowBO->getName());
- IRBuilder<> Builder(DU.NarrowUse);
- Builder.Insert(WideBO);
- if (const OverflowingBinaryOperator *OBO =
- dyn_cast<OverflowingBinaryOperator>(NarrowBO)) {
- if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();
- if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();
+ return cloneBitwiseIVUser(DU);
+ }
+}
+
+Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
+ Instruction *NarrowUse = DU.NarrowUse;
+ Instruction *NarrowDef = DU.NarrowDef;
+ Instruction *WideDef = DU.WideDef;
+
+ DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
+
+ // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
+ // about the narrow operand yet so must insert a [sz]ext. It is probably loop
+ // invariant and will be folded or hoisted. If it actually comes from a
+ // widened IV, it should be removed during a future call to widenIVUse.
+ Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(0), WideType,
+ IsSigned, NarrowUse);
+ Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(1), WideType,
+ IsSigned, NarrowUse);
+
+ auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
+ auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
+ NarrowBO->getName());
+ IRBuilder<> Builder(NarrowUse);
+ Builder.Insert(WideBO);
+ WideBO->copyIRFlags(NarrowBO);
+ return WideBO;
+}
+
+Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
+ const SCEVAddRecExpr *WideAR) {
+ Instruction *NarrowUse = DU.NarrowUse;
+ Instruction *NarrowDef = DU.NarrowDef;
+ Instruction *WideDef = DU.WideDef;
+
+ DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
+
+ unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
+
+ // We're trying to find X such that
+ //
+ // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
+ //
+ // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
+ // and check using SCEV if any of them are correct.
+
+ // Returns true if extending NonIVNarrowDef according to `SignExt` is a
+ // correct solution to X.
+ auto GuessNonIVOperand = [&](bool SignExt) {
+ const SCEV *WideLHS;
+ const SCEV *WideRHS;
+
+ auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
+ if (SignExt)
+ return SE->getSignExtendExpr(S, Ty);
+ return SE->getZeroExtendExpr(S, Ty);
+ };
+
+ if (IVOpIdx == 0) {
+ WideLHS = SE->getSCEV(WideDef);
+ const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
+ WideRHS = GetExtend(NarrowRHS, WideType);
+ } else {
+ const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
+ WideLHS = GetExtend(NarrowLHS, WideType);
+ WideRHS = SE->getSCEV(WideDef);
+ }
+
+ // WideUse is "WideDef `op.wide` X" as described in the comment.
+ const SCEV *WideUse = nullptr;
+
+ switch (NarrowUse->getOpcode()) {
+ default:
+ llvm_unreachable("No other possibility!");
+
+ case Instruction::Add:
+ WideUse = SE->getAddExpr(WideLHS, WideRHS);
+ break;
+
+ case Instruction::Mul:
+ WideUse = SE->getMulExpr(WideLHS, WideRHS);
+ break;
+
+ case Instruction::UDiv:
+ WideUse = SE->getUDivExpr(WideLHS, WideRHS);
+ break;
+
+ case Instruction::Sub:
+ WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
+ break;
}
- return WideBO;
+
+ return WideUse == WideAR;
+ };
+
+ bool SignExtend = IsSigned;
+ if (!GuessNonIVOperand(SignExtend)) {
+ SignExtend = !SignExtend;
+ if (!GuessNonIVOperand(SignExtend))
+ return nullptr;
}
+
+ Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(0), WideType,
+ SignExtend, NarrowUse);
+ Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(1), WideType,
+ SignExtend, NarrowUse);
+
+ auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
+ auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
+ NarrowBO->getName());
+
+ IRBuilder<> Builder(NarrowUse);
+ Builder.Insert(WideBO);
+ WideBO->copyIRFlags(NarrowBO);
+ return WideBO;
}
-const SCEV *WidenIV::GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
+const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
unsigned OpCode) const {
if (OpCode == Instruction::Add)
return SE->getAddExpr(LHS, RHS);
@@ -1022,7 +1100,7 @@ const SCEV *WidenIV::GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
/// operands. Generate the SCEV value for the widened operation without
/// actually modifying the IR yet. If the expression after extending the
/// operands is an AddRec for this loop, return it.
-const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
+const SCEVAddRecExpr* WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
// Handle the common case of add<nsw/nuw>
const unsigned OpCode = DU.NarrowUse->getOpcode();
@@ -1062,19 +1140,18 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
if (ExtendOperIdx == 0)
std::swap(lhs, rhs);
const SCEVAddRecExpr *AddRec =
- dyn_cast<SCEVAddRecExpr>(GetSCEVByOpCode(lhs, rhs, OpCode));
+ dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
if (!AddRec || AddRec->getLoop() != L)
return nullptr;
return AddRec;
}
-/// GetWideRecurrence - Is this instruction potentially interesting for further
-/// simplification after widening it's type? In other words, can the
-/// extend be safely hoisted out of the loop with SCEV reducing the value to a
-/// recurrence on the same loop. If so, return the sign or zero extended
-/// recurrence. Otherwise return NULL.
-const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
+/// Is this instruction potentially interesting for further simplification after
+/// widening it's type? In other words, can the extend be safely hoisted out of
+/// the loop with SCEV reducing the value to a recurrence on the same loop. If
+/// so, return the sign or zero extended recurrence. Otherwise return NULL.
+const SCEVAddRecExpr *WidenIV::getWideRecurrence(Instruction *NarrowUse) {
if (!SE->isSCEVable(NarrowUse->getType()))
return nullptr;
@@ -1097,10 +1174,11 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
/// This IV user cannot be widen. Replace this use of the original narrow IV
/// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
-static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT) {
+static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef
<< " for user " << *DU.NarrowUse << "\n");
- IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
+ IRBuilder<> Builder(
+ getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
}
@@ -1108,13 +1186,27 @@ static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT) {
/// If the narrow use is a compare instruction, then widen the compare
// (and possibly the other operand). The extend operation is hoisted into the
// loop preheader as far as possible.
-bool WidenIV::WidenLoopCompare(NarrowIVDefUse DU) {
+bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
if (!Cmp)
return false;
- // Sign of IV user and compare must match.
- if (IsSigned != CmpInst::isSigned(Cmp->getPredicate()))
+ // We can legally widen the comparison in the following two cases:
+ //
+ // - The signedness of the IV extension and comparison match
+ //
+ // - The narrow IV is always positive (and thus its sign extension is equal
+ // to its zero extension). For instance, let's say we're zero extending
+ // %narrow for the following use
+ //
+ // icmp slt i32 %narrow, %val ... (A)
+ //
+ // and %narrow is always positive. Then
+ //
+ // (A) == icmp slt i32 sext(%narrow), sext(%val)
+ // == icmp slt i32 zext(%narrow), sext(%val)
+
+ if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
return false;
Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
@@ -1123,20 +1215,21 @@ bool WidenIV::WidenLoopCompare(NarrowIVDefUse DU) {
assert (CastWidth <= IVWidth && "Unexpected width while widening compare.");
// Widen the compare instruction.
- IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
+ IRBuilder<> Builder(
+ getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
// Widen the other operand of the compare, if necessary.
if (CastWidth < IVWidth) {
- Value *ExtOp = getExtend(Op, WideType, IsSigned, Cmp);
+ Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
}
return true;
}
-/// WidenIVUse - Determine whether an individual user of the narrow IV can be
-/// widened. If so, return the wide clone of the user.
-Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
+/// Determine whether an individual user of the narrow IV can be widened. If so,
+/// return the wide clone of the user.
+Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// Stop traversing the def-use chain at inner-loop phis or post-loop phis.
if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
@@ -1145,13 +1238,13 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// After SimplifyCFG most loop exit targets have a single predecessor.
// Otherwise fall back to a truncate within the loop.
if (UsePhi->getNumOperands() != 1)
- truncateIVUse(DU, DT);
+ truncateIVUse(DU, DT, LI);
else {
PHINode *WidePhi =
PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
UsePhi);
WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
- IRBuilder<> Builder(WidePhi->getParent()->getFirstInsertionPt());
+ IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
UsePhi->replaceAllUsesWith(Trunc);
DeadInsts.emplace_back(UsePhi);
@@ -1200,20 +1293,20 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
}
// Does this user itself evaluate to a recurrence after widening?
- const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse);
+ const SCEVAddRecExpr *WideAddRec = getWideRecurrence(DU.NarrowUse);
if (!WideAddRec)
- WideAddRec = GetExtendedOperandRecurrence(DU);
+ WideAddRec = getExtendedOperandRecurrence(DU);
if (!WideAddRec) {
// If use is a loop condition, try to promote the condition instead of
// truncating the IV first.
- if (WidenLoopCompare(DU))
+ if (widenLoopCompare(DU))
return nullptr;
// This user does not evaluate to a recurence after widening, so don't
// follow it. Instead insert a Trunc to kill off the original use,
// eventually isolating the original narrow IV so it can be removed.
- truncateIVUse(DU, DT);
+ truncateIVUse(DU, DT, LI);
return nullptr;
}
// Assume block terminators cannot evaluate to a recurrence. We can't to
@@ -1228,7 +1321,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
&& Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
WideUse = WideInc;
else {
- WideUse = CloneIVUser(DU);
+ WideUse = cloneIVUser(DU, WideAddRec);
if (!WideUse)
return nullptr;
}
@@ -1248,9 +1341,13 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
return WideUse;
}
-/// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers.
+/// Add eligible users of NarrowDef to NarrowIVUsers.
///
void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
+ const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
+ bool NeverNegative =
+ SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
+ SE->getConstant(NarrowSCEV->getType(), 0));
for (User *U : NarrowDef->users()) {
Instruction *NarrowUser = cast<Instruction>(U);
@@ -1258,21 +1355,21 @@ void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
if (!Widened.insert(NarrowUser).second)
continue;
- NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUser, WideDef));
+ NarrowIVUsers.push_back(
+ NarrowIVDefUse(NarrowDef, NarrowUser, WideDef, NeverNegative));
}
}
-/// CreateWideIV - Process a single induction variable. First use the
-/// SCEVExpander to create a wide induction variable that evaluates to the same
-/// recurrence as the original narrow IV. Then use a worklist to forward
-/// traverse the narrow IV's def-use chain. After WidenIVUse has processed all
-/// interesting IV users, the narrow IV will be isolated for removal by
-/// DeleteDeadPHIs.
+/// Process a single induction variable. First use the SCEVExpander to create a
+/// wide induction variable that evaluates to the same recurrence as the
+/// original narrow IV. Then use a worklist to forward traverse the narrow IV's
+/// def-use chain. After widenIVUse has processed all interesting IV users, the
+/// narrow IV will be isolated for removal by DeleteDeadPHIs.
///
/// It would be simpler to delete uses as they are processed, but we must avoid
/// invalidating SCEV expressions.
///
-PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
+PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
// Is this phi an induction variable?
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
if (!AddRec)
@@ -1302,11 +1399,11 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// either find an existing phi or materialize a new one. Either way, we
// expect a well-formed cyclic phi-with-increments. i.e. any operand not part
// of the phi-SCC dominates the loop entry.
- Instruction *InsertPt = L->getHeader()->begin();
+ Instruction *InsertPt = &L->getHeader()->front();
WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
// Remembering the WideIV increment generated by SCEVExpander allows
- // WidenIVUse to reuse it when widening the narrow IV's increment. We don't
+ // widenIVUse to reuse it when widening the narrow IV's increment. We don't
// employ a general reuse mechanism because the call above is the only call to
// SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
if (BasicBlock *LatchBlock = L->getLoopLatch()) {
@@ -1329,13 +1426,13 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// Process a def-use edge. This may replace the use, so don't hold a
// use_iterator across it.
- Instruction *WideUse = WidenIVUse(DU, Rewriter);
+ Instruction *WideUse = widenIVUse(DU, Rewriter);
// Follow all def-use edges from the previous narrow use.
if (WideUse)
pushNarrowIVUsers(DU.NarrowUse, WideUse);
- // WidenIVUse may have removed the def-use edge.
+ // widenIVUse may have removed the def-use edge.
if (DU.NarrowDef->use_empty())
DeadInsts.emplace_back(DU.NarrowDef);
}
@@ -1352,38 +1449,38 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
//===----------------------------------------------------------------------===//
namespace {
- class IndVarSimplifyVisitor : public IVVisitor {
- ScalarEvolution *SE;
- const TargetTransformInfo *TTI;
- PHINode *IVPhi;
-
- public:
- WideIVInfo WI;
-
- IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
- const TargetTransformInfo *TTI,
- const DominatorTree *DTree)
- : SE(SCEV), TTI(TTI), IVPhi(IV) {
- DT = DTree;
- WI.NarrowIV = IVPhi;
- if (ReduceLiveIVs)
- setSplitOverflowIntrinsics();
- }
+class IndVarSimplifyVisitor : public IVVisitor {
+ ScalarEvolution *SE;
+ const TargetTransformInfo *TTI;
+ PHINode *IVPhi;
- // Implement the interface used by simplifyUsersOfIV.
- void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
- };
+public:
+ WideIVInfo WI;
+
+ IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
+ const TargetTransformInfo *TTI,
+ const DominatorTree *DTree)
+ : SE(SCEV), TTI(TTI), IVPhi(IV) {
+ DT = DTree;
+ WI.NarrowIV = IVPhi;
+ if (ReduceLiveIVs)
+ setSplitOverflowIntrinsics();
+ }
+
+ // Implement the interface used by simplifyUsersOfIV.
+ void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
+};
}
-/// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV
-/// users. Each successive simplification may push more users which may
-/// themselves be candidates for simplification.
+/// Iteratively perform simplification on a worklist of IV users. Each
+/// successive simplification may push more users which may themselves be
+/// candidates for simplification.
///
/// Sign/Zero extend elimination is interleaved with IV simplification.
///
-void IndVarSimplify::SimplifyAndExtend(Loop *L,
+void IndVarSimplify::simplifyAndExtend(Loop *L,
SCEVExpander &Rewriter,
- LPPassManager &LPM) {
+ LoopInfo *LI) {
SmallVector<WideIVInfo, 8> WideIVs;
SmallVector<PHINode*, 8> LoopPhis;
@@ -1400,14 +1497,14 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
// extension. The first time SCEV attempts to normalize sign/zero extension,
// the result becomes final. So for the most predictable results, we delay
// evaluation of sign/zero extend evaluation until needed, and avoid running
- // other SCEV based analysis prior to SimplifyAndExtend.
+ // other SCEV based analysis prior to simplifyAndExtend.
do {
PHINode *CurrIV = LoopPhis.pop_back_val();
// Information about sign/zero extensions of CurrIV.
IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
- Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &Visitor);
+ Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, &Visitor);
if (Visitor.WI.WidestNativeType) {
WideIVs.push_back(Visitor.WI);
@@ -1416,7 +1513,7 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
for (; !WideIVs.empty(); WideIVs.pop_back()) {
WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts);
- if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) {
+ if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
Changed = true;
LoopPhis.push_back(WidePhi);
}
@@ -1425,12 +1522,12 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
}
//===----------------------------------------------------------------------===//
-// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
+// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
//===----------------------------------------------------------------------===//
-/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
-/// count expression can be safely and cheaply expanded into an instruction
-/// sequence that can be used by LinearFunctionTestReplace.
+/// Return true if this loop's backedge taken count expression can be safely and
+/// cheaply expanded into an instruction sequence that can be used by
+/// linearFunctionTestReplace.
///
/// TODO: This fails for pointer-type loop counters with greater than one byte
/// strides, consequently preventing LFTR from running. For the purpose of LFTR
@@ -1461,8 +1558,7 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE,
return true;
}
-/// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop
-/// invariant value to the phi.
+/// Return the loop header phi IFF IncV adds a loop invariant value to the phi.
static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
Instruction *IncI = dyn_cast<Instruction>(IncV);
if (!IncI)
@@ -1513,8 +1609,8 @@ static ICmpInst *getLoopTest(Loop *L) {
return dyn_cast<ICmpInst>(BI->getCondition());
}
-/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show
-/// that the current exit test is already sufficiently canonical.
+/// linearFunctionTestReplace policy. Return true unless we can show that the
+/// current exit test is already sufficiently canonical.
static bool needsLFTR(Loop *L, DominatorTree *DT) {
// Do LFTR to simplify the exit condition to an ICMP.
ICmpInst *Cond = getLoopTest(L);
@@ -1574,10 +1670,10 @@ static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
return false;
// Optimistically handle other instructions.
- for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) {
- if (!Visited.insert(*OI).second)
+ for (Value *Op : I->operands()) {
+ if (!Visited.insert(Op).second)
continue;
- if (!hasConcreteDefImpl(*OI, Visited, Depth+1))
+ if (!hasConcreteDefImpl(Op, Visited, Depth+1))
return false;
}
return true;
@@ -1594,8 +1690,8 @@ static bool hasConcreteDef(Value *V) {
return hasConcreteDefImpl(V, Visited, 0);
}
-/// AlmostDeadIV - Return true if this IV has any uses other than the (soon to
-/// be rewritten) loop exit test.
+/// Return true if this IV has any uses other than the (soon to be rewritten)
+/// loop exit test.
static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
Value *IncV = Phi->getIncomingValue(LatchIdx);
@@ -1608,7 +1704,7 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
return true;
}
-/// FindLoopCounter - Find an affine IV in canonical form.
+/// Find an affine IV in canonical form.
///
/// BECount may be an i8* pointer type. The pointer difference is already
/// valid count without scaling the address stride, so it remains a pointer
@@ -1702,8 +1798,8 @@ static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
return BestPhi;
}
-/// genLoopLimit - Help LinearFunctionTestReplace by generating a value that
-/// holds the RHS of the new loop test.
+/// Help linearFunctionTestReplace by generating a value that holds the RHS of
+/// the new loop test.
static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
SCEVExpander &Rewriter, ScalarEvolution *SE) {
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
@@ -1785,13 +1881,13 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
}
}
-/// LinearFunctionTestReplace - This method rewrites the exit condition of the
-/// loop to be a canonical != comparison against the incremented loop induction
-/// variable. This pass is able to rewrite the exit tests of any loop where the
-/// SCEV analysis can determine a loop-invariant trip count of the loop, which
-/// is actually a much broader range than just linear tests.
+/// This method rewrites the exit condition of the loop to be a canonical !=
+/// comparison against the incremented loop induction variable. This pass is
+/// able to rewrite the exit tests of any loop where the SCEV analysis can
+/// determine a loop-invariant trip count of the loop, which is actually a much
+/// broader range than just linear tests.
Value *IndVarSimplify::
-LinearFunctionTestReplace(Loop *L,
+linearFunctionTestReplace(Loop *L,
const SCEV *BackedgeTakenCount,
PHINode *IndVar,
SCEVExpander &Rewriter) {
@@ -1809,7 +1905,7 @@ LinearFunctionTestReplace(Loop *L,
// This addition may overflow, which is valid as long as the comparison is
// truncated to BackedgeTakenCount->getType().
IVCount = SE->getAddExpr(BackedgeTakenCount,
- SE->getConstant(BackedgeTakenCount->getType(), 1));
+ SE->getOne(BackedgeTakenCount->getType()));
// The BackedgeTaken expression contains the number of times that the
// backedge branches to the loop header. This is one less than the
// number of times the loop executes, so use the incremented indvar.
@@ -1847,8 +1943,8 @@ LinearFunctionTestReplace(Loop *L,
const SCEV *ARStep = AR->getStepRecurrence(*SE);
// For constant IVCount, avoid truncation.
if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
- const APInt &Start = cast<SCEVConstant>(ARStart)->getValue()->getValue();
- APInt Count = cast<SCEVConstant>(IVCount)->getValue()->getValue();
+ const APInt &Start = cast<SCEVConstant>(ARStart)->getAPInt();
+ APInt Count = cast<SCEVConstant>(IVCount)->getAPInt();
// Note that the post-inc value of BackedgeTakenCount may have overflowed
// above such that IVCount is now zero.
if (IVCount != BackedgeTakenCount && Count == 0) {
@@ -1886,21 +1982,21 @@ LinearFunctionTestReplace(Loop *L,
}
//===----------------------------------------------------------------------===//
-// SinkUnusedInvariants. A late subpass to cleanup loop preheaders.
+// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
//===----------------------------------------------------------------------===//
/// If there's a single exit block, sink any loop-invariant values that
/// were defined in the preheader but not used inside the loop into the
/// exit block to reduce register pressure in the loop.
-void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
+void IndVarSimplify::sinkUnusedInvariants(Loop *L) {
BasicBlock *ExitBlock = L->getExitBlock();
if (!ExitBlock) return;
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) return;
- Instruction *InsertPt = ExitBlock->getFirstInsertionPt();
- BasicBlock::iterator I = Preheader->getTerminator();
+ Instruction *InsertPt = &*ExitBlock->getFirstInsertionPt();
+ BasicBlock::iterator I(Preheader->getTerminator());
while (I != Preheader->begin()) {
--I;
// New instructions were inserted at the end of the preheader.
@@ -1920,8 +2016,8 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
if (isa<DbgInfoIntrinsic>(I))
continue;
- // Skip landingpad instructions.
- if (isa<LandingPadInst>(I))
+ // Skip eh pad instructions.
+ if (I->isEHPad())
continue;
// Don't sink alloca: we never want to sink static alloca's out of the
@@ -1953,7 +2049,7 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
continue;
// Otherwise, sink it to the exit block.
- Instruction *ToMove = I;
+ Instruction *ToMove = &*I;
bool Done = false;
if (I != Preheader->begin()) {
@@ -1994,7 +2090,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
return false;
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
TLI = TLIP ? &TLIP->getTLI() : nullptr;
@@ -2007,7 +2103,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// If there are any floating-point recurrences, attempt to
// transform them to use integer recurrences.
- RewriteNonIntegerIVs(L);
+ rewriteNonIntegerIVs(L);
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
@@ -2024,7 +2120,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// other expressions involving loop IVs have been evaluated. This helps SCEV
// set no-wrap flags before normalizing sign/zero extension.
Rewriter.disableCanonicalMode();
- SimplifyAndExtend(L, Rewriter, LPM);
+ simplifyAndExtend(L, Rewriter, LI);
// Check to see if this loop has a computable loop-invariant execution count.
// If so, this means that we can compute the final value of any expressions
@@ -2034,7 +2130,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
//
if (ReplaceExitValue != NeverRepl &&
!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
- RewriteLoopExitValues(L, Rewriter);
+ rewriteLoopExitValues(L, Rewriter);
// Eliminate redundant IV cycles.
NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
@@ -2054,7 +2150,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// explicitly check any assumptions made by SCEV. Brittle.
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
if (!AR || AR->getLoop()->getLoopPreheader())
- (void)LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
+ (void)linearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
Rewriter);
}
}
@@ -2074,13 +2170,13 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// Loop-invariant instructions in the preheader that aren't used in the
// loop may be sunk below the loop to reduce register pressure.
- SinkUnusedInvariants(L);
+ sinkUnusedInvariants(L);
// Clean up dead instructions.
Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
+
// Check a post-condition.
- assert(L->isLCSSAForm(*DT) &&
- "Indvars did not leave the loop in lcssa form!");
+ assert(L->isRecursivelyLCSSAForm(*DT) && "Indvars did not preserve LCSSA!");
// Verify that LFTR, and any other change have not interfered with SCEV's
// ability to compute trip count.
diff --git a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index cbdacad8f28b..dea61f6ff3d7 100644
--- a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -214,8 +214,8 @@ public:
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
- AU.addRequired<ScalarEvolution>();
- AU.addRequired<BranchProbabilityInfo>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<BranchProbabilityInfoWrapperPass>();
}
bool runOnLoop(Loop *L, LPPassManager &LPM) override;
@@ -224,8 +224,15 @@ public:
char InductiveRangeCheckElimination::ID = 0;
}
-INITIALIZE_PASS(InductiveRangeCheckElimination, "irce",
- "Inductive range check elimination", false, false)
+INITIALIZE_PASS_BEGIN(InductiveRangeCheckElimination, "irce",
+ "Inductive range check elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_END(InductiveRangeCheckElimination, "irce",
+ "Inductive range check elimination", false, false)
const char *InductiveRangeCheck::rangeCheckKindToStr(
InductiveRangeCheck::RangeCheckKind RCK) {
@@ -1044,9 +1051,9 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
auto BBInsertLocation = std::next(Function::iterator(LS.Latch));
RRI.ExitSelector = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".exit.selector",
- &F, BBInsertLocation);
+ &F, &*BBInsertLocation);
RRI.PseudoExit = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".pseudo.exit", &F,
- BBInsertLocation);
+ &*BBInsertLocation);
BranchInst *PreheaderJump = cast<BranchInst>(&*Preheader->rbegin());
bool Increasing = LS.IndVarIncreasing;
@@ -1399,8 +1406,9 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
LLVMContext &Context = Preheader->getContext();
InductiveRangeCheck::AllocatorTy IRCAlloc;
SmallVector<InductiveRangeCheck *, 16> RangeChecks;
- ScalarEvolution &SE = getAnalysis<ScalarEvolution>();
- BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
+ ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ BranchProbabilityInfo &BPI =
+ getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
for (auto BBI : L->getBlocks())
if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 1130d228acb8..087ce8ac50d4 100644
--- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -18,15 +18,22 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
@@ -36,6 +43,8 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include <algorithm>
+#include <memory>
using namespace llvm;
#define DEBUG_TYPE "jump-threading"
@@ -49,6 +58,13 @@ BBDuplicateThreshold("jump-threading-threshold",
cl::desc("Max block size to duplicate for jump threading"),
cl::init(6), cl::Hidden);
+static cl::opt<unsigned>
+ImplicationSearchThreshold(
+ "jump-threading-implication-search-threshold",
+ cl::desc("The number of predecessors to search for a stronger "
+ "condition to use to thread over a weaker condition"),
+ cl::init(3), cl::Hidden);
+
namespace {
// These are at global scope so static functions can use them too.
typedef SmallVectorImpl<std::pair<Constant*, BasicBlock*> > PredValueInfo;
@@ -80,6 +96,9 @@ namespace {
class JumpThreading : public FunctionPass {
TargetLibraryInfo *TLI;
LazyValueInfo *LVI;
+ std::unique_ptr<BlockFrequencyInfo> BFI;
+ std::unique_ptr<BranchProbabilityInfo> BPI;
+ bool HasProfileData;
#ifdef NDEBUG
SmallPtrSet<BasicBlock*, 16> LoopHeaders;
#else
@@ -114,9 +133,15 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LazyValueInfo>();
AU.addPreserved<LazyValueInfo>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
+ void releaseMemory() override {
+ BFI.reset();
+ BPI.reset();
+ }
+
void FindLoopHeaders(Function &F);
bool ProcessBlock(BasicBlock *BB);
bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
@@ -134,9 +159,16 @@ namespace {
bool ProcessBranchOnPHI(PHINode *PN);
bool ProcessBranchOnXOR(BinaryOperator *BO);
+ bool ProcessImpliedCondition(BasicBlock *BB);
bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB);
+
+ private:
+ BasicBlock *SplitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
+ const char *Suffix);
+ void UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB, BasicBlock *BB,
+ BasicBlock *NewBB, BasicBlock *SuccBB);
};
}
@@ -160,11 +192,21 @@ bool JumpThreading::runOnFunction(Function &F) {
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
LVI = &getAnalysis<LazyValueInfo>();
+ BFI.reset();
+ BPI.reset();
+ // When profile data is available, we need to update edge weights after
+ // successful jump threading, which requires both BPI and BFI being available.
+ HasProfileData = F.getEntryCount().hasValue();
+ if (HasProfileData) {
+ LoopInfo LI{DominatorTree(F)};
+ BPI.reset(new BranchProbabilityInfo(F, LI));
+ BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
+ }
// Remove unreachable blocks from function as they may result in infinite
// loop. We do threading if we found something profitable. Jump threading a
// branch can create other opportunities. If these opportunities form a cycle
- // i.e. if any jump treading is undoing previous threading in the path, then
+ // i.e. if any jump threading is undoing previous threading in the path, then
// we will loop forever. We take care of this issue by not jump threading for
// back edges. This works for normal cases but not for unreachable blocks as
// they may have cycle with no back edge.
@@ -176,7 +218,7 @@ bool JumpThreading::runOnFunction(Function &F) {
do {
Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
- BasicBlock *BB = I;
+ BasicBlock *BB = &*I;
// Thread all of the branches we can over this block.
while (ProcessBlock(BB))
Changed = true;
@@ -239,11 +281,26 @@ bool JumpThreading::runOnFunction(Function &F) {
static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
unsigned Threshold) {
/// Ignore PHI nodes, these will be flattened when duplication happens.
- BasicBlock::const_iterator I = BB->getFirstNonPHI();
+ BasicBlock::const_iterator I(BB->getFirstNonPHI());
// FIXME: THREADING will delete values that are just used to compute the
// branch, so they shouldn't count against the duplication cost.
+ unsigned Bonus = 0;
+ const TerminatorInst *BBTerm = BB->getTerminator();
+ // Threading through a switch statement is particularly profitable. If this
+ // block ends in a switch, decrease its cost to make it more likely to happen.
+ if (isa<SwitchInst>(BBTerm))
+ Bonus = 6;
+
+ // The same holds for indirect branches, but slightly more so.
+ if (isa<IndirectBrInst>(BBTerm))
+ Bonus = 8;
+
+ // Bump the threshold up so the early exit from the loop doesn't skip the
+ // terminator-based Size adjustment at the end.
+ Threshold += Bonus;
+
// Sum up the cost of each instruction until we get to the terminator. Don't
// include the terminator because the copy won't include it.
unsigned Size = 0;
@@ -260,6 +317,11 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
continue;
+ // Bail out if this instruction gives back a token type, it is not possible
+ // to duplicate it if it is used outside this BB.
+ if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
+ return ~0U;
+
// All other instructions count for at least one unit.
++Size;
@@ -268,7 +330,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
// as having cost of 2 total, and if they are a vector intrinsic, we model
// them as having cost 1.
if (const CallInst *CI = dyn_cast<CallInst>(I)) {
- if (CI->cannotDuplicate())
+ if (CI->cannotDuplicate() || CI->isConvergent())
// Blocks with NoDuplicate are modelled as having infinite cost, so they
// are never duplicated.
return ~0U;
@@ -279,16 +341,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
}
}
- // Threading through a switch statement is particularly profitable. If this
- // block ends in a switch, decrease its cost to make it more likely to happen.
- if (isa<SwitchInst>(I))
- Size = Size > 6 ? Size-6 : 0;
-
- // The same holds for indirect branches, but slightly more so.
- if (isa<IndirectBrInst>(I))
- Size = Size > 8 ? Size-8 : 0;
-
- return Size;
+ return Size > Bonus ? Size - Bonus : 0;
}
/// FindLoopHeaders - We do not want jump threading to turn proper loop
@@ -669,7 +722,8 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// because now the condition in this block can be threaded through
// predecessors of our predecessor block.
if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
- if (SinglePred->getTerminator()->getNumSuccessors() == 1 &&
+ const TerminatorInst *TI = SinglePred->getTerminator();
+ if (!TI->isExceptional() && TI->getNumSuccessors() == 1 &&
SinglePred != BB && !hasAddressTakenAndUsed(BB)) {
// If SinglePred was a loop header, BB becomes one.
if (LoopHeaders.erase(SinglePred))
@@ -761,7 +815,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// If we're branching on a conditional, LVI might be able to determine
// it's value at the branch instruction. We only handle comparisons
// against a constant at this time.
- // TODO: This should be extended to handle switches as well.
+ // TODO: This should be extended to handle switches as well.
BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
if (CondBr && CondConst && CondBr->isConditional()) {
@@ -829,9 +883,40 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst));
+ // Search for a stronger dominating condition that can be used to simplify a
+ // conditional branch leaving BB.
+ if (ProcessImpliedCondition(BB))
+ return true;
+
+ return false;
+}
+
+bool JumpThreading::ProcessImpliedCondition(BasicBlock *BB) {
+ auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || !BI->isConditional())
+ return false;
+
+ Value *Cond = BI->getCondition();
+ BasicBlock *CurrentBB = BB;
+ BasicBlock *CurrentPred = BB->getSinglePredecessor();
+ unsigned Iter = 0;
+
+ auto &DL = BB->getModule()->getDataLayout();
+
+ while (CurrentPred && Iter++ < ImplicationSearchThreshold) {
+ auto *PBI = dyn_cast<BranchInst>(CurrentPred->getTerminator());
+ if (!PBI || !PBI->isConditional() || PBI->getSuccessor(0) != CurrentBB)
+ return false;
- // TODO: If we have: "br (X > 0)" and we have a predecessor where we know
- // "(X == 4)", thread through this block.
+ if (isImpliedCondition(PBI->getCondition(), Cond, DL)) {
+ BI->getSuccessor(1)->removePredecessor(BB);
+ BranchInst::Create(BI->getSuccessor(0), BI);
+ BI->eraseFromParent();
+ return true;
+ }
+ CurrentBB = CurrentPred;
+ CurrentPred = CurrentBB->getSinglePredecessor();
+ }
return false;
}
@@ -850,10 +935,10 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
if (LoadBB->getSinglePredecessor())
return false;
- // If the load is defined in a landing pad, it can't be partially redundant,
- // because the edges between the invoke and the landing pad cannot have other
+ // If the load is defined in an EH pad, it can't be partially redundant,
+ // because the edges between the invoke and the EH pad cannot have other
// instructions between them.
- if (LoadBB->isLandingPad())
+ if (LoadBB->isEHPad())
return false;
Value *LoadedPtr = LI->getOperand(0);
@@ -866,11 +951,11 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Scan a few instructions up from the load, to see if it is obviously live at
// the entry to its block.
- BasicBlock::iterator BBIt = LI;
+ BasicBlock::iterator BBIt(LI);
if (Value *AvailableVal =
- FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, 6)) {
- // If the value if the load is locally available within the block, just use
+ FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, DefMaxInstsToScan)) {
+ // If the value of the load is locally available within the block, just use
// it. This frequently occurs for reg2mem'd allocas.
//cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n";
@@ -914,7 +999,8 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Scan the predecessor to see if the value is available in the pred.
BBIt = PredBB->end();
AAMDNodes ThisAATags;
- Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6,
+ Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt,
+ DefMaxInstsToScan,
nullptr, &ThisAATags);
if (!PredAvailable) {
OneUnavailablePred = PredBB;
@@ -968,8 +1054,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
}
// Split them out to their own block.
- UnavailablePred =
- SplitBlockPredecessors(LoadBB, PredsToSplit, "thread-pre-split");
+ UnavailablePred = SplitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
}
// If the value isn't available in all predecessors, then there will be
@@ -995,7 +1080,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Create a PHI node at the start of the block for the PRE'd load value.
pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB);
PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "",
- LoadBB->begin());
+ &LoadBB->front());
PN->takeName(LI);
PN->setDebugLoc(LI->getDebugLoc());
@@ -1262,7 +1347,7 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
// Into:
// BB':
// %Y = icmp ne i32 %A, %B
- // br i1 %Z, ...
+ // br i1 %Y, ...
PredValueInfoTy XorOpValues;
bool isLHS = true;
@@ -1387,14 +1472,14 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
return false;
}
- // And finally, do it! Start by factoring the predecessors is needed.
+ // And finally, do it! Start by factoring the predecessors if needed.
BasicBlock *PredBB;
if (PredBBs.size() == 1)
PredBB = PredBBs[0];
else {
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm");
+ PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm");
}
// And finally, do it!
@@ -1415,6 +1500,13 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
BB->getParent(), BB);
NewBB->moveAfter(PredBB);
+ // Set the block frequency of NewBB.
+ if (HasProfileData) {
+ auto NewBBFreq =
+ BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
+ BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
+ }
+
BasicBlock::iterator BI = BB->begin();
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
@@ -1425,7 +1517,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
Instruction *New = BI->clone();
New->setName(BI->getName());
NewBB->getInstList().push_back(New);
- ValueMapping[BI] = New;
+ ValueMapping[&*BI] = New;
// Remap operands to patch up intra-block references.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
@@ -1438,7 +1530,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
// We didn't copy the terminator from BB over to NewBB, because there is now
// an unconditional jump to SuccBB. Insert the unconditional jump.
- BranchInst *NewBI =BranchInst::Create(SuccBB, NewBB);
+ BranchInst *NewBI = BranchInst::Create(SuccBB, NewBB);
NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
// Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
@@ -1475,8 +1567,8 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
SSAUpdate.Initialize(I->getType(), I->getName());
- SSAUpdate.AddAvailableValue(BB, I);
- SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]);
+ SSAUpdate.AddAvailableValue(BB, &*I);
+ SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&*I]);
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
@@ -1499,11 +1591,98 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
// frequently happens because of phi translation.
SimplifyInstructionsInBlock(NewBB, TLI);
+ // Update the edge weight from BB to SuccBB, which should be less than before.
+ UpdateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB);
+
// Threaded an edge!
++NumThreads;
return true;
}
+/// Create a new basic block that will be the predecessor of BB and successor of
+/// all blocks in Preds. When profile data is availble, update the frequency of
+/// this new block.
+BasicBlock *JumpThreading::SplitBlockPreds(BasicBlock *BB,
+ ArrayRef<BasicBlock *> Preds,
+ const char *Suffix) {
+ // Collect the frequencies of all predecessors of BB, which will be used to
+ // update the edge weight on BB->SuccBB.
+ BlockFrequency PredBBFreq(0);
+ if (HasProfileData)
+ for (auto Pred : Preds)
+ PredBBFreq += BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB);
+
+ BasicBlock *PredBB = SplitBlockPredecessors(BB, Preds, Suffix);
+
+ // Set the block frequency of the newly created PredBB, which is the sum of
+ // frequencies of Preds.
+ if (HasProfileData)
+ BFI->setBlockFreq(PredBB, PredBBFreq.getFrequency());
+ return PredBB;
+}
+
+/// Update the block frequency of BB and branch weight and the metadata on the
+/// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
+/// Freq(PredBB->BB) / Freq(BB->SuccBB).
+void JumpThreading::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
+ BasicBlock *BB,
+ BasicBlock *NewBB,
+ BasicBlock *SuccBB) {
+ if (!HasProfileData)
+ return;
+
+ assert(BFI && BPI && "BFI & BPI should have been created here");
+
+ // As the edge from PredBB to BB is deleted, we have to update the block
+ // frequency of BB.
+ auto BBOrigFreq = BFI->getBlockFreq(BB);
+ auto NewBBFreq = BFI->getBlockFreq(NewBB);
+ auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(BB, SuccBB);
+ auto BBNewFreq = BBOrigFreq - NewBBFreq;
+ BFI->setBlockFreq(BB, BBNewFreq.getFrequency());
+
+ // Collect updated outgoing edges' frequencies from BB and use them to update
+ // edge probabilities.
+ SmallVector<uint64_t, 4> BBSuccFreq;
+ for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+ auto SuccFreq = (*I == SuccBB)
+ ? BB2SuccBBFreq - NewBBFreq
+ : BBOrigFreq * BPI->getEdgeProbability(BB, *I);
+ BBSuccFreq.push_back(SuccFreq.getFrequency());
+ }
+
+ uint64_t MaxBBSuccFreq =
+ *std::max_element(BBSuccFreq.begin(), BBSuccFreq.end());
+
+ SmallVector<BranchProbability, 4> BBSuccProbs;
+ if (MaxBBSuccFreq == 0)
+ BBSuccProbs.assign(BBSuccFreq.size(),
+ {1, static_cast<unsigned>(BBSuccFreq.size())});
+ else {
+ for (uint64_t Freq : BBSuccFreq)
+ BBSuccProbs.push_back(
+ BranchProbability::getBranchProbability(Freq, MaxBBSuccFreq));
+ // Normalize edge probabilities so that they sum up to one.
+ BranchProbability::normalizeProbabilities(BBSuccProbs.begin(),
+ BBSuccProbs.end());
+ }
+
+ // Update edge probabilities in BPI.
+ for (int I = 0, E = BBSuccProbs.size(); I < E; I++)
+ BPI->setEdgeProbability(BB, I, BBSuccProbs[I]);
+
+ if (BBSuccProbs.size() >= 2) {
+ SmallVector<uint32_t, 4> Weights;
+ for (auto Prob : BBSuccProbs)
+ Weights.push_back(Prob.getNumerator());
+
+ auto TI = BB->getTerminator();
+ TI->setMetadata(
+ LLVMContext::MD_prof,
+ MDBuilder(TI->getParent()->getContext()).createBranchWeights(Weights));
+ }
+}
+
/// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
/// If we can duplicate the contents of BB up into PredBB do so now, this
@@ -1530,14 +1709,14 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
return false;
}
- // And finally, do it! Start by factoring the predecessors is needed.
+ // And finally, do it! Start by factoring the predecessors if needed.
BasicBlock *PredBB;
if (PredBBs.size() == 1)
PredBB = PredBBs[0];
else {
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm");
+ PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm");
}
// Okay, we decided to do this! Clone all the instructions in BB onto the end
@@ -1581,12 +1760,12 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
if (Value *IV =
SimplifyInstruction(New, BB->getModule()->getDataLayout())) {
delete New;
- ValueMapping[BI] = IV;
+ ValueMapping[&*BI] = IV;
} else {
// Otherwise, insert the new instruction into the block.
New->setName(BI->getName());
- PredBB->getInstList().insert(OldPredBranch, New);
- ValueMapping[BI] = New;
+ PredBB->getInstList().insert(OldPredBranch->getIterator(), New);
+ ValueMapping[&*BI] = New;
}
}
@@ -1628,8 +1807,8 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
SSAUpdate.Initialize(I->getType(), I->getName());
- SSAUpdate.AddAvailableValue(BB, I);
- SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]);
+ SSAUpdate.AddAvailableValue(BB, &*I);
+ SSAUpdate.AddAvailableValue(PredBB, ValueMapping[&*I]);
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index 43fc50e588f8..6d70cdc3ade2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -34,10 +34,13 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
@@ -118,9 +121,12 @@ namespace {
AU.addPreservedID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
- AU.addRequired<AliasAnalysis>();
- AU.addPreserved<AliasAnalysis>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
@@ -164,9 +170,12 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
Pass *llvm::createLICMPass() { return new LICM(); }
@@ -183,7 +192,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// Get our Loop and Alias Analysis information...
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
@@ -264,9 +273,10 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// FIXME: This is really heavy handed. It would be a bit better to use an
// SSAUpdater strategy during promotion that was LCSSA aware and reformed
// it as it went.
- if (Changed)
- formLCSSARecursively(*L, *DT, LI,
- getAnalysisIfAvailable<ScalarEvolution>());
+ if (Changed) {
+ auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
+ formLCSSARecursively(*L, *DT, LI, SEWP ? &SEWP->getSE() : nullptr);
+ }
}
// Check that neither this loop nor its parent have had LCSSA broken. LICM is
@@ -402,7 +412,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
}
/// Computes loop safety information, checks loop body & header
-/// for the possiblity of may throw exception.
+/// for the possibility of may throw exception.
///
void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) {
assert(CurLoop != nullptr && "CurLoop cant be null");
@@ -410,7 +420,7 @@ void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) {
// Setting default safety values.
SafetyInfo->MayThrow = false;
SafetyInfo->HeaderMayThrow = false;
- // Iterate over header and compute dafety info.
+ // Iterate over header and compute safety info.
for (BasicBlock::iterator I = Header->begin(), E = Header->end();
(I != E) && !SafetyInfo->HeaderMayThrow; ++I)
SafetyInfo->HeaderMayThrow |= I->mayThrow();
@@ -445,7 +455,7 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
// Don't hoist loads which have may-aliased stores in loop.
uint64_t Size = 0;
if (LI->getType()->isSized())
- Size = AA->getTypeStoreSize(LI->getType());
+ Size = I.getModule()->getDataLayout().getTypeStoreSize(LI->getType());
AAMDNodes AAInfo;
LI->getAAMetadata(AAInfo);
@@ -457,10 +467,21 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
return false;
// Handle simple cases by querying alias analysis.
- AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI);
- if (Behavior == AliasAnalysis::DoesNotAccessMemory)
+ FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
+ if (Behavior == FMRB_DoesNotAccessMemory)
return true;
if (AliasAnalysis::onlyReadsMemory(Behavior)) {
+ // A readonly argmemonly function only reads from memory pointed to by
+ // it's arguments with arbitrary offsets. If we can prove there are no
+ // writes to this memory in the loop, we can hoist or sink.
+ if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) {
+ for (Value *Op : CI->arg_operands())
+ if (Op->getType()->isPointerTy() &&
+ pointerInvalidatedByLoop(Op, MemoryLocation::UnknownSize,
+ AAMDNodes(), CurAST))
+ return false;
+ return true;
+ }
// If this call only reads from memory and there are no writes to memory
// in the loop, we can hoist or sink the call as appropriate.
bool FoundMod = false;
@@ -566,7 +587,7 @@ static Instruction *CloneInstructionInExitBlock(const Instruction &I,
if (!OLoop->contains(&PN)) {
PHINode *OpPN =
PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
- OInst->getName() + ".lcssa", ExitBlock.begin());
+ OInst->getName() + ".lcssa", &ExitBlock.front());
for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
*OI = OpPN;
@@ -651,6 +672,10 @@ static bool hoist(Instruction &I, BasicBlock *Preheader) {
// Move the new node to the Preheader, before its terminator.
I.moveBefore(Preheader->getTerminator());
+ // Metadata can be dependent on the condition we are hoisting above.
+ // Conservatively strip all metadata on the instruction.
+ I.dropUnknownNonDebugMetadata();
+
if (isa<LoadInst>(I)) ++NumMovedLoads;
else if (isa<CallInst>(I)) ++NumMovedCalls;
++NumHoisted;
@@ -730,9 +755,9 @@ namespace {
if (!L->contains(BB)) {
// We need to create an LCSSA PHI node for the incoming value and
// store that.
- PHINode *PN = PHINode::Create(
- I->getType(), PredCache.size(BB),
- I->getName() + ".lcssa", BB->begin());
+ PHINode *PN =
+ PHINode::Create(I->getType(), PredCache.size(BB),
+ I->getName() + ".lcssa", &BB->front());
for (BasicBlock *Pred : PredCache.get(BB))
PN->addIncoming(I, Pred);
return PN;
@@ -942,7 +967,7 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
CurLoop->getUniqueExitBlocks(ExitBlocks);
InsertPts.resize(ExitBlocks.size());
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- InsertPts[i] = ExitBlocks[i]->getFirstInsertionPt();
+ InsertPts[i] = &*ExitBlocks[i]->getFirstInsertionPt();
}
// We use the SSAUpdater interface to insert phi nodes as required.
@@ -973,7 +998,7 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
return Changed;
}
-/// Simple Analysis hook. Clone alias set info.
+/// Simple analysis hook. Clone alias set info.
///
void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp b/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
index c19cd19059b2..1648878b0628 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@@ -56,7 +57,7 @@ class LoadCombine : public BasicBlockPass {
public:
LoadCombine() : BasicBlockPass(ID), C(nullptr), AA(nullptr) {
- initializeSROAPass(*PassRegistry::getPassRegistry());
+ initializeLoadCombinePass(*PassRegistry::getPassRegistry());
}
using llvm::Pass::doInitialization;
@@ -223,7 +224,7 @@ bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
if (skipOptnoneFunction(BB))
return false;
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
IRBuilder<true, TargetFolder> TheBuilder(
BB.getContext(), TargetFolder(BB.getModule()->getDataLayout()));
@@ -262,8 +263,8 @@ bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
void LoadCombine::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequired<AliasAnalysis>();
- AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
char LoadCombine::ID = 0;
@@ -274,7 +275,8 @@ BasicBlockPass *llvm::createLoadCombinePass() {
INITIALIZE_PASS_BEGIN(LoadCombine, "load-combine", "Combine Adjacent Loads",
false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_END(LoadCombine, "load-combine", "Combine Adjacent Loads",
false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index 98b068edf582..bc00ff3f3a42 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -17,6 +17,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/Dominators.h"
@@ -35,18 +36,19 @@ namespace {
}
// Possibly eliminate loop L if it is dead.
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+ bool runOnLoop(Loop *L, LPPassManager &) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
}
@@ -64,7 +66,7 @@ INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion",
"Delete dead loops", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(LoopDeletion, "loop-deletion",
@@ -130,7 +132,7 @@ bool LoopDeletion::isLoopDead(Loop *L,
/// so could change the halting/non-halting nature of a program.
/// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA
/// in order to make various safety checks work.
-bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
+bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &) {
if (skipOptnoneFunction(L))
return false;
@@ -169,7 +171,7 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
// Don't remove loops for which we can't solve the trip count.
// They could be infinite, in which case we'd be changing program behavior.
- ScalarEvolution &SE = getAnalysis<ScalarEvolution>();
+ ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
const SCEV *S = SE.getMaxBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(S))
return Changed;
@@ -242,9 +244,8 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
for (BasicBlock *BB : blocks)
loopInfo.removeBlock(BB);
- // The last step is to inform the loop pass manager that we've
- // eliminated this loop.
- LPM.deleteLoopFromQueue(L);
+ // The last step is to update LoopInfo now that we've eliminated this loop.
+ loopInfo.updateUnloop(L);
Changed = true;
++NumDeleted;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index 1b9859b57790..3d3cf3e2890b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -34,6 +34,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
#include <list>
@@ -54,6 +55,11 @@ static cl::opt<bool> DistributeNonIfConvertible(
"if-convertible by the loop vectorizer"),
cl::init(false));
+static cl::opt<unsigned> DistributeSCEVCheckThreshold(
+ "loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden,
+ cl::desc("The maximum number of SCEV checks allowed for Loop "
+ "Distribution"));
+
STATISTIC(NumLoopsDistributed, "Number of loops distributed");
namespace {
@@ -164,9 +170,7 @@ public:
// Delete the instructions backwards, as it has a reduced likelihood of
// having to update as many def-use and use-def chains.
- for (auto I = Unused.rbegin(), E = Unused.rend(); I != E; ++I) {
- auto *Inst = *I;
-
+ for (auto *Inst : make_range(Unused.rbegin(), Unused.rend())) {
if (!Inst->use_empty())
Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
Inst->eraseFromParent();
@@ -373,7 +377,7 @@ public:
/// \brief This performs the main chunk of the work of cloning the loops for
/// the partitions.
- void cloneLoops(Pass *P) {
+ void cloneLoops() {
BasicBlock *OrigPH = L->getLoopPreheader();
// At this point the predecessor of the preheader is either the memcheck
// block or the top part of the original preheader.
@@ -547,11 +551,11 @@ public:
MemoryInstructionDependences(
const SmallVectorImpl<Instruction *> &Instructions,
- const SmallVectorImpl<Dependence> &InterestingDependences) {
+ const SmallVectorImpl<Dependence> &Dependences) {
Accesses.append(Instructions.begin(), Instructions.end());
DEBUG(dbgs() << "Backward dependences:\n");
- for (auto &Dep : InterestingDependences)
+ for (auto &Dep : Dependences)
if (Dep.isPossiblyBackward()) {
// Note that the designations source and destination follow the program
// order, i.e. source is always first. (The direction is given by the
@@ -567,25 +571,6 @@ private:
AccessesType Accesses;
};
-/// \brief Returns the instructions that use values defined in the loop.
-static SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L) {
- SmallVector<Instruction *, 8> UsedOutside;
-
- for (auto *Block : L->getBlocks())
- // FIXME: I believe that this could use copy_if if the Inst reference could
- // be adapted into a pointer.
- for (auto &Inst : *Block) {
- auto Users = Inst.users();
- if (std::any_of(Users.begin(), Users.end(), [&](User *U) {
- auto *Use = cast<Instruction>(U);
- return !L->contains(Use->getParent());
- }))
- UsedOutside.push_back(&Inst);
- }
-
- return UsedOutside;
-}
-
/// \brief The pass class.
class LoopDistribute : public FunctionPass {
public:
@@ -597,6 +582,7 @@ public:
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
LAA = &getAnalysis<LoopAccessAnalysis>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
// Build up a worklist of inner-loops to vectorize. This is necessary as the
// act of distributing a loop creates new loops and can invalidate iterators
@@ -619,6 +605,7 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<LoopAccessAnalysis>();
@@ -629,6 +616,45 @@ public:
static char ID;
private:
+ /// \brief Filter out checks between pointers from the same partition.
+ ///
+ /// \p PtrToPartition contains the partition number for pointers. Partition
+ /// number -1 means that the pointer is used in multiple partitions. In this
+ /// case we can't safely omit the check.
+ SmallVector<RuntimePointerChecking::PointerCheck, 4>
+ includeOnlyCrossPartitionChecks(
+ const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &AllChecks,
+ const SmallVectorImpl<int> &PtrToPartition,
+ const RuntimePointerChecking *RtPtrChecking) {
+ SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks;
+
+ std::copy_if(AllChecks.begin(), AllChecks.end(), std::back_inserter(Checks),
+ [&](const RuntimePointerChecking::PointerCheck &Check) {
+ for (unsigned PtrIdx1 : Check.first->Members)
+ for (unsigned PtrIdx2 : Check.second->Members)
+ // Only include this check if there is a pair of pointers
+ // that require checking and the pointers fall into
+ // separate partitions.
+ //
+ // (Note that we already know at this point that the two
+ // pointer groups need checking but it doesn't follow
+ // that each pair of pointers within the two groups need
+ // checking as well.
+ //
+ // In other words we don't want to include a check just
+ // because there is a pair of pointers between the two
+ // pointer groups that require checks and a different
+ // pair whose pointers fall into different partitions.)
+ if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
+ !RuntimePointerChecking::arePointersInSamePartition(
+ PtrToPartition, PtrIdx1, PtrIdx2))
+ return true;
+ return false;
+ });
+
+ return Checks;
+ }
+
/// \brief Try to distribute an inner-most loop.
bool processLoop(Loop *L) {
assert(L->empty() && "Only process inner loops.");
@@ -655,9 +681,8 @@ private:
DEBUG(dbgs() << "Skipping; memory operations are safe for vectorization");
return false;
}
- auto *InterestingDependences =
- LAI.getDepChecker().getInterestingDependences();
- if (!InterestingDependences || InterestingDependences->empty()) {
+ auto *Dependences = LAI.getDepChecker().getDependences();
+ if (!Dependences || Dependences->empty()) {
DEBUG(dbgs() << "Skipping; No unsafe dependences to isolate");
return false;
}
@@ -685,7 +710,7 @@ private:
// NumUnsafeDependencesActive reaches 0.
const MemoryDepChecker &DepChecker = LAI.getDepChecker();
MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
- *InterestingDependences);
+ *Dependences);
int NumUnsafeDependencesActive = 0;
for (auto &InstDep : MID) {
@@ -735,6 +760,13 @@ private:
return false;
}
+ // Don't distribute the loop if we need too many SCEV run-time checks.
+ const SCEVUnionPredicate &Pred = LAI.PSE.getUnionPredicate();
+ if (Pred.getComplexity() > DistributeSCEVCheckThreshold) {
+ DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
+ return false;
+ }
+
DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
// We're done forming the partitions set up the reverse mapping from
// instructions to partitions.
@@ -746,20 +778,25 @@ private:
if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator())
SplitBlock(PH, PH->getTerminator(), DT, LI);
- // If we need run-time checks to disambiguate pointers are run-time, version
- // the loop now.
+ // If we need run-time checks, version the loop now.
auto PtrToPartition = Partitions.computePartitionSetForPointers(LAI);
- LoopVersioning LVer(LAI, L, LI, DT, &PtrToPartition);
- if (LVer.needsRuntimeChecks()) {
+ const auto *RtPtrChecking = LAI.getRuntimePointerChecking();
+ const auto &AllChecks = RtPtrChecking->getChecks();
+ auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
+ RtPtrChecking);
+
+ if (!Pred.isAlwaysTrue() || !Checks.empty()) {
DEBUG(dbgs() << "\nPointers:\n");
- DEBUG(LAI.getRuntimePointerChecking()->print(dbgs(), 0, &PtrToPartition));
- LVer.versionLoop(this);
- LVer.addPHINodes(DefsUsedOutside);
+ DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks));
+ LoopVersioning LVer(LAI, L, LI, DT, SE, false);
+ LVer.setAliasChecks(std::move(Checks));
+ LVer.setSCEVChecks(LAI.PSE.getUnionPredicate());
+ LVer.versionLoop(DefsUsedOutside);
}
// Create identical copies of the original loop for each partition and hook
// them up sequentially.
- Partitions.cloneLoops(this);
+ Partitions.cloneLoops();
// Now, we remove the instruction from each loop that don't belong to that
// partition.
@@ -780,6 +817,7 @@ private:
LoopInfo *LI;
LoopAccessAnalysis *LAA;
DominatorTree *DT;
+ ScalarEvolution *SE;
};
} // anonymous namespace
@@ -790,6 +828,7 @@ INITIALIZE_PASS_BEGIN(LoopDistribute, LDIST_NAME, ldist_name, false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(LoopDistribute, LDIST_NAME, ldist_name, false, false)
namespace llvm {
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index a21ca2417ca1..2d577de7c2b8 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -31,11 +31,6 @@
// void foo(_Complex float *P)
// for (i) { __real__(*P) = 0; __imag__(*P) = 0; }
//
-// We should enhance this to handle negative strides through memory.
-// Alternatively (and perhaps better) we could rely on an earlier pass to force
-// forward iteration through memory, which is generally better for cache
-// behavior. Negative strides *do* happen for memset/memcpy loops.
-//
// This could recognize common matrix multiplies and dot product idioms and
// replace them with calls to BLAS (if linked in??).
//
@@ -44,7 +39,10 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -67,149 +65,85 @@ STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
namespace {
- class LoopIdiomRecognize;
+class LoopIdiomRecognize : public LoopPass {
+ Loop *CurLoop;
+ AliasAnalysis *AA;
+ DominatorTree *DT;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ TargetLibraryInfo *TLI;
+ const TargetTransformInfo *TTI;
+ const DataLayout *DL;
+
+public:
+ static char ID;
+ explicit LoopIdiomRecognize() : LoopPass(ID) {
+ initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
+ }
- /// This class defines some utility functions for loop idiom recognization.
- class LIRUtil {
- public:
- /// Return true iff the block contains nothing but an uncondition branch
- /// (aka goto instruction).
- static bool isAlmostEmpty(BasicBlock *);
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG.
+ ///
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
- static BranchInst *getBranch(BasicBlock *BB) {
- return dyn_cast<BranchInst>(BB->getTerminator());
- }
+private:
+ typedef SmallVector<StoreInst *, 8> StoreList;
+ StoreList StoreRefs;
- /// Derive the precondition block (i.e the block that guards the loop
- /// preheader) from the given preheader.
- static BasicBlock *getPrecondBb(BasicBlock *PreHead);
- };
-
- /// This class is to recoginize idioms of population-count conducted in
- /// a noncountable loop. Currently it only recognizes this pattern:
- /// \code
- /// while(x) {cnt++; ...; x &= x - 1; ...}
- /// \endcode
- class NclPopcountRecognize {
- LoopIdiomRecognize &LIR;
- Loop *CurLoop;
- BasicBlock *PreCondBB;
-
- typedef IRBuilder<> IRBuilderTy;
-
- public:
- explicit NclPopcountRecognize(LoopIdiomRecognize &TheLIR);
- bool recognize();
-
- private:
- /// Take a glimpse of the loop to see if we need to go ahead recoginizing
- /// the idiom.
- bool preliminaryScreen();
-
- /// Check if the given conditional branch is based on the comparison
- /// between a variable and zero, and if the variable is non-zero, the
- /// control yields to the loop entry. If the branch matches the behavior,
- /// the variable involved in the comparion is returned. This function will
- /// be called to see if the precondition and postcondition of the loop
- /// are in desirable form.
- Value *matchCondition(BranchInst *Br, BasicBlock *NonZeroTarget) const;
-
- /// Return true iff the idiom is detected in the loop. and 1) \p CntInst
- /// is set to the instruction counting the population bit. 2) \p CntPhi
- /// is set to the corresponding phi node. 3) \p Var is set to the value
- /// whose population bits are being counted.
- bool detectIdiom
- (Instruction *&CntInst, PHINode *&CntPhi, Value *&Var) const;
-
- /// Insert ctpop intrinsic function and some obviously dead instructions.
- void transform(Instruction *CntInst, PHINode *CntPhi, Value *Var);
-
- /// Create llvm.ctpop.* intrinsic function.
- CallInst *createPopcntIntrinsic(IRBuilderTy &IRB, Value *Val, DebugLoc DL);
- };
-
- class LoopIdiomRecognize : public LoopPass {
- Loop *CurLoop;
- DominatorTree *DT;
- ScalarEvolution *SE;
- TargetLibraryInfo *TLI;
- const TargetTransformInfo *TTI;
- public:
- static char ID;
- explicit LoopIdiomRecognize() : LoopPass(ID) {
- initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
- DT = nullptr;
- SE = nullptr;
- TLI = nullptr;
- TTI = nullptr;
- }
+ /// \name Countable Loop Idiom Handling
+ /// @{
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
- bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
- SmallVectorImpl<BasicBlock*> &ExitBlocks);
-
- bool processLoopStore(StoreInst *SI, const SCEV *BECount);
- bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
-
- bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
- unsigned StoreAlignment,
- Value *SplatValue, Instruction *TheStore,
- const SCEVAddRecExpr *Ev,
- const SCEV *BECount);
- bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
- const SCEVAddRecExpr *StoreEv,
- const SCEVAddRecExpr *LoadEv,
- const SCEV *BECount);
-
- /// This transformation requires natural loop information & requires that
- /// loop preheaders be inserted into the CFG.
- ///
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addPreservedID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
- AU.addPreservedID(LCSSAID);
- AU.addRequired<AliasAnalysis>();
- AU.addPreserved<AliasAnalysis>();
- AU.addRequired<ScalarEvolution>();
- AU.addPreserved<ScalarEvolution>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- }
+ bool runOnCountableLoop();
+ bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+ SmallVectorImpl<BasicBlock *> &ExitBlocks);
- DominatorTree *getDominatorTree() {
- return DT ? DT
- : (DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree());
- }
+ void collectStores(BasicBlock *BB);
+ bool isLegalStore(StoreInst *SI);
+ bool processLoopStore(StoreInst *SI, const SCEV *BECount);
+ bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
- ScalarEvolution *getScalarEvolution() {
- return SE ? SE : (SE = &getAnalysis<ScalarEvolution>());
- }
+ bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+ unsigned StoreAlignment, Value *SplatValue,
+ Instruction *TheStore, const SCEVAddRecExpr *Ev,
+ const SCEV *BECount, bool NegStride);
+ bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
+ const SCEVAddRecExpr *StoreEv,
+ const SCEV *BECount, bool NegStride);
- TargetLibraryInfo *getTargetLibraryInfo() {
- if (!TLI)
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ /// @}
+ /// \name Noncountable Loop Idiom Handling
+ /// @{
- return TLI;
- }
+ bool runOnNoncountableLoop();
- const TargetTransformInfo *getTargetTransformInfo() {
- return TTI ? TTI
- : (TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
- *CurLoop->getHeader()->getParent()));
- }
+ bool recognizePopcount();
+ void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
+ PHINode *CntPhi, Value *Var);
- Loop *getLoop() const { return CurLoop; }
+ /// @}
+};
- private:
- bool runOnNoncountableLoop();
- bool runOnCountableLoop();
- };
-}
+} // End anonymous namespace.
char LoopIdiomRecognize::ID = 0;
INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
@@ -218,9 +152,12 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
false, false)
@@ -242,406 +179,64 @@ static void deleteDeadInstruction(Instruction *I,
//===----------------------------------------------------------------------===//
//
-// Implementation of LIRUtil
-//
-//===----------------------------------------------------------------------===//
-
-// This function will return true iff the given block contains nothing but goto.
-// A typical usage of this function is to check if the preheader function is
-// "almost" empty such that generated intrinsic functions can be moved across
-// the preheader and be placed at the end of the precondition block without
-// the concern of breaking data dependence.
-bool LIRUtil::isAlmostEmpty(BasicBlock *BB) {
- if (BranchInst *Br = getBranch(BB)) {
- return Br->isUnconditional() && Br == BB->begin();
- }
- return false;
-}
-
-BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) {
- if (BasicBlock *BB = PreHead->getSinglePredecessor()) {
- BranchInst *Br = getBranch(BB);
- return Br && Br->isConditional() ? BB : nullptr;
- }
- return nullptr;
-}
-
-//===----------------------------------------------------------------------===//
-//
-// Implementation of NclPopcountRecognize
+// Implementation of LoopIdiomRecognize
//
//===----------------------------------------------------------------------===//
-NclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR):
- LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(nullptr) {
-}
-
-bool NclPopcountRecognize::preliminaryScreen() {
- const TargetTransformInfo *TTI = LIR.getTargetTransformInfo();
- if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware)
- return false;
-
- // Counting population are usually conducted by few arithmetic instructions.
- // Such instructions can be easilly "absorbed" by vacant slots in a
- // non-compact loop. Therefore, recognizing popcount idiom only makes sense
- // in a compact loop.
-
- // Give up if the loop has multiple blocks or multiple backedges.
- if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
- return false;
-
- BasicBlock *LoopBody = *(CurLoop->block_begin());
- if (LoopBody->size() >= 20) {
- // The loop is too big, bail out.
- return false;
- }
-
- // It should have a preheader containing nothing but a goto instruction.
- BasicBlock *PreHead = CurLoop->getLoopPreheader();
- if (!PreHead || !LIRUtil::isAlmostEmpty(PreHead))
+bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipOptnoneFunction(L))
return false;
- // It should have a precondition block where the generated popcount instrinsic
- // function will be inserted.
- PreCondBB = LIRUtil::getPrecondBb(PreHead);
- if (!PreCondBB)
+ CurLoop = L;
+ // If the loop could not be converted to canonical form, it must have an
+ // indirectbr in it, just give up.
+ if (!L->getLoopPreheader())
return false;
- return true;
-}
-
-Value *NclPopcountRecognize::matchCondition(BranchInst *Br,
- BasicBlock *LoopEntry) const {
- if (!Br || !Br->isConditional())
- return nullptr;
-
- ICmpInst *Cond = dyn_cast<ICmpInst>(Br->getCondition());
- if (!Cond)
- return nullptr;
-
- ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
- if (!CmpZero || !CmpZero->isZero())
- return nullptr;
-
- ICmpInst::Predicate Pred = Cond->getPredicate();
- if ((Pred == ICmpInst::ICMP_NE && Br->getSuccessor(0) == LoopEntry) ||
- (Pred == ICmpInst::ICMP_EQ && Br->getSuccessor(1) == LoopEntry))
- return Cond->getOperand(0);
-
- return nullptr;
-}
-
-bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
- PHINode *&CntPhi,
- Value *&Var) const {
- // Following code tries to detect this idiom:
- //
- // if (x0 != 0)
- // goto loop-exit // the precondition of the loop
- // cnt0 = init-val;
- // do {
- // x1 = phi (x0, x2);
- // cnt1 = phi(cnt0, cnt2);
- //
- // cnt2 = cnt1 + 1;
- // ...
- // x2 = x1 & (x1 - 1);
- // ...
- // } while(x != 0);
- //
- // loop-exit:
- //
-
- // step 1: Check to see if the look-back branch match this pattern:
- // "if (a!=0) goto loop-entry".
- BasicBlock *LoopEntry;
- Instruction *DefX2, *CountInst;
- Value *VarX1, *VarX0;
- PHINode *PhiX, *CountPhi;
-
- DefX2 = CountInst = nullptr;
- VarX1 = VarX0 = nullptr;
- PhiX = CountPhi = nullptr;
- LoopEntry = *(CurLoop->block_begin());
-
- // step 1: Check if the loop-back branch is in desirable form.
- {
- if (Value *T = matchCondition (LIRUtil::getBranch(LoopEntry), LoopEntry))
- DefX2 = dyn_cast<Instruction>(T);
- else
- return false;
- }
-
- // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
- {
- if (!DefX2 || DefX2->getOpcode() != Instruction::And)
- return false;
-
- BinaryOperator *SubOneOp;
-
- if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
- VarX1 = DefX2->getOperand(1);
- else {
- VarX1 = DefX2->getOperand(0);
- SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
- }
- if (!SubOneOp)
- return false;
-
- Instruction *SubInst = cast<Instruction>(SubOneOp);
- ConstantInt *Dec = dyn_cast<ConstantInt>(SubInst->getOperand(1));
- if (!Dec ||
- !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) ||
- (SubInst->getOpcode() == Instruction::Add && Dec->isAllOnesValue()))) {
- return false;
- }
- }
-
- // step 3: Check the recurrence of variable X
- {
- PhiX = dyn_cast<PHINode>(VarX1);
- if (!PhiX ||
- (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) {
- return false;
- }
- }
-
- // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
- {
- CountInst = nullptr;
- for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI(),
- IterE = LoopEntry->end(); Iter != IterE; Iter++) {
- Instruction *Inst = Iter;
- if (Inst->getOpcode() != Instruction::Add)
- continue;
-
- ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
- if (!Inc || !Inc->isOne())
- continue;
-
- PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0));
- if (!Phi || Phi->getParent() != LoopEntry)
- continue;
-
- // Check if the result of the instruction is live of the loop.
- bool LiveOutLoop = false;
- for (User *U : Inst->users()) {
- if ((cast<Instruction>(U))->getParent() != LoopEntry) {
- LiveOutLoop = true; break;
- }
- }
-
- if (LiveOutLoop) {
- CountInst = Inst;
- CountPhi = Phi;
- break;
- }
- }
-
- if (!CountInst)
- return false;
- }
-
- // step 5: check if the precondition is in this form:
- // "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
- {
- BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB);
- Value *T = matchCondition (PreCondBr, CurLoop->getLoopPreheader());
- if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
- return false;
-
- CntInst = CountInst;
- CntPhi = CountPhi;
- Var = T;
- }
-
- return true;
-}
-
-void NclPopcountRecognize::transform(Instruction *CntInst,
- PHINode *CntPhi, Value *Var) {
-
- ScalarEvolution *SE = LIR.getScalarEvolution();
- TargetLibraryInfo *TLI = LIR.getTargetLibraryInfo();
- BasicBlock *PreHead = CurLoop->getLoopPreheader();
- BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB);
- const DebugLoc DL = CntInst->getDebugLoc();
-
- // Assuming before transformation, the loop is following:
- // if (x) // the precondition
- // do { cnt++; x &= x - 1; } while(x);
-
- // Step 1: Insert the ctpop instruction at the end of the precondition block
- IRBuilderTy Builder(PreCondBr);
- Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
- {
- PopCnt = createPopcntIntrinsic(Builder, Var, DL);
- NewCount = PopCntZext =
- Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
-
- if (NewCount != PopCnt)
- (cast<Instruction>(NewCount))->setDebugLoc(DL);
-
- // TripCnt is exactly the number of iterations the loop has
- TripCnt = NewCount;
-
- // If the population counter's initial value is not zero, insert Add Inst.
- Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
- ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
- if (!InitConst || !InitConst->isZero()) {
- NewCount = Builder.CreateAdd(NewCount, CntInitVal);
- (cast<Instruction>(NewCount))->setDebugLoc(DL);
- }
- }
-
- // Step 2: Replace the precondition from "if(x == 0) goto loop-exit" to
- // "if(NewCount == 0) loop-exit". Withtout this change, the intrinsic
- // function would be partial dead code, and downstream passes will drag
- // it back from the precondition block to the preheader.
- {
- ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
-
- Value *Opnd0 = PopCntZext;
- Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
- if (PreCond->getOperand(0) != Var)
- std::swap(Opnd0, Opnd1);
-
- ICmpInst *NewPreCond =
- cast<ICmpInst>(Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
- PreCondBr->setCondition(NewPreCond);
-
- RecursivelyDeleteTriviallyDeadInstructions(PreCond, TLI);
- }
-
- // Step 3: Note that the population count is exactly the trip count of the
- // loop in question, which enble us to to convert the loop from noncountable
- // loop into a countable one. The benefit is twofold:
- //
- // - If the loop only counts population, the entire loop become dead after
- // the transformation. It is lots easier to prove a countable loop dead
- // than to prove a noncountable one. (In some C dialects, a infite loop
- // isn't dead even if it computes nothing useful. In general, DCE needs
- // to prove a noncountable loop finite before safely delete it.)
- //
- // - If the loop also performs something else, it remains alive.
- // Since it is transformed to countable form, it can be aggressively
- // optimized by some optimizations which are in general not applicable
- // to a noncountable loop.
- //
- // After this step, this loop (conceptually) would look like following:
- // newcnt = __builtin_ctpop(x);
- // t = newcnt;
- // if (x)
- // do { cnt++; x &= x-1; t--) } while (t > 0);
- BasicBlock *Body = *(CurLoop->block_begin());
- {
- BranchInst *LbBr = LIRUtil::getBranch(Body);
- ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
- Type *Ty = TripCnt->getType();
-
- PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", Body->begin());
-
- Builder.SetInsertPoint(LbCond);
- Value *Opnd1 = cast<Value>(TcPhi);
- Value *Opnd2 = cast<Value>(ConstantInt::get(Ty, 1));
- Instruction *TcDec =
- cast<Instruction>(Builder.CreateSub(Opnd1, Opnd2, "tcdec", false, true));
-
- TcPhi->addIncoming(TripCnt, PreHead);
- TcPhi->addIncoming(TcDec, Body);
-
- CmpInst::Predicate Pred = (LbBr->getSuccessor(0) == Body) ?
- CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
- LbCond->setPredicate(Pred);
- LbCond->setOperand(0, TcDec);
- LbCond->setOperand(1, cast<Value>(ConstantInt::get(Ty, 0)));
- }
-
- // Step 4: All the references to the original population counter outside
- // the loop are replaced with the NewCount -- the value returned from
- // __builtin_ctpop().
- CntInst->replaceUsesOutsideBlock(NewCount, Body);
-
- // step 5: Forget the "non-computable" trip-count SCEV associated with the
- // loop. The loop would otherwise not be deleted even if it becomes empty.
- SE->forgetLoop(CurLoop);
-}
-
-CallInst *NclPopcountRecognize::createPopcntIntrinsic(IRBuilderTy &IRBuilder,
- Value *Val, DebugLoc DL) {
- Value *Ops[] = { Val };
- Type *Tys[] = { Val->getType() };
-
- Module *M = (*(CurLoop->block_begin()))->getParent()->getParent();
- Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
- CallInst *CI = IRBuilder.CreateCall(Func, Ops);
- CI->setDebugLoc(DL);
-
- return CI;
-}
-
-/// recognize - detect population count idiom in a non-countable loop. If
-/// detected, transform the relevant code to popcount intrinsic function
-/// call, and return true; otherwise, return false.
-bool NclPopcountRecognize::recognize() {
-
- if (!LIR.getTargetTransformInfo())
+ // Disable loop idiom recognition if the function's name is a common idiom.
+ StringRef Name = L->getHeader()->getParent()->getName();
+ if (Name == "memset" || Name == "memcpy")
return false;
- LIR.getScalarEvolution();
-
- if (!preliminaryScreen())
- return false;
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+ *CurLoop->getHeader()->getParent());
+ DL = &CurLoop->getHeader()->getModule()->getDataLayout();
- Instruction *CntInst;
- PHINode *CntPhi;
- Value *Val;
- if (!detectIdiom(CntInst, CntPhi, Val))
- return false;
+ if (SE->hasLoopInvariantBackedgeTakenCount(L))
+ return runOnCountableLoop();
- transform(CntInst, CntPhi, Val);
- return true;
+ return runOnNoncountableLoop();
}
-//===----------------------------------------------------------------------===//
-//
-// Implementation of LoopIdiomRecognize
-//
-//===----------------------------------------------------------------------===//
-
bool LoopIdiomRecognize::runOnCountableLoop() {
const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
assert(!isa<SCEVCouldNotCompute>(BECount) &&
- "runOnCountableLoop() called on a loop without a predictable"
- "backedge-taken count");
+ "runOnCountableLoop() called on a loop without a predictable"
+ "backedge-taken count");
// If this loop executes exactly one time, then it should be peeled, not
// optimized by this pass.
if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
- if (BECst->getValue()->getValue() == 0)
+ if (BECst->getAPInt() == 0)
return false;
- // set DT
- (void)getDominatorTree();
-
- LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-
- // set TLI
- (void)getTargetLibraryInfo();
-
- SmallVector<BasicBlock*, 8> ExitBlocks;
+ SmallVector<BasicBlock *, 8> ExitBlocks;
CurLoop->getUniqueExitBlocks(ExitBlocks);
DEBUG(dbgs() << "loop-idiom Scanning: F["
- << CurLoop->getHeader()->getParent()->getName()
- << "] Loop %" << CurLoop->getHeader()->getName() << "\n");
+ << CurLoop->getHeader()->getParent()->getName() << "] Loop %"
+ << CurLoop->getHeader()->getName() << "\n");
bool MadeChange = false;
// Scan all the blocks in the loop that are not in subloops.
for (auto *BB : CurLoop->getBlocks()) {
// Ignore blocks in subloops.
- if (LI.getLoopFor(BB) != CurLoop)
+ if (LI->getLoopFor(BB) != CurLoop)
continue;
MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
@@ -649,41 +244,109 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
return MadeChange;
}
-bool LoopIdiomRecognize::runOnNoncountableLoop() {
- NclPopcountRecognize Popcount(*this);
- if (Popcount.recognize())
- return true;
+static unsigned getStoreSizeInBytes(StoreInst *SI, const DataLayout *DL) {
+ uint64_t SizeInBits = DL->getTypeSizeInBits(SI->getValueOperand()->getType());
+ assert(((SizeInBits & 7) || (SizeInBits >> 32) == 0) &&
+ "Don't overflow unsigned.");
+ return (unsigned)SizeInBits >> 3;
+}
- return false;
+static unsigned getStoreStride(const SCEVAddRecExpr *StoreEv) {
+ const SCEVConstant *ConstStride = cast<SCEVConstant>(StoreEv->getOperand(1));
+ return ConstStride->getAPInt().getZExtValue();
}
-bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
- if (skipOptnoneFunction(L))
+/// getMemSetPatternValue - If a strided store of the specified value is safe to
+/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
+/// be passed in. Otherwise, return null.
+///
+/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
+/// just replicate their input array and then pass on to memset_pattern16.
+static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
+ // If the value isn't a constant, we can't promote it to being in a constant
+ // array. We could theoretically do a store to an alloca or something, but
+ // that doesn't seem worthwhile.
+ Constant *C = dyn_cast<Constant>(V);
+ if (!C)
+ return nullptr;
+
+ // Only handle simple values that are a power of two bytes in size.
+ uint64_t Size = DL->getTypeSizeInBits(V->getType());
+ if (Size == 0 || (Size & 7) || (Size & (Size - 1)))
+ return nullptr;
+
+ // Don't care enough about darwin/ppc to implement this.
+ if (DL->isBigEndian())
+ return nullptr;
+
+ // Convert to size in bytes.
+ Size /= 8;
+
+ // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
+ // if the top and bottom are the same (e.g. for vectors and large integers).
+ if (Size > 16)
+ return nullptr;
+
+ // If the constant is exactly 16 bytes, just use it.
+ if (Size == 16)
+ return C;
+
+ // Otherwise, we'll use an array of the constants.
+ unsigned ArraySize = 16 / Size;
+ ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
+ return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
+}
+
+bool LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
+ // Don't touch volatile stores.
+ if (!SI->isSimple())
return false;
- CurLoop = L;
+ Value *StoredVal = SI->getValueOperand();
+ Value *StorePtr = SI->getPointerOperand();
- // If the loop could not be converted to canonical form, it must have an
- // indirectbr in it, just give up.
- if (!L->getLoopPreheader())
+ // Reject stores that are so large that they overflow an unsigned.
+ uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
+ if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
return false;
- // Disable loop idiom recognition if the function's name is a common idiom.
- StringRef Name = L->getHeader()->getParent()->getName();
- if (Name == "memset" || Name == "memcpy")
+ // See if the pointer expression is an AddRec like {base,+,1} on the current
+ // loop, which indicates a strided store. If we have something else, it's a
+ // random store we can't handle.
+ const SCEVAddRecExpr *StoreEv =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+ if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
return false;
- SE = &getAnalysis<ScalarEvolution>();
- if (SE->hasLoopInvariantBackedgeTakenCount(L))
- return runOnCountableLoop();
- return runOnNoncountableLoop();
+ // Check to see if we have a constant stride.
+ if (!isa<SCEVConstant>(StoreEv->getOperand(1)))
+ return false;
+
+ return true;
+}
+
+void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
+ StoreRefs.clear();
+ for (Instruction &I : *BB) {
+ StoreInst *SI = dyn_cast<StoreInst>(&I);
+ if (!SI)
+ continue;
+
+ // Make sure this is a strided store with a constant stride.
+ if (!isLegalStore(SI))
+ continue;
+
+ // Save the store locations.
+ StoreRefs.push_back(SI);
+ }
}
/// runOnLoopBlock - Process the specified block, which lives in a counted loop
/// with the specified backedge count. This block is known to be in the current
/// loop and not in any subloops.
-bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
- SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+bool LoopIdiomRecognize::runOnLoopBlock(
+ BasicBlock *BB, const SCEV *BECount,
+ SmallVectorImpl<BasicBlock *> &ExitBlocks) {
// We can only promote stores in this block if they are unconditionally
// executed in the loop. For a block to be unconditionally executed, it has
// to dominate all the exit blocks of the loop. Verify this now.
@@ -692,25 +355,18 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
return false;
bool MadeChange = false;
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
- Instruction *Inst = I++;
- // Look for store instructions, which may be optimized to memset/memcpy.
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- WeakVH InstPtr(I);
- if (!processLoopStore(SI, BECount)) continue;
- MadeChange = true;
-
- // If processing the store invalidated our iterator, start over from the
- // top of the block.
- if (!InstPtr)
- I = BB->begin();
- continue;
- }
+ // Look for store instructions, which may be optimized to memset/memcpy.
+ collectStores(BB);
+ for (auto &SI : StoreRefs)
+ MadeChange |= processLoopStore(SI, BECount);
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
+ Instruction *Inst = &*I++;
// Look for memset instructions, which may be optimized to a larger memset.
- if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
- WeakVH InstPtr(I);
- if (!processLoopMemSet(MSI, BECount)) continue;
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
+ WeakVH InstPtr(&*I);
+ if (!processLoopMemSet(MSI, BECount))
+ continue;
MadeChange = true;
// If processing the memset invalidated our iterator, start over from the
@@ -724,71 +380,38 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
return MadeChange;
}
-
/// processLoopStore - See if this store can be promoted to a memset or memcpy.
bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
- if (!SI->isSimple()) return false;
+ assert(SI->isSimple() && "Expected only non-volatile stores.");
Value *StoredVal = SI->getValueOperand();
Value *StorePtr = SI->getPointerOperand();
- // Reject stores that are so large that they overflow an unsigned.
- auto &DL = CurLoop->getHeader()->getModule()->getDataLayout();
- uint64_t SizeInBits = DL.getTypeSizeInBits(StoredVal->getType());
- if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
- return false;
-
- // See if the pointer expression is an AddRec like {base,+,1} on the current
- // loop, which indicates a strided store. If we have something else, it's a
- // random store we can't handle.
- const SCEVAddRecExpr *StoreEv =
- dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
- if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
- return false;
-
// Check to see if the stride matches the size of the store. If so, then we
// know that every byte is touched in the loop.
- unsigned StoreSize = (unsigned)SizeInBits >> 3;
- const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
-
- if (!Stride || StoreSize != Stride->getValue()->getValue()) {
- // TODO: Could also handle negative stride here someday, that will require
- // the validity check in mayLoopAccessLocation to be updated though.
- // Enable this to print exact negative strides.
- if (0 && Stride && StoreSize == -Stride->getValue()->getValue()) {
- dbgs() << "NEGATIVE STRIDE: " << *SI << "\n";
- dbgs() << "BB: " << *SI->getParent();
- }
-
+ const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+ unsigned Stride = getStoreStride(StoreEv);
+ unsigned StoreSize = getStoreSizeInBytes(SI, DL);
+ if (StoreSize != Stride && StoreSize != -Stride)
return false;
- }
+
+ bool NegStride = StoreSize == -Stride;
// See if we can optimize just this store in isolation.
if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
- StoredVal, SI, StoreEv, BECount))
+ StoredVal, SI, StoreEv, BECount, NegStride))
return true;
- // If the stored value is a strided load in the same loop with the same stride
- // this this may be transformable into a memcpy. This kicks in for stuff like
- // for (i) A[i] = B[i];
- if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
- const SCEVAddRecExpr *LoadEv =
- dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0)));
- if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() &&
- StoreEv->getOperand(1) == LoadEv->getOperand(1) && LI->isSimple())
- if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount))
- return true;
- }
- //errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n";
-
- return false;
+ // Optimize the store into a memcpy, if it feeds an similarly strided load.
+ return processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, BECount, NegStride);
}
/// processLoopMemSet - See if this memset can be promoted to a large memset.
-bool LoopIdiomRecognize::
-processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
+bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
+ const SCEV *BECount) {
// We can only handle non-volatile memsets with a constant size.
- if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) return false;
+ if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength()))
+ return false;
// If we're not allowed to hack on memset, we fail.
if (!TLI->has(LibFunc::memset))
@@ -818,17 +441,16 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
return false;
return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
- MSI->getAlignment(), MSI->getValue(),
- MSI, Ev, BECount);
+ MSI->getAlignment(), MSI->getValue(), MSI, Ev,
+ BECount, /*NegStride=*/false);
}
-
/// mayLoopAccessLocation - Return true if the specified loop might access the
/// specified pointer location, which is a loop-strided access. The 'Access'
/// argument specifies what the verboten forms of access are (read or write).
-static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
- Loop *L, const SCEV *BECount,
- unsigned StoreSize, AliasAnalysis &AA,
+static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
+ const SCEV *BECount, unsigned StoreSize,
+ AliasAnalysis &AA,
Instruction *IgnoredStore) {
// Get the location that may be stored across the loop. Since the access is
// strided positively through memory, we say that the modified location starts
@@ -838,7 +460,7 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
// If the loop iterates a fixed number of times, we can refine the access size
// to be exactly the size of the memset, which is (BECount+1)*StoreSize
if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
- AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize;
+ AccessSize = (BECst->getValue()->getZExtValue() + 1) * StoreSize;
// TODO: For this to be really effective, we have to dive into the pointer
// operand in the store. Store to &A[i] of 100 will always return may alias
@@ -849,59 +471,31 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
++BI)
for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
- if (&*I != IgnoredStore &&
- (AA.getModRefInfo(I, StoreLoc) & Access))
+ if (&*I != IgnoredStore && (AA.getModRefInfo(&*I, StoreLoc) & Access))
return true;
return false;
}
-/// getMemSetPatternValue - If a strided store of the specified value is safe to
-/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
-/// be passed in. Otherwise, return null.
-///
-/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
-/// just replicate their input array and then pass on to memset_pattern16.
-static Constant *getMemSetPatternValue(Value *V, const DataLayout &DL) {
- // If the value isn't a constant, we can't promote it to being in a constant
- // array. We could theoretically do a store to an alloca or something, but
- // that doesn't seem worthwhile.
- Constant *C = dyn_cast<Constant>(V);
- if (!C) return nullptr;
-
- // Only handle simple values that are a power of two bytes in size.
- uint64_t Size = DL.getTypeSizeInBits(V->getType());
- if (Size == 0 || (Size & 7) || (Size & (Size-1)))
- return nullptr;
-
- // Don't care enough about darwin/ppc to implement this.
- if (DL.isBigEndian())
- return nullptr;
-
- // Convert to size in bytes.
- Size /= 8;
-
- // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
- // if the top and bottom are the same (e.g. for vectors and large integers).
- if (Size > 16) return nullptr;
-
- // If the constant is exactly 16 bytes, just use it.
- if (Size == 16) return C;
-
- // Otherwise, we'll use an array of the constants.
- unsigned ArraySize = 16/Size;
- ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
- return ConstantArray::get(AT, std::vector<Constant*>(ArraySize, C));
+// If we have a negative stride, Start refers to the end of the memory location
+// we're trying to memset. Therefore, we need to recompute the base pointer,
+// which is just Start - BECount*Size.
+static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
+ Type *IntPtr, unsigned StoreSize,
+ ScalarEvolution *SE) {
+ const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+ if (StoreSize != 1)
+ Index = SE->getMulExpr(Index, SE->getConstant(IntPtr, StoreSize),
+ SCEV::FlagNUW);
+ return SE->getMinusSCEV(Start, Index);
}
-
/// processLoopStridedStore - We see a strided store of some value. If we can
/// transform this into a memset or memset_pattern in the loop preheader, do so.
-bool LoopIdiomRecognize::
-processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
- unsigned StoreAlignment, Value *StoredVal,
- Instruction *TheStore, const SCEVAddRecExpr *Ev,
- const SCEV *BECount) {
+bool LoopIdiomRecognize::processLoopStridedStore(
+ Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
+ Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev,
+ const SCEV *BECount, bool NegStride) {
// If the stored value is a byte-wise value (like i32 -1), then it may be
// turned into a memset of i8 -1, assuming that all the consecutive bytes
@@ -909,7 +503,6 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// but it can be turned into memset_pattern if the target supports it.
Value *SplatValue = isBytewiseValue(StoredVal);
Constant *PatternValue = nullptr;
- auto &DL = CurLoop->getHeader()->getModule()->getDataLayout();
unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
// If we're allowed to form a memset, and the stored value would be acceptable
@@ -936,9 +529,15 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// header. This allows us to insert code for it in the preheader.
BasicBlock *Preheader = CurLoop->getLoopPreheader();
IRBuilder<> Builder(Preheader->getTerminator());
- SCEVExpander Expander(*SE, DL, "loop-idiom");
+ SCEVExpander Expander(*SE, *DL, "loop-idiom");
Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
+ Type *IntPtr = Builder.getIntPtrTy(*DL, DestAS);
+
+ const SCEV *Start = Ev->getStart();
+ // Handle negative strided loops.
+ if (NegStride)
+ Start = getStartForNegStride(Start, BECount, IntPtr, StoreSize, SE);
// Okay, we have a strided store "p[i]" of a splattable value. We can turn
// this into a memset in the loop preheader now if we want. However, this
@@ -946,12 +545,9 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// or write to the aliased location. Check for any overlap by generating the
// base pointer and checking the region.
Value *BasePtr =
- Expander.expandCodeFor(Ev->getStart(), DestInt8PtrTy,
- Preheader->getTerminator());
-
- if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef,
- CurLoop, BECount,
- StoreSize, getAnalysis<AliasAnalysis>(), TheStore)) {
+ Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
+ if (mayLoopAccessLocation(BasePtr, MRI_ModRef, CurLoop, BECount, StoreSize,
+ *AA, TheStore)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI);
@@ -962,36 +558,30 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- Type *IntPtr = Builder.getIntPtrTy(DL, DestAS);
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
- const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
- SCEV::FlagNUW);
+ const SCEV *NumBytesS =
+ SE->getAddExpr(BECount, SE->getOne(IntPtr), SCEV::FlagNUW);
if (StoreSize != 1) {
NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
SCEV::FlagNUW);
}
Value *NumBytes =
- Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+ Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
CallInst *NewCall;
if (SplatValue) {
- NewCall = Builder.CreateMemSet(BasePtr,
- SplatValue,
- NumBytes,
- StoreAlignment);
+ NewCall =
+ Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, StoreAlignment);
} else {
// Everything is emitted in default address space
Type *Int8PtrTy = DestInt8PtrTy;
- Module *M = TheStore->getParent()->getParent()->getParent();
- Value *MSP = M->getOrInsertFunction("memset_pattern16",
- Builder.getVoidTy(),
- Int8PtrTy,
- Int8PtrTy,
- IntPtr,
- (void*)nullptr);
+ Module *M = TheStore->getModule();
+ Value *MSP =
+ M->getOrInsertFunction("memset_pattern16", Builder.getVoidTy(),
+ Int8PtrTy, Int8PtrTy, IntPtr, (void *)nullptr);
// Otherwise we should form a memset_pattern16. PatternValue is known to be
// an constant array of 16-bytes. Plop the value into a mergable global.
@@ -1015,26 +605,47 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
return true;
}
-/// processLoopStoreOfLoopLoad - We see a strided store whose value is a
-/// same-strided load.
-bool LoopIdiomRecognize::
-processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
- const SCEVAddRecExpr *StoreEv,
- const SCEVAddRecExpr *LoadEv,
- const SCEV *BECount) {
+/// If the stored value is a strided load in the same loop with the same stride
+/// this may be transformable into a memcpy. This kicks in for stuff like
+/// for (i) A[i] = B[i];
+bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
+ StoreInst *SI, unsigned StoreSize, const SCEVAddRecExpr *StoreEv,
+ const SCEV *BECount, bool NegStride) {
// If we're not allowed to form memcpy, we fail.
if (!TLI->has(LibFunc::memcpy))
return false;
- LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
+ // The store must be feeding a non-volatile load.
+ LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
+ if (!LI || !LI->isSimple())
+ return false;
+
+ // See if the pointer expression is an AddRec like {base,+,1} on the current
+ // loop, which indicates a strided load. If we have something else, it's a
+ // random load we can't handle.
+ const SCEVAddRecExpr *LoadEv =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
+ if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
+ return false;
+
+ // The store and load must share the same stride.
+ if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
+ return false;
// The trip count of the loop and the base pointer of the addrec SCEV is
// guaranteed to be loop invariant, which means that it should dominate the
// header. This allows us to insert code for it in the preheader.
BasicBlock *Preheader = CurLoop->getLoopPreheader();
IRBuilder<> Builder(Preheader->getTerminator());
- const DataLayout &DL = Preheader->getModule()->getDataLayout();
- SCEVExpander Expander(*SE, DL, "loop-idiom");
+ SCEVExpander Expander(*SE, *DL, "loop-idiom");
+
+ const SCEV *StrStart = StoreEv->getStart();
+ unsigned StrAS = SI->getPointerAddressSpace();
+ Type *IntPtrTy = Builder.getIntPtrTy(*DL, StrAS);
+
+ // Handle negative strided loops.
+ if (NegStride)
+ StrStart = getStartForNegStride(StrStart, BECount, IntPtrTy, StoreSize, SE);
// Okay, we have a strided store "p[i]" of a loaded value. We can turn
// this into a memcpy in the loop preheader now if we want. However, this
@@ -1042,29 +653,31 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
// or write the memory region we're storing to. This includes the load that
// feeds the stores. Check for an alias by generating the base address and
// checking everything.
- Value *StoreBasePtr =
- Expander.expandCodeFor(StoreEv->getStart(),
- Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
- Preheader->getTerminator());
-
- if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef,
- CurLoop, BECount, StoreSize,
- getAnalysis<AliasAnalysis>(), SI)) {
+ Value *StoreBasePtr = Expander.expandCodeFor(
+ StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
+
+ if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
+ StoreSize, *AA, SI)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
return false;
}
+ const SCEV *LdStart = LoadEv->getStart();
+ unsigned LdAS = LI->getPointerAddressSpace();
+
+ // Handle negative strided loops.
+ if (NegStride)
+ LdStart = getStartForNegStride(LdStart, BECount, IntPtrTy, StoreSize, SE);
+
// For a memcpy, we have to make sure that the input array is not being
// mutated by the loop.
- Value *LoadBasePtr =
- Expander.expandCodeFor(LoadEv->getStart(),
- Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
- Preheader->getTerminator());
+ Value *LoadBasePtr = Expander.expandCodeFor(
+ LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
- if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount,
- StoreSize, getAnalysis<AliasAnalysis>(), SI)) {
+ if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize,
+ *AA, SI)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
@@ -1074,34 +687,368 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
// Okay, everything is safe, we can transform this!
-
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- Type *IntPtrTy = Builder.getIntPtrTy(DL, SI->getPointerAddressSpace());
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
- const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtrTy, 1),
- SCEV::FlagNUW);
+ const SCEV *NumBytesS =
+ SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);
if (StoreSize != 1)
NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
SCEV::FlagNUW);
Value *NumBytes =
- Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
+ Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
CallInst *NewCall =
- Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
- std::min(SI->getAlignment(), LI->getAlignment()));
+ Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
+ std::min(SI->getAlignment(), LI->getAlignment()));
NewCall->setDebugLoc(SI->getDebugLoc());
DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n"
<< " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
<< " from store ptr=" << *StoreEv << " at: " << *SI << "\n");
-
- // Okay, the memset has been formed. Zap the original store and anything that
+ // Okay, the memcpy has been formed. Zap the original store and anything that
// feeds into it.
deleteDeadInstruction(SI, TLI);
++NumMemCpy;
return true;
}
+
+bool LoopIdiomRecognize::runOnNoncountableLoop() {
+ return recognizePopcount();
+}
+
+/// Check if the given conditional branch is based on the comparison between
+/// a variable and zero, and if the variable is non-zero, the control yields to
+/// the loop entry. If the branch matches the behavior, the variable involved
+/// in the comparion is returned. This function will be called to see if the
+/// precondition and postcondition of the loop are in desirable form.
+static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) {
+ if (!BI || !BI->isConditional())
+ return nullptr;
+
+ ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!Cond)
+ return nullptr;
+
+ ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
+ if (!CmpZero || !CmpZero->isZero())
+ return nullptr;
+
+ ICmpInst::Predicate Pred = Cond->getPredicate();
+ if ((Pred == ICmpInst::ICMP_NE && BI->getSuccessor(0) == LoopEntry) ||
+ (Pred == ICmpInst::ICMP_EQ && BI->getSuccessor(1) == LoopEntry))
+ return Cond->getOperand(0);
+
+ return nullptr;
+}
+
+/// Return true iff the idiom is detected in the loop.
+///
+/// Additionally:
+/// 1) \p CntInst is set to the instruction counting the population bit.
+/// 2) \p CntPhi is set to the corresponding phi node.
+/// 3) \p Var is set to the value whose population bits are being counted.
+///
+/// The core idiom we are trying to detect is:
+/// \code
+/// if (x0 != 0)
+/// goto loop-exit // the precondition of the loop
+/// cnt0 = init-val;
+/// do {
+/// x1 = phi (x0, x2);
+/// cnt1 = phi(cnt0, cnt2);
+///
+/// cnt2 = cnt1 + 1;
+/// ...
+/// x2 = x1 & (x1 - 1);
+/// ...
+/// } while(x != 0);
+///
+/// loop-exit:
+/// \endcode
+static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
+ Instruction *&CntInst, PHINode *&CntPhi,
+ Value *&Var) {
+ // step 1: Check to see if the look-back branch match this pattern:
+ // "if (a!=0) goto loop-entry".
+ BasicBlock *LoopEntry;
+ Instruction *DefX2, *CountInst;
+ Value *VarX1, *VarX0;
+ PHINode *PhiX, *CountPhi;
+
+ DefX2 = CountInst = nullptr;
+ VarX1 = VarX0 = nullptr;
+ PhiX = CountPhi = nullptr;
+ LoopEntry = *(CurLoop->block_begin());
+
+ // step 1: Check if the loop-back branch is in desirable form.
+ {
+ if (Value *T = matchCondition(
+ dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
+ DefX2 = dyn_cast<Instruction>(T);
+ else
+ return false;
+ }
+
+ // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
+ {
+ if (!DefX2 || DefX2->getOpcode() != Instruction::And)
+ return false;
+
+ BinaryOperator *SubOneOp;
+
+ if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
+ VarX1 = DefX2->getOperand(1);
+ else {
+ VarX1 = DefX2->getOperand(0);
+ SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
+ }
+ if (!SubOneOp)
+ return false;
+
+ Instruction *SubInst = cast<Instruction>(SubOneOp);
+ ConstantInt *Dec = dyn_cast<ConstantInt>(SubInst->getOperand(1));
+ if (!Dec ||
+ !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) ||
+ (SubInst->getOpcode() == Instruction::Add &&
+ Dec->isAllOnesValue()))) {
+ return false;
+ }
+ }
+
+ // step 3: Check the recurrence of variable X
+ {
+ PhiX = dyn_cast<PHINode>(VarX1);
+ if (!PhiX ||
+ (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) {
+ return false;
+ }
+ }
+
+ // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
+ {
+ CountInst = nullptr;
+ for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
+ IterE = LoopEntry->end();
+ Iter != IterE; Iter++) {
+ Instruction *Inst = &*Iter;
+ if (Inst->getOpcode() != Instruction::Add)
+ continue;
+
+ ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
+ if (!Inc || !Inc->isOne())
+ continue;
+
+ PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0));
+ if (!Phi || Phi->getParent() != LoopEntry)
+ continue;
+
+ // Check if the result of the instruction is live of the loop.
+ bool LiveOutLoop = false;
+ for (User *U : Inst->users()) {
+ if ((cast<Instruction>(U))->getParent() != LoopEntry) {
+ LiveOutLoop = true;
+ break;
+ }
+ }
+
+ if (LiveOutLoop) {
+ CountInst = Inst;
+ CountPhi = Phi;
+ break;
+ }
+ }
+
+ if (!CountInst)
+ return false;
+ }
+
+ // step 5: check if the precondition is in this form:
+ // "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
+ {
+ auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->getTerminator());
+ Value *T = matchCondition(PreCondBr, CurLoop->getLoopPreheader());
+ if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
+ return false;
+
+ CntInst = CountInst;
+ CntPhi = CountPhi;
+ Var = T;
+ }
+
+ return true;
+}
+
+/// Recognizes a population count idiom in a non-countable loop.
+///
+/// If detected, transforms the relevant code to issue the popcount intrinsic
+/// function call, and returns true; otherwise, returns false.
+bool LoopIdiomRecognize::recognizePopcount() {
+ if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware)
+ return false;
+
+ // Counting population are usually conducted by few arithmetic instructions.
+ // Such instructions can be easily "absorbed" by vacant slots in a
+ // non-compact loop. Therefore, recognizing popcount idiom only makes sense
+ // in a compact loop.
+
+ // Give up if the loop has multiple blocks or multiple backedges.
+ if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
+ return false;
+
+ BasicBlock *LoopBody = *(CurLoop->block_begin());
+ if (LoopBody->size() >= 20) {
+ // The loop is too big, bail out.
+ return false;
+ }
+
+ // It should have a preheader containing nothing but an unconditional branch.
+ BasicBlock *PH = CurLoop->getLoopPreheader();
+ if (!PH)
+ return false;
+ if (&PH->front() != PH->getTerminator())
+ return false;
+ auto *EntryBI = dyn_cast<BranchInst>(PH->getTerminator());
+ if (!EntryBI || EntryBI->isConditional())
+ return false;
+
+ // It should have a precondition block where the generated popcount instrinsic
+ // function can be inserted.
+ auto *PreCondBB = PH->getSinglePredecessor();
+ if (!PreCondBB)
+ return false;
+ auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
+ if (!PreCondBI || PreCondBI->isUnconditional())
+ return false;
+
+ Instruction *CntInst;
+ PHINode *CntPhi;
+ Value *Val;
+ if (!detectPopcountIdiom(CurLoop, PreCondBB, CntInst, CntPhi, Val))
+ return false;
+
+ transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val);
+ return true;
+}
+
+static CallInst *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
+ DebugLoc DL) {
+ Value *Ops[] = {Val};
+ Type *Tys[] = {Val->getType()};
+
+ Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
+ Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
+ CallInst *CI = IRBuilder.CreateCall(Func, Ops);
+ CI->setDebugLoc(DL);
+
+ return CI;
+}
+
+void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
+ Instruction *CntInst,
+ PHINode *CntPhi, Value *Var) {
+ BasicBlock *PreHead = CurLoop->getLoopPreheader();
+ auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->getTerminator());
+ const DebugLoc DL = CntInst->getDebugLoc();
+
+ // Assuming before transformation, the loop is following:
+ // if (x) // the precondition
+ // do { cnt++; x &= x - 1; } while(x);
+
+ // Step 1: Insert the ctpop instruction at the end of the precondition block
+ IRBuilder<> Builder(PreCondBr);
+ Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
+ {
+ PopCnt = createPopcntIntrinsic(Builder, Var, DL);
+ NewCount = PopCntZext =
+ Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
+
+ if (NewCount != PopCnt)
+ (cast<Instruction>(NewCount))->setDebugLoc(DL);
+
+ // TripCnt is exactly the number of iterations the loop has
+ TripCnt = NewCount;
+
+ // If the population counter's initial value is not zero, insert Add Inst.
+ Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
+ ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
+ if (!InitConst || !InitConst->isZero()) {
+ NewCount = Builder.CreateAdd(NewCount, CntInitVal);
+ (cast<Instruction>(NewCount))->setDebugLoc(DL);
+ }
+ }
+
+ // Step 2: Replace the precondition from "if (x == 0) goto loop-exit" to
+ // "if (NewCount == 0) loop-exit". Without this change, the intrinsic
+ // function would be partial dead code, and downstream passes will drag
+ // it back from the precondition block to the preheader.
+ {
+ ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
+
+ Value *Opnd0 = PopCntZext;
+ Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
+ if (PreCond->getOperand(0) != Var)
+ std::swap(Opnd0, Opnd1);
+
+ ICmpInst *NewPreCond = cast<ICmpInst>(
+ Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
+ PreCondBr->setCondition(NewPreCond);
+
+ RecursivelyDeleteTriviallyDeadInstructions(PreCond, TLI);
+ }
+
+ // Step 3: Note that the population count is exactly the trip count of the
+ // loop in question, which enable us to to convert the loop from noncountable
+ // loop into a countable one. The benefit is twofold:
+ //
+ // - If the loop only counts population, the entire loop becomes dead after
+ // the transformation. It is a lot easier to prove a countable loop dead
+ // than to prove a noncountable one. (In some C dialects, an infinite loop
+ // isn't dead even if it computes nothing useful. In general, DCE needs
+ // to prove a noncountable loop finite before safely delete it.)
+ //
+ // - If the loop also performs something else, it remains alive.
+ // Since it is transformed to countable form, it can be aggressively
+ // optimized by some optimizations which are in general not applicable
+ // to a noncountable loop.
+ //
+ // After this step, this loop (conceptually) would look like following:
+ // newcnt = __builtin_ctpop(x);
+ // t = newcnt;
+ // if (x)
+ // do { cnt++; x &= x-1; t--) } while (t > 0);
+ BasicBlock *Body = *(CurLoop->block_begin());
+ {
+ auto *LbBr = dyn_cast<BranchInst>(Body->getTerminator());
+ ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
+ Type *Ty = TripCnt->getType();
+
+ PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());
+
+ Builder.SetInsertPoint(LbCond);
+ Instruction *TcDec = cast<Instruction>(
+ Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
+ "tcdec", false, true));
+
+ TcPhi->addIncoming(TripCnt, PreHead);
+ TcPhi->addIncoming(TcDec, Body);
+
+ CmpInst::Predicate Pred =
+ (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
+ LbCond->setPredicate(Pred);
+ LbCond->setOperand(0, TcDec);
+ LbCond->setOperand(1, ConstantInt::get(Ty, 0));
+ }
+
+ // Step 4: All the references to the original population counter outside
+ // the loop are replaced with the NewCount -- the value returned from
+ // __builtin_ctpop().
+ CntInst->replaceUsesOutsideBlock(NewCount, Body);
+
+ // step 5: Forget the "non-computable" trip-count SCEV associated with the
+ // loop. The loop would otherwise not be deleted even if it becomes empty.
+ SE->forgetLoop(CurLoop);
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
index e12502654751..b4102fe9ba34 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -48,7 +48,7 @@ namespace {
AU.addRequiredID(LoopSimplifyID);
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
};
@@ -112,7 +112,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// Simplify instructions in the current basic block.
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
- Instruction *I = BI++;
+ Instruction *I = &*BI++;
// The first time through the loop ToSimplify is empty and we try to
// simplify all instructions. On later iterations ToSimplify is not
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 9d7e57ffebac..4295235a3f36 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -99,7 +99,7 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
return false;
if (St && !St->isSimple())
return false;
- MemInstr.push_back(I);
+ MemInstr.push_back(&*I);
}
}
@@ -176,7 +176,7 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
}
}
- // We don't have a DepMatrix to check legality return false
+ // We don't have a DepMatrix to check legality return false.
if (DepMatrix.size() == 0)
return false;
return true;
@@ -331,9 +331,9 @@ static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) {
class LoopInterchangeLegality {
public:
LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
- LoopInterchange *Pass)
- : OuterLoop(Outer), InnerLoop(Inner), SE(SE), CurrentPass(Pass),
- InnerLoopHasReduction(false) {}
+ LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA)
+ : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
+ PreserveLCSSA(PreserveLCSSA), InnerLoopHasReduction(false) {}
/// Check if the loops can be interchanged.
bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
@@ -357,9 +357,10 @@ private:
Loop *OuterLoop;
Loop *InnerLoop;
- /// Scev analysis.
ScalarEvolution *SE;
- LoopInterchange *CurrentPass;
+ LoopInfo *LI;
+ DominatorTree *DT;
+ bool PreserveLCSSA;
bool InnerLoopHasReduction;
};
@@ -371,7 +372,7 @@ public:
LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE)
: OuterLoop(Outer), InnerLoop(Inner), SE(SE) {}
- /// Check if the loop interchange is profitable
+ /// Check if the loop interchange is profitable.
bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
CharMatrix &DepMatrix);
@@ -385,12 +386,12 @@ private:
ScalarEvolution *SE;
};
-/// LoopInterchangeTransform interchanges the loop
+/// LoopInterchangeTransform interchanges the loop.
class LoopInterchangeTransform {
public:
LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
LoopInfo *LI, DominatorTree *DT,
- LoopInterchange *Pass, BasicBlock *LoopNestExit,
+ BasicBlock *LoopNestExit,
bool InnerLoopContainsReductions)
: OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
LoopExit(LoopNestExit),
@@ -424,21 +425,22 @@ private:
bool InnerLoopHasReduction;
};
-// Main LoopInterchange Pass
+// Main LoopInterchange Pass.
struct LoopInterchange : public FunctionPass {
static char ID;
ScalarEvolution *SE;
LoopInfo *LI;
DependenceAnalysis *DA;
DominatorTree *DT;
+ bool PreserveLCSSA;
LoopInterchange()
: FunctionPass(ID), SE(nullptr), LI(nullptr), DA(nullptr), DT(nullptr) {
initializeLoopInterchangePass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<ScalarEvolution>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DependenceAnalysis>();
@@ -447,11 +449,13 @@ struct LoopInterchange : public FunctionPass {
}
bool runOnFunction(Function &F) override {
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DA = &getAnalysis<DependenceAnalysis>();
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+
// Build up a worklist of loop pairs to analyze.
SmallVector<LoopVector, 8> Worklist;
@@ -489,7 +493,7 @@ struct LoopInterchange : public FunctionPass {
unsigned selectLoopForInterchange(LoopVector LoopList) {
// TODO: Add a better heuristic to select the loop to be interchanged based
- // on the dependece matrix. Currently we select the innermost loop.
+ // on the dependence matrix. Currently we select the innermost loop.
return LoopList.size() - 1;
}
@@ -544,7 +548,7 @@ struct LoopInterchange : public FunctionPass {
}
unsigned SelecLoopId = selectLoopForInterchange(LoopList);
- // Move the selected loop outwards to the best posible position.
+ // Move the selected loop outwards to the best possible position.
for (unsigned i = SelecLoopId; i > 0; i--) {
bool Interchanged =
processLoop(LoopList, i, i - 1, LoopNestExit, DependencyMatrix);
@@ -574,7 +578,8 @@ struct LoopInterchange : public FunctionPass {
Loop *InnerLoop = LoopList[InnerLoopId];
Loop *OuterLoop = LoopList[OuterLoopId];
- LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, this);
+ LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, LI, DT,
+ PreserveLCSSA);
if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
DEBUG(dbgs() << "Not interchanging Loops. Cannot prove legality\n");
return false;
@@ -586,7 +591,7 @@ struct LoopInterchange : public FunctionPass {
return false;
}
- LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, this,
+ LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT,
LoopNestExit, LIL.hasInnerLoopReduction());
LIT.transform();
DEBUG(dbgs() << "Loops interchanged\n");
@@ -655,7 +660,7 @@ bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch \n");
// We do not have any basic block in between now make sure the outer header
- // and outer loop latch doesnt contain any unsafe instructions.
+ // and outer loop latch doesn't contain any unsafe instructions.
if (containsUnsafeInstructionsInHeader(OuterLoopHeader) ||
containsUnsafeInstructionsInLatch(OuterLoopLatch))
return false;
@@ -698,9 +703,9 @@ bool LoopInterchangeLegality::findInductionAndReductions(
return false;
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
RecurrenceDescriptor RD;
+ InductionDescriptor ID;
PHINode *PHI = cast<PHINode>(I);
- ConstantInt *StepValue = nullptr;
- if (isInductionPHI(PHI, SE, StepValue))
+ if (InductionDescriptor::isInductionPHI(PHI, SE, ID))
Inductions.push_back(PHI);
else if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
Reductions.push_back(PHI);
@@ -836,7 +841,7 @@ bool LoopInterchangeLegality::currentLimitations() {
else
FoundInduction = true;
}
- // The loop latch ended and we didnt find the induction variable return as
+ // The loop latch ended and we didn't find the induction variable return as
// current limitation.
if (!FoundInduction)
return true;
@@ -867,12 +872,14 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
if (!OuterLoopPreHeader || OuterLoopPreHeader == OuterLoop->getHeader() ||
isa<PHINode>(OuterLoopPreHeader->begin()) ||
!OuterLoopPreHeader->getUniquePredecessor()) {
- OuterLoopPreHeader = InsertPreheaderForLoop(OuterLoop, CurrentPass);
+ OuterLoopPreHeader =
+ InsertPreheaderForLoop(OuterLoop, DT, LI, PreserveLCSSA);
}
if (!InnerLoopPreHeader || InnerLoopPreHeader == InnerLoop->getHeader() ||
InnerLoopPreHeader == OuterLoop->getHeader()) {
- InnerLoopPreHeader = InsertPreheaderForLoop(InnerLoop, CurrentPass);
+ InnerLoopPreHeader =
+ InsertPreheaderForLoop(InnerLoop, DT, LI, PreserveLCSSA);
}
// TODO: The loops could not be interchanged due to current limitations in the
@@ -966,7 +973,7 @@ bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
unsigned OuterLoopId,
CharMatrix &DepMatrix) {
- // TODO: Add Better Profitibility checks.
+ // TODO: Add better profitability checks.
// e.g
// 1) Construct dependency matrix and move the one with no loop carried dep
// inside to enable vectorization.
@@ -980,7 +987,7 @@ bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
if (Cost < 0)
return true;
- // It is not profitable as per current cache profitibility model. But check if
+ // It is not profitable as per current cache profitability model. But check if
// we can move this loop outside to improve parallelism.
bool ImprovesPar =
isProfitabileForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
@@ -996,7 +1003,7 @@ void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
return;
}
}
- assert(false && "Couldn't find loop");
+ llvm_unreachable("Couldn't find loop");
}
void LoopInterchangeTransform::restructureLoops(Loop *InnerLoop,
@@ -1045,7 +1052,7 @@ bool LoopInterchangeTransform::transform() {
splitInnerLoopLatch(InnerIndexVar);
DEBUG(dbgs() << "splitInnerLoopLatch Done\n");
- // Splits the inner loops phi nodes out into a seperate basic block.
+ // Splits the inner loops phi nodes out into a separate basic block.
splitInnerLoopHeader();
DEBUG(dbgs() << "splitInnerLoopHeader Done\n");
}
@@ -1113,8 +1120,8 @@ static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
auto &ToList = InsertBefore->getParent()->getInstList();
auto &FromList = FromBB->getInstList();
- ToList.splice(InsertBefore, FromList, FromList.begin(),
- FromBB->getTerminator());
+ ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
+ FromBB->getTerminator()->getIterator());
}
void LoopInterchangeTransform::adjustOuterLoopPreheader() {
@@ -1181,8 +1188,8 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
return false;
- BasicBlock *InnerLoopHeaderSucessor = InnerLoopHeader->getUniqueSuccessor();
- if (!InnerLoopHeaderSucessor)
+ BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
+ if (!InnerLoopHeaderSuccessor)
return false;
// Adjust Loop Preheader and headers
@@ -1198,11 +1205,11 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
if (OuterLoopHeaderBI->getSuccessor(i) == OuterLoopLatch)
OuterLoopHeaderBI->setSuccessor(i, LoopExit);
else if (OuterLoopHeaderBI->getSuccessor(i) == InnerLoopPreHeader)
- OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSucessor);
+ OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSuccessor);
}
// Adjust reduction PHI's now that the incoming block has changed.
- updateIncomingBlock(InnerLoopHeaderSucessor, InnerLoopHeader,
+ updateIncomingBlock(InnerLoopHeaderSuccessor, InnerLoopHeader,
OuterLoopHeader);
BranchInst::Create(OuterLoopPreHeader, InnerLoopHeaderBI);
@@ -1286,10 +1293,10 @@ bool LoopInterchangeTransform::adjustLoopLinks() {
char LoopInterchange::ID = 0;
INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange",
"Interchanges loops for cache reuse", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DependenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
new file mode 100644
index 000000000000..1064d088514d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -0,0 +1,566 @@
+//===- LoopLoadElimination.cpp - Loop Load Elimination Pass ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implement a loop-aware load elimination pass.
+//
+// It uses LoopAccessAnalysis to identify loop-carried dependences with a
+// distance of one between stores and loads. These form the candidates for the
+// transformation. The source value of each store then propagated to the user
+// of the corresponding load. This makes the load dead.
+//
+// The pass can also version the loop and add memchecks in order to prove that
+// may-aliasing stores can't change the value in memory before it's read by the
+// load.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/LoopVersioning.h"
+#include <forward_list>
+
+#define LLE_OPTION "loop-load-elim"
+#define DEBUG_TYPE LLE_OPTION
+
+using namespace llvm;
+
+static cl::opt<unsigned> CheckPerElim(
+ "runtime-check-per-loop-load-elim", cl::Hidden,
+ cl::desc("Max number of memchecks allowed per eliminated load on average"),
+ cl::init(1));
+
+static cl::opt<unsigned> LoadElimSCEVCheckThreshold(
+ "loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden,
+ cl::desc("The maximum number of SCEV checks allowed for Loop "
+ "Load Elimination"));
+
+
+STATISTIC(NumLoopLoadEliminted, "Number of loads eliminated by LLE");
+
+namespace {
+
+/// \brief Represent a store-to-forwarding candidate.
+struct StoreToLoadForwardingCandidate {
+ LoadInst *Load;
+ StoreInst *Store;
+
+ StoreToLoadForwardingCandidate(LoadInst *Load, StoreInst *Store)
+ : Load(Load), Store(Store) {}
+
+ /// \brief Return true if the dependence from the store to the load has a
+ /// distance of one. E.g. A[i+1] = A[i]
+ bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE) const {
+ Value *LoadPtr = Load->getPointerOperand();
+ Value *StorePtr = Store->getPointerOperand();
+ Type *LoadPtrType = LoadPtr->getType();
+ Type *LoadType = LoadPtrType->getPointerElementType();
+
+ assert(LoadPtrType->getPointerAddressSpace() ==
+ StorePtr->getType()->getPointerAddressSpace() &&
+ LoadType == StorePtr->getType()->getPointerElementType() &&
+ "Should be a known dependence");
+
+ auto &DL = Load->getParent()->getModule()->getDataLayout();
+ unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
+
+ auto *LoadPtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(LoadPtr));
+ auto *StorePtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(StorePtr));
+
+ // We don't need to check non-wrapping here because forward/backward
+ // dependence wouldn't be valid if these weren't monotonic accesses.
+ auto *Dist = cast<SCEVConstant>(
+ PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
+ const APInt &Val = Dist->getAPInt();
+ return Val.abs() == TypeByteSize;
+ }
+
+ Value *getLoadPtr() const { return Load->getPointerOperand(); }
+
+#ifndef NDEBUG
+ friend raw_ostream &operator<<(raw_ostream &OS,
+ const StoreToLoadForwardingCandidate &Cand) {
+ OS << *Cand.Store << " -->\n";
+ OS.indent(2) << *Cand.Load << "\n";
+ return OS;
+ }
+#endif
+};
+
+/// \brief Check if the store dominates all latches, so as long as there is no
+/// intervening store this value will be loaded in the next iteration.
+bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L,
+ DominatorTree *DT) {
+ SmallVector<BasicBlock *, 8> Latches;
+ L->getLoopLatches(Latches);
+ return std::all_of(Latches.begin(), Latches.end(),
+ [&](const BasicBlock *Latch) {
+ return DT->dominates(StoreBlock, Latch);
+ });
+}
+
+/// \brief The per-loop class that does most of the work.
+class LoadEliminationForLoop {
+public:
+ LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
+ DominatorTree *DT)
+ : L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.PSE) {}
+
+ /// \brief Look through the loop-carried and loop-independent dependences in
+ /// this loop and find store->load dependences.
+ ///
+ /// Note that no candidate is returned if LAA has failed to analyze the loop
+ /// (e.g. if it's not bottom-tested, contains volatile memops, etc.)
+ std::forward_list<StoreToLoadForwardingCandidate>
+ findStoreToLoadDependences(const LoopAccessInfo &LAI) {
+ std::forward_list<StoreToLoadForwardingCandidate> Candidates;
+
+ const auto *Deps = LAI.getDepChecker().getDependences();
+ if (!Deps)
+ return Candidates;
+
+ // Find store->load dependences (consequently true dep). Both lexically
+ // forward and backward dependences qualify. Disqualify loads that have
+ // other unknown dependences.
+
+ SmallSet<Instruction *, 4> LoadsWithUnknownDepedence;
+
+ for (const auto &Dep : *Deps) {
+ Instruction *Source = Dep.getSource(LAI);
+ Instruction *Destination = Dep.getDestination(LAI);
+
+ if (Dep.Type == MemoryDepChecker::Dependence::Unknown) {
+ if (isa<LoadInst>(Source))
+ LoadsWithUnknownDepedence.insert(Source);
+ if (isa<LoadInst>(Destination))
+ LoadsWithUnknownDepedence.insert(Destination);
+ continue;
+ }
+
+ if (Dep.isBackward())
+ // Note that the designations source and destination follow the program
+ // order, i.e. source is always first. (The direction is given by the
+ // DepType.)
+ std::swap(Source, Destination);
+ else
+ assert(Dep.isForward() && "Needs to be a forward dependence");
+
+ auto *Store = dyn_cast<StoreInst>(Source);
+ if (!Store)
+ continue;
+ auto *Load = dyn_cast<LoadInst>(Destination);
+ if (!Load)
+ continue;
+ Candidates.emplace_front(Load, Store);
+ }
+
+ if (!LoadsWithUnknownDepedence.empty())
+ Candidates.remove_if([&](const StoreToLoadForwardingCandidate &C) {
+ return LoadsWithUnknownDepedence.count(C.Load);
+ });
+
+ return Candidates;
+ }
+
+ /// \brief Return the index of the instruction according to program order.
+ unsigned getInstrIndex(Instruction *Inst) {
+ auto I = InstOrder.find(Inst);
+ assert(I != InstOrder.end() && "No index for instruction");
+ return I->second;
+ }
+
+ /// \brief If a load has multiple candidates associated (i.e. different
+ /// stores), it means that it could be forwarding from multiple stores
+ /// depending on control flow. Remove these candidates.
+ ///
+ /// Here, we rely on LAA to include the relevant loop-independent dependences.
+ /// LAA is known to omit these in the very simple case when the read and the
+ /// write within an alias set always takes place using the *same* pointer.
+ ///
+ /// However, we know that this is not the case here, i.e. we can rely on LAA
+ /// to provide us with loop-independent dependences for the cases we're
+ /// interested. Consider the case for example where a loop-independent
+ /// dependece S1->S2 invalidates the forwarding S3->S2.
+ ///
+ /// A[i] = ... (S1)
+ /// ... = A[i] (S2)
+ /// A[i+1] = ... (S3)
+ ///
+ /// LAA will perform dependence analysis here because there are two
+ /// *different* pointers involved in the same alias set (&A[i] and &A[i+1]).
+ void removeDependencesFromMultipleStores(
+ std::forward_list<StoreToLoadForwardingCandidate> &Candidates) {
+ // If Store is nullptr it means that we have multiple stores forwarding to
+ // this store.
+ typedef DenseMap<LoadInst *, const StoreToLoadForwardingCandidate *>
+ LoadToSingleCandT;
+ LoadToSingleCandT LoadToSingleCand;
+
+ for (const auto &Cand : Candidates) {
+ bool NewElt;
+ LoadToSingleCandT::iterator Iter;
+
+ std::tie(Iter, NewElt) =
+ LoadToSingleCand.insert(std::make_pair(Cand.Load, &Cand));
+ if (!NewElt) {
+ const StoreToLoadForwardingCandidate *&OtherCand = Iter->second;
+ // Already multiple stores forward to this load.
+ if (OtherCand == nullptr)
+ continue;
+
+ // Handle the very basic of case when the two stores are in the same
+ // block so deciding which one forwards is easy. The later one forwards
+ // as long as they both have a dependence distance of one to the load.
+ if (Cand.Store->getParent() == OtherCand->Store->getParent() &&
+ Cand.isDependenceDistanceOfOne(PSE) &&
+ OtherCand->isDependenceDistanceOfOne(PSE)) {
+ // They are in the same block, the later one will forward to the load.
+ if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store))
+ OtherCand = &Cand;
+ } else
+ OtherCand = nullptr;
+ }
+ }
+
+ Candidates.remove_if([&](const StoreToLoadForwardingCandidate &Cand) {
+ if (LoadToSingleCand[Cand.Load] != &Cand) {
+ DEBUG(dbgs() << "Removing from candidates: \n" << Cand
+ << " The load may have multiple stores forwarding to "
+ << "it\n");
+ return true;
+ }
+ return false;
+ });
+ }
+
+ /// \brief Given two pointers operations by their RuntimePointerChecking
+ /// indices, return true if they require an alias check.
+ ///
+ /// We need a check if one is a pointer for a candidate load and the other is
+ /// a pointer for a possibly intervening store.
+ bool needsChecking(unsigned PtrIdx1, unsigned PtrIdx2,
+ const SmallSet<Value *, 4> &PtrsWrittenOnFwdingPath,
+ const std::set<Value *> &CandLoadPtrs) {
+ Value *Ptr1 =
+ LAI.getRuntimePointerChecking()->getPointerInfo(PtrIdx1).PointerValue;
+ Value *Ptr2 =
+ LAI.getRuntimePointerChecking()->getPointerInfo(PtrIdx2).PointerValue;
+ return ((PtrsWrittenOnFwdingPath.count(Ptr1) && CandLoadPtrs.count(Ptr2)) ||
+ (PtrsWrittenOnFwdingPath.count(Ptr2) && CandLoadPtrs.count(Ptr1)));
+ }
+
+ /// \brief Return pointers that are possibly written to on the path from a
+ /// forwarding store to a load.
+ ///
+ /// These pointers need to be alias-checked against the forwarding candidates.
+ SmallSet<Value *, 4> findPointersWrittenOnForwardingPath(
+ const SmallVectorImpl<StoreToLoadForwardingCandidate> &Candidates) {
+ // From FirstStore to LastLoad neither of the elimination candidate loads
+ // should overlap with any of the stores.
+ //
+ // E.g.:
+ //
+ // st1 C[i]
+ // ld1 B[i] <-------,
+ // ld0 A[i] <----, | * LastLoad
+ // ... | |
+ // st2 E[i] | |
+ // st3 B[i+1] -- | -' * FirstStore
+ // st0 A[i+1] ---'
+ // st4 D[i]
+ //
+ // st0 forwards to ld0 if the accesses in st4 and st1 don't overlap with
+ // ld0.
+
+ LoadInst *LastLoad =
+ std::max_element(Candidates.begin(), Candidates.end(),
+ [&](const StoreToLoadForwardingCandidate &A,
+ const StoreToLoadForwardingCandidate &B) {
+ return getInstrIndex(A.Load) < getInstrIndex(B.Load);
+ })
+ ->Load;
+ StoreInst *FirstStore =
+ std::min_element(Candidates.begin(), Candidates.end(),
+ [&](const StoreToLoadForwardingCandidate &A,
+ const StoreToLoadForwardingCandidate &B) {
+ return getInstrIndex(A.Store) <
+ getInstrIndex(B.Store);
+ })
+ ->Store;
+
+ // We're looking for stores after the first forwarding store until the end
+ // of the loop, then from the beginning of the loop until the last
+ // forwarded-to load. Collect the pointer for the stores.
+ SmallSet<Value *, 4> PtrsWrittenOnFwdingPath;
+
+ auto InsertStorePtr = [&](Instruction *I) {
+ if (auto *S = dyn_cast<StoreInst>(I))
+ PtrsWrittenOnFwdingPath.insert(S->getPointerOperand());
+ };
+ const auto &MemInstrs = LAI.getDepChecker().getMemoryInstructions();
+ std::for_each(MemInstrs.begin() + getInstrIndex(FirstStore) + 1,
+ MemInstrs.end(), InsertStorePtr);
+ std::for_each(MemInstrs.begin(), &MemInstrs[getInstrIndex(LastLoad)],
+ InsertStorePtr);
+
+ return PtrsWrittenOnFwdingPath;
+ }
+
+ /// \brief Determine the pointer alias checks to prove that there are no
+ /// intervening stores.
+ SmallVector<RuntimePointerChecking::PointerCheck, 4> collectMemchecks(
+ const SmallVectorImpl<StoreToLoadForwardingCandidate> &Candidates) {
+
+ SmallSet<Value *, 4> PtrsWrittenOnFwdingPath =
+ findPointersWrittenOnForwardingPath(Candidates);
+
+ // Collect the pointers of the candidate loads.
+ // FIXME: SmallSet does not work with std::inserter.
+ std::set<Value *> CandLoadPtrs;
+ std::transform(Candidates.begin(), Candidates.end(),
+ std::inserter(CandLoadPtrs, CandLoadPtrs.begin()),
+ std::mem_fn(&StoreToLoadForwardingCandidate::getLoadPtr));
+
+ const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks();
+ SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks;
+
+ std::copy_if(AllChecks.begin(), AllChecks.end(), std::back_inserter(Checks),
+ [&](const RuntimePointerChecking::PointerCheck &Check) {
+ for (auto PtrIdx1 : Check.first->Members)
+ for (auto PtrIdx2 : Check.second->Members)
+ if (needsChecking(PtrIdx1, PtrIdx2,
+ PtrsWrittenOnFwdingPath, CandLoadPtrs))
+ return true;
+ return false;
+ });
+
+ DEBUG(dbgs() << "\nPointer Checks (count: " << Checks.size() << "):\n");
+ DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks));
+
+ return Checks;
+ }
+
+ /// \brief Perform the transformation for a candidate.
+ void
+ propagateStoredValueToLoadUsers(const StoreToLoadForwardingCandidate &Cand,
+ SCEVExpander &SEE) {
+ //
+ // loop:
+ // %x = load %gep_i
+ // = ... %x
+ // store %y, %gep_i_plus_1
+ //
+ // =>
+ //
+ // ph:
+ // %x.initial = load %gep_0
+ // loop:
+ // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
+ // %x = load %gep_i <---- now dead
+ // = ... %x.storeforward
+ // store %y, %gep_i_plus_1
+
+ Value *Ptr = Cand.Load->getPointerOperand();
+ auto *PtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(Ptr));
+ auto *PH = L->getLoopPreheader();
+ Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(),
+ PH->getTerminator());
+ Value *Initial =
+ new LoadInst(InitialPtr, "load_initial", PH->getTerminator());
+ PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded",
+ &L->getHeader()->front());
+ PHI->addIncoming(Initial, PH);
+ PHI->addIncoming(Cand.Store->getOperand(0), L->getLoopLatch());
+
+ Cand.Load->replaceAllUsesWith(PHI);
+ }
+
+ /// \brief Top-level driver for each loop: find store->load forwarding
+ /// candidates, add run-time checks and perform transformation.
+ bool processLoop() {
+ DEBUG(dbgs() << "\nIn \"" << L->getHeader()->getParent()->getName()
+ << "\" checking " << *L << "\n");
+ // Look for store-to-load forwarding cases across the
+ // backedge. E.g.:
+ //
+ // loop:
+ // %x = load %gep_i
+ // = ... %x
+ // store %y, %gep_i_plus_1
+ //
+ // =>
+ //
+ // ph:
+ // %x.initial = load %gep_0
+ // loop:
+ // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
+ // %x = load %gep_i <---- now dead
+ // = ... %x.storeforward
+ // store %y, %gep_i_plus_1
+
+ // First start with store->load dependences.
+ auto StoreToLoadDependences = findStoreToLoadDependences(LAI);
+ if (StoreToLoadDependences.empty())
+ return false;
+
+ // Generate an index for each load and store according to the original
+ // program order. This will be used later.
+ InstOrder = LAI.getDepChecker().generateInstructionOrderMap();
+
+ // To keep things simple for now, remove those where the load is potentially
+ // fed by multiple stores.
+ removeDependencesFromMultipleStores(StoreToLoadDependences);
+ if (StoreToLoadDependences.empty())
+ return false;
+
+ // Filter the candidates further.
+ SmallVector<StoreToLoadForwardingCandidate, 4> Candidates;
+ unsigned NumForwarding = 0;
+ for (const StoreToLoadForwardingCandidate Cand : StoreToLoadDependences) {
+ DEBUG(dbgs() << "Candidate " << Cand);
+ // Make sure that the stored values is available everywhere in the loop in
+ // the next iteration.
+ if (!doesStoreDominatesAllLatches(Cand.Store->getParent(), L, DT))
+ continue;
+
+ // Check whether the SCEV difference is the same as the induction step,
+ // thus we load the value in the next iteration.
+ if (!Cand.isDependenceDistanceOfOne(PSE))
+ continue;
+
+ ++NumForwarding;
+ DEBUG(dbgs()
+ << NumForwarding
+ << ". Valid store-to-load forwarding across the loop backedge\n");
+ Candidates.push_back(Cand);
+ }
+ if (Candidates.empty())
+ return false;
+
+ // Check intervening may-alias stores. These need runtime checks for alias
+ // disambiguation.
+ SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks =
+ collectMemchecks(Candidates);
+
+ // Too many checks are likely to outweigh the benefits of forwarding.
+ if (Checks.size() > Candidates.size() * CheckPerElim) {
+ DEBUG(dbgs() << "Too many run-time checks needed.\n");
+ return false;
+ }
+
+ if (LAI.PSE.getUnionPredicate().getComplexity() >
+ LoadElimSCEVCheckThreshold) {
+ DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
+ return false;
+ }
+
+ // Point of no-return, start the transformation. First, version the loop if
+ // necessary.
+ if (!Checks.empty() || !LAI.PSE.getUnionPredicate().isAlwaysTrue()) {
+ LoopVersioning LV(LAI, L, LI, DT, PSE.getSE(), false);
+ LV.setAliasChecks(std::move(Checks));
+ LV.setSCEVChecks(LAI.PSE.getUnionPredicate());
+ LV.versionLoop();
+ }
+
+ // Next, propagate the value stored by the store to the users of the load.
+ // Also for the first iteration, generate the initial value of the load.
+ SCEVExpander SEE(*PSE.getSE(), L->getHeader()->getModule()->getDataLayout(),
+ "storeforward");
+ for (const auto &Cand : Candidates)
+ propagateStoredValueToLoadUsers(Cand, SEE);
+ NumLoopLoadEliminted += NumForwarding;
+
+ return true;
+ }
+
+private:
+ Loop *L;
+
+ /// \brief Maps the load/store instructions to their index according to
+ /// program order.
+ DenseMap<Instruction *, unsigned> InstOrder;
+
+ // Analyses used.
+ LoopInfo *LI;
+ const LoopAccessInfo &LAI;
+ DominatorTree *DT;
+ PredicatedScalarEvolution PSE;
+};
+
+/// \brief The pass. Most of the work is delegated to the per-loop
+/// LoadEliminationForLoop class.
+class LoopLoadElimination : public FunctionPass {
+public:
+ LoopLoadElimination() : FunctionPass(ID) {
+ initializeLoopLoadEliminationPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto *LAA = &getAnalysis<LoopAccessAnalysis>();
+ auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ // Build up a worklist of inner-loops to vectorize. This is necessary as the
+ // act of distributing a loop creates new loops and can invalidate iterators
+ // across the loops.
+ SmallVector<Loop *, 8> Worklist;
+
+ for (Loop *TopLevelLoop : *LI)
+ for (Loop *L : depth_first(TopLevelLoop))
+ // We only handle inner-most loops.
+ if (L->empty())
+ Worklist.push_back(L);
+
+ // Now walk the identified inner loops.
+ bool Changed = false;
+ for (Loop *L : Worklist) {
+ const LoopAccessInfo &LAI = LAA->getInfo(L, ValueToValueMap());
+ // The actual work is performed by LoadEliminationForLoop.
+ LoadEliminationForLoop LEL(L, LI, LAI, DT);
+ Changed |= LEL.processLoop();
+ }
+
+ // Process each loop nest in the function.
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequired<LoopAccessAnalysis>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
+
+ static char ID;
+};
+}
+
+char LoopLoadElimination::ID;
+static const char LLE_name[] = "Loop Load Elimination";
+
+INITIALIZE_PASS_BEGIN(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
+
+namespace llvm {
+FunctionPass *createLoopLoadEliminationPass() {
+ return new LoopLoadElimination();
+}
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
index ed103e6b8ed6..27c2d8824df0 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -147,12 +147,12 @@ namespace {
bool runOnLoop(Loop *L, LPPassManager &LPM) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
@@ -162,11 +162,15 @@ namespace {
ScalarEvolution *SE;
TargetLibraryInfo *TLI;
DominatorTree *DT;
+ bool PreserveLCSSA;
typedef SmallVector<Instruction *, 16> SmallInstructionVector;
typedef SmallSet<Instruction *, 16> SmallInstructionSet;
- // A chain of isomorphic instructions, indentified by a single-use PHI,
+ // Map between induction variable and its increment
+ DenseMap<Instruction *, int64_t> IVToIncMap;
+
+ // A chain of isomorphic instructions, identified by a single-use PHI
// representing a reduction. Only the last value may be used outside the
// loop.
struct SimpleLoopReduction {
@@ -300,22 +304,6 @@ namespace {
// The functions below can be called after we've finished processing all
// instructions in the loop, and we know which reductions were selected.
- // Is the provided instruction the PHI of a reduction selected for
- // rerolling?
- bool isSelectedPHI(Instruction *J) {
- if (!isa<PHINode>(J))
- return false;
-
- for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
- RI != RIE; ++RI) {
- int i = *RI;
- if (cast<Instruction>(J) == PossibleReds[i].getPHI())
- return true;
- }
-
- return false;
- }
-
bool validateSelected();
void replaceSelected();
@@ -335,7 +323,7 @@ namespace {
// x[i*3+1] = y2
// x[i*3+2] = y3
//
- // Base instruction -> i*3
+ // Base instruction -> i*3
// +---+----+
// / | \
// ST[y1] +1 +2 <-- Roots
@@ -366,8 +354,11 @@ namespace {
struct DAGRootTracker {
DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV,
ScalarEvolution *SE, AliasAnalysis *AA,
- TargetLibraryInfo *TLI)
- : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), IV(IV) {}
+ TargetLibraryInfo *TLI, DominatorTree *DT, LoopInfo *LI,
+ bool PreserveLCSSA,
+ DenseMap<Instruction *, int64_t> &IncrMap)
+ : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI),
+ PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap) {}
/// Stage 1: Find all the DAG roots for the induction variable.
bool findRoots();
@@ -413,11 +404,14 @@ namespace {
ScalarEvolution *SE;
AliasAnalysis *AA;
TargetLibraryInfo *TLI;
+ DominatorTree *DT;
+ LoopInfo *LI;
+ bool PreserveLCSSA;
// The loop induction variable.
Instruction *IV;
// Loop step amount.
- uint64_t Inc;
+ int64_t Inc;
// Loop reroll count; if Inc == 1, this records the scaling applied
// to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ;
// If Inc is not 1, Scale = Inc.
@@ -430,6 +424,8 @@ namespace {
// they are used in (or specially, IL_All for instructions
// used in the loop increment mechanism).
UsesTy Uses;
+ // Map between induction variable and its increment
+ DenseMap<Instruction *, int64_t> &IVToIncMap;
};
void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
@@ -442,10 +438,10 @@ namespace {
char LoopReroll::ID = 0;
INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(LoopReroll, "loop-reroll", "Reroll loops", false, false)
@@ -477,21 +473,20 @@ void LoopReroll::collectPossibleIVs(Loop *L,
continue;
if (const SCEVAddRecExpr *PHISCEV =
- dyn_cast<SCEVAddRecExpr>(SE->getSCEV(I))) {
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(&*I))) {
if (PHISCEV->getLoop() != L)
continue;
if (!PHISCEV->isAffine())
continue;
if (const SCEVConstant *IncSCEV =
dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE))) {
- if (!IncSCEV->getValue()->getValue().isStrictlyPositive())
+ const APInt &AInt = IncSCEV->getAPInt().abs();
+ if (IncSCEV->getValue()->isZero() || AInt.uge(MaxInc))
continue;
- if (IncSCEV->getValue()->uge(MaxInc))
- continue;
-
- DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " <<
- *PHISCEV << "\n");
- PossibleIVs.push_back(I);
+ IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue();
+ DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV
+ << "\n");
+ PossibleIVs.push_back(&*I);
}
}
}
@@ -552,7 +547,7 @@ void LoopReroll::collectPossibleReductions(Loop *L,
if (!I->getType()->isSingleValueType())
continue;
- SimpleLoopReduction SLR(I, L);
+ SimpleLoopReduction SLR(&*I, L);
if (!SLR.valid())
continue;
@@ -699,17 +694,11 @@ collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
}
}
- int64_t V = CI->getValue().getSExtValue();
+ int64_t V = std::abs(CI->getValue().getSExtValue());
if (Roots.find(V) != Roots.end())
// No duplicates, please.
return false;
- // FIXME: Add support for negative values.
- if (V < 0) {
- DEBUG(dbgs() << "LRR: Aborting due to negative value: " << V << "\n");
- return false;
- }
-
Roots[V] = cast<Instruction>(I);
}
@@ -731,7 +720,7 @@ collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
unsigned NumBaseUses = BaseUsers.size();
if (NumBaseUses == 0)
NumBaseUses = Roots.begin()->second->getNumUses();
-
+
// Check that every node has the same number of users.
for (auto &KV : Roots) {
if (KV.first == 0)
@@ -744,7 +733,7 @@ collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
}
}
- return true;
+ return true;
}
bool LoopReroll::DAGRootTracker::
@@ -787,7 +776,7 @@ findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
if (!collectPossibleRoots(IVU, V))
return false;
- // If we didn't get a root for index zero, then IVU must be
+ // If we didn't get a root for index zero, then IVU must be
// subsumed.
if (V.find(0) == V.end())
SubsumedInsts.insert(IVU);
@@ -818,13 +807,10 @@ findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
}
bool LoopReroll::DAGRootTracker::findRoots() {
-
- const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(IV));
- Inc = cast<SCEVConstant>(RealIVSCEV->getOperand(1))->
- getValue()->getZExtValue();
+ Inc = IVToIncMap[IV];
assert(RootSets.empty() && "Unclean state!");
- if (Inc == 1) {
+ if (std::abs(Inc) == 1) {
for (auto *IVU : IV->users()) {
if (isLoopIncrement(IVU, IV))
LoopIncs.push_back(cast<Instruction>(IVU));
@@ -996,6 +982,25 @@ bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I,
return false;
}
+static bool isIgnorableInst(const Instruction *I) {
+ if (isa<DbgInfoIntrinsic>(I))
+ return true;
+ const IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
+ if (!II)
+ return false;
+ switch (II->getIntrinsicID()) {
+ default:
+ return false;
+ case llvm::Intrinsic::annotation:
+ case Intrinsic::ptr_annotation:
+ case Intrinsic::var_annotation:
+ // TODO: the following intrinsics may also be whitelisted:
+ // lifetime_start, lifetime_end, invariant_start, invariant_end
+ return true;
+ }
+ return false;
+}
+
bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
// We now need to check for equivalence of the use graph of each root with
// that of the primary induction variable (excluding the roots). Our goal
@@ -1029,7 +1034,7 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
// Make sure all instructions in the loop are in one and only one
// set.
for (auto &KV : Uses) {
- if (KV.second.count() != 1) {
+ if (KV.second.count() != 1 && !isIgnorableInst(KV.first)) {
DEBUG(dbgs() << "LRR: Aborting - instruction is not used in 1 iteration: "
<< *KV.first << " (#uses=" << KV.second.count() << ")\n");
return false;
@@ -1103,15 +1108,15 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
" vs. " << *RootInst << "\n");
return false;
}
-
+
RootIt = TryIt;
RootInst = TryIt->first;
}
// All instructions between the last root and this root
- // may belong to some other iteration. If they belong to a
+ // may belong to some other iteration. If they belong to a
// future iteration, then they're dangerous to alias with.
- //
+ //
// Note that because we allow a limited amount of flexibility in the order
// that we visit nodes, LastRootIt might be *before* RootIt, in which
// case we've already checked this set of instructions so we shouldn't
@@ -1267,6 +1272,7 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
++J;
}
+ bool Negative = IVToIncMap[IV] < 0;
const DataLayout &DL = Header->getModule()->getDataLayout();
// We need to create a new induction variable for each different BaseInst.
@@ -1275,13 +1281,12 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
const SCEVAddRecExpr *RealIVSCEV =
cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
const SCEV *Start = RealIVSCEV->getStart();
- const SCEVAddRecExpr *H = cast<SCEVAddRecExpr>
- (SE->getAddRecExpr(Start,
- SE->getConstant(RealIVSCEV->getType(), 1),
- L, SCEV::FlagAnyWrap));
+ const SCEVAddRecExpr *H = cast<SCEVAddRecExpr>(SE->getAddRecExpr(
+ Start, SE->getConstant(RealIVSCEV->getType(), Negative ? -1 : 1), L,
+ SCEV::FlagAnyWrap));
{ // Limit the lifetime of SCEVExpander.
SCEVExpander Expander(*SE, DL, "reroll");
- Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin());
+ Value *NewIV = Expander.expandCodeFor(H, IV->getType(), &Header->front());
for (auto &KV : Uses) {
if (KV.second.find_first() == 0)
@@ -1294,8 +1299,8 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
// Iteration count SCEV minus 1
- const SCEV *ICMinus1SCEV =
- SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1));
+ const SCEV *ICMinus1SCEV = SE->getMinusSCEV(
+ ICSCEV, SE->getConstant(ICSCEV->getType(), Negative ? -1 : 1));
Value *ICMinus1; // Iteration count minus 1
if (isa<SCEVConstant>(ICMinus1SCEV)) {
@@ -1303,7 +1308,7 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
} else {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader)
- Preheader = InsertPreheaderForLoop(L, Parent);
+ Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(),
Preheader->getTerminator());
@@ -1444,13 +1449,14 @@ void LoopReroll::ReductionTracker::replaceSelected() {
bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
const SCEV *IterCount,
ReductionTracker &Reductions) {
- DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI);
+ DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA,
+ IVToIncMap);
if (!DAGRoots.findRoots())
return false;
DEBUG(dbgs() << "LRR: Found all root induction increments for: " <<
*IV << "\n");
-
+
if (!DAGRoots.validate(Reductions))
return false;
if (!Reductions.validateSelected())
@@ -1469,11 +1475,12 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
if (skipOptnoneFunction(L))
return false;
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
BasicBlock *Header = L->getHeader();
DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() <<
@@ -1490,13 +1497,13 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
return Changed;
const SCEV *LIBETC = SE->getBackedgeTakenCount(L);
- const SCEV *IterCount =
- SE->getAddExpr(LIBETC, SE->getConstant(LIBETC->getType(), 1));
+ const SCEV *IterCount = SE->getAddExpr(LIBETC, SE->getOne(LIBETC->getType()));
DEBUG(dbgs() << "LRR: iteration count = " << *IterCount << "\n");
// First, we need to find the induction variable with respect to which we can
// reroll (there may be several possible options).
SmallInstructionVector PossibleIVs;
+ IVToIncMap.clear();
collectPossibleIVs(L, PossibleIVs);
if (PossibleIVs.empty()) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index a675e1289baf..5e6c2da08cc3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -13,11 +13,15 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
@@ -41,95 +45,6 @@ DefaultRotationThreshold("rotation-max-header-size", cl::init(16), cl::Hidden,
cl::desc("The default maximum header size for automatic loop rotation"));
STATISTIC(NumRotated, "Number of loops rotated");
-namespace {
-
- class LoopRotate : public LoopPass {
- public:
- static char ID; // Pass ID, replacement for typeid
- LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) {
- initializeLoopRotatePass(*PassRegistry::getPassRegistry());
- if (SpecifiedMaxHeaderSize == -1)
- MaxHeaderSize = DefaultRotationThreshold;
- else
- MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize);
- }
-
- // LCSSA form makes instruction renaming easier.
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addPreservedID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
- AU.addPreservedID(LCSSAID);
- AU.addPreserved<ScalarEvolution>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- }
-
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
- bool simplifyLoopLatch(Loop *L);
- bool rotateLoop(Loop *L, bool SimplifiedLatch);
-
- private:
- unsigned MaxHeaderSize;
- LoopInfo *LI;
- const TargetTransformInfo *TTI;
- AssumptionCache *AC;
- DominatorTree *DT;
- };
-}
-
-char LoopRotate::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
-
-Pass *llvm::createLoopRotatePass(int MaxHeaderSize) {
- return new LoopRotate(MaxHeaderSize);
-}
-
-/// Rotate Loop L as many times as possible. Return true if
-/// the loop is rotated at least once.
-bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
- if (skipOptnoneFunction(L))
- return false;
-
- // Save the loop metadata.
- MDNode *LoopMD = L->getLoopID();
-
- Function &F = *L->getHeader()->getParent();
-
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DT = DTWP ? &DTWP->getDomTree() : nullptr;
-
- // Simplify the loop latch before attempting to rotate the header
- // upward. Rotation may not be needed if the loop tail can be folded into the
- // loop exit.
- bool SimplifiedLatch = simplifyLoopLatch(L);
-
- // One loop can be rotated multiple times.
- bool MadeChange = false;
- while (rotateLoop(L, SimplifiedLatch)) {
- MadeChange = true;
- SimplifiedLatch = false;
- }
-
- // Restore the loop metadata.
- // NB! We presume LoopRotation DOESN'T ADD its own metadata.
- if ((MadeChange || SimplifiedLatch) && LoopMD)
- L->setLoopID(LoopMD);
-
- return MadeChange;
-}
/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
/// old header into the preheader. If there were uses of the values produced by
@@ -147,7 +62,7 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
// as necessary.
SSAUpdater SSA;
for (I = OrigHeader->begin(); I != E; ++I) {
- Value *OrigHeaderVal = I;
+ Value *OrigHeaderVal = &*I;
// If there are no uses of the value (e.g. because it returns void), there
// is nothing to rewrite.
@@ -196,127 +111,6 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
}
}
-/// Determine whether the instructions in this range may be safely and cheaply
-/// speculated. This is not an important enough situation to develop complex
-/// heuristics. We handle a single arithmetic instruction along with any type
-/// conversions.
-static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
- BasicBlock::iterator End, Loop *L) {
- bool seenIncrement = false;
- bool MultiExitLoop = false;
-
- if (!L->getExitingBlock())
- MultiExitLoop = true;
-
- for (BasicBlock::iterator I = Begin; I != End; ++I) {
-
- if (!isSafeToSpeculativelyExecute(I))
- return false;
-
- if (isa<DbgInfoIntrinsic>(I))
- continue;
-
- switch (I->getOpcode()) {
- default:
- return false;
- case Instruction::GetElementPtr:
- // GEPs are cheap if all indices are constant.
- if (!cast<GEPOperator>(I)->hasAllConstantIndices())
- return false;
- // fall-thru to increment case
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr: {
- Value *IVOpnd = !isa<Constant>(I->getOperand(0))
- ? I->getOperand(0)
- : !isa<Constant>(I->getOperand(1))
- ? I->getOperand(1)
- : nullptr;
- if (!IVOpnd)
- return false;
-
- // If increment operand is used outside of the loop, this speculation
- // could cause extra live range interference.
- if (MultiExitLoop) {
- for (User *UseI : IVOpnd->users()) {
- auto *UserInst = cast<Instruction>(UseI);
- if (!L->contains(UserInst))
- return false;
- }
- }
-
- if (seenIncrement)
- return false;
- seenIncrement = true;
- break;
- }
- case Instruction::Trunc:
- case Instruction::ZExt:
- case Instruction::SExt:
- // ignore type conversions
- break;
- }
- }
- return true;
-}
-
-/// Fold the loop tail into the loop exit by speculating the loop tail
-/// instructions. Typically, this is a single post-increment. In the case of a
-/// simple 2-block loop, hoisting the increment can be much better than
-/// duplicating the entire loop header. In the case of loops with early exits,
-/// rotation will not work anyway, but simplifyLoopLatch will put the loop in
-/// canonical form so downstream passes can handle it.
-///
-/// I don't believe this invalidates SCEV.
-bool LoopRotate::simplifyLoopLatch(Loop *L) {
- BasicBlock *Latch = L->getLoopLatch();
- if (!Latch || Latch->hasAddressTaken())
- return false;
-
- BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
- if (!Jmp || !Jmp->isUnconditional())
- return false;
-
- BasicBlock *LastExit = Latch->getSinglePredecessor();
- if (!LastExit || !L->isLoopExiting(LastExit))
- return false;
-
- BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
- if (!BI)
- return false;
-
- if (!shouldSpeculateInstrs(Latch->begin(), Jmp, L))
- return false;
-
- DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
- << LastExit->getName() << "\n");
-
- // Hoist the instructions from Latch into LastExit.
- LastExit->getInstList().splice(BI, Latch->getInstList(), Latch->begin(), Jmp);
-
- unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1;
- BasicBlock *Header = Jmp->getSuccessor(0);
- assert(Header == L->getHeader() && "expected a backward branch");
-
- // Remove Latch from the CFG so that LastExit becomes the new Latch.
- BI->setSuccessor(FallThruPath, Header);
- Latch->replaceSuccessorsPhiUsesWith(LastExit);
- Jmp->eraseFromParent();
-
- // Nuke the Latch block.
- assert(Latch->empty() && "unable to evacuate Latch");
- LI->removeBlock(Latch);
- if (DT)
- DT->eraseNode(Latch);
- Latch->eraseFromParent();
- return true;
-}
-
/// Rotate loop LP. Return true if the loop is rotated.
///
/// \param SimplifiedLatch is true if the latch was just folded into the final
@@ -327,7 +121,10 @@ bool LoopRotate::simplifyLoopLatch(Loop *L) {
/// rotation. LoopRotate should be repeatable and converge to a canonical
/// form. This property is satisfied because simplifying the loop latch can only
/// happen once across multiple invocations of the LoopRotate pass.
-bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
+static bool rotateLoop(Loop *L, unsigned MaxHeaderSize, LoopInfo *LI,
+ const TargetTransformInfo *TTI, AssumptionCache *AC,
+ DominatorTree *DT, ScalarEvolution *SE,
+ bool SimplifiedLatch) {
// If the loop has only one block then there is not much to rotate.
if (L->getBlocks().size() == 1)
return false;
@@ -382,7 +179,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// Anything ScalarEvolution may know about this loop or the PHI nodes
// in its header will soon be invalidated.
- if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
+ if (SE)
SE->forgetLoop(L);
DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
@@ -420,7 +217,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// possible or create a clone in the OldPreHeader if not.
TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
while (I != E) {
- Instruction *Inst = I++;
+ Instruction *Inst = &*I++;
// If the instruction's operands are invariant and it doesn't read or write
// memory, then it is safe to hoist. Doing this doesn't change the order of
@@ -465,8 +262,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
// successors by duplicating their incoming values for OrigHeader.
TerminatorInst *TI = OrigHeader->getTerminator();
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- for (BasicBlock::iterator BI = TI->getSuccessor(i)->begin();
+ for (BasicBlock *SuccBB : TI->successors())
+ for (BasicBlock::iterator BI = SuccBB->begin();
PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
@@ -607,3 +404,221 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
++NumRotated;
return true;
}
+
+/// Determine whether the instructions in this range may be safely and cheaply
+/// speculated. This is not an important enough situation to develop complex
+/// heuristics. We handle a single arithmetic instruction along with any type
+/// conversions.
+static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
+ BasicBlock::iterator End, Loop *L) {
+ bool seenIncrement = false;
+ bool MultiExitLoop = false;
+
+ if (!L->getExitingBlock())
+ MultiExitLoop = true;
+
+ for (BasicBlock::iterator I = Begin; I != End; ++I) {
+
+ if (!isSafeToSpeculativelyExecute(&*I))
+ return false;
+
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+
+ switch (I->getOpcode()) {
+ default:
+ return false;
+ case Instruction::GetElementPtr:
+ // GEPs are cheap if all indices are constant.
+ if (!cast<GEPOperator>(I)->hasAllConstantIndices())
+ return false;
+ // fall-thru to increment case
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr: {
+ Value *IVOpnd = !isa<Constant>(I->getOperand(0))
+ ? I->getOperand(0)
+ : !isa<Constant>(I->getOperand(1))
+ ? I->getOperand(1)
+ : nullptr;
+ if (!IVOpnd)
+ return false;
+
+ // If increment operand is used outside of the loop, this speculation
+ // could cause extra live range interference.
+ if (MultiExitLoop) {
+ for (User *UseI : IVOpnd->users()) {
+ auto *UserInst = cast<Instruction>(UseI);
+ if (!L->contains(UserInst))
+ return false;
+ }
+ }
+
+ if (seenIncrement)
+ return false;
+ seenIncrement = true;
+ break;
+ }
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ // ignore type conversions
+ break;
+ }
+ }
+ return true;
+}
+
+/// Fold the loop tail into the loop exit by speculating the loop tail
+/// instructions. Typically, this is a single post-increment. In the case of a
+/// simple 2-block loop, hoisting the increment can be much better than
+/// duplicating the entire loop header. In the case of loops with early exits,
+/// rotation will not work anyway, but simplifyLoopLatch will put the loop in
+/// canonical form so downstream passes can handle it.
+///
+/// I don't believe this invalidates SCEV.
+static bool simplifyLoopLatch(Loop *L, LoopInfo *LI, DominatorTree *DT) {
+ BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch || Latch->hasAddressTaken())
+ return false;
+
+ BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
+ if (!Jmp || !Jmp->isUnconditional())
+ return false;
+
+ BasicBlock *LastExit = Latch->getSinglePredecessor();
+ if (!LastExit || !L->isLoopExiting(LastExit))
+ return false;
+
+ BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
+ if (!BI)
+ return false;
+
+ if (!shouldSpeculateInstrs(Latch->begin(), Jmp->getIterator(), L))
+ return false;
+
+ DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
+ << LastExit->getName() << "\n");
+
+ // Hoist the instructions from Latch into LastExit.
+ LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(),
+ Latch->begin(), Jmp->getIterator());
+
+ unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1;
+ BasicBlock *Header = Jmp->getSuccessor(0);
+ assert(Header == L->getHeader() && "expected a backward branch");
+
+ // Remove Latch from the CFG so that LastExit becomes the new Latch.
+ BI->setSuccessor(FallThruPath, Header);
+ Latch->replaceSuccessorsPhiUsesWith(LastExit);
+ Jmp->eraseFromParent();
+
+ // Nuke the Latch block.
+ assert(Latch->empty() && "unable to evacuate Latch");
+ LI->removeBlock(Latch);
+ if (DT)
+ DT->eraseNode(Latch);
+ Latch->eraseFromParent();
+ return true;
+}
+
+/// Rotate \c L as many times as possible. Return true if the loop is rotated
+/// at least once.
+static bool iterativelyRotateLoop(Loop *L, unsigned MaxHeaderSize, LoopInfo *LI,
+ const TargetTransformInfo *TTI,
+ AssumptionCache *AC, DominatorTree *DT,
+ ScalarEvolution *SE) {
+ // Save the loop metadata.
+ MDNode *LoopMD = L->getLoopID();
+
+ // Simplify the loop latch before attempting to rotate the header
+ // upward. Rotation may not be needed if the loop tail can be folded into the
+ // loop exit.
+ bool SimplifiedLatch = simplifyLoopLatch(L, LI, DT);
+
+ // One loop can be rotated multiple times.
+ bool MadeChange = false;
+ while (rotateLoop(L, MaxHeaderSize, LI, TTI, AC, DT, SE, SimplifiedLatch)) {
+ MadeChange = true;
+ SimplifiedLatch = false;
+ }
+
+ // Restore the loop metadata.
+ // NB! We presume LoopRotation DOESN'T ADD its own metadata.
+ if ((MadeChange || SimplifiedLatch) && LoopMD)
+ L->setLoopID(LoopMD);
+
+ return MadeChange;
+}
+
+namespace {
+
+class LoopRotate : public LoopPass {
+ unsigned MaxHeaderSize;
+
+public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) {
+ initializeLoopRotatePass(*PassRegistry::getPassRegistry());
+ if (SpecifiedMaxHeaderSize == -1)
+ MaxHeaderSize = DefaultRotationThreshold;
+ else
+ MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize);
+ }
+
+ // LCSSA form makes instruction renaming easier.
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override {
+ if (skipOptnoneFunction(L))
+ return false;
+ Function &F = *L->getHeader()->getParent();
+
+ auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
+ auto *SE = SEWP ? &SEWP->getSE() : nullptr;
+
+ return iterativelyRotateLoop(L, MaxHeaderSize, LI, TTI, AC, DT, SE);
+ }
+};
+}
+
+char LoopRotate::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+
+Pass *llvm::createLoopRotatePass(int MaxHeaderSize) {
+ return new LoopRotate(MaxHeaderSize);
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 4b59f3d2f6cc..2101225ed9f7 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -105,10 +105,33 @@ static bool StressIVChain = false;
namespace {
-/// RegSortData - This class holds data which is used to order reuse candidates.
+struct MemAccessTy {
+ /// Used in situations where the accessed memory type is unknown.
+ static const unsigned UnknownAddressSpace = ~0u;
+
+ Type *MemTy;
+ unsigned AddrSpace;
+
+ MemAccessTy() : MemTy(nullptr), AddrSpace(UnknownAddressSpace) {}
+
+ MemAccessTy(Type *Ty, unsigned AS) :
+ MemTy(Ty), AddrSpace(AS) {}
+
+ bool operator==(MemAccessTy Other) const {
+ return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace;
+ }
+
+ bool operator!=(MemAccessTy Other) const { return !(*this == Other); }
+
+ static MemAccessTy getUnknown(LLVMContext &Ctx) {
+ return MemAccessTy(Type::getVoidTy(Ctx), UnknownAddressSpace);
+ }
+};
+
+/// This class holds data which is used to order reuse candidates.
class RegSortData {
public:
- /// UsedByIndices - This represents the set of LSRUse indices which reference
+ /// This represents the set of LSRUse indices which reference
/// a particular register.
SmallBitVector UsedByIndices;
@@ -122,16 +145,14 @@ void RegSortData::print(raw_ostream &OS) const {
OS << "[NumUses=" << UsedByIndices.count() << ']';
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void RegSortData::dump() const {
print(errs()); errs() << '\n';
}
-#endif
namespace {
-/// RegUseTracker - Map register candidates to information about how they are
-/// used.
+/// Map register candidates to information about how they are used.
class RegUseTracker {
typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
@@ -139,9 +160,9 @@ class RegUseTracker {
SmallVector<const SCEV *, 16> RegSequence;
public:
- void CountRegister(const SCEV *Reg, size_t LUIdx);
- void DropRegister(const SCEV *Reg, size_t LUIdx);
- void SwapAndDropUse(size_t LUIdx, size_t LastLUIdx);
+ void countRegister(const SCEV *Reg, size_t LUIdx);
+ void dropRegister(const SCEV *Reg, size_t LUIdx);
+ void swapAndDropUse(size_t LUIdx, size_t LastLUIdx);
bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
@@ -160,7 +181,7 @@ public:
}
void
-RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
+RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) {
std::pair<RegUsesTy::iterator, bool> Pair =
RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
RegSortData &RSD = Pair.first->second;
@@ -171,7 +192,7 @@ RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
}
void
-RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) {
+RegUseTracker::dropRegister(const SCEV *Reg, size_t LUIdx) {
RegUsesTy::iterator It = RegUsesMap.find(Reg);
assert(It != RegUsesMap.end());
RegSortData &RSD = It->second;
@@ -180,7 +201,7 @@ RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) {
}
void
-RegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
+RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
assert(LUIdx <= LastLUIdx);
// Update RegUses. The data structure is not optimized for this purpose;
@@ -219,9 +240,8 @@ void RegUseTracker::clear() {
namespace {
-/// Formula - This class holds information that describes a formula for
-/// computing satisfying a use. It may include broken-out immediates and scaled
-/// registers.
+/// This class holds information that describes a formula for computing
+/// satisfying a use. It may include broken-out immediates and scaled registers.
struct Formula {
/// Global base address used for complex addressing.
GlobalValue *BaseGV;
@@ -235,8 +255,8 @@ struct Formula {
/// The scale of any complex addressing.
int64_t Scale;
- /// BaseRegs - The list of "base" registers for this use. When this is
- /// non-empty. The canonical representation of a formula is
+ /// The list of "base" registers for this use. When this is non-empty. The
+ /// canonical representation of a formula is
/// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
/// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().
/// #1 enforces that the scaled register is always used when at least two
@@ -247,31 +267,31 @@ struct Formula {
/// form.
SmallVector<const SCEV *, 4> BaseRegs;
- /// ScaledReg - The 'scaled' register for this use. This should be non-null
- /// when Scale is not zero.
+ /// The 'scaled' register for this use. This should be non-null when Scale is
+ /// not zero.
const SCEV *ScaledReg;
- /// UnfoldedOffset - An additional constant offset which added near the
- /// use. This requires a temporary register, but the offset itself can
- /// live in an add immediate field rather than a register.
+ /// An additional constant offset which added near the use. This requires a
+ /// temporary register, but the offset itself can live in an add immediate
+ /// field rather than a register.
int64_t UnfoldedOffset;
Formula()
: BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0),
ScaledReg(nullptr), UnfoldedOffset(0) {}
- void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
+ void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
bool isCanonical() const;
- void Canonicalize();
+ void canonicalize();
- bool Unscale();
+ bool unscale();
size_t getNumRegs() const;
Type *getType() const;
- void DeleteBaseReg(const SCEV *&S);
+ void deleteBaseReg(const SCEV *&S);
bool referencesReg(const SCEV *S) const;
bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
@@ -283,7 +303,7 @@ struct Formula {
}
-/// DoInitialMatch - Recursion helper for InitialMatch.
+/// Recursion helper for initialMatch.
static void DoInitialMatch(const SCEV *S, Loop *L,
SmallVectorImpl<const SCEV *> &Good,
SmallVectorImpl<const SCEV *> &Bad,
@@ -336,10 +356,9 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
Bad.push_back(S);
}
-/// InitialMatch - Incorporate loop-variant parts of S into this Formula,
-/// attempting to keep all loop-invariant and loop-computable values in a
-/// single base register.
-void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
+/// Incorporate loop-variant parts of S into this Formula, attempting to keep
+/// all loop-invariant and loop-computable values in a single base register.
+void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
SmallVector<const SCEV *, 4> Good;
SmallVector<const SCEV *, 4> Bad;
DoInitialMatch(S, L, Good, Bad, SE);
@@ -355,7 +374,7 @@ void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
BaseRegs.push_back(Sum);
HasBaseReg = true;
}
- Canonicalize();
+ canonicalize();
}
/// \brief Check whether or not this formula statisfies the canonical
@@ -373,7 +392,7 @@ bool Formula::isCanonical() const {
/// field. Otherwise, we would have to do special cases everywhere in LSR
/// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...
/// On the other hand, 1*reg should be canonicalized into reg.
-void Formula::Canonicalize() {
+void Formula::canonicalize() {
if (isCanonical())
return;
// So far we did not need this case. This is easy to implement but it is
@@ -394,7 +413,7 @@ void Formula::Canonicalize() {
/// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.
/// \return true if it was possible to get rid of the scale, false otherwise.
/// \note After this operation the formula may not be in the canonical form.
-bool Formula::Unscale() {
+bool Formula::unscale() {
if (Scale != 1)
return false;
Scale = 0;
@@ -403,15 +422,14 @@ bool Formula::Unscale() {
return true;
}
-/// getNumRegs - Return the total number of register operands used by this
-/// formula. This does not include register uses implied by non-constant
-/// addrec strides.
+/// Return the total number of register operands used by this formula. This does
+/// not include register uses implied by non-constant addrec strides.
size_t Formula::getNumRegs() const {
return !!ScaledReg + BaseRegs.size();
}
-/// getType - Return the type of this formula, if it has one, or null
-/// otherwise. This type is meaningless except for the bit size.
+/// Return the type of this formula, if it has one, or null otherwise. This type
+/// is meaningless except for the bit size.
Type *Formula::getType() const {
return !BaseRegs.empty() ? BaseRegs.front()->getType() :
ScaledReg ? ScaledReg->getType() :
@@ -419,21 +437,21 @@ Type *Formula::getType() const {
nullptr;
}
-/// DeleteBaseReg - Delete the given base reg from the BaseRegs list.
-void Formula::DeleteBaseReg(const SCEV *&S) {
+/// Delete the given base reg from the BaseRegs list.
+void Formula::deleteBaseReg(const SCEV *&S) {
if (&S != &BaseRegs.back())
std::swap(S, BaseRegs.back());
BaseRegs.pop_back();
}
-/// referencesReg - Test if this formula references the given register.
+/// Test if this formula references the given register.
bool Formula::referencesReg(const SCEV *S) const {
return S == ScaledReg ||
std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
}
-/// hasRegsUsedByUsesOtherThan - Test whether this formula uses registers
-/// which are used by uses other than the use with the given index.
+/// Test whether this formula uses registers which are used by uses other than
+/// the use with the given index.
bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
const RegUseTracker &RegUses) const {
if (ScaledReg)
@@ -481,30 +499,29 @@ void Formula::print(raw_ostream &OS) const {
}
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void Formula::dump() const {
print(errs()); errs() << '\n';
}
-#endif
-/// isAddRecSExtable - Return true if the given addrec can be sign-extended
-/// without changing its value.
+/// Return true if the given addrec can be sign-extended without changing its
+/// value.
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
Type *WideTy =
IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
}
-/// isAddSExtable - Return true if the given add can be sign-extended
-/// without changing its value.
+/// Return true if the given add can be sign-extended without changing its
+/// value.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
Type *WideTy =
IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
}
-/// isMulSExtable - Return true if the given mul can be sign-extended
-/// without changing its value.
+/// Return true if the given mul can be sign-extended without changing its
+/// value.
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
Type *WideTy =
IntegerType::get(SE.getContext(),
@@ -512,12 +529,11 @@ static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
}
-/// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined
-/// and if the remainder is known to be zero, or null otherwise. If
-/// IgnoreSignificantBits is true, expressions like (X * Y) /s Y are simplified
-/// to Y, ignoring that the multiplication may overflow, which is useful when
-/// the result will be used in a context where the most significant bits are
-/// ignored.
+/// Return an expression for LHS /s RHS, if it can be determined and if the
+/// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits
+/// is true, expressions like (X * Y) /s Y are simplified to Y, ignoring that
+/// the multiplication may overflow, which is useful when the result will be
+/// used in a context where the most significant bits are ignored.
static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
ScalarEvolution &SE,
bool IgnoreSignificantBits = false) {
@@ -528,7 +544,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
// Handle a few RHS special cases.
const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
if (RC) {
- const APInt &RA = RC->getValue()->getValue();
+ const APInt &RA = RC->getAPInt();
// Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
// some folding.
if (RA.isAllOnesValue())
@@ -542,8 +558,8 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
if (!RC)
return nullptr;
- const APInt &LA = C->getValue()->getValue();
- const APInt &RA = RC->getValue()->getValue();
+ const APInt &LA = C->getAPInt();
+ const APInt &RA = RC->getAPInt();
if (LA.srem(RA) != 0)
return nullptr;
return SE.getConstant(LA.sdiv(RA));
@@ -603,12 +619,11 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
return nullptr;
}
-/// ExtractImmediate - If S involves the addition of a constant integer value,
-/// return that integer value, and mutate S to point to a new SCEV with that
-/// value excluded.
+/// If S involves the addition of a constant integer value, return that integer
+/// value, and mutate S to point to a new SCEV with that value excluded.
static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
- if (C->getValue()->getValue().getMinSignedBits() <= 64) {
+ if (C->getAPInt().getMinSignedBits() <= 64) {
S = SE.getConstant(C->getType(), 0);
return C->getValue()->getSExtValue();
}
@@ -630,9 +645,8 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
return 0;
}
-/// ExtractSymbol - If S involves the addition of a GlobalValue address,
-/// return that symbol, and mutate S to point to a new SCEV with that
-/// value excluded.
+/// If S involves the addition of a GlobalValue address, return that symbol, and
+/// mutate S to point to a new SCEV with that value excluded.
static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
@@ -657,8 +671,8 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
return nullptr;
}
-/// isAddressUse - Returns true if the specified instruction is using the
-/// specified value as an address.
+/// Returns true if the specified instruction is using the specified value as an
+/// address.
static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
bool isAddress = isa<LoadInst>(Inst);
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
@@ -682,12 +696,15 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
return isAddress;
}
-/// getAccessType - Return the type of the memory being accessed.
-static Type *getAccessType(const Instruction *Inst) {
- Type *AccessTy = Inst->getType();
- if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
- AccessTy = SI->getOperand(0)->getType();
- else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+/// Return the type of the memory being accessed.
+static MemAccessTy getAccessType(const Instruction *Inst) {
+ MemAccessTy AccessTy(Inst->getType(), MemAccessTy::UnknownAddressSpace);
+ if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ AccessTy.MemTy = SI->getOperand(0)->getType();
+ AccessTy.AddrSpace = SI->getPointerAddressSpace();
+ } else if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ AccessTy.AddrSpace = LI->getPointerAddressSpace();
+ } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
// Addressing modes can also be folded into prefetches and a variety
// of intrinsics.
switch (II->getIntrinsicID()) {
@@ -696,21 +713,21 @@ static Type *getAccessType(const Instruction *Inst) {
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
case Intrinsic::x86_sse2_storel_dq:
- AccessTy = II->getArgOperand(0)->getType();
+ AccessTy.MemTy = II->getArgOperand(0)->getType();
break;
}
}
// All pointers have the same requirements, so canonicalize them to an
// arbitrary pointer type to minimize variation.
- if (PointerType *PTy = dyn_cast<PointerType>(AccessTy))
- AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
- PTy->getAddressSpace());
+ if (PointerType *PTy = dyn_cast<PointerType>(AccessTy.MemTy))
+ AccessTy.MemTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
+ PTy->getAddressSpace());
return AccessTy;
}
-/// isExistingPhi - Return true if this AddRec is already a phi in its loop.
+/// Return true if this AddRec is already a phi in its loop.
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
PHINode *PN = dyn_cast<PHINode>(I); ++I) {
@@ -793,9 +810,8 @@ static bool isHighCostExpansion(const SCEV *S,
return true;
}
-/// DeleteTriviallyDeadInstructions - If any of the instructions is the
-/// specified set are trivially dead, delete them and see if this makes any of
-/// their operands subsequently dead.
+/// If any of the instructions is the specified set are trivially dead, delete
+/// them and see if this makes any of their operands subsequently dead.
static bool
DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
bool Changed = false;
@@ -842,7 +858,7 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
namespace {
-/// Cost - This class is used to measure and compare candidate formulae.
+/// This class is used to measure and compare candidate formulae.
class Cost {
/// TODO: Some of these could be merged. Also, a lexical ordering
/// isn't always optimal.
@@ -905,7 +921,7 @@ private:
}
-/// RateRegister - Tally up interesting quantities from the given register.
+/// Tally up interesting quantities from the given register.
void Cost::RateRegister(const SCEV *Reg,
SmallPtrSetImpl<const SCEV *> &Regs,
const Loop *L,
@@ -951,9 +967,9 @@ void Cost::RateRegister(const SCEV *Reg,
SE.hasComputableLoopEvolution(Reg, L);
}
-/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
-/// before, rate it. Optional LoserRegs provides a way to declare any formula
-/// that refers to one of those regs an instant loser.
+/// Record this register in the set. If we haven't seen it before, rate
+/// it. Optional LoserRegs provides a way to declare any formula that refers to
+/// one of those regs an instant loser.
void Cost::RatePrimaryRegister(const SCEV *Reg,
SmallPtrSetImpl<const SCEV *> &Regs,
const Loop *L,
@@ -1024,7 +1040,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
assert(isValid() && "invalid cost");
}
-/// Lose - Set this cost to a losing value.
+/// Set this cost to a losing value.
void Cost::Lose() {
NumRegs = ~0u;
AddRecCost = ~0u;
@@ -1035,7 +1051,7 @@ void Cost::Lose() {
ScaleCost = ~0u;
}
-/// operator< - Choose the lower cost.
+/// Choose the lower cost.
bool Cost::operator<(const Cost &Other) const {
return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost,
ImmCost, SetupCost) <
@@ -1061,37 +1077,35 @@ void Cost::print(raw_ostream &OS) const {
OS << ", plus " << SetupCost << " setup cost";
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void Cost::dump() const {
print(errs()); errs() << '\n';
}
-#endif
namespace {
-/// LSRFixup - An operand value in an instruction which is to be replaced
-/// with some equivalent, possibly strength-reduced, replacement.
+/// An operand value in an instruction which is to be replaced with some
+/// equivalent, possibly strength-reduced, replacement.
struct LSRFixup {
- /// UserInst - The instruction which will be updated.
+ /// The instruction which will be updated.
Instruction *UserInst;
- /// OperandValToReplace - The operand of the instruction which will
- /// be replaced. The operand may be used more than once; every instance
- /// will be replaced.
+ /// The operand of the instruction which will be replaced. The operand may be
+ /// used more than once; every instance will be replaced.
Value *OperandValToReplace;
- /// PostIncLoops - If this user is to use the post-incremented value of an
- /// induction variable, this variable is non-null and holds the loop
- /// associated with the induction variable.
+ /// If this user is to use the post-incremented value of an induction
+ /// variable, this variable is non-null and holds the loop associated with the
+ /// induction variable.
PostIncLoopSet PostIncLoops;
- /// LUIdx - The index of the LSRUse describing the expression which
- /// this fixup needs, minus an offset (below).
+ /// The index of the LSRUse describing the expression which this fixup needs,
+ /// minus an offset (below).
size_t LUIdx;
- /// Offset - A constant offset to be added to the LSRUse expression.
- /// This allows multiple fixups to share the same LSRUse with different
- /// offsets, for example in an unrolled loop.
+ /// A constant offset to be added to the LSRUse expression. This allows
+ /// multiple fixups to share the same LSRUse with different offsets, for
+ /// example in an unrolled loop.
int64_t Offset;
bool isUseFullyOutsideLoop(const Loop *L) const;
@@ -1108,8 +1122,7 @@ LSRFixup::LSRFixup()
: UserInst(nullptr), OperandValToReplace(nullptr), LUIdx(~size_t(0)),
Offset(0) {}
-/// isUseFullyOutsideLoop - Test whether this fixup always uses its
-/// value outside of the given loop.
+/// Test whether this fixup always uses its value outside of the given loop.
bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
// PHI nodes use their value in their incoming blocks.
if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
@@ -1149,16 +1162,15 @@ void LSRFixup::print(raw_ostream &OS) const {
OS << ", Offset=" << Offset;
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void LSRFixup::dump() const {
print(errs()); errs() << '\n';
}
-#endif
namespace {
-/// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding
-/// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*.
+/// A DenseMapInfo implementation for holding DenseMaps and DenseSets of sorted
+/// SmallVectors of const SCEV*.
struct UniquifierDenseMapInfo {
static SmallVector<const SCEV *, 4> getEmptyKey() {
SmallVector<const SCEV *, 4> V;
@@ -1182,17 +1194,17 @@ struct UniquifierDenseMapInfo {
}
};
-/// LSRUse - This class holds the state that LSR keeps for each use in
-/// IVUsers, as well as uses invented by LSR itself. It includes information
-/// about what kinds of things can be folded into the user, information about
-/// the user itself, and information about how the use may be satisfied.
-/// TODO: Represent multiple users of the same expression in common?
+/// This class holds the state that LSR keeps for each use in IVUsers, as well
+/// as uses invented by LSR itself. It includes information about what kinds of
+/// things can be folded into the user, information about the user itself, and
+/// information about how the use may be satisfied. TODO: Represent multiple
+/// users of the same expression in common?
class LSRUse {
DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
public:
- /// KindType - An enum for a kind of use, indicating what types of
- /// scaled and immediate operands it might support.
+ /// An enum for a kind of use, indicating what types of scaled and immediate
+ /// operands it might support.
enum KindType {
Basic, ///< A normal use, with no folding.
Special, ///< A special case of basic, allowing -1 scales.
@@ -1204,15 +1216,14 @@ public:
typedef PointerIntPair<const SCEV *, 2, KindType> SCEVUseKindPair;
KindType Kind;
- Type *AccessTy;
+ MemAccessTy AccessTy;
SmallVector<int64_t, 8> Offsets;
int64_t MinOffset;
int64_t MaxOffset;
- /// AllFixupsOutsideLoop - This records whether all of the fixups using this
- /// LSRUse are outside of the loop, in which case some special-case heuristics
- /// may be used.
+ /// This records whether all of the fixups using this LSRUse are outside of
+ /// the loop, in which case some special-case heuristics may be used.
bool AllFixupsOutsideLoop;
/// RigidFormula is set to true to guarantee that this use will be associated
@@ -1222,26 +1233,24 @@ public:
/// changing the formula.
bool RigidFormula;
- /// WidestFixupType - This records the widest use type for any fixup using
- /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
- /// max fixup widths to be equivalent, because the narrower one may be relying
- /// on the implicit truncation to truncate away bogus bits.
+ /// This records the widest use type for any fixup using this
+ /// LSRUse. FindUseWithSimilarFormula can't consider uses with different max
+ /// fixup widths to be equivalent, because the narrower one may be relying on
+ /// the implicit truncation to truncate away bogus bits.
Type *WidestFixupType;
- /// Formulae - A list of ways to build a value that can satisfy this user.
- /// After the list is populated, one of these is selected heuristically and
- /// used to formulate a replacement for OperandValToReplace in UserInst.
+ /// A list of ways to build a value that can satisfy this user. After the
+ /// list is populated, one of these is selected heuristically and used to
+ /// formulate a replacement for OperandValToReplace in UserInst.
SmallVector<Formula, 12> Formulae;
- /// Regs - The set of register candidates used by all formulae in this LSRUse.
+ /// The set of register candidates used by all formulae in this LSRUse.
SmallPtrSet<const SCEV *, 4> Regs;
- LSRUse(KindType K, Type *T) : Kind(K), AccessTy(T),
- MinOffset(INT64_MAX),
- MaxOffset(INT64_MIN),
- AllFixupsOutsideLoop(true),
- RigidFormula(false),
- WidestFixupType(nullptr) {}
+ LSRUse(KindType K, MemAccessTy AT)
+ : Kind(K), AccessTy(AT), MinOffset(INT64_MAX), MaxOffset(INT64_MIN),
+ AllFixupsOutsideLoop(true), RigidFormula(false),
+ WidestFixupType(nullptr) {}
bool HasFormulaWithSameRegs(const Formula &F) const;
bool InsertFormula(const Formula &F);
@@ -1254,8 +1263,8 @@ public:
}
-/// HasFormula - Test whether this use as a formula which has the same
-/// registers as the given formula.
+/// Test whether this use as a formula which has the same registers as the given
+/// formula.
bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
@@ -1264,9 +1273,8 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
return Uniquifier.count(Key);
}
-/// InsertFormula - If the given formula has not yet been inserted, add it to
-/// the list, and return true. Return false otherwise.
-/// The formula must be in canonical form.
+/// If the given formula has not yet been inserted, add it to the list, and
+/// return true. Return false otherwise. The formula must be in canonical form.
bool LSRUse::InsertFormula(const Formula &F) {
assert(F.isCanonical() && "Invalid canonical representation");
@@ -1300,14 +1308,14 @@ bool LSRUse::InsertFormula(const Formula &F) {
return true;
}
-/// DeleteFormula - Remove the given formula from this use's list.
+/// Remove the given formula from this use's list.
void LSRUse::DeleteFormula(Formula &F) {
if (&F != &Formulae.back())
std::swap(F, Formulae.back());
Formulae.pop_back();
}
-/// RecomputeRegs - Recompute the Regs field, and update RegUses.
+/// Recompute the Regs field, and update RegUses.
void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
// Now that we've filtered out some formulae, recompute the Regs set.
SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
@@ -1320,7 +1328,7 @@ void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
// Update the RegTracker.
for (const SCEV *S : OldRegs)
if (!Regs.count(S))
- RegUses.DropRegister(S, LUIdx);
+ RegUses.dropRegister(S, LUIdx);
}
void LSRUse::print(raw_ostream &OS) const {
@@ -1331,10 +1339,13 @@ void LSRUse::print(raw_ostream &OS) const {
case ICmpZero: OS << "ICmpZero"; break;
case Address:
OS << "Address of ";
- if (AccessTy->isPointerTy())
+ if (AccessTy.MemTy->isPointerTy())
OS << "pointer"; // the full pointer type could be really verbose
- else
- OS << *AccessTy;
+ else {
+ OS << *AccessTy.MemTy;
+ }
+
+ OS << " in addrspace(" << AccessTy.AddrSpace << ')';
}
OS << ", Offsets={";
@@ -1353,19 +1364,19 @@ void LSRUse::print(raw_ostream &OS) const {
OS << ", widest fixup type: " << *WidestFixupType;
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void LSRUse::dump() const {
print(errs()); errs() << '\n';
}
-#endif
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
- LSRUse::KindType Kind, Type *AccessTy,
+ LSRUse::KindType Kind, MemAccessTy AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale) {
switch (Kind) {
case LSRUse::Address:
- return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
+ return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, BaseOffset,
+ HasBaseReg, Scale, AccessTy.AddrSpace);
case LSRUse::ICmpZero:
// There's not even a target hook for querying whether it would be legal to
@@ -1412,7 +1423,7 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
int64_t MinOffset, int64_t MaxOffset,
- LSRUse::KindType Kind, Type *AccessTy,
+ LSRUse::KindType Kind, MemAccessTy AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale) {
// Check for overflow.
@@ -1433,7 +1444,7 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
int64_t MinOffset, int64_t MaxOffset,
- LSRUse::KindType Kind, Type *AccessTy,
+ LSRUse::KindType Kind, MemAccessTy AccessTy,
const Formula &F) {
// For the purpose of isAMCompletelyFolded either having a canonical formula
// or a scale not equal to zero is correct.
@@ -1447,11 +1458,11 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
}
-/// isLegalUse - Test whether we know how to expand the current formula.
+/// Test whether we know how to expand the current formula.
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
- int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
- GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale) {
+ int64_t MaxOffset, LSRUse::KindType Kind,
+ MemAccessTy AccessTy, GlobalValue *BaseGV,
+ int64_t BaseOffset, bool HasBaseReg, int64_t Scale) {
// We know how to expand completely foldable formulae.
return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
BaseOffset, HasBaseReg, Scale) ||
@@ -1463,8 +1474,8 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
}
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
- int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
- const Formula &F) {
+ int64_t MaxOffset, LSRUse::KindType Kind,
+ MemAccessTy AccessTy, const Formula &F) {
return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
F.BaseOffset, F.HasBaseReg, F.Scale);
}
@@ -1490,14 +1501,12 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
switch (LU.Kind) {
case LSRUse::Address: {
// Check the scaling factor cost with both the min and max offsets.
- int ScaleCostMinOffset =
- TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV,
- F.BaseOffset + LU.MinOffset,
- F.HasBaseReg, F.Scale);
- int ScaleCostMaxOffset =
- TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV,
- F.BaseOffset + LU.MaxOffset,
- F.HasBaseReg, F.Scale);
+ int ScaleCostMinOffset = TTI.getScalingFactorCost(
+ LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MinOffset, F.HasBaseReg,
+ F.Scale, LU.AccessTy.AddrSpace);
+ int ScaleCostMaxOffset = TTI.getScalingFactorCost(
+ LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MaxOffset, F.HasBaseReg,
+ F.Scale, LU.AccessTy.AddrSpace);
assert(ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 &&
"Legal addressing mode has an illegal cost!");
@@ -1515,7 +1524,7 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
}
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
- LSRUse::KindType Kind, Type *AccessTy,
+ LSRUse::KindType Kind, MemAccessTy AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg) {
// Fast-path: zero is always foldable.
@@ -1539,7 +1548,8 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
ScalarEvolution &SE, int64_t MinOffset,
int64_t MaxOffset, LSRUse::KindType Kind,
- Type *AccessTy, const SCEV *S, bool HasBaseReg) {
+ MemAccessTy AccessTy, const SCEV *S,
+ bool HasBaseReg) {
// Fast-path: zero is always foldable.
if (S->isZero()) return true;
@@ -1564,9 +1574,9 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
namespace {
-/// IVInc - An individual increment in a Chain of IV increments.
-/// Relate an IV user to an expression that computes the IV it uses from the IV
-/// used by the previous link in the Chain.
+/// An individual increment in a Chain of IV increments. Relate an IV user to
+/// an expression that computes the IV it uses from the IV used by the previous
+/// link in the Chain.
///
/// For the head of a chain, IncExpr holds the absolute SCEV expression for the
/// original IVOperand. The head of the chain's IVOperand is only valid during
@@ -1582,8 +1592,8 @@ struct IVInc {
UserInst(U), IVOperand(O), IncExpr(E) {}
};
-// IVChain - The list of IV increments in program order.
-// We typically add the head of a chain without finding subsequent links.
+// The list of IV increments in program order. We typically add the head of a
+// chain without finding subsequent links.
struct IVChain {
SmallVector<IVInc,1> Incs;
const SCEV *ExprBase;
@@ -1595,7 +1605,7 @@ struct IVChain {
typedef SmallVectorImpl<IVInc>::const_iterator const_iterator;
- // begin - return the first increment in the chain.
+ // Return the first increment in the chain.
const_iterator begin() const {
assert(!Incs.empty());
return std::next(Incs.begin());
@@ -1604,32 +1614,30 @@ struct IVChain {
return Incs.end();
}
- // hasIncs - Returns true if this chain contains any increments.
+ // Returns true if this chain contains any increments.
bool hasIncs() const { return Incs.size() >= 2; }
- // add - Add an IVInc to the end of this chain.
+ // Add an IVInc to the end of this chain.
void add(const IVInc &X) { Incs.push_back(X); }
- // tailUserInst - Returns the last UserInst in the chain.
+ // Returns the last UserInst in the chain.
Instruction *tailUserInst() const { return Incs.back().UserInst; }
- // isProfitableIncrement - Returns true if IncExpr can be profitably added to
- // this chain.
+ // Returns true if IncExpr can be profitably added to this chain.
bool isProfitableIncrement(const SCEV *OperExpr,
const SCEV *IncExpr,
ScalarEvolution&);
};
-/// ChainUsers - Helper for CollectChains to track multiple IV increment uses.
-/// Distinguish between FarUsers that definitely cross IV increments and
-/// NearUsers that may be used between IV increments.
+/// Helper for CollectChains to track multiple IV increment uses. Distinguish
+/// between FarUsers that definitely cross IV increments and NearUsers that may
+/// be used between IV increments.
struct ChainUsers {
SmallPtrSet<Instruction*, 4> FarUsers;
SmallPtrSet<Instruction*, 4> NearUsers;
};
-/// LSRInstance - This class holds state for the main loop strength reduction
-/// logic.
+/// This class holds state for the main loop strength reduction logic.
class LSRInstance {
IVUsers &IU;
ScalarEvolution &SE;
@@ -1639,25 +1647,25 @@ class LSRInstance {
Loop *const L;
bool Changed;
- /// IVIncInsertPos - This is the insert position that the current loop's
- /// induction variable increment should be placed. In simple loops, this is
- /// the latch block's terminator. But in more complicated cases, this is a
- /// position which will dominate all the in-loop post-increment users.
+ /// This is the insert position that the current loop's induction variable
+ /// increment should be placed. In simple loops, this is the latch block's
+ /// terminator. But in more complicated cases, this is a position which will
+ /// dominate all the in-loop post-increment users.
Instruction *IVIncInsertPos;
- /// Factors - Interesting factors between use strides.
+ /// Interesting factors between use strides.
SmallSetVector<int64_t, 8> Factors;
- /// Types - Interesting use types, to facilitate truncation reuse.
+ /// Interesting use types, to facilitate truncation reuse.
SmallSetVector<Type *, 4> Types;
- /// Fixups - The list of operands which are to be replaced.
+ /// The list of operands which are to be replaced.
SmallVector<LSRFixup, 16> Fixups;
- /// Uses - The list of interesting uses.
+ /// The list of interesting uses.
SmallVector<LSRUse, 16> Uses;
- /// RegUses - Track which uses use which register candidates.
+ /// Track which uses use which register candidates.
RegUseTracker RegUses;
// Limit the number of chains to avoid quadratic behavior. We don't expect to
@@ -1665,10 +1673,10 @@ class LSRInstance {
// back to normal LSR behavior for those uses.
static const unsigned MaxChains = 8;
- /// IVChainVec - IV users can form a chain of IV increments.
+ /// IV users can form a chain of IV increments.
SmallVector<IVChain, MaxChains> IVChainVec;
- /// IVIncSet - IV users that belong to profitable IVChains.
+ /// IV users that belong to profitable IVChains.
SmallPtrSet<Use*, MaxChains> IVIncSet;
void OptimizeShadowIV();
@@ -1696,11 +1704,10 @@ class LSRInstance {
UseMapTy UseMap;
bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
- LSRUse::KindType Kind, Type *AccessTy);
+ LSRUse::KindType Kind, MemAccessTy AccessTy);
- std::pair<size_t, int64_t> getUse(const SCEV *&Expr,
- LSRUse::KindType Kind,
- Type *AccessTy);
+ std::pair<size_t, int64_t> getUse(const SCEV *&Expr, LSRUse::KindType Kind,
+ MemAccessTy AccessTy);
void DeleteUse(LSRUse &LU, size_t LUIdx);
@@ -1769,18 +1776,16 @@ class LSRInstance {
void RewriteForPHI(PHINode *PN, const LSRFixup &LF,
const Formula &F,
SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts,
- Pass *P) const;
+ SmallVectorImpl<WeakVH> &DeadInsts) const;
void Rewrite(const LSRFixup &LF,
const Formula &F,
SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts,
- Pass *P) const;
- void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
- Pass *P);
+ SmallVectorImpl<WeakVH> &DeadInsts) const;
+ void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
public:
- LSRInstance(Loop *L, Pass *P);
+ LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
+ LoopInfo &LI, const TargetTransformInfo &TTI);
bool getChanged() const { return Changed; }
@@ -1793,8 +1798,8 @@ public:
}
-/// OptimizeShadowIV - If IV is used in a int-to-float cast
-/// inside the loop then try to eliminate the cast operation.
+/// If IV is used in a int-to-float cast inside the loop then try to eliminate
+/// the cast operation.
void LSRInstance::OptimizeShadowIV() {
const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
@@ -1902,9 +1907,8 @@ void LSRInstance::OptimizeShadowIV() {
}
}
-/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
-/// set the IV user and stride information and return true, otherwise return
-/// false.
+/// If Cond has an operand that is an expression of an IV, set the IV user and
+/// stride information and return true, otherwise return false.
bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
for (IVStrideUse &U : IU)
if (U.getUser() == Cond) {
@@ -1917,8 +1921,7 @@ bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
return false;
}
-/// OptimizeMax - Rewrite the loop's terminating condition if it uses
-/// a max computation.
+/// Rewrite the loop's terminating condition if it uses a max computation.
///
/// This is a narrow solution to a specific, but acute, problem. For loops
/// like this:
@@ -2076,8 +2079,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
return NewCond;
}
-/// OptimizeLoopTermCond - Change loop terminating condition to use the
-/// postinc iv when possible.
+/// Change loop terminating condition to use the postinc iv when possible.
void
LSRInstance::OptimizeLoopTermCond() {
SmallPtrSet<Instruction *, 4> PostIncs;
@@ -2152,16 +2154,18 @@ LSRInstance::OptimizeLoopTermCond() {
C->getValue().isMinSignedValue())
goto decline_post_inc;
// Check for possible scaled-address reuse.
- Type *AccessTy = getAccessType(UI->getUser());
+ MemAccessTy AccessTy = getAccessType(UI->getUser());
int64_t Scale = C->getSExtValue();
- if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr,
- /*BaseOffset=*/ 0,
- /*HasBaseReg=*/ false, Scale))
+ if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
+ /*BaseOffset=*/0,
+ /*HasBaseReg=*/false, Scale,
+ AccessTy.AddrSpace))
goto decline_post_inc;
Scale = -Scale;
- if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr,
- /*BaseOffset=*/ 0,
- /*HasBaseReg=*/ false, Scale))
+ if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
+ /*BaseOffset=*/0,
+ /*HasBaseReg=*/false, Scale,
+ AccessTy.AddrSpace))
goto decline_post_inc;
}
}
@@ -2180,7 +2184,7 @@ LSRInstance::OptimizeLoopTermCond() {
ICmpInst *OldCond = Cond;
Cond = cast<ICmpInst>(Cond->clone());
Cond->setName(L->getHeader()->getName() + ".termcond");
- ExitingBlock->getInstList().insert(TermBr, Cond);
+ ExitingBlock->getInstList().insert(TermBr->getIterator(), Cond);
// Clone the IVUse, as the old use still exists!
CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
@@ -2213,15 +2217,14 @@ LSRInstance::OptimizeLoopTermCond() {
}
}
-/// reconcileNewOffset - Determine if the given use can accommodate a fixup
-/// at the given offset and other details. If so, update the use and
-/// return true.
-bool
-LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
- LSRUse::KindType Kind, Type *AccessTy) {
+/// Determine if the given use can accommodate a fixup at the given offset and
+/// other details. If so, update the use and return true.
+bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
+ bool HasBaseReg, LSRUse::KindType Kind,
+ MemAccessTy AccessTy) {
int64_t NewMinOffset = LU.MinOffset;
int64_t NewMaxOffset = LU.MaxOffset;
- Type *NewAccessTy = AccessTy;
+ MemAccessTy NewAccessTy = AccessTy;
// Check for a mismatched kind. It's tempting to collapse mismatched kinds to
// something conservative, however this can pessimize in the case that one of
@@ -2232,8 +2235,10 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
// Check for a mismatched access type, and fall back conservatively as needed.
// TODO: Be less conservative when the type is similar and can use the same
// addressing modes.
- if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
- NewAccessTy = Type::getVoidTy(AccessTy->getContext());
+ if (Kind == LSRUse::Address) {
+ if (AccessTy != LU.AccessTy)
+ NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext());
+ }
// Conservatively assume HasBaseReg is true for now.
if (NewOffset < LU.MinOffset) {
@@ -2257,12 +2262,12 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
return true;
}
-/// getUse - Return an LSRUse index and an offset value for a fixup which
-/// needs the given expression, with the given kind and optional access type.
-/// Either reuse an existing use or create a new one, as needed.
-std::pair<size_t, int64_t>
-LSRInstance::getUse(const SCEV *&Expr,
- LSRUse::KindType Kind, Type *AccessTy) {
+/// Return an LSRUse index and an offset value for a fixup which needs the given
+/// expression, with the given kind and optional access type. Either reuse an
+/// existing use or create a new one, as needed.
+std::pair<size_t, int64_t> LSRInstance::getUse(const SCEV *&Expr,
+ LSRUse::KindType Kind,
+ MemAccessTy AccessTy) {
const SCEV *Copy = Expr;
int64_t Offset = ExtractImmediate(Expr, SE);
@@ -2300,18 +2305,18 @@ LSRInstance::getUse(const SCEV *&Expr,
return std::make_pair(LUIdx, Offset);
}
-/// DeleteUse - Delete the given use from the Uses list.
+/// Delete the given use from the Uses list.
void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
if (&LU != &Uses.back())
std::swap(LU, Uses.back());
Uses.pop_back();
// Update RegUses.
- RegUses.SwapAndDropUse(LUIdx, Uses.size());
+ RegUses.swapAndDropUse(LUIdx, Uses.size());
}
-/// FindUseWithFormula - Look for a use distinct from OrigLU which is has
-/// a formula that has the same registers as the given formula.
+/// Look for a use distinct from OrigLU which is has a formula that has the same
+/// registers as the given formula.
LSRUse *
LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
const LSRUse &OrigLU) {
@@ -2396,14 +2401,14 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
if (const SCEVConstant *Factor =
dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
SE, true))) {
- if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
- Factors.insert(Factor->getValue()->getValue().getSExtValue());
+ if (Factor->getAPInt().getMinSignedBits() <= 64)
+ Factors.insert(Factor->getAPInt().getSExtValue());
} else if (const SCEVConstant *Factor =
dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
NewStride,
SE, true))) {
- if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
- Factors.insert(Factor->getValue()->getValue().getSExtValue());
+ if (Factor->getAPInt().getMinSignedBits() <= 64)
+ Factors.insert(Factor->getAPInt().getSExtValue());
}
}
@@ -2415,9 +2420,9 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
DEBUG(print_factors_and_types(dbgs()));
}
-/// findIVOperand - Helper for CollectChains that finds an IV operand (computed
-/// by an AddRec in this loop) within [OI,OE) or returns OE. If IVUsers mapped
-/// Instructions to IVStrideUses, we could partially skip this.
+/// Helper for CollectChains that finds an IV operand (computed by an AddRec in
+/// this loop) within [OI,OE) or returns OE. If IVUsers mapped Instructions to
+/// IVStrideUses, we could partially skip this.
static User::op_iterator
findIVOperand(User::op_iterator OI, User::op_iterator OE,
Loop *L, ScalarEvolution &SE) {
@@ -2436,29 +2441,28 @@ findIVOperand(User::op_iterator OI, User::op_iterator OE,
return OI;
}
-/// getWideOperand - IVChain logic must consistenctly peek base TruncInst
-/// operands, so wrap it in a convenient helper.
+/// IVChain logic must consistenctly peek base TruncInst operands, so wrap it in
+/// a convenient helper.
static Value *getWideOperand(Value *Oper) {
if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
return Trunc->getOperand(0);
return Oper;
}
-/// isCompatibleIVType - Return true if we allow an IV chain to include both
-/// types.
+/// Return true if we allow an IV chain to include both types.
static bool isCompatibleIVType(Value *LVal, Value *RVal) {
Type *LType = LVal->getType();
Type *RType = RVal->getType();
return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy());
}
-/// getExprBase - Return an approximation of this SCEV expression's "base", or
-/// NULL for any constant. Returning the expression itself is
-/// conservative. Returning a deeper subexpression is more precise and valid as
-/// long as it isn't less complex than another subexpression. For expressions
-/// involving multiple unscaled values, we need to return the pointer-type
-/// SCEVUnknown. This avoids forming chains across objects, such as:
-/// PrevOper==a[i], IVOper==b[i], IVInc==b-a.
+/// Return an approximation of this SCEV expression's "base", or NULL for any
+/// constant. Returning the expression itself is conservative. Returning a
+/// deeper subexpression is more precise and valid as long as it isn't less
+/// complex than another subexpression. For expressions involving multiple
+/// unscaled values, we need to return the pointer-type SCEVUnknown. This avoids
+/// forming chains across objects, such as: PrevOper==a[i], IVOper==b[i],
+/// IVInc==b-a.
///
/// Since SCEVUnknown is the rightmost type, and pointers are the rightmost
/// SCEVUnknown, we simply return the rightmost SCEV operand.
@@ -2601,8 +2605,7 @@ isProfitableChain(IVChain &Chain, SmallPtrSetImpl<Instruction*> &Users,
return cost < 0;
}
-/// ChainInstruction - Add this IV user to an existing chain or make it the head
-/// of a new chain.
+/// Add this IV user to an existing chain or make it the head of a new chain.
void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
SmallVectorImpl<ChainUsers> &ChainUsersVec) {
// When IVs are used as types of varying widths, they are generally converted
@@ -2714,7 +2717,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);
}
-/// CollectChains - Populate the vector of Chains.
+/// Populate the vector of Chains.
///
/// This decreases ILP at the architecture level. Targets with ample registers,
/// multiple memory ports, and no register renaming probably don't want
@@ -2755,19 +2758,19 @@ void LSRInstance::CollectChains() {
for (BasicBlock::iterator I = (*BBIter)->begin(), E = (*BBIter)->end();
I != E; ++I) {
// Skip instructions that weren't seen by IVUsers analysis.
- if (isa<PHINode>(I) || !IU.isIVUserOrOperand(I))
+ if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&*I))
continue;
// Ignore users that are part of a SCEV expression. This way we only
// consider leaf IV Users. This effectively rediscovers a portion of
// IVUsers analysis but in program order this time.
- if (SE.isSCEVable(I->getType()) && !isa<SCEVUnknown>(SE.getSCEV(I)))
+ if (SE.isSCEVable(I->getType()) && !isa<SCEVUnknown>(SE.getSCEV(&*I)))
continue;
// Remove this instruction from any NearUsers set it may be in.
for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
ChainIdx < NChains; ++ChainIdx) {
- ChainUsersVec[ChainIdx].NearUsers.erase(I);
+ ChainUsersVec[ChainIdx].NearUsers.erase(&*I);
}
// Search for operands that can be chained.
SmallPtrSet<Instruction*, 4> UniqueOperands;
@@ -2776,7 +2779,7 @@ void LSRInstance::CollectChains() {
while (IVOpIter != IVOpEnd) {
Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
if (UniqueOperands.insert(IVOpInst).second)
- ChainInstruction(I, IVOpInst, ChainUsersVec);
+ ChainInstruction(&*I, IVOpInst, ChainUsersVec);
IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
}
} // Continue walking down the instructions.
@@ -2828,20 +2831,20 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
if (!IncConst || !isAddressUse(UserInst, Operand))
return false;
- if (IncConst->getValue()->getValue().getMinSignedBits() > 64)
+ if (IncConst->getAPInt().getMinSignedBits() > 64)
return false;
+ MemAccessTy AccessTy = getAccessType(UserInst);
int64_t IncOffset = IncConst->getValue()->getSExtValue();
- if (!isAlwaysFoldable(TTI, LSRUse::Address,
- getAccessType(UserInst), /*BaseGV=*/ nullptr,
- IncOffset, /*HaseBaseReg=*/ false))
+ if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr,
+ IncOffset, /*HaseBaseReg=*/false))
return false;
return true;
}
-/// GenerateIVChains - Generate an add or subtract for each IVInc in a chain to
-/// materialize the IV user's operand from the previous IV user's operand.
+/// Generate an add or subtract for each IVInc in a chain to materialize the IV
+/// user's operand from the previous IV user's operand.
void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts) {
// Find the new IVOperand for the head of the chain. It may have been replaced
@@ -2961,7 +2964,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
LF.PostIncLoops = U.getPostIncLoops();
LSRUse::KindType Kind = LSRUse::Basic;
- Type *AccessTy = nullptr;
+ MemAccessTy AccessTy;
if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
Kind = LSRUse::Address;
AccessTy = getAccessType(LF.UserInst);
@@ -3027,9 +3030,8 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
DEBUG(print_fixups(dbgs()));
}
-/// InsertInitialFormula - Insert a formula for the given expression into
-/// the given use, separating out loop-variant portions from loop-invariant
-/// and loop-computable portions.
+/// Insert a formula for the given expression into the given use, separating out
+/// loop-variant portions from loop-invariant and loop-computable portions.
void
LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
// Mark uses whose expressions cannot be expanded.
@@ -3037,13 +3039,13 @@ LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
LU.RigidFormula = true;
Formula F;
- F.InitialMatch(S, L, SE);
+ F.initialMatch(S, L, SE);
bool Inserted = InsertFormula(LU, LUIdx, F);
assert(Inserted && "Initial formula already exists!"); (void)Inserted;
}
-/// InsertSupplementalFormula - Insert a simple single-register formula for
-/// the given expression into the given use.
+/// Insert a simple single-register formula for the given expression into the
+/// given use.
void
LSRInstance::InsertSupplementalFormula(const SCEV *S,
LSRUse &LU, size_t LUIdx) {
@@ -3054,17 +3056,16 @@ LSRInstance::InsertSupplementalFormula(const SCEV *S,
assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
}
-/// CountRegisters - Note which registers are used by the given formula,
-/// updating RegUses.
+/// Note which registers are used by the given formula, updating RegUses.
void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
if (F.ScaledReg)
- RegUses.CountRegister(F.ScaledReg, LUIdx);
+ RegUses.countRegister(F.ScaledReg, LUIdx);
for (const SCEV *BaseReg : F.BaseRegs)
- RegUses.CountRegister(BaseReg, LUIdx);
+ RegUses.countRegister(BaseReg, LUIdx);
}
-/// InsertFormula - If the given formula has not yet been inserted, add it to
-/// the list, and return true. Return false otherwise.
+/// If the given formula has not yet been inserted, add it to the list, and
+/// return true. Return false otherwise.
bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
// Do not insert formula that we will not be able to expand.
assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
@@ -3076,9 +3077,9 @@ bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
return true;
}
-/// CollectLoopInvariantFixupsAndFormulae - Check for other uses of
-/// loop-invariant values which we're tracking. These other uses will pin these
-/// values in registers, making them less profitable for elimination.
+/// Check for other uses of loop-invariant values which we're tracking. These
+/// other uses will pin these values in registers, making them less profitable
+/// for elimination.
/// TODO: This currently misses non-constant addrec step registers.
/// TODO: Should this give more weight to users inside the loop?
void
@@ -3124,6 +3125,9 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
if (!DT.dominates(L->getHeader(), UseBB))
continue;
+ // Don't bother if the instruction is in a BB which ends in an EHPad.
+ if (UseBB->getTerminator()->isEHPad())
+ continue;
// Ignore uses which are part of other SCEV expressions, to avoid
// analyzing them multiple times.
if (SE.isSCEVable(UserInst->getType())) {
@@ -3148,7 +3152,8 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
LSRFixup &LF = getNewFixup();
LF.UserInst = const_cast<Instruction *>(UserInst);
LF.OperandValToReplace = U;
- std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, nullptr);
+ std::pair<size_t, int64_t> P = getUse(
+ S, LSRUse::Basic, MemAccessTy());
LF.LUIdx = P.first;
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
@@ -3165,8 +3170,8 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
}
}
-/// CollectSubexprs - Split S into subexpressions which can be pulled out into
-/// separate registers. If C is non-null, multiply each subexpression by C.
+/// Split S into subexpressions which can be pulled out into separate
+/// registers. If C is non-null, multiply each subexpression by C.
///
/// Return remainder expression after factoring the subexpressions captured by
/// Ops. If Ops is complete, return NULL.
@@ -3300,7 +3305,7 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
F.BaseRegs.push_back(*J);
// We may have changed the number of register in base regs, adjust the
// formula accordingly.
- F.Canonicalize();
+ F.canonicalize();
if (InsertFormula(LU, LUIdx, F))
// If that formula hadn't been seen before, recurse to find more like
@@ -3309,8 +3314,7 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
}
}
-/// GenerateReassociations - Split out subexpressions from adds and the bases of
-/// addrecs.
+/// Split out subexpressions from adds and the bases of addrecs.
void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
Formula Base, unsigned Depth) {
assert(Base.isCanonical() && "Input must be in the canonical form");
@@ -3326,8 +3330,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
/* Idx */ -1, /* IsScaledReg */ true);
}
-/// GenerateCombinations - Generate a formula consisting of all of the
-/// loop-dominating registers added into a single register.
+/// Generate a formula consisting of all of the loop-dominating registers added
+/// into a single register.
void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// This method is only interesting on a plurality of registers.
@@ -3336,7 +3340,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
// Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
// processing the formula.
- Base.Unscale();
+ Base.unscale();
Formula F = Base;
F.BaseRegs.clear();
SmallVector<const SCEV *, 4> Ops;
@@ -3354,7 +3358,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
// rather than proceed with zero in a register.
if (!Sum->isZero()) {
F.BaseRegs.push_back(Sum);
- F.Canonicalize();
+ F.canonicalize();
(void)InsertFormula(LU, LUIdx, F);
}
}
@@ -3379,7 +3383,7 @@ void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
(void)InsertFormula(LU, LUIdx, F);
}
-/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets.
+/// Generate reuse formulae using symbolic offsets.
void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// We can't add a symbolic offset if the address already contains one.
@@ -3410,8 +3414,8 @@ void LSRInstance::GenerateConstantOffsetsImpl(
F.Scale = 0;
F.ScaledReg = nullptr;
} else
- F.DeleteBaseReg(F.BaseRegs[Idx]);
- F.Canonicalize();
+ F.deleteBaseReg(F.BaseRegs[Idx]);
+ F.canonicalize();
} else if (IsScaledReg)
F.ScaledReg = NewG;
else
@@ -3452,8 +3456,8 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
/* IsScaledReg */ true);
}
-/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up
-/// the comparison. For example, x == y -> x*c == y*c.
+/// For ICmpZero, check to see if we can scale up the comparison. For example, x
+/// == y -> x*c == y*c.
void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
Formula Base) {
if (LU.Kind != LSRUse::ICmpZero) return;
@@ -3538,8 +3542,8 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
}
}
-/// GenerateScales - Generate stride factor reuse formulae by making use of
-/// scaled-offset address modes, for example.
+/// Generate stride factor reuse formulae by making use of scaled-offset address
+/// modes, for example.
void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
// Determine the integer type for the base formula.
Type *IntTy = Base.getType();
@@ -3547,10 +3551,10 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
// If this Formula already has a scaled register, we can't add another one.
// Try to unscale the formula to generate a better scale.
- if (Base.Scale != 0 && !Base.Unscale())
+ if (Base.Scale != 0 && !Base.unscale())
return;
- assert(Base.Scale == 0 && "Unscale did not did its job!");
+ assert(Base.Scale == 0 && "unscale did not did its job!");
// Check each interesting stride.
for (int64_t Factor : Factors) {
@@ -3587,7 +3591,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
// TODO: This could be optimized to avoid all the copying.
Formula F = Base;
F.ScaledReg = Quotient;
- F.DeleteBaseReg(F.BaseRegs[i]);
+ F.deleteBaseReg(F.BaseRegs[i]);
// The canonical representation of 1*reg is reg, which is already in
// Base. In that case, do not try to insert the formula, it will be
// rejected anyway.
@@ -3599,7 +3603,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
}
}
-/// GenerateTruncates - Generate reuse formulae from different IV types.
+/// Generate reuse formulae from different IV types.
void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
// Don't bother truncating symbolic values.
if (Base.BaseGV) return;
@@ -3629,9 +3633,9 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
namespace {
-/// WorkItem - Helper class for GenerateCrossUseConstantOffsets. It's used to
-/// defer modifications so that the search phase doesn't have to worry about
-/// the data structures moving underneath it.
+/// Helper class for GenerateCrossUseConstantOffsets. It's used to defer
+/// modifications so that the search phase doesn't have to worry about the data
+/// structures moving underneath it.
struct WorkItem {
size_t LUIdx;
int64_t Imm;
@@ -3651,14 +3655,13 @@ void WorkItem::print(raw_ostream &OS) const {
<< " , add offset " << Imm;
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void WorkItem::dump() const {
print(errs()); errs() << '\n';
}
-#endif
-/// GenerateCrossUseConstantOffsets - Look for registers which are a constant
-/// distance apart and try to form reuse opportunities between them.
+/// Look for registers which are a constant distance apart and try to form reuse
+/// opportunities between them.
void LSRInstance::GenerateCrossUseConstantOffsets() {
// Group the registers by their value without any added constant offset.
typedef std::map<int64_t, const SCEV *> ImmMapTy;
@@ -3751,7 +3754,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// very similar but slightly different. Investigate if they
// could be merged. That way, we would not have to unscale the
// Formula.
- F.Unscale();
+ F.unscale();
// Use the immediate in the scaled register.
if (F.ScaledReg == OrigReg) {
int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
@@ -3770,14 +3773,13 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// value to the immediate would produce a value closer to zero than the
// immediate itself, then the formula isn't worthwhile.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
- if (C->getValue()->isNegative() !=
- (NewF.BaseOffset < 0) &&
- (C->getValue()->getValue().abs() * APInt(BitWidth, F.Scale))
- .ule(std::abs(NewF.BaseOffset)))
+ if (C->getValue()->isNegative() != (NewF.BaseOffset < 0) &&
+ (C->getAPInt().abs() * APInt(BitWidth, F.Scale))
+ .ule(std::abs(NewF.BaseOffset)))
continue;
// OK, looks good.
- NewF.Canonicalize();
+ NewF.canonicalize();
(void)InsertFormula(LU, LUIdx, NewF);
} else {
// Use the immediate in a base register.
@@ -3801,15 +3803,15 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// zero than the immediate itself, then the formula isn't worthwhile.
for (const SCEV *NewReg : NewF.BaseRegs)
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewReg))
- if ((C->getValue()->getValue() + NewF.BaseOffset).abs().slt(
- std::abs(NewF.BaseOffset)) &&
- (C->getValue()->getValue() +
- NewF.BaseOffset).countTrailingZeros() >=
- countTrailingZeros<uint64_t>(NewF.BaseOffset))
+ if ((C->getAPInt() + NewF.BaseOffset)
+ .abs()
+ .slt(std::abs(NewF.BaseOffset)) &&
+ (C->getAPInt() + NewF.BaseOffset).countTrailingZeros() >=
+ countTrailingZeros<uint64_t>(NewF.BaseOffset))
goto skip_formula;
// Ok, looks good.
- NewF.Canonicalize();
+ NewF.canonicalize();
(void)InsertFormula(LU, LUIdx, NewF);
break;
skip_formula:;
@@ -3819,7 +3821,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
}
}
-/// GenerateAllReuseFormulae - Generate formulae for each use.
+/// Generate formulae for each use.
void
LSRInstance::GenerateAllReuseFormulae() {
// This is split into multiple loops so that hasRegsUsedByUsesOtherThan
@@ -3959,10 +3961,9 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
// This is a rough guess that seems to work fairly well.
static const size_t ComplexityLimit = UINT16_MAX;
-/// EstimateSearchSpaceComplexity - Estimate the worst-case number of
-/// solutions the solver might have to consider. It almost never considers
-/// this many solutions because it prune the search space, but the pruning
-/// isn't always sufficient.
+/// Estimate the worst-case number of solutions the solver might have to
+/// consider. It almost never considers this many solutions because it prune the
+/// search space, but the pruning isn't always sufficient.
size_t LSRInstance::EstimateSearchSpaceComplexity() const {
size_t Power = 1;
for (const LSRUse &LU : Uses) {
@@ -3978,10 +3979,9 @@ size_t LSRInstance::EstimateSearchSpaceComplexity() const {
return Power;
}
-/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset
-/// of the registers of another formula, it won't help reduce register
-/// pressure (though it may not necessarily hurt register pressure); remove
-/// it to simplify the system.
+/// When one formula uses a superset of the registers of another formula, it
+/// won't help reduce register pressure (though it may not necessarily hurt
+/// register pressure); remove it to simplify the system.
void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
DEBUG(dbgs() << "The search space is too complex.\n");
@@ -4042,9 +4042,8 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
}
}
-/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers
-/// for expressions like A, A+1, A+2, etc., allocate a single register for
-/// them.
+/// When there are many registers for expressions like A, A+1, A+2, etc.,
+/// allocate a single register for them.
void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
if (EstimateSearchSpaceComplexity() < ComplexityLimit)
return;
@@ -4121,8 +4120,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
}
-/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
-/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that
+/// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that
/// we've done more filtering, as it may be able to find more formulae to
/// eliminate.
void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
@@ -4139,9 +4137,9 @@ void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
}
}
-/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely
-/// to be profitable, and then in any use which has any reference to that
-/// register, delete all formulae which do not reference that register.
+/// Pick a register which seems likely to be profitable, and then in any use
+/// which has any reference to that register, delete all formulae which do not
+/// reference that register.
void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
// With all other options exhausted, loop until the system is simple
// enough to handle.
@@ -4202,10 +4200,10 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
}
}
-/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
-/// formulae to choose from, use some rough heuristics to prune down the number
-/// of formulae. This keeps the main solver from taking an extraordinary amount
-/// of time in some worst-case scenarios.
+/// If there are an extraordinary number of formulae to choose from, use some
+/// rough heuristics to prune down the number of formulae. This keeps the main
+/// solver from taking an extraordinary amount of time in some worst-case
+/// scenarios.
void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
NarrowSearchSpaceByDetectingSupersets();
NarrowSearchSpaceByCollapsingUnrolledCode();
@@ -4213,7 +4211,7 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
NarrowSearchSpaceByPickingWinnerRegs();
}
-/// SolveRecurse - This is the recursive solver.
+/// This is the recursive solver.
void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
Cost &SolutionCost,
SmallVectorImpl<const Formula *> &Workspace,
@@ -4291,8 +4289,8 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
}
}
-/// Solve - Choose one formula from each use. Return the results in the given
-/// Solution vector.
+/// Choose one formula from each use. Return the results in the given Solution
+/// vector.
void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
SmallVector<const Formula *, 8> Workspace;
Cost SolutionCost;
@@ -4326,10 +4324,9 @@ void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
assert(Solution.size() == Uses.size() && "Malformed solution!");
}
-/// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up
-/// the dominator tree far as we can go while still being dominated by the
-/// input positions. This helps canonicalize the insert position, which
-/// encourages sharing.
+/// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as
+/// we can go while still being dominated by the input positions. This helps
+/// canonicalize the insert position, which encourages sharing.
BasicBlock::iterator
LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
const SmallVectorImpl<Instruction *> &Inputs)
@@ -4365,21 +4362,21 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
// instead of at the end, so that it can be used for other expansions.
if (IDom == Inst->getParent() &&
(!BetterPos || !DT.dominates(Inst, BetterPos)))
- BetterPos = std::next(BasicBlock::iterator(Inst));
+ BetterPos = &*std::next(BasicBlock::iterator(Inst));
}
if (!AllDominate)
break;
if (BetterPos)
- IP = BetterPos;
+ IP = BetterPos->getIterator();
else
- IP = Tentative;
+ IP = Tentative->getIterator();
}
return IP;
}
-/// AdjustInsertPositionForExpand - Determine an input position which will be
-/// dominated by the operands and which will dominate the result.
+/// Determine an input position which will be dominated by the operands and
+/// which will dominate the result.
BasicBlock::iterator
LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
const LSRFixup &LF,
@@ -4417,7 +4414,7 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
}
}
- assert(!isa<PHINode>(LowestIP) && !isa<LandingPadInst>(LowestIP)
+ assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()
&& !isa<DbgInfoIntrinsic>(LowestIP) &&
"Insertion point must be a normal instruction");
@@ -4429,7 +4426,7 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
while (isa<PHINode>(IP)) ++IP;
// Ignore landingpad instructions.
- while (isa<LandingPadInst>(IP)) ++IP;
+ while (!isa<TerminatorInst>(IP) && IP->isEHPad()) ++IP;
// Ignore debug intrinsics.
while (isa<DbgInfoIntrinsic>(IP)) ++IP;
@@ -4437,13 +4434,14 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
// Set IP below instructions recently inserted by SCEVExpander. This keeps the
// IP consistent across expansions and allows the previously inserted
// instructions to be reused by subsequent expansion.
- while (Rewriter.isInsertedInstruction(IP) && IP != LowestIP) ++IP;
+ while (Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
+ ++IP;
return IP;
}
-/// Expand - Emit instructions for the leading candidate expression for this
-/// LSRUse (this is called "expanding").
+/// Emit instructions for the leading candidate expression for this LSRUse (this
+/// is called "expanding").
Value *LSRInstance::Expand(const LSRFixup &LF,
const Formula &F,
BasicBlock::iterator IP,
@@ -4487,7 +4485,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
LF.UserInst, LF.OperandValToReplace,
Loops, SE, DT);
- Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, IP)));
+ Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, &*IP)));
}
// Expand the ScaledReg portion.
@@ -4505,14 +4503,14 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Expand ScaleReg as if it was part of the base regs.
if (F.Scale == 1)
Ops.push_back(
- SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP)));
+ SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP)));
else {
// An interesting way of "folding" with an icmp is to use a negated
// scale, which we'll implement by inserting it into the other operand
// of the icmp.
assert(F.Scale == -1 &&
"The only scale supported by ICmpZero uses is -1!");
- ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, IP);
+ ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, &*IP);
}
} else {
// Otherwise just expand the scaled register and an explicit scale,
@@ -4522,11 +4520,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Unless the addressing mode will not be folded.
if (!Ops.empty() && LU.Kind == LSRUse::Address &&
isAMCompletelyFolded(TTI, LU, F)) {
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
- ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP));
+ ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP));
if (F.Scale != 1)
ScaledS =
SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
@@ -4538,7 +4536,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
if (F.BaseGV) {
// Flush the operand list to suppress SCEVExpander hoisting.
if (!Ops.empty()) {
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
@@ -4548,7 +4546,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Flush the operand list to suppress SCEVExpander hoisting of both folded and
// unfolded offsets. LSR assumes they both live next to their uses.
if (!Ops.empty()) {
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
@@ -4584,7 +4582,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
const SCEV *FullS = Ops.empty() ?
SE.getConstant(IntTy, 0) :
SE.getAddExpr(Ops);
- Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
+ Value *FullV = Rewriter.expandCodeFor(FullS, Ty, &*IP);
// We're done expanding now, so reset the rewriter.
Rewriter.clearPostInc();
@@ -4626,15 +4624,14 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
return FullV;
}
-/// RewriteForPHI - Helper for Rewrite. PHI nodes are special because the use
-/// of their operands effectively happens in their predecessor blocks, so the
-/// expression may need to be expanded in multiple places.
+/// Helper for Rewrite. PHI nodes are special because the use of their operands
+/// effectively happens in their predecessor blocks, so the expression may need
+/// to be expanded in multiple places.
void LSRInstance::RewriteForPHI(PHINode *PN,
const LSRFixup &LF,
const Formula &F,
SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts,
- Pass *P) const {
+ SmallVectorImpl<WeakVH> &DeadInsts) const {
DenseMap<BasicBlock *, Value *> Inserted;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
@@ -4658,8 +4655,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
.setDontDeleteUselessPHIs());
} else {
SmallVector<BasicBlock*, 2> NewBBs;
- SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs,
- /*AliasAnalysis*/ nullptr, &DT, &LI);
+ SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DT, &LI);
NewBB = NewBBs[0];
}
// If NewBB==NULL, then SplitCriticalEdge refused to split because all
@@ -4685,7 +4681,8 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
if (!Pair.second)
PN->setIncomingValue(i, Pair.first->second);
else {
- Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts);
+ Value *FullV = Expand(LF, F, BB->getTerminator()->getIterator(),
+ Rewriter, DeadInsts);
// If this is reuse-by-noop-cast, insert the noop cast.
Type *OpTy = LF.OperandValToReplace->getType();
@@ -4702,20 +4699,20 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
}
}
-/// Rewrite - Emit instructions for the leading candidate expression for this
-/// LSRUse (this is called "expanding"), and update the UserInst to reference
-/// the newly expanded value.
+/// Emit instructions for the leading candidate expression for this LSRUse (this
+/// is called "expanding"), and update the UserInst to reference the newly
+/// expanded value.
void LSRInstance::Rewrite(const LSRFixup &LF,
const Formula &F,
SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts,
- Pass *P) const {
+ SmallVectorImpl<WeakVH> &DeadInsts) const {
// First, find an insertion point that dominates UserInst. For PHI nodes,
// find the nearest block which dominates all the relevant uses.
if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
- RewriteForPHI(PN, LF, F, Rewriter, DeadInsts, P);
+ RewriteForPHI(PN, LF, F, Rewriter, DeadInsts);
} else {
- Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts);
+ Value *FullV =
+ Expand(LF, F, LF.UserInst->getIterator(), Rewriter, DeadInsts);
// If this is reuse-by-noop-cast, insert the noop cast.
Type *OpTy = LF.OperandValToReplace->getType();
@@ -4740,11 +4737,10 @@ void LSRInstance::Rewrite(const LSRFixup &LF,
DeadInsts.emplace_back(LF.OperandValToReplace);
}
-/// ImplementSolution - Rewrite all the fixup locations with new values,
-/// following the chosen solution.
-void
-LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
- Pass *P) {
+/// Rewrite all the fixup locations with new values, following the chosen
+/// solution.
+void LSRInstance::ImplementSolution(
+ const SmallVectorImpl<const Formula *> &Solution) {
// Keep track of instructions we may have made dead, so that
// we can remove them after we are done working.
SmallVector<WeakVH, 16> DeadInsts;
@@ -4766,7 +4762,7 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
// Expand the new value definitions and update the users.
for (const LSRFixup &Fixup : Fixups) {
- Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts, P);
+ Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts);
Changed = true;
}
@@ -4782,13 +4778,11 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
}
-LSRInstance::LSRInstance(Loop *L, Pass *P)
- : IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()),
- DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()),
- LI(P->getAnalysis<LoopInfoWrapperPass>().getLoopInfo()),
- TTI(P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
- *L->getHeader()->getParent())),
- L(L), Changed(false), IVIncInsertPos(nullptr) {
+LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
+ DominatorTree &DT, LoopInfo &LI,
+ const TargetTransformInfo &TTI)
+ : IU(IU), SE(SE), DT(DT), LI(LI), TTI(TTI), L(L), Changed(false),
+ IVIncInsertPos(nullptr) {
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm())
return;
@@ -4879,7 +4873,7 @@ LSRInstance::LSRInstance(Loop *L, Pass *P)
#endif
// Now that we've decided what we want, make it so.
- ImplementSolution(Solution, P);
+ ImplementSolution(Solution);
}
void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
@@ -4931,11 +4925,10 @@ void LSRInstance::print(raw_ostream &OS) const {
print_uses(OS);
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void LSRInstance::dump() const {
print(errs()); errs() << '\n';
}
-#endif
namespace {
@@ -4956,7 +4949,7 @@ INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(IVUsers)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
@@ -4982,8 +4975,8 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolution>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
// Requiring LoopSimplify a second time here prevents IVUsers from running
// twice, since LoopSimplify was invalidated by running ScalarEvolution.
AU.addRequiredID(LoopSimplifyID);
@@ -4996,17 +4989,24 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
if (skipOptnoneFunction(L))
return false;
+ auto &IU = getAnalysis<IVUsers>();
+ auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+ *L->getHeader()->getParent());
bool Changed = false;
// Run the main LSR transformation.
- Changed |= LSRInstance(L, this).getChanged();
+ Changed |= LSRInstance(L, IU, SE, DT, LI, TTI).getChanged();
// Remove any extra phis created by processing inner loops.
Changed |= DeleteDeadPHIs(L->getHeader());
if (EnablePhiElim && L->isLoopSimplifyForm()) {
SmallVector<WeakVH, 16> DeadInsts;
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
- SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), DL, "lsr");
+ SCEVExpander Rewriter(getAnalysis<ScalarEvolutionWrapperPass>().getSE(), DL,
+ "lsr");
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index d78db6c369b3..56ae5c010411 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -14,6 +14,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -130,27 +131,29 @@ namespace {
bool UserAllowPartial;
bool UserRuntime;
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+ bool runOnLoop(Loop *L, LPPassManager &) override;
/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG...
///
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequiredID(LoopSimplifyID);
AU.addPreservedID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
- AU.addRequired<ScalarEvolution>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
// FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
// If loop unroll does not preserve dom info then LCSSA pass on next
// loop will receive invalid dom info.
// For now, recreate dom info, if loop is unrolled.
AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
// Fill in the UnrollingPreferences parameter with values from the
@@ -186,7 +189,7 @@ namespace {
// total unrolled size. Parameters Threshold and PartialThreshold
// are set to the maximum unrolled size for fully and partially
// unrolled loops respectively.
- void selectThresholds(const Loop *L, bool HasPragma,
+ void selectThresholds(const Loop *L, bool UsePragmaThreshold,
const TargetTransformInfo::UnrollingPreferences &UP,
unsigned &Threshold, unsigned &PartialThreshold,
unsigned &PercentDynamicCostSavedThreshold,
@@ -207,12 +210,13 @@ namespace {
: UP.DynamicCostSavingsDiscount;
if (!UserThreshold &&
+ // FIXME: Use Function::optForSize().
L->getHeader()->getParent()->hasFnAttribute(
Attribute::OptimizeForSize)) {
Threshold = UP.OptSizeThreshold;
PartialThreshold = UP.PartialOptSizeThreshold;
}
- if (HasPragma) {
+ if (UsePragmaThreshold) {
// If the loop has an unrolling pragma, we want to be more
// aggressive with unrolling limits. Set thresholds to at
// least the PragmaTheshold value which is larger than the
@@ -235,10 +239,11 @@ char LoopUnroll::ID = 0;
INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
@@ -278,8 +283,8 @@ class UnrolledInstAnalyzer : private InstVisitor<UnrolledInstAnalyzer, bool> {
public:
UnrolledInstAnalyzer(unsigned Iteration,
DenseMap<Value *, Constant *> &SimplifiedValues,
- const Loop *L, ScalarEvolution &SE)
- : Iteration(Iteration), SimplifiedValues(SimplifiedValues), L(L), SE(SE) {
+ ScalarEvolution &SE)
+ : SimplifiedValues(SimplifiedValues), SE(SE) {
IterationNumber = SE.getConstant(APInt(64, Iteration));
}
@@ -295,13 +300,6 @@ private:
/// results saved.
DenseMap<Value *, SimplifiedAddress> SimplifiedAddresses;
- /// \brief Number of currently simulated iteration.
- ///
- /// If an expression is ConstAddress+Constant, then the Constant is
- /// Start + Iteration*Step, where Start and Step could be obtained from
- /// SCEVGEPCache.
- unsigned Iteration;
-
/// \brief SCEV expression corresponding to number of currently simulated
/// iteration.
const SCEV *IterationNumber;
@@ -316,7 +314,6 @@ private:
/// post-unrolling.
DenseMap<Value *, Constant *> &SimplifiedValues;
- const Loop *L;
ScalarEvolution &SE;
/// \brief Try to simplify instruction \param I using its SCEV expression.
@@ -368,11 +365,9 @@ private:
return simplifyInstWithSCEV(&I);
}
- /// TODO: Add visitors for other instruction types, e.g. ZExt, SExt.
-
/// Try to simplify binary operator I.
///
- /// TODO: Probaly it's worth to hoist the code for estimating the
+ /// TODO: Probably it's worth to hoist the code for estimating the
/// simplifications effects to a separate class, since we have a very similar
/// code in InlineCost already.
bool visitBinaryOperator(BinaryOperator &I) {
@@ -412,7 +407,7 @@ private:
auto *GV = dyn_cast<GlobalVariable>(AddressIt->second.Base);
// We're only interested in loads that can be completely folded to a
// constant.
- if (!GV || !GV->hasInitializer())
+ if (!GV || !GV->hasDefinitiveInitializer() || !GV->isConstant())
return false;
ConstantDataSequential *CDS =
@@ -420,6 +415,12 @@ private:
if (!CDS)
return false;
+ // We might have a vector load from an array. FIXME: for now we just bail
+ // out in this case, but we should be able to resolve and simplify such
+ // loads.
+ if(!CDS->isElementTypeCompatible(I.getType()))
+ return false;
+
int ElemSize = CDS->getElementType()->getPrimitiveSizeInBits() / 8U;
assert(SimplifiedAddrOp->getValue().getActiveBits() < 64 &&
"Unexpectedly large index value.");
@@ -436,6 +437,59 @@ private:
return true;
}
+
+ bool visitCastInst(CastInst &I) {
+ // Propagate constants through casts.
+ Constant *COp = dyn_cast<Constant>(I.getOperand(0));
+ if (!COp)
+ COp = SimplifiedValues.lookup(I.getOperand(0));
+ if (COp)
+ if (Constant *C =
+ ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
+ SimplifiedValues[&I] = C;
+ return true;
+ }
+
+ return Base::visitCastInst(I);
+ }
+
+ bool visitCmpInst(CmpInst &I) {
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+
+ // First try to handle simplified comparisons.
+ if (!isa<Constant>(LHS))
+ if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
+ LHS = SimpleLHS;
+ if (!isa<Constant>(RHS))
+ if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
+ RHS = SimpleRHS;
+
+ if (!isa<Constant>(LHS) && !isa<Constant>(RHS)) {
+ auto SimplifiedLHS = SimplifiedAddresses.find(LHS);
+ if (SimplifiedLHS != SimplifiedAddresses.end()) {
+ auto SimplifiedRHS = SimplifiedAddresses.find(RHS);
+ if (SimplifiedRHS != SimplifiedAddresses.end()) {
+ SimplifiedAddress &LHSAddr = SimplifiedLHS->second;
+ SimplifiedAddress &RHSAddr = SimplifiedRHS->second;
+ if (LHSAddr.Base == RHSAddr.Base) {
+ LHS = LHSAddr.Offset;
+ RHS = RHSAddr.Offset;
+ }
+ }
+ }
+ }
+
+ if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
+ if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
+ if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) {
+ SimplifiedValues[&I] = C;
+ return true;
+ }
+ }
+ }
+
+ return Base::visitCmpInst(I);
+ }
};
} // namespace
@@ -443,11 +497,11 @@ private:
namespace {
struct EstimatedUnrollCost {
/// \brief The estimated cost after unrolling.
- unsigned UnrolledCost;
+ int UnrolledCost;
/// \brief The estimated dynamic cost of executing the instructions in the
/// rolled form.
- unsigned RolledDynamicCost;
+ int RolledDynamicCost;
};
}
@@ -464,10 +518,10 @@ struct EstimatedUnrollCost {
/// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
/// the analysis failed (no benefits expected from the unrolling, or the loop is
/// too big to analyze), the returned value is None.
-Optional<EstimatedUnrollCost>
-analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
- const TargetTransformInfo &TTI,
- unsigned MaxUnrolledLoopSize) {
+static Optional<EstimatedUnrollCost>
+analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
+ ScalarEvolution &SE, const TargetTransformInfo &TTI,
+ int MaxUnrolledLoopSize) {
// We want to be able to scale offsets by the trip count and add more offsets
// to them without checking for overflows, and we already don't want to
// analyze *massive* trip counts, so we force the max to be reasonably small.
@@ -481,24 +535,61 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
SmallSetVector<BasicBlock *, 16> BBWorklist;
DenseMap<Value *, Constant *> SimplifiedValues;
+ SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues;
// The estimated cost of the unrolled form of the loop. We try to estimate
// this by simplifying as much as we can while computing the estimate.
- unsigned UnrolledCost = 0;
+ int UnrolledCost = 0;
// We also track the estimated dynamic (that is, actually executed) cost in
// the rolled form. This helps identify cases when the savings from unrolling
// aren't just exposing dead control flows, but actual reduced dynamic
// instructions due to the simplifications which we expect to occur after
// unrolling.
- unsigned RolledDynamicCost = 0;
+ int RolledDynamicCost = 0;
+
+ // Ensure that we don't violate the loop structure invariants relied on by
+ // this analysis.
+ assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
+ assert(L->isLCSSAForm(DT) &&
+ "Must have loops in LCSSA form to track live-out values.");
+
+ DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
// Simulate execution of each iteration of the loop counting instructions,
// which would be simplified.
// Since the same load will take different values on different iterations,
// we literally have to go through all loop's iterations.
for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
+ DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
+
+ // Prepare for the iteration by collecting any simplified entry or backedge
+ // inputs.
+ for (Instruction &I : *L->getHeader()) {
+ auto *PHI = dyn_cast<PHINode>(&I);
+ if (!PHI)
+ break;
+
+ // The loop header PHI nodes must have exactly two input: one from the
+ // loop preheader and one from the loop latch.
+ assert(
+ PHI->getNumIncomingValues() == 2 &&
+ "Must have an incoming value only for the preheader and the latch.");
+
+ Value *V = PHI->getIncomingValueForBlock(
+ Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
+ Constant *C = dyn_cast<Constant>(V);
+ if (Iteration != 0 && !C)
+ C = SimplifiedValues.lookup(V);
+ if (C)
+ SimplifiedInputValues.push_back({PHI, C});
+ }
+
+ // Now clear and re-populate the map for the next iteration.
SimplifiedValues.clear();
- UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, L, SE);
+ while (!SimplifiedInputValues.empty())
+ SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
+
+ UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE);
BBWorklist.clear();
BBWorklist.insert(L->getHeader());
@@ -510,21 +601,67 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
// it. We don't change the actual IR, just count optimization
// opportunities.
for (Instruction &I : *BB) {
- unsigned InstCost = TTI.getUserCost(&I);
+ int InstCost = TTI.getUserCost(&I);
// Visit the instruction to analyze its loop cost after unrolling,
// and if the visitor returns false, include this instruction in the
// unrolled cost.
if (!Analyzer.visit(I))
UnrolledCost += InstCost;
+ else {
+ DEBUG(dbgs() << " " << I
+ << " would be simplified if loop is unrolled.\n");
+ (void)0;
+ }
// Also track this instructions expected cost when executing the rolled
// loop form.
RolledDynamicCost += InstCost;
// If unrolled body turns out to be too big, bail out.
- if (UnrolledCost > MaxUnrolledLoopSize)
+ if (UnrolledCost > MaxUnrolledLoopSize) {
+ DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
+ << " UnrolledCost: " << UnrolledCost
+ << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
+ << "\n");
return None;
+ }
+ }
+
+ TerminatorInst *TI = BB->getTerminator();
+
+ // Add in the live successors by first checking whether we have terminator
+ // that may be simplified based on the values simplified by this call.
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isConditional()) {
+ if (Constant *SimpleCond =
+ SimplifiedValues.lookup(BI->getCondition())) {
+ BasicBlock *Succ = nullptr;
+ // Just take the first successor if condition is undef
+ if (isa<UndefValue>(SimpleCond))
+ Succ = BI->getSuccessor(0);
+ else
+ Succ = BI->getSuccessor(
+ cast<ConstantInt>(SimpleCond)->isZero() ? 1 : 0);
+ if (L->contains(Succ))
+ BBWorklist.insert(Succ);
+ continue;
+ }
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ if (Constant *SimpleCond =
+ SimplifiedValues.lookup(SI->getCondition())) {
+ BasicBlock *Succ = nullptr;
+ // Just take the first successor if condition is undef
+ if (isa<UndefValue>(SimpleCond))
+ Succ = SI->getSuccessor(0);
+ else
+ Succ = SI->findCaseValue(cast<ConstantInt>(SimpleCond))
+ .getCaseSuccessor();
+ if (L->contains(Succ))
+ BBWorklist.insert(Succ);
+ continue;
+ }
}
// Add BB's successors to the worklist.
@@ -535,9 +672,15 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
// If we found no optimization opportunities on the first iteration, we
// won't find them on later ones too.
- if (UnrolledCost == RolledDynamicCost)
+ if (UnrolledCost == RolledDynamicCost) {
+ DEBUG(dbgs() << " No opportunities found.. exiting.\n"
+ << " UnrolledCost: " << UnrolledCost << "\n");
return None;
+ }
}
+ DEBUG(dbgs() << "Analysis finished:\n"
+ << "UnrolledCost: " << UnrolledCost << ", "
+ << "RolledDynamicCost: " << RolledDynamicCost << "\n");
return {{UnrolledCost, RolledDynamicCost}};
}
@@ -583,6 +726,12 @@ static bool HasUnrollFullPragma(const Loop *L) {
return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
}
+// Returns true if the loop has an unroll(enable) pragma. This metadata is used
+// for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
+static bool HasUnrollEnablePragma(const Loop *L) {
+ return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
+}
+
// Returns true if the loop has an unroll(disable) pragma.
static bool HasUnrollDisablePragma(const Loop *L) {
return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable");
@@ -708,7 +857,7 @@ unsigned LoopUnroll::selectUnrollCount(
unsigned Count = UserCount ? CurrentCount : 0;
// If there is no user-specified count, unroll pragmas have the next
- // highest precendence.
+ // highest precedence.
if (Count == 0) {
if (PragmaCount) {
Count = PragmaCount;
@@ -737,17 +886,19 @@ unsigned LoopUnroll::selectUnrollCount(
return Count;
}
-bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
+bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
if (skipOptnoneFunction(L))
return false;
Function &F = *L->getHeader()->getParent();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
+ ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
const TargetTransformInfo &TTI =
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
BasicBlock *Header = L->getHeader();
DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
@@ -757,8 +908,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
return false;
}
bool PragmaFullUnroll = HasUnrollFullPragma(L);
+ bool PragmaEnableUnroll = HasUnrollEnablePragma(L);
unsigned PragmaCount = UnrollCountPragmaValue(L);
- bool HasPragma = PragmaFullUnroll || PragmaCount > 0;
+ bool HasPragma = PragmaFullUnroll || PragmaEnableUnroll || PragmaCount > 0;
TargetTransformInfo::UnrollingPreferences UP;
getUnrollingPreferences(L, TTI, UP);
@@ -806,7 +958,15 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
unsigned Threshold, PartialThreshold;
unsigned PercentDynamicCostSavedThreshold;
unsigned DynamicCostSavingsDiscount;
- selectThresholds(L, HasPragma, UP, Threshold, PartialThreshold,
+ // Only use the high pragma threshold when we have a target unroll factor such
+ // as with "#pragma unroll N" or a pragma indicating full unrolling and the
+ // trip count is known. Otherwise we rely on the standard threshold to
+ // heuristically select a reasonable unroll count.
+ bool UsePragmaThreshold =
+ PragmaCount > 0 ||
+ ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0);
+
+ selectThresholds(L, UsePragmaThreshold, UP, Threshold, PartialThreshold,
PercentDynamicCostSavedThreshold,
DynamicCostSavingsDiscount);
@@ -824,8 +984,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// The loop isn't that small, but we still can fully unroll it if that
// helps to remove a significant number of instructions.
// To check that, run additional analysis on the loop.
- if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
- L, TripCount, *SE, TTI, Threshold + DynamicCostSavingsDiscount))
+ if (Optional<EstimatedUnrollCost> Cost =
+ analyzeLoopUnrollCost(L, TripCount, DT, *SE, TTI,
+ Threshold + DynamicCostSavingsDiscount))
if (canUnrollCompletely(L, Threshold, PercentDynamicCostSavedThreshold,
DynamicCostSavingsDiscount, Cost->UnrolledCost,
Cost->RolledDynamicCost)) {
@@ -840,14 +1001,15 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// Reduce count based on the type of unrolling and the threshold values.
unsigned OriginalCount = Count;
- bool AllowRuntime =
- (PragmaCount > 0) || (UserRuntime ? CurrentRuntime : UP.Runtime);
+ bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) ||
+ (UserRuntime ? CurrentRuntime : UP.Runtime);
// Don't unroll a runtime trip count loop with unroll full pragma.
if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) {
AllowRuntime = false;
}
if (Unrolling == Partial) {
- bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
+ bool AllowPartial = PragmaEnableUnroll ||
+ (UserAllowPartial ? CurrentAllowPartial : UP.Partial);
if (!AllowPartial && !CountSetExplicitly) {
DEBUG(dbgs() << " will not try to unroll partially because "
<< "-unroll-allow-partial not given\n");
@@ -887,23 +1049,27 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
DebugLoc LoopLoc = L->getStartLoc();
Function *F = Header->getParent();
LLVMContext &Ctx = F->getContext();
- if (PragmaFullUnroll && PragmaCount == 0) {
- if (TripCount && Count != TripCount) {
- emitOptimizationRemarkMissed(
- Ctx, DEBUG_TYPE, *F, LoopLoc,
- "Unable to fully unroll loop as directed by unroll(full) pragma "
- "because unrolled size is too large.");
- } else if (!TripCount) {
- emitOptimizationRemarkMissed(
- Ctx, DEBUG_TYPE, *F, LoopLoc,
- "Unable to fully unroll loop as directed by unroll(full) pragma "
- "because loop has a runtime trip count.");
- }
- } else if (PragmaCount > 0 && Count != OriginalCount) {
+ if ((PragmaCount > 0) && Count != OriginalCount) {
emitOptimizationRemarkMissed(
Ctx, DEBUG_TYPE, *F, LoopLoc,
"Unable to unroll loop the number of times directed by "
"unroll_count pragma because unrolled size is too large.");
+ } else if (PragmaFullUnroll && !TripCount) {
+ emitOptimizationRemarkMissed(
+ Ctx, DEBUG_TYPE, *F, LoopLoc,
+ "Unable to fully unroll loop as directed by unroll(full) pragma "
+ "because loop has a runtime trip count.");
+ } else if (PragmaEnableUnroll && Count != TripCount && Count < 2) {
+ emitOptimizationRemarkMissed(
+ Ctx, DEBUG_TYPE, *F, LoopLoc,
+ "Unable to unroll loop as directed by unroll(enable) pragma because "
+ "unrolled size is too large.");
+ } else if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
+ Count != TripCount) {
+ emitOptimizationRemarkMissed(
+ Ctx, DEBUG_TYPE, *F, LoopLoc,
+ "Unable to fully unroll loop as directed by unroll pragma because "
+ "unrolled size is too large.");
}
}
@@ -915,7 +1081,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// Unroll the loop.
if (!UnrollLoop(L, Count, TripCount, AllowRuntime, UP.AllowExpensiveTripCount,
- TripMultiple, LI, this, &LPM, &AC))
+ TripMultiple, LI, SE, &DT, &AC, PreserveLCSSA))
return false;
return true;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index cbc563bd8998..95d7f8a3beda 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -30,6 +30,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -37,6 +38,10 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Support/BranchProbability.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
@@ -70,6 +75,19 @@ static cl::opt<unsigned>
Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
cl::init(100), cl::Hidden);
+static cl::opt<bool>
+LoopUnswitchWithBlockFrequency("loop-unswitch-with-block-frequency",
+ cl::init(false), cl::Hidden,
+ cl::desc("Enable the use of the block frequency analysis to access PGO "
+ "heuristics to minimize code growth in cold regions."));
+
+static cl::opt<unsigned>
+ColdnessThreshold("loop-unswitch-coldness-threshold", cl::init(1), cl::Hidden,
+ cl::desc("Coldness threshold in percentage. The loop header frequency "
+ "(relative to the entry frequency) is compared with this "
+ "threshold to determine if non-trivial unswitching should be "
+ "enabled."));
+
namespace {
class LUAnalysisCache {
@@ -148,12 +166,19 @@ namespace {
LPPassManager *LPM;
AssumptionCache *AC;
- // LoopProcessWorklist - Used to check if second loop needs processing
- // after RewriteLoopBodyWithConditionConstant rewrites first loop.
+ // Used to check if second loop needs processing after
+ // RewriteLoopBodyWithConditionConstant rewrites first loop.
std::vector<Loop*> LoopProcessWorklist;
LUAnalysisCache BranchesInfo;
+ bool EnabledPGO;
+
+ // BFI and ColdEntryFreq are only used when PGO and
+ // LoopUnswitchWithBlockFrequency are enabled.
+ BlockFrequencyInfo BFI;
+ BlockFrequency ColdEntryFreq;
+
bool OptimizeForSize;
bool redoLoop;
@@ -192,9 +217,11 @@ namespace {
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
private:
@@ -210,7 +237,10 @@ namespace {
/// Split all of the edges from inside the loop to their exit blocks.
/// Update the appropriate Phi nodes as we do so.
- void SplitExitEdges(Loop *L, const SmallVectorImpl<BasicBlock *> &ExitBlocks);
+ void SplitExitEdges(Loop *L,
+ const SmallVectorImpl<BasicBlock *> &ExitBlocks);
+
+ bool TryTrivialLoopUnswitch(bool &Changed);
bool UnswitchIfProfitable(Value *LoopCond, Constant *Val,
TerminatorInst *TI = nullptr);
@@ -229,9 +259,6 @@ namespace {
TerminatorInst *TI);
void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
- bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = nullptr,
- BasicBlock **LoopExit = nullptr);
-
};
}
@@ -367,9 +394,8 @@ Pass *llvm::createLoopUnswitchPass(bool Os) {
return new LoopUnswitch(Os);
}
-/// FindLIVLoopCondition - Cond is a condition that occurs in L. If it is
-/// invariant in the loop, or has an invariant piece, return the invariant.
-/// Otherwise, return null.
+/// Cond is a condition that occurs in L. If it is invariant in the loop, or has
+/// an invariant piece, return the invariant. Otherwise, return null.
static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
// We started analyze new instruction, increment scanned instructions counter.
@@ -411,11 +437,23 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
*L->getHeader()->getParent());
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
LPM = &LPM_Ref;
- DominatorTreeWrapperPass *DTWP =
- getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
currentLoop = L;
Function *F = currentLoop->getHeader()->getParent();
+
+ EnabledPGO = F->getEntryCount().hasValue();
+
+ if (LoopUnswitchWithBlockFrequency && EnabledPGO) {
+ BranchProbabilityInfo BPI(*F, *LI);
+ BFI.calculate(*L->getHeader()->getParent(), BPI, *LI);
+
+ // Use BranchProbability to compute a minimum frequency based on
+ // function entry baseline frequency. Loops with headers below this
+ // frequency are considered as cold.
+ const BranchProbability ColdProb(ColdnessThreshold, 100);
+ ColdEntryFreq = BlockFrequency(BFI.getEntryFreq()) * ColdProb;
+ }
+
bool Changed = false;
do {
assert(currentLoop->isLCSSAForm(*DT));
@@ -423,16 +461,13 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
Changed |= processCurrentLoop();
} while(redoLoop);
- if (Changed) {
- // FIXME: Reconstruct dom info, because it is not preserved properly.
- if (DT)
- DT->recalculate(*F);
- }
+ // FIXME: Reconstruct dom info, because it is not preserved properly.
+ if (Changed)
+ DT->recalculate(*F);
return Changed;
}
-/// processCurrentLoop - Do actual work and unswitch loop if possible
-/// and profitable.
+/// Do actual work and unswitch loop if possible and profitable.
bool LoopUnswitch::processCurrentLoop() {
bool Changed = false;
@@ -452,14 +487,48 @@ bool LoopUnswitch::processCurrentLoop() {
LLVMContext &Context = loopHeader->getContext();
- // Probably we reach the quota of branches for this loop. If so
- // stop unswitching.
+ // Analyze loop cost, and stop unswitching if loop content can not be duplicated.
if (!BranchesInfo.countLoop(
currentLoop, getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
*currentLoop->getHeader()->getParent()),
AC))
return false;
+ // Try trivial unswitch first before loop over other basic blocks in the loop.
+ if (TryTrivialLoopUnswitch(Changed)) {
+ return true;
+ }
+
+ // Do not unswitch loops containing convergent operations, as we might be
+ // making them control dependent on the unswitch value when they were not
+ // before.
+ // FIXME: This could be refined to only bail if the convergent operation is
+ // not already control-dependent on the unswitch value.
+ for (const auto BB : currentLoop->blocks()) {
+ for (auto &I : *BB) {
+ auto CS = CallSite(&I);
+ if (!CS) continue;
+ if (CS.hasFnAttr(Attribute::Convergent))
+ return false;
+ }
+ }
+
+ // Do not do non-trivial unswitch while optimizing for size.
+ // FIXME: Use Function::optForSize().
+ if (OptimizeForSize ||
+ loopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize))
+ return false;
+
+ if (LoopUnswitchWithBlockFrequency && EnabledPGO) {
+ // Compute the weighted frequency of the hottest block in the
+ // loop (loopHeader in this case since inner loops should be
+ // processed before outer loop). If it is less than ColdFrequency,
+ // we should not unswitch.
+ BlockFrequency LoopEntryFreq = BFI.getBlockFreq(loopHeader);
+ if (LoopEntryFreq < ColdEntryFreq)
+ return false;
+ }
+
// Loop over all of the basic blocks in the loop. If we find an interior
// block that is branching on a loop-invariant condition, we can unswitch this
// loop.
@@ -528,8 +597,8 @@ bool LoopUnswitch::processCurrentLoop() {
return Changed;
}
-/// isTrivialLoopExitBlock - Check to see if all paths from BB exit the
-/// loop with no side effects (including infinite loops).
+/// Check to see if all paths from BB exit the loop with no side effects
+/// (including infinite loops).
///
/// If true, we return true and set ExitBB to the block we
/// exit through.
@@ -566,9 +635,9 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
return true;
}
-/// isTrivialLoopExitBlock - Return true if the specified block unconditionally
-/// leads to an exit from the specified loop, and has no side-effects in the
-/// process. If so, return the block that is exited to, otherwise return null.
+/// Return true if the specified block unconditionally leads to an exit from
+/// the specified loop, and has no side-effects in the process. If so, return
+/// the block that is exited to, otherwise return null.
static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
std::set<BasicBlock*> Visited;
Visited.insert(L->getHeader()); // Branches to header make infinite loops.
@@ -578,105 +647,11 @@ static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
return nullptr;
}
-/// IsTrivialUnswitchCondition - Check to see if this unswitch condition is
-/// trivial: that is, that the condition controls whether or not the loop does
-/// anything at all. If this is a trivial condition, unswitching produces no
-/// code duplications (equivalently, it produces a simpler loop and a new empty
-/// loop, which gets deleted).
-///
-/// If this is a trivial condition, return true, otherwise return false. When
-/// returning true, this sets Cond and Val to the condition that controls the
-/// trivial condition: when Cond dynamically equals Val, the loop is known to
-/// exit. Finally, this sets LoopExit to the BB that the loop exits to when
-/// Cond == Val.
-///
-bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
- BasicBlock **LoopExit) {
- BasicBlock *Header = currentLoop->getHeader();
- TerminatorInst *HeaderTerm = Header->getTerminator();
- LLVMContext &Context = Header->getContext();
-
- BasicBlock *LoopExitBB = nullptr;
- if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) {
- // If the header block doesn't end with a conditional branch on Cond, we
- // can't handle it.
- if (!BI->isConditional() || BI->getCondition() != Cond)
- return false;
-
- // Check to see if a successor of the branch is guaranteed to
- // exit through a unique exit block without having any
- // side-effects. If so, determine the value of Cond that causes it to do
- // this.
- if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
- BI->getSuccessor(0)))) {
- if (Val) *Val = ConstantInt::getTrue(Context);
- } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
- BI->getSuccessor(1)))) {
- if (Val) *Val = ConstantInt::getFalse(Context);
- }
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(HeaderTerm)) {
- // If this isn't a switch on Cond, we can't handle it.
- if (SI->getCondition() != Cond) return false;
-
- // Check to see if a successor of the switch is guaranteed to go to the
- // latch block or exit through a one exit block without having any
- // side-effects. If so, determine the value of Cond that causes it to do
- // this.
- // Note that we can't trivially unswitch on the default case or
- // on already unswitched cases.
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
- i != e; ++i) {
- BasicBlock *LoopExitCandidate;
- if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop,
- i.getCaseSuccessor()))) {
- // Okay, we found a trivial case, remember the value that is trivial.
- ConstantInt *CaseVal = i.getCaseValue();
-
- // Check that it was not unswitched before, since already unswitched
- // trivial vals are looks trivial too.
- if (BranchesInfo.isUnswitched(SI, CaseVal))
- continue;
- LoopExitBB = LoopExitCandidate;
- if (Val) *Val = CaseVal;
- break;
- }
- }
- }
-
- // If we didn't find a single unique LoopExit block, or if the loop exit block
- // contains phi nodes, this isn't trivial.
- if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
- return false; // Can't handle this.
-
- if (LoopExit) *LoopExit = LoopExitBB;
-
- // We already know that nothing uses any scalar values defined inside of this
- // loop. As such, we just have to check to see if this loop will execute any
- // side-effecting instructions (e.g. stores, calls, volatile loads) in the
- // part of the loop that the code *would* execute. We already checked the
- // tail, check the header now.
- for (BasicBlock::iterator I = Header->begin(), E = Header->end(); I != E; ++I)
- if (I->mayHaveSideEffects())
- return false;
- return true;
-}
-
-/// UnswitchIfProfitable - We have found that we can unswitch currentLoop when
-/// LoopCond == Val to simplify the loop. If we decide that this is profitable,
+/// We have found that we can unswitch currentLoop when LoopCond == Val to
+/// simplify the loop. If we decide that this is profitable,
/// unswitch the loop, reprocess the pieces, then return true.
bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,
TerminatorInst *TI) {
- Function *F = loopHeader->getParent();
- Constant *CondVal = nullptr;
- BasicBlock *ExitBlock = nullptr;
-
- if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) {
- // If the condition is trivial, always unswitch. There is no code growth
- // for this case.
- UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock, TI);
- return true;
- }
-
// Check to see if it would be profitable to unswitch current loop.
if (!BranchesInfo.CostAllowsUnswitching()) {
DEBUG(dbgs() << "NOT unswitching loop %"
@@ -687,32 +662,27 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,
return false;
}
- // Do not do non-trivial unswitch while optimizing for size.
- if (OptimizeForSize || F->hasFnAttribute(Attribute::OptimizeForSize))
- return false;
-
UnswitchNontrivialCondition(LoopCond, Val, currentLoop, TI);
return true;
}
-/// CloneLoop - Recursively clone the specified loop and all of its children,
+/// Recursively clone the specified loop and all of its children,
/// mapping the blocks with the specified map.
static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
LoopInfo *LI, LPPassManager *LPM) {
- Loop *New = new Loop();
- LPM->insertLoop(New, PL);
+ Loop &New = LPM->addLoop(PL);
// Add all of the blocks in L to the new loop.
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)
if (LI->getLoopFor(*I) == L)
- New->addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), *LI);
+ New.addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), *LI);
// Add all of the subloops to the new loop.
for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
- CloneLoop(*I, New, VM, LI, LPM);
+ CloneLoop(*I, &New, VM, LI, LPM);
- return New;
+ return &New;
}
static void copyMetadata(Instruction *DstInst, const Instruction *SrcInst,
@@ -744,15 +714,15 @@ static void copyMetadata(Instruction *DstInst, const Instruction *SrcInst,
}
}
// fallthrough.
+ case LLVMContext::MD_make_implicit:
case LLVMContext::MD_dbg:
DstInst->setMetadata(MD.first, MD.second);
}
}
}
-/// EmitPreheaderBranchOnCondition - Emit a conditional branch on two values
-/// if LIC == Val, branch to TrueDst, otherwise branch to FalseDest. Insert the
-/// code immediately before InsertPt.
+/// Emit a conditional branch on two values if LIC == Val, branch to TrueDst,
+/// otherwise branch to FalseDest. Insert the code immediately before InsertPt.
void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
BasicBlock *TrueDest,
BasicBlock *FalseDest,
@@ -782,11 +752,11 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
SplitCriticalEdge(BI, 1, Options);
}
-/// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable
-/// condition in it (a cond branch from its header block to its latch block,
-/// where the path through the loop that doesn't execute its body has no
-/// side-effects), unswitch it. This doesn't involve any code duplication, just
-/// moving the conditional branch outside of the loop and updating loop info.
+/// Given a loop that has a trivial unswitchable condition in it (a cond branch
+/// from its header block to its latch block, where the path through the loop
+/// that doesn't execute its body has no side-effects), unswitch it. This
+/// doesn't involve any code duplication, just moving the conditional branch
+/// outside of the loop and updating loop info.
void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
BasicBlock *ExitBlock,
TerminatorInst *TI) {
@@ -810,7 +780,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
// without actually branching to it (the exit block should be dominated by the
// loop header, not the preheader).
assert(!L->contains(ExitBlock) && "Exit block is in the loop?");
- BasicBlock *NewExit = SplitBlock(ExitBlock, ExitBlock->begin(), DT, LI);
+ BasicBlock *NewExit = SplitBlock(ExitBlock, &ExitBlock->front(), DT, LI);
// Okay, now we have a position to branch from and a position to branch to,
// insert the new conditional branch.
@@ -829,8 +799,155 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
++NumTrivial;
}
-/// SplitExitEdges - Split all of the edges from inside the loop to their exit
-/// blocks. Update the appropriate Phi nodes as we do so.
+/// Check if the first non-constant condition starting from the loop header is
+/// a trivial unswitch condition: that is, a condition controls whether or not
+/// the loop does anything at all. If it is a trivial condition, unswitching
+/// produces no code duplications (equivalently, it produces a simpler loop and
+/// a new empty loop, which gets deleted). Therefore always unswitch trivial
+/// condition.
+bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) {
+ BasicBlock *CurrentBB = currentLoop->getHeader();
+ TerminatorInst *CurrentTerm = CurrentBB->getTerminator();
+ LLVMContext &Context = CurrentBB->getContext();
+
+ // If loop header has only one reachable successor (currently via an
+ // unconditional branch or constant foldable conditional branch, but
+ // should also consider adding constant foldable switch instruction in
+ // future), we should keep looking for trivial condition candidates in
+ // the successor as well. An alternative is to constant fold conditions
+ // and merge successors into loop header (then we only need to check header's
+ // terminator). The reason for not doing this in LoopUnswitch pass is that
+ // it could potentially break LoopPassManager's invariants. Folding dead
+ // branches could either eliminate the current loop or make other loops
+ // unreachable. LCSSA form might also not be preserved after deleting
+ // branches. The following code keeps traversing loop header's successors
+ // until it finds the trivial condition candidate (condition that is not a
+ // constant). Since unswitching generates branches with constant conditions,
+ // this scenario could be very common in practice.
+ SmallSet<BasicBlock*, 8> Visited;
+
+ while (true) {
+ // If we exit loop or reach a previous visited block, then
+ // we can not reach any trivial condition candidates (unfoldable
+ // branch instructions or switch instructions) and no unswitch
+ // can happen. Exit and return false.
+ if (!currentLoop->contains(CurrentBB) || !Visited.insert(CurrentBB).second)
+ return false;
+
+ // Check if this loop will execute any side-effecting instructions (e.g.
+ // stores, calls, volatile loads) in the part of the loop that the code
+ // *would* execute. Check the header first.
+ for (Instruction &I : *CurrentBB)
+ if (I.mayHaveSideEffects())
+ return false;
+
+ // FIXME: add check for constant foldable switch instructions.
+ if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) {
+ if (BI->isUnconditional()) {
+ CurrentBB = BI->getSuccessor(0);
+ } else if (BI->getCondition() == ConstantInt::getTrue(Context)) {
+ CurrentBB = BI->getSuccessor(0);
+ } else if (BI->getCondition() == ConstantInt::getFalse(Context)) {
+ CurrentBB = BI->getSuccessor(1);
+ } else {
+ // Found a trivial condition candidate: non-foldable conditional branch.
+ break;
+ }
+ } else {
+ break;
+ }
+
+ CurrentTerm = CurrentBB->getTerminator();
+ }
+
+ // CondVal is the condition that controls the trivial condition.
+ // LoopExitBB is the BasicBlock that loop exits when meets trivial condition.
+ Constant *CondVal = nullptr;
+ BasicBlock *LoopExitBB = nullptr;
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) {
+ // If this isn't branching on an invariant condition, we can't unswitch it.
+ if (!BI->isConditional())
+ return false;
+
+ Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
+ currentLoop, Changed);
+
+ // Unswitch only if the trivial condition itself is an LIV (not
+ // partial LIV which could occur in and/or)
+ if (!LoopCond || LoopCond != BI->getCondition())
+ return false;
+
+ // Check to see if a successor of the branch is guaranteed to
+ // exit through a unique exit block without having any
+ // side-effects. If so, determine the value of Cond that causes
+ // it to do this.
+ if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ BI->getSuccessor(0)))) {
+ CondVal = ConstantInt::getTrue(Context);
+ } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ BI->getSuccessor(1)))) {
+ CondVal = ConstantInt::getFalse(Context);
+ }
+
+ // If we didn't find a single unique LoopExit block, or if the loop exit
+ // block contains phi nodes, this isn't trivial.
+ if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
+ return false; // Can't handle this.
+
+ UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, LoopExitBB,
+ CurrentTerm);
+ ++NumBranches;
+ return true;
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
+ // If this isn't switching on an invariant condition, we can't unswitch it.
+ Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
+ currentLoop, Changed);
+
+ // Unswitch only if the trivial condition itself is an LIV (not
+ // partial LIV which could occur in and/or)
+ if (!LoopCond || LoopCond != SI->getCondition())
+ return false;
+
+ // Check to see if a successor of the switch is guaranteed to go to the
+ // latch block or exit through a one exit block without having any
+ // side-effects. If so, determine the value of Cond that causes it to do
+ // this.
+ // Note that we can't trivially unswitch on the default case or
+ // on already unswitched cases.
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
+ BasicBlock *LoopExitCandidate;
+ if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop,
+ i.getCaseSuccessor()))) {
+ // Okay, we found a trivial case, remember the value that is trivial.
+ ConstantInt *CaseVal = i.getCaseValue();
+
+ // Check that it was not unswitched before, since already unswitched
+ // trivial vals are looks trivial too.
+ if (BranchesInfo.isUnswitched(SI, CaseVal))
+ continue;
+ LoopExitBB = LoopExitCandidate;
+ CondVal = CaseVal;
+ break;
+ }
+ }
+
+ // If we didn't find a single unique LoopExit block, or if the loop exit
+ // block contains phi nodes, this isn't trivial.
+ if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
+ return false; // Can't handle this.
+
+ UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, LoopExitBB,
+ nullptr);
+ ++NumSwitches;
+ return true;
+ }
+ return false;
+}
+
+/// Split all of the edges from inside the loop to their exit blocks.
+/// Update the appropriate Phi nodes as we do so.
void LoopUnswitch::SplitExitEdges(Loop *L,
const SmallVectorImpl<BasicBlock *> &ExitBlocks){
@@ -841,15 +958,14 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
// Although SplitBlockPredecessors doesn't preserve loop-simplify in
// general, if we call it on all predecessors of all exits then it does.
- SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa",
- /*AliasAnalysis*/ nullptr, DT, LI,
+ SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", DT, LI,
/*PreserveLCSSA*/ true);
}
}
-/// UnswitchNontrivialCondition - We determined that the loop is profitable
-/// to unswitch when LIC equal Val. Split it into loop versions and test the
-/// condition outside of either loop. Return the loops created as Out1/Out2.
+/// We determined that the loop is profitable to unswitch when LIC equal Val.
+/// Split it into loop versions and test the condition outside of either loop.
+/// Return the loops created as Out1/Out2.
void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
Loop *L, TerminatorInst *TI) {
Function *F = loopHeader->getParent();
@@ -858,8 +974,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
<< " blocks] in Function " << F->getName()
<< " when '" << *Val << "' == " << *LIC << "\n");
- if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
- SE->forgetLoop(L);
+ if (auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>())
+ SEWP->getSE().forgetLoop(L);
LoopBlocks.clear();
NewBlocks.clear();
@@ -901,8 +1017,9 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
// Splice the newly inserted blocks into the function right before the
// original preheader.
- F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(),
- NewBlocks[0], F->end());
+ F->getBasicBlockList().splice(NewPreheader->getIterator(),
+ F->getBasicBlockList(),
+ NewBlocks[0]->getIterator(), F->end());
// FIXME: We could register any cloned assumptions instead of clearing the
// whole function's cache.
@@ -944,7 +1061,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
if (LandingPadInst *LPad = NewExit->getLandingPadInst()) {
PHINode *PN = PHINode::Create(LPad->getType(), 0, "",
- ExitSucc->getFirstInsertionPt());
+ &*ExitSucc->getFirstInsertionPt());
for (pred_iterator I = pred_begin(ExitSucc), E = pred_end(ExitSucc);
I != E; ++I) {
@@ -960,7 +1077,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
for (BasicBlock::iterator I = NewBlocks[i]->begin(),
E = NewBlocks[i]->end(); I != E; ++I)
- RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+ RemapInstruction(&*I, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
// Rewrite the original preheader to select between versions of the loop.
BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
@@ -994,8 +1112,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true);
}
-/// RemoveFromWorklist - Remove all instances of I from the worklist vector
-/// specified.
+/// Remove all instances of I from the worklist vector specified.
static void RemoveFromWorklist(Instruction *I,
std::vector<Instruction*> &Worklist) {
@@ -1003,7 +1120,7 @@ static void RemoveFromWorklist(Instruction *I,
Worklist.end());
}
-/// ReplaceUsesOfWith - When we find that I really equals V, remove I from the
+/// When we find that I really equals V, remove I from the
/// program, replacing all uses with V and update the worklist.
static void ReplaceUsesOfWith(Instruction *I, Value *V,
std::vector<Instruction*> &Worklist,
@@ -1025,9 +1142,9 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V,
++NumSimplify;
}
-// RewriteLoopBodyWithConditionConstant - We know either that the value LIC has
-// the value specified by Val in the specified loop, or we know it does NOT have
-// that value. Rewrite any uses of LIC or of properties correlated to it.
+/// We know either that the value LIC has the value specified by Val in the
+/// specified loop, or we know it does NOT have that value.
+/// Rewrite any uses of LIC or of properties correlated to it.
void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
Constant *Val,
bool IsEqual) {
@@ -1138,18 +1255,16 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
// domtree here -- instead we force it to do a full recomputation
// after the pass is complete -- but we do need to inform it of
// new blocks.
- if (DT)
- DT->addNewBlock(Abort, NewSISucc);
+ DT->addNewBlock(Abort, NewSISucc);
}
SimplifyCode(Worklist, L);
}
-/// SimplifyCode - Okay, now that we have simplified some instructions in the
-/// loop, walk over it and constant prop, dce, and fold control flow where
-/// possible. Note that this is effectively a very simple loop-structure-aware
-/// optimizer. During processing of this loop, L could very well be deleted, so
-/// it must not be used.
+/// Now that we have simplified some instructions in the loop, walk over it and
+/// constant prop, dce, and fold control flow where possible. Note that this is
+/// effectively a very simple loop-structure-aware optimizer. During processing
+/// of this loop, L could very well be deleted, so it must not be used.
///
/// FIXME: When the loop optimizer is more mature, separate this out to a new
/// pass.
@@ -1207,8 +1322,8 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
Succ->replaceAllUsesWith(Pred);
// Move all of the successor contents from Succ to Pred.
- Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(),
- Succ->end());
+ Pred->getInstList().splice(BI->getIterator(), Succ->getInstList(),
+ Succ->begin(), Succ->end());
LPM->deleteSimpleAnalysisValue(BI, L);
BI->eraseFromParent();
RemoveFromWorklist(BI, Worklist);
diff --git a/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
index 3314e1ed41ab..41511bcb7b04 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
#define DEBUG_TYPE "loweratomic"
static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
- IRBuilder<> Builder(CXI->getParent(), CXI);
+ IRBuilder<> Builder(CXI);
Value *Ptr = CXI->getPointerOperand();
Value *Cmp = CXI->getCompareOperand();
Value *Val = CXI->getNewValOperand();
@@ -41,7 +41,7 @@ static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
}
static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) {
- IRBuilder<> Builder(RMWI->getParent(), RMWI);
+ IRBuilder<> Builder(RMWI);
Value *Ptr = RMWI->getPointerOperand();
Value *Val = RMWI->getValOperand();
@@ -120,7 +120,7 @@ namespace {
return false;
bool Changed = false;
for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) {
- Instruction *Inst = DI++;
+ Instruction *Inst = &*DI++;
if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
Changed |= LowerFenceInst(FI);
else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(Inst))
diff --git a/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index 0c47cbd5bfda..2ace902a7a1b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -139,7 +139,7 @@ static bool lowerExpectIntrinsic(Function &F) {
ExpectIntrinsicsHandled++;
}
- // remove llvm.expect intrinsics.
+ // Remove llvm.expect intrinsics.
for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
CallInst *CI = dyn_cast<CallInst>(BI++);
if (!CI)
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 85012afc80ac..0333bf2284e1 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -30,7 +31,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
-#include <list>
+#include <algorithm>
using namespace llvm;
#define DEBUG_TYPE "memcpyopt"
@@ -71,9 +72,9 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
return Offset;
}
-/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a
-/// constant offset, and return that constant offset. For example, Ptr1 might
-/// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8.
+/// Return true if Ptr1 is provably equal to Ptr2 plus a constant offset, and
+/// return that constant offset. For example, Ptr1 might be &A[42], and Ptr2
+/// might be &A[40]. In this case offset would be -8.
static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
const DataLayout &DL) {
Ptr1 = Ptr1->stripPointerCasts();
@@ -125,7 +126,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
}
-/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value.
+/// Represents a range of memset'd bytes with the ByteVal value.
/// This allows us to analyze stores like:
/// store 0 -> P+1
/// store 0 -> P+0
@@ -164,8 +165,8 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
// If any of the stores are a memset, then it is always good to extend the
// memset.
- for (unsigned i = 0, e = TheStores.size(); i != e; ++i)
- if (!isa<StoreInst>(TheStores[i]))
+ for (Instruction *SI : TheStores)
+ if (!isa<StoreInst>(SI))
return true;
// Assume that the code generator is capable of merging pairs of stores
@@ -189,7 +190,7 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
unsigned NumPointerStores = Bytes / MaxIntSize;
// Assume the remaining bytes if any are done a byte at a time.
- unsigned NumByteStores = Bytes - NumPointerStores * MaxIntSize;
+ unsigned NumByteStores = Bytes % MaxIntSize;
// If we will reduce the # stores (according to this heuristic), do the
// transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
@@ -200,15 +201,14 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
namespace {
class MemsetRanges {
- /// Ranges - A sorted list of the memset ranges. We use std::list here
- /// because each element is relatively large and expensive to copy.
- std::list<MemsetRange> Ranges;
- typedef std::list<MemsetRange>::iterator range_iterator;
+ /// A sorted list of the memset ranges.
+ SmallVector<MemsetRange, 8> Ranges;
+ typedef SmallVectorImpl<MemsetRange>::iterator range_iterator;
const DataLayout &DL;
public:
MemsetRanges(const DataLayout &DL) : DL(DL) {}
- typedef std::list<MemsetRange>::const_iterator const_iterator;
+ typedef SmallVectorImpl<MemsetRange>::const_iterator const_iterator;
const_iterator begin() const { return Ranges.begin(); }
const_iterator end() const { return Ranges.end(); }
bool empty() const { return Ranges.empty(); }
@@ -240,26 +240,20 @@ public:
} // end anon namespace
-/// addRange - Add a new store to the MemsetRanges data structure. This adds a
+/// Add a new store to the MemsetRanges data structure. This adds a
/// new range for the specified store at the specified offset, merging into
/// existing ranges as appropriate.
-///
-/// Do a linear search of the ranges to see if this can be joined and/or to
-/// find the insertion point in the list. We keep the ranges sorted for
-/// simplicity here. This is a linear search of a linked list, which is ugly,
-/// however the number of ranges is limited, so this won't get crazy slow.
void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
unsigned Alignment, Instruction *Inst) {
int64_t End = Start+Size;
- range_iterator I = Ranges.begin(), E = Ranges.end();
- while (I != E && Start > I->End)
- ++I;
+ range_iterator I = std::lower_bound(Ranges.begin(), Ranges.end(), Start,
+ [](const MemsetRange &LHS, int64_t RHS) { return LHS.End < RHS; });
// We now know that I == E, in which case we didn't find anything to merge
// with, or that Start <= I->End. If End < I->Start or I == E, then we need
// to insert a new range. Handle this now.
- if (I == E || End < I->Start) {
+ if (I == Ranges.end() || End < I->Start) {
MemsetRange &R = *Ranges.insert(I, MemsetRange());
R.Start = Start;
R.End = End;
@@ -295,7 +289,7 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
if (End > I->End) {
I->End = End;
range_iterator NextI = I;
- while (++NextI != E && End >= NextI->Start) {
+ while (++NextI != Ranges.end() && End >= NextI->Start) {
// Merge the range in.
I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
if (NextI->End > I->End)
@@ -331,9 +325,9 @@ namespace {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<MemoryDependenceAnalysis>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<MemoryDependenceAnalysis>();
}
@@ -357,7 +351,7 @@ namespace {
char MemCpyOpt::ID = 0;
}
-// createMemCpyOptPass - The public interface to this file...
+/// The public interface to this file...
FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
@@ -366,14 +360,15 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
false, false)
-/// tryMergingIntoMemset - When scanning forward over instructions, we look for
-/// some other patterns to fold away. In particular, this looks for stores to
-/// neighboring locations of memory. If it sees enough consecutive ones, it
-/// attempts to merge them together into a memcpy/memset.
+/// When scanning forward over instructions, we look for some other patterns to
+/// fold away. In particular, this looks for stores to neighboring locations of
+/// memory. If it sees enough consecutive ones, it attempts to merge them
+/// together into a memcpy/memset.
Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
Value *StartPtr, Value *ByteVal) {
const DataLayout &DL = StartInst->getModule()->getDataLayout();
@@ -384,7 +379,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
// are stored.
MemsetRanges Ranges(DL);
- BasicBlock::iterator BI = StartInst;
+ BasicBlock::iterator BI(StartInst);
for (++BI; !isa<TerminatorInst>(BI); ++BI) {
if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
// If the instruction is readnone, ignore it, otherwise bail out. We
@@ -439,14 +434,12 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
// If we create any memsets, we put it right before the first instruction that
// isn't part of the memset block. This ensure that the memset is dominated
// by any addressing instruction needed by the start of the block.
- IRBuilder<> Builder(BI);
+ IRBuilder<> Builder(&*BI);
// Now that we have full information about ranges, loop over the ranges and
// emit memset's for anything big enough to be worthwhile.
Instruction *AMemSet = nullptr;
- for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
- I != E; ++I) {
- const MemsetRange &Range = *I;
+ for (const MemsetRange &Range : Ranges) {
if (Range.TheStores.size() == 1) continue;
@@ -470,19 +463,17 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
DEBUG(dbgs() << "Replace stores:\n";
- for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
- dbgs() << *Range.TheStores[i] << '\n';
+ for (Instruction *SI : Range.TheStores)
+ dbgs() << *SI << '\n';
dbgs() << "With: " << *AMemSet << '\n');
if (!Range.TheStores.empty())
AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
// Zap all the stores.
- for (SmallVectorImpl<Instruction *>::const_iterator
- SI = Range.TheStores.begin(),
- SE = Range.TheStores.end(); SI != SE; ++SI) {
- MD->removeInstruction(*SI);
- (*SI)->eraseFromParent();
+ for (Instruction *SI : Range.TheStores) {
+ MD->removeInstruction(SI);
+ SI->eraseFromParent();
}
++NumMemSetInfer;
}
@@ -493,6 +484,16 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (!SI->isSimple()) return false;
+
+ // Avoid merging nontemporal stores since the resulting
+ // memcpy/memset would not be able to preserve the nontemporal hint.
+ // In theory we could teach how to propagate the !nontemporal metadata to
+ // memset calls. However, that change would force the backend to
+ // conservatively expand !nontemporal memset calls back to sequences of
+ // store instructions (effectively undoing the merging).
+ if (SI->getMetadata(LLVMContext::MD_nontemporal))
+ return false;
+
const DataLayout &DL = SI->getModule()->getDataLayout();
// Detect cases where we're performing call slot forwarding, but
@@ -509,11 +510,11 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (C) {
// Check that nothing touches the dest of the "copy" between
// the call and the store.
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
MemoryLocation StoreLoc = MemoryLocation::get(SI);
- for (BasicBlock::iterator I = --BasicBlock::iterator(SI),
- E = C; I != E; --I) {
- if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) {
+ for (BasicBlock::iterator I = --SI->getIterator(), E = C->getIterator();
+ I != E; --I) {
+ if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) {
C = nullptr;
break;
}
@@ -554,7 +555,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (Value *ByteVal = isBytewiseValue(SI->getOperand(0)))
if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
ByteVal)) {
- BBI = I; // Don't invalidate iterator.
+ BBI = I->getIterator(); // Don't invalidate iterator.
return true;
}
@@ -567,14 +568,14 @@ bool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
MSI->getValue())) {
- BBI = I; // Don't invalidate iterator.
+ BBI = I->getIterator(); // Don't invalidate iterator.
return true;
}
return false;
}
-/// performCallSlotOptzn - takes a memcpy and a call that it depends on,
+/// Takes a memcpy and a call that it depends on,
/// and checks for the possibility of a call slot optimization by having
/// the call write its result directly into the destination of the memcpy.
bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
@@ -710,12 +711,12 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
// unexpected manner, for example via a global, which we deduce from
// the use analysis, we also need to know that it does not sneakily
// access dest. We rely on AA to figure this out for us.
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
- AliasAnalysis::ModRefResult MR = AA.getModRefInfo(C, cpyDest, srcSize);
+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ ModRefInfo MR = AA.getModRefInfo(C, cpyDest, srcSize);
// If necessary, perform additional analysis.
- if (MR != AliasAnalysis::NoModRef)
+ if (MR != MRI_NoModRef)
MR = AA.callCapturesBefore(C, cpyDest, srcSize, &DT);
- if (MR != AliasAnalysis::NoModRef)
+ if (MR != MRI_NoModRef)
return false;
// All the checks have passed, so do the transformation.
@@ -749,11 +750,9 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
// Update AA metadata
// FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
// handled here, but combineMetadata doesn't support them yet
- unsigned KnownIDs[] = {
- LLVMContext::MD_tbaa,
- LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias,
- };
+ unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias,
+ LLVMContext::MD_invariant_group};
combineMetadata(C, cpy, KnownIDs);
// Remove the memcpy.
@@ -763,10 +762,8 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
return true;
}
-/// processMemCpyMemCpyDependence - We've found that the (upward scanning)
-/// memory dependence of memcpy 'M' is the memcpy 'MDep'. Try to simplify M to
-/// copy from MDep's input if we can.
-///
+/// We've found that the (upward scanning) memory dependence of memcpy 'M' is
+/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {
// We can only transforms memcpy's where the dest of one is the source of the
// other.
@@ -788,7 +785,7 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {
if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
return false;
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
// Verify that the copied-from memory doesn't change in between the two
// transfers. For example, in:
@@ -802,8 +799,9 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {
//
// NOTE: This is conservative, it will stop on any read from the source loc,
// not just the defining memcpy.
- MemDepResult SourceDep = MD->getPointerDependencyFrom(
- MemoryLocation::getForSource(MDep), false, M, M->getParent());
+ MemDepResult SourceDep =
+ MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
+ M->getIterator(), M->getParent());
if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
return false;
@@ -860,8 +858,9 @@ bool MemCpyOpt::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
return false;
// Check that there are no other dependencies on the memset destination.
- MemDepResult DstDepInfo = MD->getPointerDependencyFrom(
- MemoryLocation::getForDest(MemSet), false, MemCpy, MemCpy->getParent());
+ MemDepResult DstDepInfo =
+ MD->getPointerDependencyFrom(MemoryLocation::getForDest(MemSet), false,
+ MemCpy->getIterator(), MemCpy->getParent());
if (DstDepInfo.getInst() != MemSet)
return false;
@@ -936,7 +935,7 @@ bool MemCpyOpt::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
return true;
}
-/// processMemCpy - perform simplification of memcpy's. If we have memcpy A
+/// Perform simplification of memcpy's. If we have memcpy A
/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
/// B to be a memcpy from X to Z (or potentially a memmove, depending on
/// circumstances). This allows later passes to remove the first memcpy
@@ -998,8 +997,8 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
}
MemoryLocation SrcLoc = MemoryLocation::getForSource(M);
- MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true,
- M, M->getParent());
+ MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
+ SrcLoc, true, M->getIterator(), M->getParent());
if (SrcDepInfo.isClobber()) {
if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
@@ -1037,10 +1036,10 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
return false;
}
-/// processMemMove - Transforms memmove calls to memcpy calls when the src/dst
-/// are guaranteed not to alias.
+/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
+/// not to alias.
bool MemCpyOpt::processMemMove(MemMoveInst *M) {
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
if (!TLI->has(LibFunc::memmove))
return false;
@@ -1053,12 +1052,11 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
// If not, then we know we can transform this.
- Module *Mod = M->getParent()->getParent()->getParent();
Type *ArgTys[3] = { M->getRawDest()->getType(),
M->getRawSource()->getType(),
M->getLength()->getType() };
- M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy,
- ArgTys));
+ M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
+ Intrinsic::memcpy, ArgTys));
// MemDep may have over conservative information about this instruction, just
// conservatively flush it from the cache.
@@ -1068,7 +1066,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
return true;
}
-/// processByValArgument - This is called on every byval argument in call sites.
+/// This is called on every byval argument in call sites.
bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout();
// Find out what feeds this byval argument.
@@ -1076,8 +1074,8 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
MemDepResult DepInfo = MD->getPointerDependencyFrom(
- MemoryLocation(ByValArg, ByValSize), true, CS.getInstruction(),
- CS.getInstruction()->getParent());
+ MemoryLocation(ByValArg, ByValSize), true,
+ CS.getInstruction()->getIterator(), CS.getInstruction()->getParent());
if (!DepInfo.isClobber())
return false;
@@ -1119,9 +1117,9 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
//
// NOTE: This is conservative, it will stop on any read from the source loc,
// not just the defining memcpy.
- MemDepResult SourceDep =
- MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
- CS.getInstruction(), MDep->getParent());
+ MemDepResult SourceDep = MD->getPointerDependencyFrom(
+ MemoryLocation::getForSource(MDep), false,
+ CS.getInstruction()->getIterator(), MDep->getParent());
if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
return false;
@@ -1140,7 +1138,7 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
return true;
}
-/// iterateOnFunction - Executes one iteration of MemCpyOpt.
+/// Executes one iteration of MemCpyOpt.
bool MemCpyOpt::iterateOnFunction(Function &F) {
bool MadeChange = false;
@@ -1148,7 +1146,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
// Avoid invalidating the iterator.
- Instruction *I = BI++;
+ Instruction *I = &*BI++;
bool RepeatInstruction = false;
@@ -1177,9 +1175,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
return MadeChange;
}
-// MemCpyOpt::runOnFunction - This is the main transformation entry point for a
-// function.
-//
+/// This is the main transformation entry point for a function.
bool MemCpyOpt::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
diff --git a/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 643f3740eedd..c812d618c16a 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -78,6 +78,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
@@ -91,6 +92,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "mldst-motion"
@@ -106,7 +108,7 @@ class MergedLoadStoreMotion : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- explicit MergedLoadStoreMotion(void)
+ MergedLoadStoreMotion()
: FunctionPass(ID), MD(nullptr), MagicCompileTimeControl(250) {
initializeMergedLoadStoreMotionPass(*PassRegistry::getPassRegistry());
}
@@ -116,10 +118,11 @@ public:
private:
// This transformation requires dominator postdominator info
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<MemoryDependenceAnalysis>();
- AU.addPreserved<AliasAnalysis>();
}
// Helper routines
@@ -156,7 +159,7 @@ private:
};
char MergedLoadStoreMotion::ID = 0;
-}
+} // anonymous namespace
///
/// \brief createMergedLoadStoreMotionPass - The public interface to this file.
@@ -169,7 +172,8 @@ INITIALIZE_PASS_BEGIN(MergedLoadStoreMotion, "mldst-motion",
"MergedLoadStoreMotion", false, false)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_END(MergedLoadStoreMotion, "mldst-motion",
"MergedLoadStoreMotion", false, false)
@@ -236,12 +240,11 @@ bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) {
/// being loaded or protect against the load from happening
/// it is considered a hoist barrier.
///
-
bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(const Instruction& Start,
const Instruction& End,
LoadInst* LI) {
MemoryLocation Loc = MemoryLocation::get(LI);
- return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::Mod);
+ return AA->canInstructionRangeModRef(Start, End, Loc, MRI_Mod);
}
///
@@ -256,7 +259,7 @@ LoadInst *MergedLoadStoreMotion::canHoistFromBlock(BasicBlock *BB1,
for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end(); BBI != BBE;
++BBI) {
- Instruction *Inst = BBI;
+ Instruction *Inst = &*BBI;
// Only merge and hoist loads when their result in used only in BB
if (!isa<LoadInst>(Inst) || Inst->isUsedOutsideOfBlock(BB1))
@@ -293,7 +296,7 @@ void MergedLoadStoreMotion::hoistInstruction(BasicBlock *BB,
// Intersect optional metadata.
HoistCand->intersectOptionalDataWith(ElseInst);
- HoistCand->dropUnknownMetadata();
+ HoistCand->dropUnknownNonDebugMetadata();
// Prepend point for instruction insert
Instruction *HoistPt = BB->getTerminator();
@@ -363,8 +366,7 @@ bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) {
int NLoads = 0;
for (BasicBlock::iterator BBI = Succ0->begin(), BBE = Succ0->end();
BBI != BBE;) {
-
- Instruction *I = BBI;
+ Instruction *I = &*BBI;
++BBI;
// Only move non-simple (atomic, volatile) loads.
@@ -394,11 +396,10 @@ bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) {
/// value being stored or protect against the store from
/// happening it is considered a sink barrier.
///
-
bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction &Start,
const Instruction &End,
MemoryLocation Loc) {
- return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::ModRef);
+ return AA->canInstructionRangeModRef(Start, End, Loc, MRI_ModRef);
}
///
@@ -438,23 +439,16 @@ StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0,
StoreInst *S1) {
// Create a phi if the values mismatch.
- PHINode *NewPN = 0;
+ PHINode *NewPN = nullptr;
Value *Opd1 = S0->getValueOperand();
Value *Opd2 = S1->getValueOperand();
if (Opd1 != Opd2) {
NewPN = PHINode::Create(Opd1->getType(), 2, Opd2->getName() + ".sink",
- BB->begin());
+ &BB->front());
NewPN->addIncoming(Opd1, S0->getParent());
NewPN->addIncoming(Opd2, S1->getParent());
- if (NewPN->getType()->getScalarType()->isPointerTy()) {
- // AA needs to be informed when a PHI-use of the pointer value is added
- for (unsigned I = 0, E = NewPN->getNumIncomingValues(); I != E; ++I) {
- unsigned J = PHINode::getOperandNumForIncomingValue(I);
- AA->addEscapingUse(NewPN->getOperandUse(J));
- }
- if (MD)
- MD->invalidateCachedPointerInfo(NewPN);
- }
+ if (MD && NewPN->getType()->getScalarType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(NewPN);
}
return NewPN;
}
@@ -479,12 +473,12 @@ bool MergedLoadStoreMotion::sinkStore(BasicBlock *BB, StoreInst *S0,
BasicBlock::iterator InsertPt = BB->getFirstInsertionPt();
// Intersect optional metadata.
S0->intersectOptionalDataWith(S1);
- S0->dropUnknownMetadata();
+ S0->dropUnknownNonDebugMetadata();
// Create the new store to be inserted at the join point.
StoreInst *SNew = (StoreInst *)(S0->clone());
Instruction *ANew = A0->clone();
- SNew->insertBefore(InsertPt);
+ SNew->insertBefore(&*InsertPt);
ANew->insertBefore(SNew);
assert(S0->getParent() == A0->getParent());
@@ -566,12 +560,13 @@ bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) {
}
return MergedStores;
}
+
///
/// \brief Run the transformation for each function
///
bool MergedLoadStoreMotion::runOnFunction(Function &F) {
MD = getAnalysisIfAvailable<MemoryDependenceAnalysis>();
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
bool Changed = false;
DEBUG(dbgs() << "Instruction Merger\n");
@@ -579,7 +574,7 @@ bool MergedLoadStoreMotion::runOnFunction(Function &F) {
// Merge unconditional branches, allowing PRE to catch more
// optimization opportunities.
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
- BasicBlock *BB = FI++;
+ BasicBlock *BB = &*FI++;
// Hoist equivalent loads and sink stores
// outside diamonds when possible
diff --git a/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
index f42f8306fccc..c8f885e7eec5 100644
--- a/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -71,8 +71,8 @@
//
// Limitations and TODO items:
//
-// 1) We only considers n-ary adds for now. This should be extended and
-// generalized.
+// 1) We only considers n-ary adds and muls for now. This should be extended
+// and generalized.
//
//===----------------------------------------------------------------------===//
@@ -110,11 +110,11 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addPreserved<TargetLibraryInfoWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.setPreservesCFG();
@@ -145,12 +145,23 @@ private:
unsigned I, Value *LHS,
Value *RHS, Type *IndexedType);
- // Reassociate Add for better CSE.
- Instruction *tryReassociateAdd(BinaryOperator *I);
- // A helper function for tryReassociateAdd. LHS and RHS are explicitly passed.
- Instruction *tryReassociateAdd(Value *LHS, Value *RHS, Instruction *I);
- // Rewrites I to LHS + RHS if LHS is computed already.
- Instruction *tryReassociatedAdd(const SCEV *LHS, Value *RHS, Instruction *I);
+ // Reassociate binary operators for better CSE.
+ Instruction *tryReassociateBinaryOp(BinaryOperator *I);
+
+ // A helper function for tryReassociateBinaryOp. LHS and RHS are explicitly
+ // passed.
+ Instruction *tryReassociateBinaryOp(Value *LHS, Value *RHS,
+ BinaryOperator *I);
+ // Rewrites I to (LHS op RHS) if LHS is computed already.
+ Instruction *tryReassociatedBinaryOp(const SCEV *LHS, Value *RHS,
+ BinaryOperator *I);
+
+ // Tries to match Op1 and Op2 by using V.
+ bool matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1, Value *&Op2);
+
+ // Gets SCEV for (LHS op RHS).
+ const SCEV *getBinarySCEV(BinaryOperator *I, const SCEV *LHS,
+ const SCEV *RHS);
// Returns the closest dominator of \c Dominatee that computes
// \c CandidateExpr. Returns null if not found.
@@ -161,11 +172,6 @@ private:
// GEP's pointer size, i.e., whether Index needs to be sign-extended in order
// to be an index of GEP.
bool requiresSignExtension(Value *Index, GetElementPtrInst *GEP);
- // Returns whether V is known to be non-negative at context \c Ctxt.
- bool isKnownNonNegative(Value *V, Instruction *Ctxt);
- // Returns whether AO may sign overflow at context \c Ctxt. It computes a
- // conservative result -- it answers true when not sure.
- bool maySignOverflow(AddOperator *AO, Instruction *Ctxt);
AssumptionCache *AC;
const DataLayout *DL;
@@ -182,7 +188,7 @@ private:
// foo(a + b);
// if (p2)
// bar(a + b);
- DenseMap<const SCEV *, SmallVector<Instruction *, 2>> SeenExprs;
+ DenseMap<const SCEV *, SmallVector<WeakVH, 2>> SeenExprs;
};
} // anonymous namespace
@@ -191,7 +197,7 @@ INITIALIZE_PASS_BEGIN(NaryReassociate, "nary-reassociate", "Nary reassociation",
false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(NaryReassociate, "nary-reassociate", "Nary reassociation",
@@ -207,7 +213,7 @@ bool NaryReassociate::runOnFunction(Function &F) {
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
@@ -224,6 +230,7 @@ static bool isPotentiallyNaryReassociable(Instruction *I) {
switch (I->getOpcode()) {
case Instruction::Add:
case Instruction::GetElementPtr:
+ case Instruction::Mul:
return true;
default:
return false;
@@ -239,19 +246,21 @@ bool NaryReassociate::doOneIteration(Function &F) {
Node != GraphTraits<DominatorTree *>::nodes_end(DT); ++Node) {
BasicBlock *BB = Node->getBlock();
for (auto I = BB->begin(); I != BB->end(); ++I) {
- if (SE->isSCEVable(I->getType()) && isPotentiallyNaryReassociable(I)) {
- const SCEV *OldSCEV = SE->getSCEV(I);
- if (Instruction *NewI = tryReassociate(I)) {
+ if (SE->isSCEVable(I->getType()) && isPotentiallyNaryReassociable(&*I)) {
+ const SCEV *OldSCEV = SE->getSCEV(&*I);
+ if (Instruction *NewI = tryReassociate(&*I)) {
Changed = true;
- SE->forgetValue(I);
+ SE->forgetValue(&*I);
I->replaceAllUsesWith(NewI);
- RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
- I = NewI;
+ // If SeenExprs constains I's WeakVH, that entry will be replaced with
+ // nullptr.
+ RecursivelyDeleteTriviallyDeadInstructions(&*I, TLI);
+ I = NewI->getIterator();
}
// Add the rewritten instruction to SeenExprs; the original instruction
// is deleted.
- const SCEV *NewSCEV = SE->getSCEV(I);
- SeenExprs[NewSCEV].push_back(I);
+ const SCEV *NewSCEV = SE->getSCEV(&*I);
+ SeenExprs[NewSCEV].push_back(WeakVH(&*I));
// Ideally, NewSCEV should equal OldSCEV because tryReassociate(I)
// is equivalent to I. However, ScalarEvolution::getSCEV may
// weaken nsw causing NewSCEV not to equal OldSCEV. For example, suppose
@@ -271,7 +280,7 @@ bool NaryReassociate::doOneIteration(Function &F) {
//
// This improvement is exercised in @reassociate_gep_nsw in nary-gep.ll.
if (NewSCEV != OldSCEV)
- SeenExprs[OldSCEV].push_back(I);
+ SeenExprs[OldSCEV].push_back(WeakVH(&*I));
}
}
}
@@ -281,7 +290,8 @@ bool NaryReassociate::doOneIteration(Function &F) {
Instruction *NaryReassociate::tryReassociate(Instruction *I) {
switch (I->getOpcode()) {
case Instruction::Add:
- return tryReassociateAdd(cast<BinaryOperator>(I));
+ case Instruction::Mul:
+ return tryReassociateBinaryOp(cast<BinaryOperator>(I));
case Instruction::GetElementPtr:
return tryReassociateGEP(cast<GetElementPtrInst>(I));
default:
@@ -352,27 +362,6 @@ bool NaryReassociate::requiresSignExtension(Value *Index,
return cast<IntegerType>(Index->getType())->getBitWidth() < PointerSizeInBits;
}
-bool NaryReassociate::isKnownNonNegative(Value *V, Instruction *Ctxt) {
- bool NonNegative, Negative;
- // TODO: ComputeSignBits is expensive. Consider caching the results.
- ComputeSignBit(V, NonNegative, Negative, *DL, 0, AC, Ctxt, DT);
- return NonNegative;
-}
-
-bool NaryReassociate::maySignOverflow(AddOperator *AO, Instruction *Ctxt) {
- if (AO->hasNoSignedWrap())
- return false;
-
- Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1);
- // If LHS or RHS has the same sign as the sum, AO doesn't sign overflow.
- // TODO: handle the negative case as well.
- if (isKnownNonNegative(AO, Ctxt) &&
- (isKnownNonNegative(LHS, Ctxt) || isKnownNonNegative(RHS, Ctxt)))
- return false;
-
- return true;
-}
-
GetElementPtrInst *
NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
Type *IndexedType) {
@@ -381,7 +370,7 @@ NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
IndexToSplit = SExt->getOperand(0);
} else if (ZExtInst *ZExt = dyn_cast<ZExtInst>(IndexToSplit)) {
// zext can be treated as sext if the source is non-negative.
- if (isKnownNonNegative(ZExt->getOperand(0), GEP))
+ if (isKnownNonNegative(ZExt->getOperand(0), *DL, 0, AC, GEP, DT))
IndexToSplit = ZExt->getOperand(0);
}
@@ -389,8 +378,11 @@ NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
// If the I-th index needs sext and the underlying add is not equipped with
// nsw, we cannot split the add because
// sext(LHS + RHS) != sext(LHS) + sext(RHS).
- if (requiresSignExtension(IndexToSplit, GEP) && maySignOverflow(AO, GEP))
+ if (requiresSignExtension(IndexToSplit, GEP) &&
+ computeOverflowForSignedAdd(AO, *DL, AC, GEP, DT) !=
+ OverflowResult::NeverOverflows)
return nullptr;
+
Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1);
// IndexToSplit = LHS + RHS.
if (auto *NewGEP = tryReassociateGEPAtIndex(GEP, I, LHS, RHS, IndexedType))
@@ -415,7 +407,7 @@ GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex(
IndexExprs.push_back(SE->getSCEV(*Index));
// Replace the I-th index with LHS.
IndexExprs[I] = SE->getSCEV(LHS);
- if (isKnownNonNegative(LHS, GEP) &&
+ if (isKnownNonNegative(LHS, *DL, 0, AC, GEP, DT) &&
DL->getTypeSizeInBits(LHS->getType()) <
DL->getTypeSizeInBits(GEP->getOperand(I)->getType())) {
// Zero-extend LHS if it is non-negative. InstCombine canonicalizes sext to
@@ -429,19 +421,20 @@ GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex(
GEP->getSourceElementType(), SE->getSCEV(GEP->getPointerOperand()),
IndexExprs, GEP->isInBounds());
- auto *Candidate = findClosestMatchingDominator(CandidateExpr, GEP);
+ Value *Candidate = findClosestMatchingDominator(CandidateExpr, GEP);
if (Candidate == nullptr)
return nullptr;
- PointerType *TypeOfCandidate = dyn_cast<PointerType>(Candidate->getType());
- // Pretty rare but theoretically possible when a numeric value happens to
- // share CandidateExpr.
- if (TypeOfCandidate == nullptr)
- return nullptr;
+ IRBuilder<> Builder(GEP);
+ // Candidate does not necessarily have the same pointer type as GEP. Use
+ // bitcast or pointer cast to make sure they have the same type, so that the
+ // later RAUW doesn't complain.
+ Candidate = Builder.CreateBitOrPointerCast(Candidate, GEP->getType());
+ assert(Candidate->getType() == GEP->getType());
// NewGEP = (char *)Candidate + RHS * sizeof(IndexedType)
uint64_t IndexedSize = DL->getTypeAllocSize(IndexedType);
- Type *ElementType = TypeOfCandidate->getElementType();
+ Type *ElementType = GEP->getType()->getElementType();
uint64_t ElementSize = DL->getTypeAllocSize(ElementType);
// Another less rare case: because I is not necessarily the last index of the
// GEP, the size of the type at the I-th index (IndexedSize) is not
@@ -461,8 +454,7 @@ GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex(
return nullptr;
// NewGEP = &Candidate[RHS * (sizeof(IndexedType) / sizeof(Candidate[0])));
- IRBuilder<> Builder(GEP);
- Type *IntPtrTy = DL->getIntPtrType(TypeOfCandidate);
+ Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
if (RHS->getType() != IntPtrTy)
RHS = Builder.CreateSExtOrTrunc(RHS, IntPtrTy);
if (IndexedSize != ElementSize) {
@@ -476,54 +468,89 @@ GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex(
return NewGEP;
}
-Instruction *NaryReassociate::tryReassociateAdd(BinaryOperator *I) {
+Instruction *NaryReassociate::tryReassociateBinaryOp(BinaryOperator *I) {
Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
- if (auto *NewI = tryReassociateAdd(LHS, RHS, I))
+ if (auto *NewI = tryReassociateBinaryOp(LHS, RHS, I))
return NewI;
- if (auto *NewI = tryReassociateAdd(RHS, LHS, I))
+ if (auto *NewI = tryReassociateBinaryOp(RHS, LHS, I))
return NewI;
return nullptr;
}
-Instruction *NaryReassociate::tryReassociateAdd(Value *LHS, Value *RHS,
- Instruction *I) {
+Instruction *NaryReassociate::tryReassociateBinaryOp(Value *LHS, Value *RHS,
+ BinaryOperator *I) {
Value *A = nullptr, *B = nullptr;
- // To be conservative, we reassociate I only when it is the only user of A+B.
- if (LHS->hasOneUse() && match(LHS, m_Add(m_Value(A), m_Value(B)))) {
- // I = (A + B) + RHS
- // = (A + RHS) + B or (B + RHS) + A
+ // To be conservative, we reassociate I only when it is the only user of (A op
+ // B).
+ if (LHS->hasOneUse() && matchTernaryOp(I, LHS, A, B)) {
+ // I = (A op B) op RHS
+ // = (A op RHS) op B or (B op RHS) op A
const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B);
const SCEV *RHSExpr = SE->getSCEV(RHS);
if (BExpr != RHSExpr) {
- if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(AExpr, RHSExpr), B, I))
+ if (auto *NewI =
+ tryReassociatedBinaryOp(getBinarySCEV(I, AExpr, RHSExpr), B, I))
return NewI;
}
if (AExpr != RHSExpr) {
- if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(BExpr, RHSExpr), A, I))
+ if (auto *NewI =
+ tryReassociatedBinaryOp(getBinarySCEV(I, BExpr, RHSExpr), A, I))
return NewI;
}
}
return nullptr;
}
-Instruction *NaryReassociate::tryReassociatedAdd(const SCEV *LHSExpr,
- Value *RHS, Instruction *I) {
- auto Pos = SeenExprs.find(LHSExpr);
- // Bail out if LHSExpr is not previously seen.
- if (Pos == SeenExprs.end())
- return nullptr;
-
+Instruction *NaryReassociate::tryReassociatedBinaryOp(const SCEV *LHSExpr,
+ Value *RHS,
+ BinaryOperator *I) {
// Look for the closest dominator LHS of I that computes LHSExpr, and replace
- // I with LHS + RHS.
+ // I with LHS op RHS.
auto *LHS = findClosestMatchingDominator(LHSExpr, I);
if (LHS == nullptr)
return nullptr;
- Instruction *NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I);
+ Instruction *NewI = nullptr;
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I);
+ break;
+ case Instruction::Mul:
+ NewI = BinaryOperator::CreateMul(LHS, RHS, "", I);
+ break;
+ default:
+ llvm_unreachable("Unexpected instruction.");
+ }
NewI->takeName(I);
return NewI;
}
+bool NaryReassociate::matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1,
+ Value *&Op2) {
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ return match(V, m_Add(m_Value(Op1), m_Value(Op2)));
+ case Instruction::Mul:
+ return match(V, m_Mul(m_Value(Op1), m_Value(Op2)));
+ default:
+ llvm_unreachable("Unexpected instruction.");
+ }
+ return false;
+}
+
+const SCEV *NaryReassociate::getBinarySCEV(BinaryOperator *I, const SCEV *LHS,
+ const SCEV *RHS) {
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ return SE->getAddExpr(LHS, RHS);
+ case Instruction::Mul:
+ return SE->getMulExpr(LHS, RHS);
+ default:
+ llvm_unreachable("Unexpected instruction.");
+ }
+ return nullptr;
+}
+
Instruction *
NaryReassociate::findClosestMatchingDominator(const SCEV *CandidateExpr,
Instruction *Dominatee) {
@@ -537,9 +564,13 @@ NaryReassociate::findClosestMatchingDominator(const SCEV *CandidateExpr,
// future instruction either. Therefore, we pop it out of the stack. This
// optimization makes the algorithm O(n).
while (!Candidates.empty()) {
- Instruction *Candidate = Candidates.back();
- if (DT->dominates(Candidate, Dominatee))
- return Candidate;
+ // Candidates stores WeakVHs, so a candidate can be nullptr if it's removed
+ // during rewriting.
+ if (Value *Candidate = Candidates.back()) {
+ Instruction *CandidateInstruction = cast<Instruction>(Candidate);
+ if (DT->dominates(CandidateInstruction, Dominatee))
+ return CandidateInstruction;
+ }
Candidates.pop_back();
}
return nullptr;
diff --git a/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 31d7df39c781..9f26f78892c6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -154,7 +154,7 @@ bool PartiallyInlineLibCalls::optimizeSQRT(CallInst *Call,
Phi->addIncoming(Call, &CurrBB);
Phi->addIncoming(LibCall, LibCallBB);
- BB = JoinBB;
+ BB = JoinBB->getIterator();
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
index 366301ad731a..28c610c2486a 100644
--- a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -27,7 +27,7 @@
// well defined state for inspection by the collector. In the current
// implementation, this is done via the insertion of poll sites at method entry
// and the backedge of most loops. We try to avoid inserting more polls than
-// are neccessary to ensure a finite period between poll sites. This is not
+// are necessary to ensure a finite period between poll sites. This is not
// because the poll itself is expensive in the generated code; it's not. Polls
// do tend to impact the optimizer itself in negative ways; we'd like to avoid
// perturbing the optimization of the method as much as we can.
@@ -91,13 +91,15 @@ STATISTIC(FiniteExecution, "Number of loops w/o safepoints finite execution");
using namespace llvm;
-// Ignore oppurtunities to avoid placing safepoints on backedges, useful for
+// Ignore opportunities to avoid placing safepoints on backedges, useful for
// validation
static cl::opt<bool> AllBackedges("spp-all-backedges", cl::Hidden,
cl::init(false));
-/// If true, do not place backedge safepoints in counted loops.
-static cl::opt<bool> SkipCounted("spp-counted", cl::Hidden, cl::init(true));
+/// How narrow does the trip count of a loop have to be to have to be considered
+/// "counted"? Counted loops do not get safepoints at backedges.
+static cl::opt<int> CountedLoopTripWidth("spp-counted-loop-trip-width",
+ cl::Hidden, cl::init(32));
// If true, split the backedge of a loop when placing the safepoint, otherwise
// split the latch block itself. Both are useful to support for
@@ -121,7 +123,7 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass {
std::vector<TerminatorInst *> PollLocations;
/// True unless we're running spp-no-calls in which case we need to disable
- /// the call dependend placement opts.
+ /// the call-dependent placement opts.
bool CallSafepointsEnabled;
ScalarEvolution *SE = nullptr;
@@ -142,7 +144,7 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass {
}
bool runOnFunction(Function &F) override {
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
for (auto I = LI->begin(), E = LI->end(); I != E; I++) {
@@ -153,7 +155,7 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
// We no longer modify the IR at all in this pass. Thus all
// analysis are preserved.
@@ -190,10 +192,8 @@ static void
InsertSafepointPoll(Instruction *InsertBefore,
std::vector<CallSite> &ParsePointsNeeded /*rval*/);
-static bool isGCLeafFunction(const CallSite &CS);
-
static bool needsStatepoint(const CallSite &CS) {
- if (isGCLeafFunction(CS))
+ if (callsGCLeafFunction(CS))
return false;
if (CS.isCall()) {
CallInst *call = cast<CallInst>(CS.getInstruction());
@@ -206,7 +206,7 @@ static bool needsStatepoint(const CallSite &CS) {
return true;
}
-static Value *ReplaceWithStatepoint(const CallSite &CS, Pass *P);
+static Value *ReplaceWithStatepoint(const CallSite &CS);
/// Returns true if this loop is known to contain a call safepoint which
/// must unconditionally execute on any iteration of the loop which returns
@@ -220,7 +220,7 @@ static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header,
// For the moment, we look only for the 'cuts' that consist of a single call
// instruction in a block which is dominated by the Header and dominates the
// loop latch (Pred) block. Somewhat surprisingly, walking the entire chain
- // of such dominating blocks gets substaintially more occurences than just
+ // of such dominating blocks gets substantially more occurrences than just
// checking the Pred and Header blocks themselves. This may be due to the
// density of loop exit conditions caused by range and null checks.
// TODO: structure this as an analysis pass, cache the result for subloops,
@@ -255,18 +255,12 @@ static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header,
/// conservatism in the analysis.
static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE,
BasicBlock *Pred) {
- // Only used when SkipCounted is off
- const unsigned upperTripBound = 8192;
-
// A conservative bound on the loop as a whole.
const SCEV *MaxTrips = SE->getMaxBackedgeTakenCount(L);
- if (MaxTrips != SE->getCouldNotCompute()) {
- if (SE->getUnsignedRange(MaxTrips).getUnsignedMax().ult(upperTripBound))
- return true;
- if (SkipCounted &&
- SE->getUnsignedRange(MaxTrips).getUnsignedMax().isIntN(32))
- return true;
- }
+ if (MaxTrips != SE->getCouldNotCompute() &&
+ SE->getUnsignedRange(MaxTrips).getUnsignedMax().isIntN(
+ CountedLoopTripWidth))
+ return true;
// If this is a conditional branch to the header with the alternate path
// being outside the loop, we can ask questions about the execution frequency
@@ -275,13 +269,10 @@ static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE,
// This returns an exact expression only. TODO: We really only need an
// upper bound here, but SE doesn't expose that.
const SCEV *MaxExec = SE->getExitCount(L, Pred);
- if (MaxExec != SE->getCouldNotCompute()) {
- if (SE->getUnsignedRange(MaxExec).getUnsignedMax().ult(upperTripBound))
- return true;
- if (SkipCounted &&
- SE->getUnsignedRange(MaxExec).getUnsignedMax().isIntN(32))
+ if (MaxExec != SE->getCouldNotCompute() &&
+ SE->getUnsignedRange(MaxExec).getUnsignedMax().isIntN(
+ CountedLoopTripWidth))
return true;
- }
}
return /* not finite */ false;
@@ -432,14 +423,14 @@ static Instruction *findLocationForEntrySafepoint(Function &F,
assert(hasNextInstruction(I) &&
"first check if there is a next instruction!");
if (I->isTerminator()) {
- return I->getParent()->getUniqueSuccessor()->begin();
+ return &I->getParent()->getUniqueSuccessor()->front();
} else {
- return std::next(BasicBlock::iterator(I));
+ return &*++I->getIterator();
}
};
Instruction *cursor = nullptr;
- for (cursor = F.getEntryBlock().begin(); hasNextInstruction(cursor);
+ for (cursor = &F.getEntryBlock().front(); hasNextInstruction(cursor);
cursor = nextInstruction(cursor)) {
// We need to ensure a safepoint poll occurs before any 'real' call. The
@@ -466,7 +457,7 @@ static Instruction *findLocationForEntrySafepoint(Function &F,
static void findCallSafepoints(Function &F,
std::vector<CallSite> &Found /*rval*/) {
assert(Found.empty() && "must be empty!");
- for (Instruction &I : inst_range(F)) {
+ for (Instruction &I : instructions(F)) {
Instruction *inst = &I;
if (isa<CallInst>(inst) || isa<InvokeInst>(inst)) {
CallSite CS(inst);
@@ -713,7 +704,7 @@ bool PlaceSafepoints::runOnFunction(Function &F) {
Invoke->getParent());
}
- Value *GCResult = ReplaceWithStatepoint(CS, nullptr);
+ Value *GCResult = ReplaceWithStatepoint(CS);
Results.push_back(GCResult);
}
assert(Results.size() == ParsePointNeeded.size());
@@ -747,7 +738,7 @@ FunctionPass *llvm::createPlaceSafepointsPass() {
INITIALIZE_PASS_BEGIN(PlaceBackedgeSafepointsImpl,
"place-backedge-safepoints-impl",
"Place Backedge Safepoints", false, false)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(PlaceBackedgeSafepointsImpl,
@@ -759,31 +750,6 @@ INITIALIZE_PASS_BEGIN(PlaceSafepoints, "place-safepoints", "Place Safepoints",
INITIALIZE_PASS_END(PlaceSafepoints, "place-safepoints", "Place Safepoints",
false, false)
-static bool isGCLeafFunction(const CallSite &CS) {
- Instruction *inst = CS.getInstruction();
- if (isa<IntrinsicInst>(inst)) {
- // Most LLVM intrinsics are things which can never take a safepoint.
- // As a result, we don't need to have the stack parsable at the
- // callsite. This is a highly useful optimization since intrinsic
- // calls are fairly prevelent, particularly in debug builds.
- return true;
- }
-
- // If this function is marked explicitly as a leaf call, we don't need to
- // place a safepoint of it. In fact, for correctness we *can't* in many
- // cases. Note: Indirect calls return Null for the called function,
- // these obviously aren't runtime functions with attributes
- // TODO: Support attributes on the call site as well.
- const Function *F = CS.getCalledFunction();
- bool isLeaf =
- F &&
- F->getFnAttribute("gc-leaf-function").getValueAsString().equals("true");
- if (isLeaf) {
- return true;
- }
- return false;
-}
-
static void
InsertSafepointPoll(Instruction *InsertBefore,
std::vector<CallSite> &ParsePointsNeeded /*rval*/) {
@@ -796,6 +762,7 @@ InsertSafepointPoll(Instruction *InsertBefore,
// path call - where we need to insert a safepoint (parsepoint).
auto *F = M->getFunction(GCSafepointPollName);
+ assert(F && "gc.safepoint_poll function is missing");
assert(F->getType()->getElementType() ==
FunctionType::get(Type::getVoidTy(M->getContext()), false) &&
"gc.safepoint_poll declared with wrong type");
@@ -864,10 +831,8 @@ InsertSafepointPoll(Instruction *InsertBefore,
/// Replaces the given call site (Call or Invoke) with a gc.statepoint
/// intrinsic with an empty deoptimization arguments list. This does
/// NOT do explicit relocation for GC support.
-static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */
- Pass *P) {
- assert(CS.getInstruction()->getParent()->getParent()->getParent() &&
- "must be set");
+static Value *ReplaceWithStatepoint(const CallSite &CS /* to replace */) {
+ assert(CS.getInstruction()->getModule() && "must be set");
// TODO: technically, a pass is not allowed to get functions from within a
// function pass since it might trigger a new function addition. Refactor
@@ -917,15 +882,10 @@ static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */
CS.getInstruction()->getContext(), AttributeSet::FunctionIndex,
AttrsToRemove);
- Value *StatepointTarget = NumPatchBytes == 0
- ? CS.getCalledValue()
- : ConstantPointerNull::get(cast<PointerType>(
- CS.getCalledValue()->getType()));
-
if (CS.isCall()) {
CallInst *ToReplace = cast<CallInst>(CS.getInstruction());
CallInst *Call = Builder.CreateGCStatepointCall(
- ID, NumPatchBytes, StatepointTarget,
+ ID, NumPatchBytes, CS.getCalledValue(),
makeArrayRef(CS.arg_begin(), CS.arg_end()), None, None,
"safepoint_token");
Call->setTailCall(ToReplace->isTailCall());
@@ -938,7 +898,7 @@ static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */
Token = Call;
- // Put the following gc_result and gc_relocate calls immediately after the
+ // Put the following gc_result and gc_relocate calls immediately after
// the old call (which we're about to delete).
assert(ToReplace->getNextNode() && "not a terminator, must have next");
Builder.SetInsertPoint(ToReplace->getNextNode());
@@ -951,7 +911,7 @@ static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */
// original block.
Builder.SetInsertPoint(ToReplace->getParent());
InvokeInst *Invoke = Builder.CreateGCStatepointInvoke(
- ID, NumPatchBytes, StatepointTarget, ToReplace->getNormalDest(),
+ ID, NumPatchBytes, CS.getCalledValue(), ToReplace->getNormalDest(),
ToReplace->getUnwindDest(), makeArrayRef(CS.arg_begin(), CS.arg_end()),
None, None, "safepoint_token");
@@ -967,7 +927,7 @@ static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */
// We'll insert the gc.result into the normal block
BasicBlock *NormalDest = ToReplace->getNormalDest();
// Can not insert gc.result in case of phi nodes preset.
- // Should have removed this cases prior to runnning this function
+ // Should have removed this cases prior to running this function
assert(!isa<PHINode>(NormalDest->begin()));
Instruction *IP = &*(NormalDest->getFirstInsertionPt());
Builder.SetInsertPoint(IP);
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index d1acf785d07e..fb970c747ce1 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -26,6 +26,8 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
@@ -62,7 +64,7 @@ namespace {
/// Print out the expression identified in the Ops list.
///
static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
- Module *M = I->getParent()->getParent()->getParent();
+ Module *M = I->getModule();
dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
<< *Ops[0].Op->getType() << '\t';
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
@@ -82,20 +84,6 @@ namespace {
Factor(Value *Base, unsigned Power) : Base(Base), Power(Power) {}
- /// \brief Sort factors by their Base.
- struct BaseSorter {
- bool operator()(const Factor &LHS, const Factor &RHS) {
- return LHS.Base < RHS.Base;
- }
- };
-
- /// \brief Compare factors for equal bases.
- struct BaseEqual {
- bool operator()(const Factor &LHS, const Factor &RHS) {
- return LHS.Base == RHS.Base;
- }
- };
-
/// \brief Sort factors in descending order by their power.
struct PowerDescendingSorter {
bool operator()(const Factor &LHS, const Factor &RHS) {
@@ -172,6 +160,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
private:
void BuildRankMap(Function &F);
@@ -255,27 +244,6 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
return nullptr;
}
-static bool isUnmovableInstruction(Instruction *I) {
- switch (I->getOpcode()) {
- case Instruction::PHI:
- case Instruction::LandingPad:
- case Instruction::Alloca:
- case Instruction::Load:
- case Instruction::Invoke:
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::FDiv:
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::FRem:
- return true;
- case Instruction::Call:
- return !isa<DbgInfoIntrinsic>(I);
- default:
- return false;
- }
-}
-
void Reassociate::BuildRankMap(Function &F) {
unsigned i = 2;
@@ -295,7 +263,7 @@ void Reassociate::BuildRankMap(Function &F) {
// we cannot move. This ensures that the ranks for these instructions are
// all different in the block.
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
- if (isUnmovableInstruction(I))
+ if (mayBeMemoryDependent(*I))
ValueRankMap[&*I] = ++BBRank;
}
}
@@ -913,7 +881,11 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
/// that computes the negative version of the value specified. The negative
/// version of the value is returned, and BI is left pointing at the instruction
/// that should be processed next by the reassociation pass.
-static Value *NegateValue(Value *V, Instruction *BI) {
+/// Also add intermediate instructions to the redo list that are modified while
+/// pushing the negates through adds. These will be revisited to see if
+/// additional opportunities have been exposed.
+static Value *NegateValue(Value *V, Instruction *BI,
+ SetVector<AssertingVH<Instruction>> &ToRedo) {
if (Constant *C = dyn_cast<Constant>(V)) {
if (C->getType()->isFPOrFPVectorTy()) {
return ConstantExpr::getFNeg(C);
@@ -934,8 +906,8 @@ static Value *NegateValue(Value *V, Instruction *BI) {
if (BinaryOperator *I =
isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
// Push the negates through the add.
- I->setOperand(0, NegateValue(I->getOperand(0), BI));
- I->setOperand(1, NegateValue(I->getOperand(1), BI));
+ I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo));
+ I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo));
if (I->getOpcode() == Instruction::Add) {
I->setHasNoUnsignedWrap(false);
I->setHasNoSignedWrap(false);
@@ -948,6 +920,10 @@ static Value *NegateValue(Value *V, Instruction *BI) {
//
I->moveBefore(BI);
I->setName(I->getName()+".neg");
+
+ // Add the intermediate negates to the redo list as processing them later
+ // could expose more reassociating opportunities.
+ ToRedo.insert(I);
return I;
}
@@ -972,26 +948,28 @@ static Value *NegateValue(Value *V, Instruction *BI) {
if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) {
InsertPt = II->getNormalDest()->begin();
} else {
- InsertPt = InstInput;
- ++InsertPt;
+ InsertPt = ++InstInput->getIterator();
}
while (isa<PHINode>(InsertPt)) ++InsertPt;
} else {
InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin();
}
- TheNeg->moveBefore(InsertPt);
+ TheNeg->moveBefore(&*InsertPt);
if (TheNeg->getOpcode() == Instruction::Sub) {
TheNeg->setHasNoUnsignedWrap(false);
TheNeg->setHasNoSignedWrap(false);
} else {
TheNeg->andIRFlags(BI);
}
+ ToRedo.insert(TheNeg);
return TheNeg;
}
// Insert a 'neg' instruction that subtracts the value from zero to get the
// negation.
- return CreateNeg(V, V->getName() + ".neg", BI, BI);
+ BinaryOperator *NewNeg = CreateNeg(V, V->getName() + ".neg", BI, BI);
+ ToRedo.insert(NewNeg);
+ return NewNeg;
}
/// Return true if we should break up this subtract of X-Y into (X + -Y).
@@ -1025,14 +1003,15 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) {
/// If we have (X-Y), and if either X is an add, or if this is only used by an
/// add, transform this into (X+(0-Y)) to promote better reassociation.
-static BinaryOperator *BreakUpSubtract(Instruction *Sub) {
+static BinaryOperator *
+BreakUpSubtract(Instruction *Sub, SetVector<AssertingVH<Instruction>> &ToRedo) {
// Convert a subtract into an add and a neg instruction. This allows sub
// instructions to be commuted with other add instructions.
//
// Calculate the negative value of Operand 1 of the sub instruction,
// and set it as the RHS of the add instruction we just made.
//
- Value *NegVal = NegateValue(Sub->getOperand(1), Sub);
+ Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo);
BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub);
Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op.
@@ -1166,7 +1145,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
return nullptr;
}
- BasicBlock::iterator InsertPt = BO; ++InsertPt;
+ BasicBlock::iterator InsertPt = ++BO->getIterator();
// If this was just a single multiply, remove the multiply and return the only
// remaining operand.
@@ -1179,7 +1158,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
}
if (NeedsNegate)
- V = CreateNeg(V, "neg", InsertPt, BO);
+ V = CreateNeg(V, "neg", &*InsertPt, BO);
return V;
}
@@ -1250,7 +1229,7 @@ static Value *OptimizeAndOrXor(unsigned Opcode,
return nullptr;
}
-/// Helper funciton of CombineXorOpnd(). It creates a bitwise-and
+/// Helper function of CombineXorOpnd(). It creates a bitwise-and
/// instruction with the given two operands, and return the resulting
/// instruction. There are two special cases: 1) if the constant operand is 0,
/// it will return NULL. 2) if the constant is ~0, the symbolic operand will
@@ -2083,7 +2062,7 @@ void Reassociate::OptimizeInst(Instruction *I) {
return;
// Don't optimize floating point instructions that don't have unsafe algebra.
- if (I->getType()->isFloatingPointTy() && !I->hasUnsafeAlgebra())
+ if (I->getType()->isFPOrFPVectorTy() && !I->hasUnsafeAlgebra())
return;
// Do not reassociate boolean (i1) expressions. We want to preserve the
@@ -2099,7 +2078,7 @@ void Reassociate::OptimizeInst(Instruction *I) {
// see if we can convert it to X+-Y.
if (I->getOpcode() == Instruction::Sub) {
if (ShouldBreakUpSubtract(I)) {
- Instruction *NI = BreakUpSubtract(I);
+ Instruction *NI = BreakUpSubtract(I, RedoInsts);
RedoInsts.insert(I);
MadeChange = true;
I = NI;
@@ -2110,6 +2089,12 @@ void Reassociate::OptimizeInst(Instruction *I) {
(!I->hasOneUse() ||
!isReassociableOp(I->user_back(), Instruction::Mul))) {
Instruction *NI = LowerNegateToMultiply(I);
+ // If the negate was simplified, revisit the users to see if we can
+ // reassociate further.
+ for (User *U : NI->users()) {
+ if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
+ RedoInsts.insert(Tmp);
+ }
RedoInsts.insert(I);
MadeChange = true;
I = NI;
@@ -2117,7 +2102,7 @@ void Reassociate::OptimizeInst(Instruction *I) {
}
} else if (I->getOpcode() == Instruction::FSub) {
if (ShouldBreakUpSubtract(I)) {
- Instruction *NI = BreakUpSubtract(I);
+ Instruction *NI = BreakUpSubtract(I, RedoInsts);
RedoInsts.insert(I);
MadeChange = true;
I = NI;
@@ -2127,7 +2112,13 @@ void Reassociate::OptimizeInst(Instruction *I) {
if (isReassociableOp(I->getOperand(1), Instruction::FMul) &&
(!I->hasOneUse() ||
!isReassociableOp(I->user_back(), Instruction::FMul))) {
+ // If the negate was simplified, revisit the users to see if we can
+ // reassociate further.
Instruction *NI = LowerNegateToMultiply(I);
+ for (User *U : NI->users()) {
+ if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
+ RedoInsts.insert(Tmp);
+ }
RedoInsts.insert(I);
MadeChange = true;
I = NI;
@@ -2142,8 +2133,14 @@ void Reassociate::OptimizeInst(Instruction *I) {
// If this is an interior node of a reassociable tree, ignore it until we
// get to the root of the tree, to avoid N^2 analysis.
unsigned Opcode = BO->getOpcode();
- if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode)
+ if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) {
+ // During the initial run we will get to the root of the tree.
+ // But if we get here while we are redoing instructions, there is no
+ // guarantee that the root will be visited. So Redo later
+ if (BO->user_back() != BO)
+ RedoInsts.insert(BO->user_back());
return;
+ }
// If this is an add tree that is used by a sub instruction, ignore it
// until we process the subtract.
@@ -2250,10 +2247,10 @@ bool Reassociate::runOnFunction(Function &F) {
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
// Optimize every instruction in the basic block.
for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; )
- if (isInstructionTriviallyDead(II)) {
- EraseInst(II++);
+ if (isInstructionTriviallyDead(&*II)) {
+ EraseInst(&*II++);
} else {
- OptimizeInst(II);
+ OptimizeInst(&*II);
assert(II->getParent() == BI && "Moved to a different block!");
++II;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
index 1b46727c17bb..915f89780c08 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -82,10 +82,9 @@ bool RegToMem::runOnFunction(Function &F) {
BasicBlock::iterator I = BBEntry->begin();
while (isa<AllocaInst>(I)) ++I;
- CastInst *AllocaInsertionPoint =
- new BitCastInst(Constant::getNullValue(Type::getInt32Ty(F.getContext())),
- Type::getInt32Ty(F.getContext()),
- "reg2mem alloca point", I);
+ CastInst *AllocaInsertionPoint = new BitCastInst(
+ Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+ Type::getInt32Ty(F.getContext()), "reg2mem alloca point", &*I);
// Find the escaped instructions. But don't create stack slots for
// allocas in entry block.
@@ -95,7 +94,7 @@ bool RegToMem::runOnFunction(Function &F) {
for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
iib != iie; ++iib) {
if (!(isa<AllocaInst>(iib) && iib->getParent() == BBEntry) &&
- valueEscapes(iib)) {
+ valueEscapes(&*iib)) {
WorkList.push_front(&*iib);
}
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index ae2ae3af0c7a..db127c3f7b4e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -14,12 +14,14 @@
#include "llvm/Pass.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Dominators.h"
@@ -46,10 +48,6 @@
using namespace llvm;
-// Print tracing output
-static cl::opt<bool> TraceLSP("trace-rewrite-statepoints", cl::Hidden,
- cl::init(false));
-
// Print the liveset found at the insert location
static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden,
cl::init(false));
@@ -74,6 +72,12 @@ static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
cl::location(ClobberNonLive),
cl::Hidden);
+static cl::opt<bool> UseDeoptBundles("rs4gc-use-deopt-bundles", cl::Hidden,
+ cl::init(false));
+static cl::opt<bool>
+ AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info",
+ cl::Hidden, cl::init(true));
+
namespace {
struct RewriteStatepointsForGC : public ModulePass {
static char ID; // Pass identification, replacement for typeid
@@ -88,10 +92,10 @@ struct RewriteStatepointsForGC : public ModulePass {
Changed |= runOnFunction(F);
if (Changed) {
- // stripDereferenceabilityInfo asserts that shouldRewriteStatepointsIn
+ // stripNonValidAttributes asserts that shouldRewriteStatepointsIn
// returns true for at least one function in the module. Since at least
// one function changed, we know that the precondition is satisfied.
- stripDereferenceabilityInfo(M);
+ stripNonValidAttributes(M);
}
return Changed;
@@ -108,15 +112,16 @@ struct RewriteStatepointsForGC : public ModulePass {
/// dereferenceability that are no longer valid/correct after
/// RewriteStatepointsForGC has run. This is because semantically, after
/// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire
- /// heap. stripDereferenceabilityInfo (conservatively) restores correctness
+ /// heap. stripNonValidAttributes (conservatively) restores correctness
/// by erasing all attributes in the module that externally imply
/// dereferenceability.
- ///
- void stripDereferenceabilityInfo(Module &M);
+ /// Similar reasoning also applies to the noalias attributes. gc.statepoint
+ /// can touch the entire heap including noalias objects.
+ void stripNonValidAttributes(Module &M);
- // Helpers for stripDereferenceabilityInfo
- void stripDereferenceabilityInfoFromBody(Function &F);
- void stripDereferenceabilityInfoFromPrototype(Function &F);
+ // Helpers for stripNonValidAttributes
+ void stripNonValidAttributesFromBody(Function &F);
+ void stripNonValidAttributesFromPrototype(Function &F);
};
} // namespace
@@ -160,15 +165,16 @@ struct GCPtrLivenessData {
// base relation will remain. Internally, we add a mixture of the two
// types, then update all the second type to the first type
typedef DenseMap<Value *, Value *> DefiningValueMapTy;
-typedef DenseSet<llvm::Value *> StatepointLiveSetTy;
-typedef DenseMap<Instruction *, Value *> RematerializedValueMapTy;
+typedef DenseSet<Value *> StatepointLiveSetTy;
+typedef DenseMap<AssertingVH<Instruction>, AssertingVH<Value>>
+ RematerializedValueMapTy;
struct PartiallyConstructedSafepointRecord {
- /// The set of values known to be live accross this safepoint
- StatepointLiveSetTy liveset;
+ /// The set of values known to be live across this safepoint
+ StatepointLiveSetTy LiveSet;
/// Mapping from live pointers to a base-defining-value
- DenseMap<llvm::Value *, llvm::Value *> PointerToBase;
+ DenseMap<Value *, Value *> PointerToBase;
/// The *new* gc.statepoint instruction itself. This produces the token
/// that normal path gc.relocates and the gc.result are tied to.
@@ -179,12 +185,26 @@ struct PartiallyConstructedSafepointRecord {
Instruction *UnwindToken;
/// Record live values we are rematerialized instead of relocating.
- /// They are not included into 'liveset' field.
+ /// They are not included into 'LiveSet' field.
/// Maps rematerialized copy to it's original value.
RematerializedValueMapTy RematerializedValues;
};
}
+static ArrayRef<Use> GetDeoptBundleOperands(ImmutableCallSite CS) {
+ assert(UseDeoptBundles && "Should not be called otherwise!");
+
+ Optional<OperandBundleUse> DeoptBundle = CS.getOperandBundle("deopt");
+
+ if (!DeoptBundle.hasValue()) {
+ assert(AllowStatepointWithNoDeoptInfo &&
+ "Found non-leaf call without deopt info!");
+ return None;
+ }
+
+ return DeoptBundle.getValue().Inputs;
+}
+
/// Compute the live-in set for every basic block in the function
static void computeLiveInValues(DominatorTree &DT, Function &F,
GCPtrLivenessData &Data);
@@ -195,10 +215,10 @@ static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data,
StatepointLiveSetTy &out);
// TODO: Once we can get to the GCStrategy, this becomes
-// Optional<bool> isGCManagedPointer(const Value *V) const override {
+// Optional<bool> isGCManagedPointer(const Type *Ty) const override {
-static bool isGCPointerType(const Type *T) {
- if (const PointerType *PT = dyn_cast<PointerType>(T))
+static bool isGCPointerType(Type *T) {
+ if (auto *PT = dyn_cast<PointerType>(T))
// For the sake of this example GC, we arbitrarily pick addrspace(1) as our
// GC managed heap. We know that a pointer into this heap needs to be
// updated and that no other pointer does.
@@ -233,9 +253,8 @@ static bool containsGCPtrType(Type *Ty) {
if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
return containsGCPtrType(AT->getElementType());
if (StructType *ST = dyn_cast<StructType>(Ty))
- return std::any_of(
- ST->subtypes().begin(), ST->subtypes().end(),
- [](Type *SubType) { return containsGCPtrType(SubType); });
+ return std::any_of(ST->subtypes().begin(), ST->subtypes().end(),
+ containsGCPtrType);
return false;
}
@@ -247,7 +266,7 @@ static bool isUnhandledGCPointerType(Type *Ty) {
}
#endif
-static bool order_by_name(llvm::Value *a, llvm::Value *b) {
+static bool order_by_name(Value *a, Value *b) {
if (a->hasName() && b->hasName()) {
return -1 == a->getName().compare(b->getName());
} else if (a->hasName() && !b->hasName()) {
@@ -260,6 +279,13 @@ static bool order_by_name(llvm::Value *a, llvm::Value *b) {
}
}
+// Return the name of the value suffixed with the provided value, or if the
+// value didn't have a name, the default value specified.
+static std::string suffixed_name_or(Value *V, StringRef Suffix,
+ StringRef DefaultName) {
+ return V->hasName() ? (V->getName() + Suffix).str() : DefaultName.str();
+}
+
// Conservatively identifies any definitions which might be live at the
// given instruction. The analysis is performed immediately before the
// given instruction. Values defined by that instruction are not considered
@@ -269,30 +295,56 @@ static void analyzeParsePointLiveness(
const CallSite &CS, PartiallyConstructedSafepointRecord &result) {
Instruction *inst = CS.getInstruction();
- StatepointLiveSetTy liveset;
- findLiveSetAtInst(inst, OriginalLivenessData, liveset);
+ StatepointLiveSetTy LiveSet;
+ findLiveSetAtInst(inst, OriginalLivenessData, LiveSet);
if (PrintLiveSet) {
// Note: This output is used by several of the test cases
- // The order of elemtns in a set is not stable, put them in a vec and sort
+ // The order of elements in a set is not stable, put them in a vec and sort
// by name
- SmallVector<Value *, 64> temp;
- temp.insert(temp.end(), liveset.begin(), liveset.end());
- std::sort(temp.begin(), temp.end(), order_by_name);
+ SmallVector<Value *, 64> Temp;
+ Temp.insert(Temp.end(), LiveSet.begin(), LiveSet.end());
+ std::sort(Temp.begin(), Temp.end(), order_by_name);
errs() << "Live Variables:\n";
- for (Value *V : temp) {
- errs() << " " << V->getName(); // no newline
- V->dump();
- }
+ for (Value *V : Temp)
+ dbgs() << " " << V->getName() << " " << *V << "\n";
}
if (PrintLiveSetSize) {
errs() << "Safepoint For: " << CS.getCalledValue()->getName() << "\n";
- errs() << "Number live values: " << liveset.size() << "\n";
+ errs() << "Number live values: " << LiveSet.size() << "\n";
+ }
+ result.LiveSet = LiveSet;
+}
+
+static bool isKnownBaseResult(Value *V);
+namespace {
+/// A single base defining value - An immediate base defining value for an
+/// instruction 'Def' is an input to 'Def' whose base is also a base of 'Def'.
+/// For instructions which have multiple pointer [vector] inputs or that
+/// transition between vector and scalar types, there is no immediate base
+/// defining value. The 'base defining value' for 'Def' is the transitive
+/// closure of this relation stopping at the first instruction which has no
+/// immediate base defining value. The b.d.v. might itself be a base pointer,
+/// but it can also be an arbitrary derived pointer.
+struct BaseDefiningValueResult {
+ /// Contains the value which is the base defining value.
+ Value * const BDV;
+ /// True if the base defining value is also known to be an actual base
+ /// pointer.
+ const bool IsKnownBase;
+ BaseDefiningValueResult(Value *BDV, bool IsKnownBase)
+ : BDV(BDV), IsKnownBase(IsKnownBase) {
+#ifndef NDEBUG
+ // Check consistency between new and old means of checking whether a BDV is
+ // a base.
+ bool MustBeBase = isKnownBaseResult(BDV);
+ assert(!MustBeBase || MustBeBase == IsKnownBase);
+#endif
}
- result.liveset = liveset;
+};
}
-static Value *findBaseDefiningValue(Value *I);
+static BaseDefiningValueResult findBaseDefiningValue(Value *I);
/// Return a base defining value for the 'Index' element of the given vector
/// instruction 'I'. If Index is null, returns a BDV for the entire vector
@@ -303,8 +355,8 @@ static Value *findBaseDefiningValue(Value *I);
/// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
/// If the later, the return pointer is a BDV (or possibly a base) for the
/// particular element in 'I'.
-static std::pair<Value *, bool>
-findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) {
+static BaseDefiningValueResult
+findBaseDefiningValueOfVector(Value *I) {
assert(I->getType()->isVectorTy() &&
cast<VectorType>(I->getType())->getElementType()->isPointerTy() &&
"Illegal to ask for the base pointer of a non-pointer type");
@@ -314,7 +366,7 @@ findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) {
if (isa<Argument>(I))
// An incoming argument to the function is a base pointer
- return std::make_pair(I, true);
+ return BaseDefiningValueResult(I, true);
// We shouldn't see the address of a global as a vector value?
assert(!isa<GlobalVariable>(I) &&
@@ -325,7 +377,7 @@ findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) {
if (isa<UndefValue>(I))
// utterly meaningless, but useful for dealing with partially optimized
// code.
- return std::make_pair(I, true);
+ return BaseDefiningValueResult(I, true);
// Due to inheritance, this must be _after_ the global variable and undef
// checks
@@ -333,31 +385,17 @@ findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) {
assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
"order of checks wrong!");
assert(Con->isNullValue() && "null is the only case which makes sense");
- return std::make_pair(Con, true);
+ return BaseDefiningValueResult(Con, true);
}
if (isa<LoadInst>(I))
- return std::make_pair(I, true);
-
- // For an insert element, we might be able to look through it if we know
- // something about the indexes.
- if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(I)) {
- if (Index) {
- Value *InsertIndex = IEI->getOperand(2);
- // This index is inserting the value, look for its BDV
- if (InsertIndex == Index)
- return std::make_pair(findBaseDefiningValue(IEI->getOperand(1)), false);
- // Both constant, and can't be equal per above. This insert is definitely
- // not relevant, look back at the rest of the vector and keep trying.
- if (isa<ConstantInt>(Index) && isa<ConstantInt>(InsertIndex))
- return findBaseDefiningValueOfVector(IEI->getOperand(0), Index);
- }
-
+ return BaseDefiningValueResult(I, true);
+
+ if (isa<InsertElementInst>(I))
// We don't know whether this vector contains entirely base pointers or
// not. To be conservatively correct, we treat it as a BDV and will
// duplicate code as needed to construct a parallel vector of bases.
- return std::make_pair(IEI, false);
- }
+ return BaseDefiningValueResult(I, false);
if (isa<ShuffleVectorInst>(I))
// We don't know whether this vector contains entirely base pointers or
@@ -365,105 +403,62 @@ findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) {
// duplicate code as needed to construct a parallel vector of bases.
// TODO: There a number of local optimizations which could be applied here
// for particular sufflevector patterns.
- return std::make_pair(I, false);
+ return BaseDefiningValueResult(I, false);
// A PHI or Select is a base defining value. The outer findBasePointer
// algorithm is responsible for constructing a base value for this BDV.
assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
"unknown vector instruction - no base found for vector element");
- return std::make_pair(I, false);
+ return BaseDefiningValueResult(I, false);
}
-static bool isKnownBaseResult(Value *V);
-
/// Helper function for findBasePointer - Will return a value which either a)
-/// defines the base pointer for the input or b) blocks the simple search
-/// (i.e. a PHI or Select of two derived pointers)
-static Value *findBaseDefiningValue(Value *I) {
+/// defines the base pointer for the input, b) blocks the simple search
+/// (i.e. a PHI or Select of two derived pointers), or c) involves a change
+/// from pointer to vector type or back.
+static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
if (I->getType()->isVectorTy())
- return findBaseDefiningValueOfVector(I).first;
+ return findBaseDefiningValueOfVector(I);
assert(I->getType()->isPointerTy() &&
"Illegal to ask for the base pointer of a non-pointer type");
- // This case is a bit of a hack - it only handles extracts from vectors which
- // trivially contain only base pointers or cases where we can directly match
- // the index of the original extract element to an insertion into the vector.
- // See note inside the function for how to improve this.
- if (auto *EEI = dyn_cast<ExtractElementInst>(I)) {
- Value *VectorOperand = EEI->getVectorOperand();
- Value *Index = EEI->getIndexOperand();
- std::pair<Value *, bool> pair =
- findBaseDefiningValueOfVector(VectorOperand, Index);
- Value *VectorBase = pair.first;
- if (VectorBase->getType()->isPointerTy())
- // We found a BDV for this specific element with the vector. This is an
- // optimization, but in practice it covers most of the useful cases
- // created via scalarization.
- return VectorBase;
- else {
- assert(VectorBase->getType()->isVectorTy());
- if (pair.second)
- // If the entire vector returned is known to be entirely base pointers,
- // then the extractelement is valid base for this value.
- return EEI;
- else {
- // Otherwise, we have an instruction which potentially produces a
- // derived pointer and we need findBasePointers to clone code for us
- // such that we can create an instruction which produces the
- // accompanying base pointer.
- // Note: This code is currently rather incomplete. We don't currently
- // support the general form of shufflevector of insertelement.
- // Conceptually, these are just 'base defining values' of the same
- // variety as phi or select instructions. We need to update the
- // findBasePointers algorithm to insert new 'base-only' versions of the
- // original instructions. This is relative straight forward to do, but
- // the case which would motivate the work hasn't shown up in real
- // workloads yet.
- assert((isa<PHINode>(VectorBase) || isa<SelectInst>(VectorBase)) &&
- "need to extend findBasePointers for generic vector"
- "instruction cases");
- return VectorBase;
- }
- }
- }
-
if (isa<Argument>(I))
// An incoming argument to the function is a base pointer
// We should have never reached here if this argument isn't an gc value
- return I;
+ return BaseDefiningValueResult(I, true);
if (isa<GlobalVariable>(I))
// base case
- return I;
+ return BaseDefiningValueResult(I, true);
// inlining could possibly introduce phi node that contains
// undef if callee has multiple returns
if (isa<UndefValue>(I))
// utterly meaningless, but useful for dealing with
// partially optimized code.
- return I;
+ return BaseDefiningValueResult(I, true);
// Due to inheritance, this must be _after_ the global variable and undef
// checks
- if (Constant *Con = dyn_cast<Constant>(I)) {
+ if (isa<Constant>(I)) {
assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
"order of checks wrong!");
- // Note: Finding a constant base for something marked for relocation
- // doesn't really make sense. The most likely case is either a) some
- // screwed up the address space usage or b) your validating against
- // compiled C++ code w/o the proper separation. The only real exception
- // is a null pointer. You could have generic code written to index of
- // off a potentially null value and have proven it null. We also use
- // null pointers in dead paths of relocation phis (which we might later
- // want to find a base pointer for).
- assert(isa<ConstantPointerNull>(Con) &&
- "null is the only case which makes sense");
- return Con;
+ // Note: Even for frontends which don't have constant references, we can
+ // see constants appearing after optimizations. A simple example is
+ // specialization of an address computation on null feeding into a merge
+ // point where the actual use of the now-constant input is protected by
+ // another null check. (e.g. test4 in constants.ll)
+ return BaseDefiningValueResult(I, true);
}
if (CastInst *CI = dyn_cast<CastInst>(I)) {
Value *Def = CI->stripPointerCasts();
+ // If stripping pointer casts changes the address space there is an
+ // addrspacecast in between.
+ assert(cast<PointerType>(Def->getType())->getAddressSpace() ==
+ cast<PointerType>(CI->getType())->getAddressSpace() &&
+ "unsupported addrspacecast");
// If we find a cast instruction here, it means we've found a cast which is
// not simply a pointer cast (i.e. an inttoptr). We don't know how to
// handle int->ptr conversion.
@@ -472,7 +467,9 @@ static Value *findBaseDefiningValue(Value *I) {
}
if (isa<LoadInst>(I))
- return I; // The value loaded is an gc base itself
+ // The value loaded is an gc base itself
+ return BaseDefiningValueResult(I, true);
+
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
// The base of this GEP is the base
@@ -480,14 +477,11 @@ static Value *findBaseDefiningValue(Value *I) {
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
- case Intrinsic::experimental_gc_result_ptr:
default:
// fall through to general call handling
break;
case Intrinsic::experimental_gc_statepoint:
- case Intrinsic::experimental_gc_result_float:
- case Intrinsic::experimental_gc_result_int:
- llvm_unreachable("these don't produce pointers");
+ llvm_unreachable("statepoints don't produce pointers");
case Intrinsic::experimental_gc_relocate: {
// Rerunning safepoint insertion after safepoints are already
// inserted is not supported. It could probably be made to work,
@@ -506,17 +500,17 @@ static Value *findBaseDefiningValue(Value *I) {
// pointers. This should probably be generalized via attributes to support
// both source language and internal functions.
if (isa<CallInst>(I) || isa<InvokeInst>(I))
- return I;
+ return BaseDefiningValueResult(I, true);
// I have absolutely no idea how to implement this part yet. It's not
- // neccessarily hard, I just haven't really looked at it yet.
+ // necessarily hard, I just haven't really looked at it yet.
assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented");
if (isa<AtomicCmpXchgInst>(I))
// A CAS is effectively a atomic store and load combined under a
// predicate. From the perspective of base pointers, we just treat it
// like a load.
- return I;
+ return BaseDefiningValueResult(I, true);
assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are "
"binary ops which don't apply to pointers");
@@ -525,34 +519,41 @@ static Value *findBaseDefiningValue(Value *I) {
// stack, but in either case, this is simply a field load. As a result,
// this is a defining definition of the base just like a load is.
if (isa<ExtractValueInst>(I))
- return I;
+ return BaseDefiningValueResult(I, true);
// We should never see an insert vector since that would require we be
// tracing back a struct value not a pointer value.
assert(!isa<InsertValueInst>(I) &&
"Base pointer for a struct is meaningless");
+ // An extractelement produces a base result exactly when it's input does.
+ // We may need to insert a parallel instruction to extract the appropriate
+ // element out of the base vector corresponding to the input. Given this,
+ // it's analogous to the phi and select case even though it's not a merge.
+ if (isa<ExtractElementInst>(I))
+ // Note: There a lot of obvious peephole cases here. This are deliberately
+ // handled after the main base pointer inference algorithm to make writing
+ // test cases to exercise that code easier.
+ return BaseDefiningValueResult(I, false);
+
// The last two cases here don't return a base pointer. Instead, they
- // return a value which dynamically selects from amoung several base
+ // return a value which dynamically selects from among several base
// derived pointers (each with it's own base potentially). It's the job of
// the caller to resolve these.
assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
"missing instruction case in findBaseDefiningValing");
- return I;
+ return BaseDefiningValueResult(I, false);
}
/// Returns the base defining value for this value.
static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) {
Value *&Cached = Cache[I];
if (!Cached) {
- Cached = findBaseDefiningValue(I);
+ Cached = findBaseDefiningValue(I).BDV;
+ DEBUG(dbgs() << "fBDV-cached: " << I->getName() << " -> "
+ << Cached->getName() << "\n");
}
assert(Cache[I] != nullptr);
-
- if (TraceLSP) {
- dbgs() << "fBDV-cached: " << I->getName() << " -> " << Cached->getName()
- << "\n";
- }
return Cached;
}
@@ -572,7 +573,9 @@ static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) {
/// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV,
/// is it known to be a base pointer? Or do we need to continue searching.
static bool isKnownBaseResult(Value *V) {
- if (!isa<PHINode>(V) && !isa<SelectInst>(V)) {
+ if (!isa<PHINode>(V) && !isa<SelectInst>(V) &&
+ !isa<ExtractElementInst>(V) && !isa<InsertElementInst>(V) &&
+ !isa<ShuffleVectorInst>(V)) {
// no recursion possible
return true;
}
@@ -587,17 +590,19 @@ static bool isKnownBaseResult(Value *V) {
return false;
}
-// TODO: find a better name for this
namespace {
-class PhiState {
+/// Models the state of a single base defining value in the findBasePointer
+/// algorithm for determining where a new instruction is needed to propagate
+/// the base of this BDV.
+class BDVState {
public:
enum Status { Unknown, Base, Conflict };
- PhiState(Status s, Value *b = nullptr) : status(s), base(b) {
+ BDVState(Status s, Value *b = nullptr) : status(s), base(b) {
assert(status != Base || b);
}
- PhiState(Value *b) : status(Base), base(b) {}
- PhiState() : status(Unknown), base(nullptr) {}
+ explicit BDVState(Value *b) : status(Base), base(b) {}
+ BDVState() : status(Unknown), base(nullptr) {}
Status getStatus() const { return status; }
Value *getBase() const { return base; }
@@ -606,72 +611,80 @@ public:
bool isUnknown() const { return getStatus() == Unknown; }
bool isConflict() const { return getStatus() == Conflict; }
- bool operator==(const PhiState &other) const {
+ bool operator==(const BDVState &other) const {
return base == other.base && status == other.status;
}
- bool operator!=(const PhiState &other) const { return !(*this == other); }
+ bool operator!=(const BDVState &other) const { return !(*this == other); }
- void dump() {
- errs() << status << " (" << base << " - "
- << (base ? base->getName() : "nullptr") << "): ";
+ LLVM_DUMP_METHOD
+ void dump() const { print(dbgs()); dbgs() << '\n'; }
+
+ void print(raw_ostream &OS) const {
+ switch (status) {
+ case Unknown:
+ OS << "U";
+ break;
+ case Base:
+ OS << "B";
+ break;
+ case Conflict:
+ OS << "C";
+ break;
+ };
+ OS << " (" << base << " - "
+ << (base ? base->getName() : "nullptr") << "): ";
}
private:
Status status;
- Value *base; // non null only if status == base
+ AssertingVH<Value> base; // non null only if status == base
};
+}
-typedef DenseMap<Value *, PhiState> ConflictStateMapTy;
-// Values of type PhiState form a lattice, and this is a helper
+#ifndef NDEBUG
+static raw_ostream &operator<<(raw_ostream &OS, const BDVState &State) {
+ State.print(OS);
+ return OS;
+}
+#endif
+
+namespace {
+// Values of type BDVState form a lattice, and this is a helper
// class that implementes the meet operation. The meat of the meet
-// operation is implemented in MeetPhiStates::pureMeet
-class MeetPhiStates {
+// operation is implemented in MeetBDVStates::pureMeet
+class MeetBDVStates {
public:
- // phiStates is a mapping from PHINodes and SelectInst's to PhiStates.
- explicit MeetPhiStates(const ConflictStateMapTy &phiStates)
- : phiStates(phiStates) {}
-
- // Destructively meet the current result with the base V. V can
- // either be a merge instruction (SelectInst / PHINode), in which
- // case its status is looked up in the phiStates map; or a regular
- // SSA value, in which case it is assumed to be a base.
- void meetWith(Value *V) {
- PhiState otherState = getStateForBDV(V);
- assert((MeetPhiStates::pureMeet(otherState, currentResult) ==
- MeetPhiStates::pureMeet(currentResult, otherState)) &&
- "math is wrong: meet does not commute!");
- currentResult = MeetPhiStates::pureMeet(otherState, currentResult);
+ /// Initializes the currentResult to the TOP state so that if can be met with
+ /// any other state to produce that state.
+ MeetBDVStates() {}
+
+ // Destructively meet the current result with the given BDVState
+ void meetWith(BDVState otherState) {
+ currentResult = meet(otherState, currentResult);
}
- PhiState getResult() const { return currentResult; }
+ BDVState getResult() const { return currentResult; }
private:
- const ConflictStateMapTy &phiStates;
- PhiState currentResult;
-
- /// Return a phi state for a base defining value. We'll generate a new
- /// base state for known bases and expect to find a cached state otherwise
- PhiState getStateForBDV(Value *baseValue) {
- if (isKnownBaseResult(baseValue)) {
- return PhiState(baseValue);
- } else {
- return lookupFromMap(baseValue);
- }
- }
+ BDVState currentResult;
- PhiState lookupFromMap(Value *V) {
- auto I = phiStates.find(V);
- assert(I != phiStates.end() && "lookup failed!");
- return I->second;
+ /// Perform a meet operation on two elements of the BDVState lattice.
+ static BDVState meet(BDVState LHS, BDVState RHS) {
+ assert((pureMeet(LHS, RHS) == pureMeet(RHS, LHS)) &&
+ "math is wrong: meet does not commute!");
+ BDVState Result = pureMeet(LHS, RHS);
+ DEBUG(dbgs() << "meet of " << LHS << " with " << RHS
+ << " produced " << Result << "\n");
+ return Result;
}
- static PhiState pureMeet(const PhiState &stateA, const PhiState &stateB) {
+ static BDVState pureMeet(const BDVState &stateA, const BDVState &stateB) {
switch (stateA.getStatus()) {
- case PhiState::Unknown:
+ case BDVState::Unknown:
return stateB;
- case PhiState::Base:
+ case BDVState::Base:
assert(stateA.getBase() && "can't be null");
if (stateB.isUnknown())
return stateA;
@@ -681,18 +694,20 @@ private:
assert(stateA == stateB && "equality broken!");
return stateA;
}
- return PhiState(PhiState::Conflict);
+ return BDVState(BDVState::Conflict);
}
assert(stateB.isConflict() && "only three states!");
- return PhiState(PhiState::Conflict);
+ return BDVState(BDVState::Conflict);
- case PhiState::Conflict:
+ case BDVState::Conflict:
return stateA;
}
llvm_unreachable("only three states!");
}
};
}
+
+
/// For a given value or instruction, figure out what base ptr it's derived
/// from. For gc objects, this is simply itself. On success, returns a value
/// which is the base pointer. (This is reliable and can be used for
@@ -723,171 +738,252 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
//
// Note: A simpler form of this would be to add the conflict form of all
// PHIs without running the optimistic algorithm. This would be
- // analougous to pessimistic data flow and would likely lead to an
+ // analogous to pessimistic data flow and would likely lead to an
// overall worse solution.
- ConflictStateMapTy states;
- states[def] = PhiState();
- // Recursively fill in all phis & selects reachable from the initial one
- // for which we don't already know a definite base value for
- // TODO: This should be rewritten with a worklist
- bool done = false;
- while (!done) {
- done = true;
- // Since we're adding elements to 'states' as we run, we can't keep
- // iterators into the set.
- SmallVector<Value *, 16> Keys;
- Keys.reserve(states.size());
- for (auto Pair : states) {
- Value *V = Pair.first;
- Keys.push_back(V);
- }
- for (Value *v : Keys) {
- assert(!isKnownBaseResult(v) && "why did it get added?");
- if (PHINode *phi = dyn_cast<PHINode>(v)) {
- assert(phi->getNumIncomingValues() > 0 &&
- "zero input phis are illegal");
- for (Value *InVal : phi->incoming_values()) {
- Value *local = findBaseOrBDV(InVal, cache);
- if (!isKnownBaseResult(local) && states.find(local) == states.end()) {
- states[local] = PhiState();
- done = false;
- }
- }
- } else if (SelectInst *sel = dyn_cast<SelectInst>(v)) {
- Value *local = findBaseOrBDV(sel->getTrueValue(), cache);
- if (!isKnownBaseResult(local) && states.find(local) == states.end()) {
- states[local] = PhiState();
- done = false;
- }
- local = findBaseOrBDV(sel->getFalseValue(), cache);
- if (!isKnownBaseResult(local) && states.find(local) == states.end()) {
- states[local] = PhiState();
- done = false;
- }
+#ifndef NDEBUG
+ auto isExpectedBDVType = [](Value *BDV) {
+ return isa<PHINode>(BDV) || isa<SelectInst>(BDV) ||
+ isa<ExtractElementInst>(BDV) || isa<InsertElementInst>(BDV);
+ };
+#endif
+
+ // Once populated, will contain a mapping from each potentially non-base BDV
+ // to a lattice value (described above) which corresponds to that BDV.
+ // We use the order of insertion (DFS over the def/use graph) to provide a
+ // stable deterministic ordering for visiting DenseMaps (which are unordered)
+ // below. This is important for deterministic compilation.
+ MapVector<Value *, BDVState> States;
+
+ // Recursively fill in all base defining values reachable from the initial
+ // one for which we don't already know a definite base value for
+ /* scope */ {
+ SmallVector<Value*, 16> Worklist;
+ Worklist.push_back(def);
+ States.insert(std::make_pair(def, BDVState()));
+ while (!Worklist.empty()) {
+ Value *Current = Worklist.pop_back_val();
+ assert(!isKnownBaseResult(Current) && "why did it get added?");
+
+ auto visitIncomingValue = [&](Value *InVal) {
+ Value *Base = findBaseOrBDV(InVal, cache);
+ if (isKnownBaseResult(Base))
+ // Known bases won't need new instructions introduced and can be
+ // ignored safely
+ return;
+ assert(isExpectedBDVType(Base) && "the only non-base values "
+ "we see should be base defining values");
+ if (States.insert(std::make_pair(Base, BDVState())).second)
+ Worklist.push_back(Base);
+ };
+ if (PHINode *Phi = dyn_cast<PHINode>(Current)) {
+ for (Value *InVal : Phi->incoming_values())
+ visitIncomingValue(InVal);
+ } else if (SelectInst *Sel = dyn_cast<SelectInst>(Current)) {
+ visitIncomingValue(Sel->getTrueValue());
+ visitIncomingValue(Sel->getFalseValue());
+ } else if (auto *EE = dyn_cast<ExtractElementInst>(Current)) {
+ visitIncomingValue(EE->getVectorOperand());
+ } else if (auto *IE = dyn_cast<InsertElementInst>(Current)) {
+ visitIncomingValue(IE->getOperand(0)); // vector operand
+ visitIncomingValue(IE->getOperand(1)); // scalar operand
+ } else {
+ // There is one known class of instructions we know we don't handle.
+ assert(isa<ShuffleVectorInst>(Current));
+ llvm_unreachable("unimplemented instruction case");
}
}
}
- if (TraceLSP) {
- errs() << "States after initialization:\n";
- for (auto Pair : states) {
- Instruction *v = cast<Instruction>(Pair.first);
- PhiState state = Pair.second;
- state.dump();
- v->dump();
- }
+#ifndef NDEBUG
+ DEBUG(dbgs() << "States after initialization:\n");
+ for (auto Pair : States) {
+ DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
}
+#endif
- // TODO: come back and revisit the state transitions around inputs which
- // have reached conflict state. The current version seems too conservative.
+ // Return a phi state for a base defining value. We'll generate a new
+ // base state for known bases and expect to find a cached state otherwise.
+ auto getStateForBDV = [&](Value *baseValue) {
+ if (isKnownBaseResult(baseValue))
+ return BDVState(baseValue);
+ auto I = States.find(baseValue);
+ assert(I != States.end() && "lookup failed!");
+ return I->second;
+ };
bool progress = true;
while (progress) {
#ifndef NDEBUG
- size_t oldSize = states.size();
+ const size_t oldSize = States.size();
#endif
progress = false;
- // We're only changing keys in this loop, thus safe to keep iterators
- for (auto Pair : states) {
- MeetPhiStates calculateMeet(states);
- Value *v = Pair.first;
- assert(!isKnownBaseResult(v) && "why did it get added?");
- if (SelectInst *select = dyn_cast<SelectInst>(v)) {
- calculateMeet.meetWith(findBaseOrBDV(select->getTrueValue(), cache));
- calculateMeet.meetWith(findBaseOrBDV(select->getFalseValue(), cache));
- } else
- for (Value *Val : cast<PHINode>(v)->incoming_values())
- calculateMeet.meetWith(findBaseOrBDV(Val, cache));
-
- PhiState oldState = states[v];
- PhiState newState = calculateMeet.getResult();
+ // We're only changing values in this loop, thus safe to keep iterators.
+ // Since this is computing a fixed point, the order of visit does not
+ // effect the result. TODO: We could use a worklist here and make this run
+ // much faster.
+ for (auto Pair : States) {
+ Value *BDV = Pair.first;
+ assert(!isKnownBaseResult(BDV) && "why did it get added?");
+
+ // Given an input value for the current instruction, return a BDVState
+ // instance which represents the BDV of that value.
+ auto getStateForInput = [&](Value *V) mutable {
+ Value *BDV = findBaseOrBDV(V, cache);
+ return getStateForBDV(BDV);
+ };
+
+ MeetBDVStates calculateMeet;
+ if (SelectInst *select = dyn_cast<SelectInst>(BDV)) {
+ calculateMeet.meetWith(getStateForInput(select->getTrueValue()));
+ calculateMeet.meetWith(getStateForInput(select->getFalseValue()));
+ } else if (PHINode *Phi = dyn_cast<PHINode>(BDV)) {
+ for (Value *Val : Phi->incoming_values())
+ calculateMeet.meetWith(getStateForInput(Val));
+ } else if (auto *EE = dyn_cast<ExtractElementInst>(BDV)) {
+ // The 'meet' for an extractelement is slightly trivial, but it's still
+ // useful in that it drives us to conflict if our input is.
+ calculateMeet.meetWith(getStateForInput(EE->getVectorOperand()));
+ } else {
+ // Given there's a inherent type mismatch between the operands, will
+ // *always* produce Conflict.
+ auto *IE = cast<InsertElementInst>(BDV);
+ calculateMeet.meetWith(getStateForInput(IE->getOperand(0)));
+ calculateMeet.meetWith(getStateForInput(IE->getOperand(1)));
+ }
+
+ BDVState oldState = States[BDV];
+ BDVState newState = calculateMeet.getResult();
if (oldState != newState) {
progress = true;
- states[v] = newState;
+ States[BDV] = newState;
}
}
- assert(oldSize <= states.size());
- assert(oldSize == states.size() || progress);
+ assert(oldSize == States.size() &&
+ "fixed point shouldn't be adding any new nodes to state");
}
- if (TraceLSP) {
- errs() << "States after meet iteration:\n";
- for (auto Pair : states) {
- Instruction *v = cast<Instruction>(Pair.first);
- PhiState state = Pair.second;
- state.dump();
- v->dump();
- }
+#ifndef NDEBUG
+ DEBUG(dbgs() << "States after meet iteration:\n");
+ for (auto Pair : States) {
+ DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
}
-
+#endif
+
// Insert Phis for all conflicts
- // We want to keep naming deterministic in the loop that follows, so
- // sort the keys before iteration. This is useful in allowing us to
- // write stable tests. Note that there is no invalidation issue here.
- SmallVector<Value *, 16> Keys;
- Keys.reserve(states.size());
- for (auto Pair : states) {
- Value *V = Pair.first;
- Keys.push_back(V);
- }
- std::sort(Keys.begin(), Keys.end(), order_by_name);
// TODO: adjust naming patterns to avoid this order of iteration dependency
- for (Value *V : Keys) {
- Instruction *v = cast<Instruction>(V);
- PhiState state = states[V];
- assert(!isKnownBaseResult(v) && "why did it get added?");
- assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");
- if (!state.isConflict())
+ for (auto Pair : States) {
+ Instruction *I = cast<Instruction>(Pair.first);
+ BDVState State = Pair.second;
+ assert(!isKnownBaseResult(I) && "why did it get added?");
+ assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
+
+ // extractelement instructions are a bit special in that we may need to
+ // insert an extract even when we know an exact base for the instruction.
+ // The problem is that we need to convert from a vector base to a scalar
+ // base for the particular indice we're interested in.
+ if (State.isBase() && isa<ExtractElementInst>(I) &&
+ isa<VectorType>(State.getBase()->getType())) {
+ auto *EE = cast<ExtractElementInst>(I);
+ // TODO: In many cases, the new instruction is just EE itself. We should
+ // exploit this, but can't do it here since it would break the invariant
+ // about the BDV not being known to be a base.
+ auto *BaseInst = ExtractElementInst::Create(State.getBase(),
+ EE->getIndexOperand(),
+ "base_ee", EE);
+ BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
+ States[I] = BDVState(BDVState::Base, BaseInst);
+ }
+
+ // Since we're joining a vector and scalar base, they can never be the
+ // same. As a result, we should always see insert element having reached
+ // the conflict state.
+ if (isa<InsertElementInst>(I)) {
+ assert(State.isConflict());
+ }
+
+ if (!State.isConflict())
continue;
- if (isa<PHINode>(v)) {
- int num_preds =
- std::distance(pred_begin(v->getParent()), pred_end(v->getParent()));
- assert(num_preds > 0 && "how did we reach here");
- PHINode *phi = PHINode::Create(v->getType(), num_preds, "base_phi", v);
- // Add metadata marking this as a base value
- auto *const_1 = ConstantInt::get(
- Type::getInt32Ty(
- v->getParent()->getParent()->getParent()->getContext()),
- 1);
- auto MDConst = ConstantAsMetadata::get(const_1);
- MDNode *md = MDNode::get(
- v->getParent()->getParent()->getParent()->getContext(), MDConst);
- phi->setMetadata("is_base_value", md);
- states[v] = PhiState(PhiState::Conflict, phi);
+ /// Create and insert a new instruction which will represent the base of
+ /// the given instruction 'I'.
+ auto MakeBaseInstPlaceholder = [](Instruction *I) -> Instruction* {
+ if (isa<PHINode>(I)) {
+ BasicBlock *BB = I->getParent();
+ int NumPreds = std::distance(pred_begin(BB), pred_end(BB));
+ assert(NumPreds > 0 && "how did we reach here");
+ std::string Name = suffixed_name_or(I, ".base", "base_phi");
+ return PHINode::Create(I->getType(), NumPreds, Name, I);
+ } else if (SelectInst *Sel = dyn_cast<SelectInst>(I)) {
+ // The undef will be replaced later
+ UndefValue *Undef = UndefValue::get(Sel->getType());
+ std::string Name = suffixed_name_or(I, ".base", "base_select");
+ return SelectInst::Create(Sel->getCondition(), Undef,
+ Undef, Name, Sel);
+ } else if (auto *EE = dyn_cast<ExtractElementInst>(I)) {
+ UndefValue *Undef = UndefValue::get(EE->getVectorOperand()->getType());
+ std::string Name = suffixed_name_or(I, ".base", "base_ee");
+ return ExtractElementInst::Create(Undef, EE->getIndexOperand(), Name,
+ EE);
+ } else {
+ auto *IE = cast<InsertElementInst>(I);
+ UndefValue *VecUndef = UndefValue::get(IE->getOperand(0)->getType());
+ UndefValue *ScalarUndef = UndefValue::get(IE->getOperand(1)->getType());
+ std::string Name = suffixed_name_or(I, ".base", "base_ie");
+ return InsertElementInst::Create(VecUndef, ScalarUndef,
+ IE->getOperand(2), Name, IE);
+ }
+
+ };
+ Instruction *BaseInst = MakeBaseInstPlaceholder(I);
+ // Add metadata marking this as a base value
+ BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
+ States[I] = BDVState(BDVState::Conflict, BaseInst);
+ }
+
+ // Returns a instruction which produces the base pointer for a given
+ // instruction. The instruction is assumed to be an input to one of the BDVs
+ // seen in the inference algorithm above. As such, we must either already
+ // know it's base defining value is a base, or have inserted a new
+ // instruction to propagate the base of it's BDV and have entered that newly
+ // introduced instruction into the state table. In either case, we are
+ // assured to be able to determine an instruction which produces it's base
+ // pointer.
+ auto getBaseForInput = [&](Value *Input, Instruction *InsertPt) {
+ Value *BDV = findBaseOrBDV(Input, cache);
+ Value *Base = nullptr;
+ if (isKnownBaseResult(BDV)) {
+ Base = BDV;
} else {
- SelectInst *sel = cast<SelectInst>(v);
- // The undef will be replaced later
- UndefValue *undef = UndefValue::get(sel->getType());
- SelectInst *basesel = SelectInst::Create(sel->getCondition(), undef,
- undef, "base_select", sel);
- // Add metadata marking this as a base value
- auto *const_1 = ConstantInt::get(
- Type::getInt32Ty(
- v->getParent()->getParent()->getParent()->getContext()),
- 1);
- auto MDConst = ConstantAsMetadata::get(const_1);
- MDNode *md = MDNode::get(
- v->getParent()->getParent()->getParent()->getContext(), MDConst);
- basesel->setMetadata("is_base_value", md);
- states[v] = PhiState(PhiState::Conflict, basesel);
+ // Either conflict or base.
+ assert(States.count(BDV));
+ Base = States[BDV].getBase();
}
- }
+ assert(Base && "can't be null");
+ // The cast is needed since base traversal may strip away bitcasts
+ if (Base->getType() != Input->getType() &&
+ InsertPt) {
+ Base = new BitCastInst(Base, Input->getType(), "cast",
+ InsertPt);
+ }
+ return Base;
+ };
- // Fixup all the inputs of the new PHIs
- for (auto Pair : states) {
- Instruction *v = cast<Instruction>(Pair.first);
- PhiState state = Pair.second;
+ // Fixup all the inputs of the new PHIs. Visit order needs to be
+ // deterministic and predictable because we're naming newly created
+ // instructions.
+ for (auto Pair : States) {
+ Instruction *BDV = cast<Instruction>(Pair.first);
+ BDVState State = Pair.second;
- assert(!isKnownBaseResult(v) && "why did it get added?");
- assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");
- if (!state.isConflict())
+ assert(!isKnownBaseResult(BDV) && "why did it get added?");
+ assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
+ if (!State.isConflict())
continue;
- if (PHINode *basephi = dyn_cast<PHINode>(state.getBase())) {
- PHINode *phi = cast<PHINode>(v);
+ if (PHINode *basephi = dyn_cast<PHINode>(State.getBase())) {
+ PHINode *phi = cast<PHINode>(BDV);
unsigned NumPHIValues = phi->getNumIncomingValues();
for (unsigned i = 0; i < NumPHIValues; i++) {
Value *InVal = phi->getIncomingValue(i);
@@ -906,104 +1002,145 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
if (blockIndex != -1) {
Value *oldBase = basephi->getIncomingValue(blockIndex);
basephi->addIncoming(oldBase, InBB);
+
#ifndef NDEBUG
- Value *base = findBaseOrBDV(InVal, cache);
- if (!isKnownBaseResult(base)) {
- // Either conflict or base.
- assert(states.count(base));
- base = states[base].getBase();
- assert(base != nullptr && "unknown PhiState!");
- }
-
- // In essense this assert states: the only way two
+ Value *Base = getBaseForInput(InVal, nullptr);
+ // In essence this assert states: the only way two
// values incoming from the same basic block may be
// different is by being different bitcasts of the same
// value. A cleanup that remains TODO is changing
// findBaseOrBDV to return an llvm::Value of the correct
// type (and still remain pure). This will remove the
// need to add bitcasts.
- assert(base->stripPointerCasts() == oldBase->stripPointerCasts() &&
+ assert(Base->stripPointerCasts() == oldBase->stripPointerCasts() &&
"sanity -- findBaseOrBDV should be pure!");
#endif
continue;
}
- // Find either the defining value for the PHI or the normal base for
- // a non-phi node
- Value *base = findBaseOrBDV(InVal, cache);
- if (!isKnownBaseResult(base)) {
- // Either conflict or base.
- assert(states.count(base));
- base = states[base].getBase();
- assert(base != nullptr && "unknown PhiState!");
- }
- assert(base && "can't be null");
- // Must use original input BB since base may not be Instruction
- // The cast is needed since base traversal may strip away bitcasts
- if (base->getType() != basephi->getType()) {
- base = new BitCastInst(base, basephi->getType(), "cast",
- InBB->getTerminator());
- }
- basephi->addIncoming(base, InBB);
+ // Find the instruction which produces the base for each input. We may
+ // need to insert a bitcast in the incoming block.
+ // TODO: Need to split critical edges if insertion is needed
+ Value *Base = getBaseForInput(InVal, InBB->getTerminator());
+ basephi->addIncoming(Base, InBB);
}
assert(basephi->getNumIncomingValues() == NumPHIValues);
- } else {
- SelectInst *basesel = cast<SelectInst>(state.getBase());
- SelectInst *sel = cast<SelectInst>(v);
+ } else if (SelectInst *BaseSel = dyn_cast<SelectInst>(State.getBase())) {
+ SelectInst *Sel = cast<SelectInst>(BDV);
// Operand 1 & 2 are true, false path respectively. TODO: refactor to
// something more safe and less hacky.
for (int i = 1; i <= 2; i++) {
- Value *InVal = sel->getOperand(i);
- // Find either the defining value for the PHI or the normal base for
- // a non-phi node
- Value *base = findBaseOrBDV(InVal, cache);
- if (!isKnownBaseResult(base)) {
- // Either conflict or base.
- assert(states.count(base));
- base = states[base].getBase();
- assert(base != nullptr && "unknown PhiState!");
- }
- assert(base && "can't be null");
- // Must use original input BB since base may not be Instruction
- // The cast is needed since base traversal may strip away bitcasts
- if (base->getType() != basesel->getType()) {
- base = new BitCastInst(base, basesel->getType(), "cast", basesel);
- }
- basesel->setOperand(i, base);
+ Value *InVal = Sel->getOperand(i);
+ // Find the instruction which produces the base for each input. We may
+ // need to insert a bitcast.
+ Value *Base = getBaseForInput(InVal, BaseSel);
+ BaseSel->setOperand(i, Base);
}
+ } else if (auto *BaseEE = dyn_cast<ExtractElementInst>(State.getBase())) {
+ Value *InVal = cast<ExtractElementInst>(BDV)->getVectorOperand();
+ // Find the instruction which produces the base for each input. We may
+ // need to insert a bitcast.
+ Value *Base = getBaseForInput(InVal, BaseEE);
+ BaseEE->setOperand(0, Base);
+ } else {
+ auto *BaseIE = cast<InsertElementInst>(State.getBase());
+ auto *BdvIE = cast<InsertElementInst>(BDV);
+ auto UpdateOperand = [&](int OperandIdx) {
+ Value *InVal = BdvIE->getOperand(OperandIdx);
+ Value *Base = getBaseForInput(InVal, BaseIE);
+ BaseIE->setOperand(OperandIdx, Base);
+ };
+ UpdateOperand(0); // vector operand
+ UpdateOperand(1); // scalar operand
+ }
+
+ }
+
+ // Now that we're done with the algorithm, see if we can optimize the
+ // results slightly by reducing the number of new instructions needed.
+ // Arguably, this should be integrated into the algorithm above, but
+ // doing as a post process step is easier to reason about for the moment.
+ DenseMap<Value *, Value *> ReverseMap;
+ SmallPtrSet<Instruction *, 16> NewInsts;
+ SmallSetVector<AssertingVH<Instruction>, 16> Worklist;
+ // Note: We need to visit the states in a deterministic order. We uses the
+ // Keys we sorted above for this purpose. Note that we are papering over a
+ // bigger problem with the algorithm above - it's visit order is not
+ // deterministic. A larger change is needed to fix this.
+ for (auto Pair : States) {
+ auto *BDV = Pair.first;
+ auto State = Pair.second;
+ Value *Base = State.getBase();
+ assert(BDV && Base);
+ assert(!isKnownBaseResult(BDV) && "why did it get added?");
+ assert(isKnownBaseResult(Base) &&
+ "must be something we 'know' is a base pointer");
+ if (!State.isConflict())
+ continue;
+
+ ReverseMap[Base] = BDV;
+ if (auto *BaseI = dyn_cast<Instruction>(Base)) {
+ NewInsts.insert(BaseI);
+ Worklist.insert(BaseI);
+ }
+ }
+ auto ReplaceBaseInstWith = [&](Value *BDV, Instruction *BaseI,
+ Value *Replacement) {
+ // Add users which are new instructions (excluding self references)
+ for (User *U : BaseI->users())
+ if (auto *UI = dyn_cast<Instruction>(U))
+ if (NewInsts.count(UI) && UI != BaseI)
+ Worklist.insert(UI);
+ // Then do the actual replacement
+ NewInsts.erase(BaseI);
+ ReverseMap.erase(BaseI);
+ BaseI->replaceAllUsesWith(Replacement);
+ assert(States.count(BDV));
+ assert(States[BDV].isConflict() && States[BDV].getBase() == BaseI);
+ States[BDV] = BDVState(BDVState::Conflict, Replacement);
+ BaseI->eraseFromParent();
+ };
+ const DataLayout &DL = cast<Instruction>(def)->getModule()->getDataLayout();
+ while (!Worklist.empty()) {
+ Instruction *BaseI = Worklist.pop_back_val();
+ assert(NewInsts.count(BaseI));
+ Value *Bdv = ReverseMap[BaseI];
+ if (auto *BdvI = dyn_cast<Instruction>(Bdv))
+ if (BaseI->isIdenticalTo(BdvI)) {
+ DEBUG(dbgs() << "Identical Base: " << *BaseI << "\n");
+ ReplaceBaseInstWith(Bdv, BaseI, Bdv);
+ continue;
+ }
+ if (Value *V = SimplifyInstruction(BaseI, DL)) {
+ DEBUG(dbgs() << "Base " << *BaseI << " simplified to " << *V << "\n");
+ ReplaceBaseInstWith(Bdv, BaseI, V);
+ continue;
}
}
// Cache all of our results so we can cheaply reuse them
// NOTE: This is actually two caches: one of the base defining value
// relation and one of the base pointer relation! FIXME
- for (auto item : states) {
- Value *v = item.first;
- Value *base = item.second.getBase();
- assert(v && base);
- assert(!isKnownBaseResult(v) && "why did it get added?");
-
- if (TraceLSP) {
- std::string fromstr =
- cache.count(v) ? (cache[v]->hasName() ? cache[v]->getName() : "")
- : "none";
- errs() << "Updating base value cache"
- << " for: " << (v->hasName() ? v->getName() : "")
- << " from: " << fromstr
- << " to: " << (base->hasName() ? base->getName() : "") << "\n";
- }
-
- assert(isKnownBaseResult(base) &&
- "must be something we 'know' is a base pointer");
- if (cache.count(v)) {
+ for (auto Pair : States) {
+ auto *BDV = Pair.first;
+ Value *base = Pair.second.getBase();
+ assert(BDV && base);
+
+ std::string fromstr = cache.count(BDV) ? cache[BDV]->getName() : "none";
+ DEBUG(dbgs() << "Updating base value cache"
+ << " for: " << BDV->getName()
+ << " from: " << fromstr
+ << " to: " << base->getName() << "\n");
+
+ if (cache.count(BDV)) {
// Once we transition from the BDV relation being store in the cache to
// the base relation being stored, it must be stable
- assert((!isKnownBaseResult(cache[v]) || cache[v] == base) &&
+ assert((!isKnownBaseResult(cache[BDV]) || cache[BDV] == base) &&
"base relation should be stable");
}
- cache[v] = base;
+ cache[BDV] = base;
}
- assert(cache.find(def) != cache.end());
+ assert(cache.count(def));
return cache[def];
}
@@ -1024,7 +1161,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
// pointer was a base pointer.
static void
findBasePointers(const StatepointLiveSetTy &live,
- DenseMap<llvm::Value *, llvm::Value *> &PointerToBase,
+ DenseMap<Value *, Value *> &PointerToBase,
DominatorTree *DT, DefiningValueMapTy &DVCache) {
// For the naming of values inserted to be deterministic - which makes for
// much cleaner and more stable tests - we need to assign an order to the
@@ -1043,7 +1180,7 @@ findBasePointers(const StatepointLiveSetTy &live,
// If you see this trip and like to live really dangerously, the code should
// be correct, just with idioms the verifier can't handle. You can try
- // disabling the verifier at your own substaintial risk.
+ // disabling the verifier at your own substantial risk.
assert(!isa<ConstantPointerNull>(base) &&
"the relocation code needs adjustment to handle the relocation of "
"a null pointer constant without causing false positives in the "
@@ -1056,8 +1193,8 @@ findBasePointers(const StatepointLiveSetTy &live,
static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
const CallSite &CS,
PartiallyConstructedSafepointRecord &result) {
- DenseMap<llvm::Value *, llvm::Value *> PointerToBase;
- findBasePointers(result.liveset, PointerToBase, &DT, DVCache);
+ DenseMap<Value *, Value *> PointerToBase;
+ findBasePointers(result.LiveSet, PointerToBase, &DT, DVCache);
if (PrintBasePointers) {
// Note: Need to print these in a stable order since this is checked in
@@ -1071,8 +1208,11 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
std::sort(Temp.begin(), Temp.end(), order_by_name);
for (Value *Ptr : Temp) {
Value *Base = PointerToBase[Ptr];
- errs() << " derived %" << Ptr->getName() << " base %" << Base->getName()
- << "\n";
+ errs() << " derived ";
+ Ptr->printAsOperand(errs(), false);
+ errs() << " base ";
+ Base->printAsOperand(errs(), false);
+ errs() << "\n";;
}
}
@@ -1086,10 +1226,10 @@ static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
PartiallyConstructedSafepointRecord &result);
static void recomputeLiveInValues(
- Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate,
+ Function &F, DominatorTree &DT, ArrayRef<CallSite> toUpdate,
MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
// TODO-PERF: reuse the original liveness, then simply run the dataflow
- // again. The old values are still live and will help it stablize quickly.
+ // again. The old values are still live and will help it stabilize quickly.
GCPtrLivenessData RevisedLivenessData;
computeLiveInValues(DT, F, RevisedLivenessData);
for (size_t i = 0; i < records.size(); i++) {
@@ -1099,69 +1239,66 @@ static void recomputeLiveInValues(
}
}
-// When inserting gc.relocate calls, we need to ensure there are no uses
-// of the original value between the gc.statepoint and the gc.relocate call.
-// One case which can arise is a phi node starting one of the successor blocks.
-// We also need to be able to insert the gc.relocates only on the path which
-// goes through the statepoint. We might need to split an edge to make this
-// possible.
+// When inserting gc.relocate and gc.result calls, we need to ensure there are
+// no uses of the original value / return value between the gc.statepoint and
+// the gc.relocate / gc.result call. One case which can arise is a phi node
+// starting one of the successor blocks. We also need to be able to insert the
+// gc.relocates only on the path which goes through the statepoint. We might
+// need to split an edge to make this possible.
static BasicBlock *
normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent,
DominatorTree &DT) {
BasicBlock *Ret = BB;
- if (!BB->getUniquePredecessor()) {
- Ret = SplitBlockPredecessors(BB, InvokeParent, "", nullptr, &DT);
- }
+ if (!BB->getUniquePredecessor())
+ Ret = SplitBlockPredecessors(BB, InvokeParent, "", &DT);
- // Now that 'ret' has unique predecessor we can safely remove all phi nodes
+ // Now that 'Ret' has unique predecessor we can safely remove all phi nodes
// from it
FoldSingleEntryPHINodes(Ret);
- assert(!isa<PHINode>(Ret->begin()));
+ assert(!isa<PHINode>(Ret->begin()) &&
+ "All PHI nodes should have been removed!");
- // At this point, we can safely insert a gc.relocate as the first instruction
- // in Ret if needed.
+ // At this point, we can safely insert a gc.relocate or gc.result as the first
+ // instruction in Ret if needed.
return Ret;
}
-static int find_index(ArrayRef<Value *> livevec, Value *val) {
- auto itr = std::find(livevec.begin(), livevec.end(), val);
- assert(livevec.end() != itr);
- size_t index = std::distance(livevec.begin(), itr);
- assert(index < livevec.size());
- return index;
-}
-
-// Create new attribute set containing only attributes which can be transfered
+// Create new attribute set containing only attributes which can be transferred
// from original call to the safepoint.
static AttributeSet legalizeCallAttributes(AttributeSet AS) {
- AttributeSet ret;
+ AttributeSet Ret;
for (unsigned Slot = 0; Slot < AS.getNumSlots(); Slot++) {
- unsigned index = AS.getSlotIndex(Slot);
+ unsigned Index = AS.getSlotIndex(Slot);
- if (index == AttributeSet::ReturnIndex ||
- index == AttributeSet::FunctionIndex) {
+ if (Index == AttributeSet::ReturnIndex ||
+ Index == AttributeSet::FunctionIndex) {
- for (auto it = AS.begin(Slot), it_end = AS.end(Slot); it != it_end;
- ++it) {
- Attribute attr = *it;
+ for (Attribute Attr : make_range(AS.begin(Slot), AS.end(Slot))) {
// Do not allow certain attributes - just skip them
// Safepoint can not be read only or read none.
- if (attr.hasAttribute(Attribute::ReadNone) ||
- attr.hasAttribute(Attribute::ReadOnly))
+ if (Attr.hasAttribute(Attribute::ReadNone) ||
+ Attr.hasAttribute(Attribute::ReadOnly))
+ continue;
+
+ // These attributes control the generation of the gc.statepoint call /
+ // invoke itself; and once the gc.statepoint is in place, they're of no
+ // use.
+ if (Attr.hasAttribute("statepoint-num-patch-bytes") ||
+ Attr.hasAttribute("statepoint-id"))
continue;
- ret = ret.addAttributes(
- AS.getContext(), index,
- AttributeSet::get(AS.getContext(), index, AttrBuilder(attr)));
+ Ret = Ret.addAttributes(
+ AS.getContext(), Index,
+ AttributeSet::get(AS.getContext(), Index, AttrBuilder(Attr)));
}
}
// Just skip parameter attributes for now
}
- return ret;
+ return Ret;
}
/// Helper function to place all gc relocates necessary for the given
@@ -1173,225 +1310,290 @@ static AttributeSet legalizeCallAttributes(AttributeSet AS) {
/// statepointToken - statepoint instruction to which relocates should be
/// bound.
/// Builder - Llvm IR builder to be used to construct new calls.
-static void CreateGCRelocates(ArrayRef<llvm::Value *> LiveVariables,
+static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
const int LiveStart,
- ArrayRef<llvm::Value *> BasePtrs,
+ ArrayRef<Value *> BasePtrs,
Instruction *StatepointToken,
IRBuilder<> Builder) {
- SmallVector<Instruction *, 64> NewDefs;
- NewDefs.reserve(LiveVariables.size());
+ if (LiveVariables.empty())
+ return;
- Module *M = StatepointToken->getParent()->getParent()->getParent();
+ auto FindIndex = [](ArrayRef<Value *> LiveVec, Value *Val) {
+ auto ValIt = std::find(LiveVec.begin(), LiveVec.end(), Val);
+ assert(ValIt != LiveVec.end() && "Val not found in LiveVec!");
+ size_t Index = std::distance(LiveVec.begin(), ValIt);
+ assert(Index < LiveVec.size() && "Bug in std::find?");
+ return Index;
+ };
- for (unsigned i = 0; i < LiveVariables.size(); i++) {
- // We generate a (potentially) unique declaration for every pointer type
- // combination. This results is some blow up the function declarations in
- // the IR, but removes the need for argument bitcasts which shrinks the IR
- // greatly and makes it much more readable.
- SmallVector<Type *, 1> Types; // one per 'any' type
- // All gc_relocate are set to i8 addrspace(1)* type. This could help avoid
- // cases where the actual value's type mangling is not supported by llvm. A
- // bitcast is added later to convert gc_relocate to the actual value's type.
- Types.push_back(Type::getInt8PtrTy(M->getContext(), 1));
- Value *GCRelocateDecl = Intrinsic::getDeclaration(
- M, Intrinsic::experimental_gc_relocate, Types);
+ // All gc_relocate are set to i8 addrspace(1)* type. We originally generated
+ // unique declarations for each pointer type, but this proved problematic
+ // because the intrinsic mangling code is incomplete and fragile. Since
+ // we're moving towards a single unified pointer type anyways, we can just
+ // cast everything to an i8* of the right address space. A bitcast is added
+ // later to convert gc_relocate to the actual value's type.
+ Module *M = StatepointToken->getModule();
+ auto AS = cast<PointerType>(LiveVariables[0]->getType())->getAddressSpace();
+ Type *Types[] = {Type::getInt8PtrTy(M->getContext(), AS)};
+ Value *GCRelocateDecl =
+ Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate, Types);
+ for (unsigned i = 0; i < LiveVariables.size(); i++) {
// Generate the gc.relocate call and save the result
Value *BaseIdx =
- ConstantInt::get(Type::getInt32Ty(M->getContext()),
- LiveStart + find_index(LiveVariables, BasePtrs[i]));
- Value *LiveIdx = ConstantInt::get(
- Type::getInt32Ty(M->getContext()),
- LiveStart + find_index(LiveVariables, LiveVariables[i]));
+ Builder.getInt32(LiveStart + FindIndex(LiveVariables, BasePtrs[i]));
+ Value *LiveIdx = Builder.getInt32(LiveStart + i);
// only specify a debug name if we can give a useful one
- Value *Reloc = Builder.CreateCall(
+ CallInst *Reloc = Builder.CreateCall(
GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx},
- LiveVariables[i]->hasName() ? LiveVariables[i]->getName() + ".relocated"
- : "");
+ suffixed_name_or(LiveVariables[i], ".relocated", ""));
// Trick CodeGen into thinking there are lots of free registers at this
// fake call.
- cast<CallInst>(Reloc)->setCallingConv(CallingConv::Cold);
+ Reloc->setCallingConv(CallingConv::Cold);
+ }
+}
- NewDefs.push_back(cast<Instruction>(Reloc));
+namespace {
+
+/// This struct is used to defer RAUWs and `eraseFromParent` s. Using this
+/// avoids having to worry about keeping around dangling pointers to Values.
+class DeferredReplacement {
+ AssertingVH<Instruction> Old;
+ AssertingVH<Instruction> New;
+
+public:
+ explicit DeferredReplacement(Instruction *Old, Instruction *New) :
+ Old(Old), New(New) {
+ assert(Old != New && "Not allowed!");
}
- assert(NewDefs.size() == LiveVariables.size() &&
- "missing or extra redefinition at safepoint");
+
+ /// Does the task represented by this instance.
+ void doReplacement() {
+ Instruction *OldI = Old;
+ Instruction *NewI = New;
+
+ assert(OldI != NewI && "Disallowed at construction?!");
+
+ Old = nullptr;
+ New = nullptr;
+
+ if (NewI)
+ OldI->replaceAllUsesWith(NewI);
+ OldI->eraseFromParent();
+ }
+};
}
static void
-makeStatepointExplicitImpl(const CallSite &CS, /* to replace */
- const SmallVectorImpl<llvm::Value *> &basePtrs,
- const SmallVectorImpl<llvm::Value *> &liveVariables,
- Pass *P,
- PartiallyConstructedSafepointRecord &result) {
- assert(basePtrs.size() == liveVariables.size());
- assert(isStatepoint(CS) &&
+makeStatepointExplicitImpl(const CallSite CS, /* to replace */
+ const SmallVectorImpl<Value *> &BasePtrs,
+ const SmallVectorImpl<Value *> &LiveVariables,
+ PartiallyConstructedSafepointRecord &Result,
+ std::vector<DeferredReplacement> &Replacements) {
+ assert(BasePtrs.size() == LiveVariables.size());
+ assert((UseDeoptBundles || isStatepoint(CS)) &&
"This method expects to be rewriting a statepoint");
- BasicBlock *BB = CS.getInstruction()->getParent();
- assert(BB);
- Function *F = BB->getParent();
- assert(F && "must be set");
- Module *M = F->getParent();
- (void)M;
- assert(M && "must be set");
-
- // We're not changing the function signature of the statepoint since the gc
- // arguments go into the var args section.
- Function *gc_statepoint_decl = CS.getCalledFunction();
-
// Then go ahead and use the builder do actually do the inserts. We insert
// immediately before the previous instruction under the assumption that all
// arguments will be available here. We can't insert afterwards since we may
// be replacing a terminator.
- Instruction *insertBefore = CS.getInstruction();
- IRBuilder<> Builder(insertBefore);
- // Copy all of the arguments from the original statepoint - this includes the
- // target, call args, and deopt args
- SmallVector<llvm::Value *, 64> args;
- args.insert(args.end(), CS.arg_begin(), CS.arg_end());
- // TODO: Clear the 'needs rewrite' flag
-
- // add all the pointers to be relocated (gc arguments)
- // Capture the start of the live variable list for use in the gc_relocates
- const int live_start = args.size();
- args.insert(args.end(), liveVariables.begin(), liveVariables.end());
+ Instruction *InsertBefore = CS.getInstruction();
+ IRBuilder<> Builder(InsertBefore);
+
+ ArrayRef<Value *> GCArgs(LiveVariables);
+ uint64_t StatepointID = 0xABCDEF00;
+ uint32_t NumPatchBytes = 0;
+ uint32_t Flags = uint32_t(StatepointFlags::None);
+
+ ArrayRef<Use> CallArgs;
+ ArrayRef<Use> DeoptArgs;
+ ArrayRef<Use> TransitionArgs;
+
+ Value *CallTarget = nullptr;
+
+ if (UseDeoptBundles) {
+ CallArgs = {CS.arg_begin(), CS.arg_end()};
+ DeoptArgs = GetDeoptBundleOperands(CS);
+ // TODO: we don't fill in TransitionArgs or Flags in this branch, but we
+ // could have an operand bundle for that too.
+ AttributeSet OriginalAttrs = CS.getAttributes();
+
+ Attribute AttrID = OriginalAttrs.getAttribute(AttributeSet::FunctionIndex,
+ "statepoint-id");
+ if (AttrID.isStringAttribute())
+ AttrID.getValueAsString().getAsInteger(10, StatepointID);
+
+ Attribute AttrNumPatchBytes = OriginalAttrs.getAttribute(
+ AttributeSet::FunctionIndex, "statepoint-num-patch-bytes");
+ if (AttrNumPatchBytes.isStringAttribute())
+ AttrNumPatchBytes.getValueAsString().getAsInteger(10, NumPatchBytes);
+
+ CallTarget = CS.getCalledValue();
+ } else {
+ // This branch will be gone soon, and we will soon only support the
+ // UseDeoptBundles == true configuration.
+ Statepoint OldSP(CS);
+ StatepointID = OldSP.getID();
+ NumPatchBytes = OldSP.getNumPatchBytes();
+ Flags = OldSP.getFlags();
+
+ CallArgs = {OldSP.arg_begin(), OldSP.arg_end()};
+ DeoptArgs = {OldSP.vm_state_begin(), OldSP.vm_state_end()};
+ TransitionArgs = {OldSP.gc_transition_args_begin(),
+ OldSP.gc_transition_args_end()};
+ CallTarget = OldSP.getCalledValue();
+ }
// Create the statepoint given all the arguments
- Instruction *token = nullptr;
- AttributeSet return_attributes;
+ Instruction *Token = nullptr;
+ AttributeSet ReturnAttrs;
if (CS.isCall()) {
- CallInst *toReplace = cast<CallInst>(CS.getInstruction());
- CallInst *call =
- Builder.CreateCall(gc_statepoint_decl, args, "safepoint_token");
- call->setTailCall(toReplace->isTailCall());
- call->setCallingConv(toReplace->getCallingConv());
+ CallInst *ToReplace = cast<CallInst>(CS.getInstruction());
+ CallInst *Call = Builder.CreateGCStatepointCall(
+ StatepointID, NumPatchBytes, CallTarget, Flags, CallArgs,
+ TransitionArgs, DeoptArgs, GCArgs, "safepoint_token");
+
+ Call->setTailCall(ToReplace->isTailCall());
+ Call->setCallingConv(ToReplace->getCallingConv());
// Currently we will fail on parameter attributes and on certain
// function attributes.
- AttributeSet new_attrs = legalizeCallAttributes(toReplace->getAttributes());
- // In case if we can handle this set of sttributes - set up function attrs
+ AttributeSet NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
+ // In case if we can handle this set of attributes - set up function attrs
// directly on statepoint and return attrs later for gc_result intrinsic.
- call->setAttributes(new_attrs.getFnAttributes());
- return_attributes = new_attrs.getRetAttributes();
+ Call->setAttributes(NewAttrs.getFnAttributes());
+ ReturnAttrs = NewAttrs.getRetAttributes();
- token = call;
+ Token = Call;
// Put the following gc_result and gc_relocate calls immediately after the
// the old call (which we're about to delete)
- BasicBlock::iterator next(toReplace);
- assert(BB->end() != next && "not a terminator, must have next");
- next++;
- Instruction *IP = &*(next);
- Builder.SetInsertPoint(IP);
- Builder.SetCurrentDebugLocation(IP->getDebugLoc());
-
+ assert(ToReplace->getNextNode() && "Not a terminator, must have next!");
+ Builder.SetInsertPoint(ToReplace->getNextNode());
+ Builder.SetCurrentDebugLocation(ToReplace->getNextNode()->getDebugLoc());
} else {
- InvokeInst *toReplace = cast<InvokeInst>(CS.getInstruction());
+ InvokeInst *ToReplace = cast<InvokeInst>(CS.getInstruction());
// Insert the new invoke into the old block. We'll remove the old one in a
// moment at which point this will become the new terminator for the
// original block.
- InvokeInst *invoke = InvokeInst::Create(
- gc_statepoint_decl, toReplace->getNormalDest(),
- toReplace->getUnwindDest(), args, "", toReplace->getParent());
- invoke->setCallingConv(toReplace->getCallingConv());
+ InvokeInst *Invoke = Builder.CreateGCStatepointInvoke(
+ StatepointID, NumPatchBytes, CallTarget, ToReplace->getNormalDest(),
+ ToReplace->getUnwindDest(), Flags, CallArgs, TransitionArgs, DeoptArgs,
+ GCArgs, "statepoint_token");
+
+ Invoke->setCallingConv(ToReplace->getCallingConv());
// Currently we will fail on parameter attributes and on certain
// function attributes.
- AttributeSet new_attrs = legalizeCallAttributes(toReplace->getAttributes());
- // In case if we can handle this set of sttributes - set up function attrs
+ AttributeSet NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
+ // In case if we can handle this set of attributes - set up function attrs
// directly on statepoint and return attrs later for gc_result intrinsic.
- invoke->setAttributes(new_attrs.getFnAttributes());
- return_attributes = new_attrs.getRetAttributes();
+ Invoke->setAttributes(NewAttrs.getFnAttributes());
+ ReturnAttrs = NewAttrs.getRetAttributes();
- token = invoke;
+ Token = Invoke;
// Generate gc relocates in exceptional path
- BasicBlock *unwindBlock = toReplace->getUnwindDest();
- assert(!isa<PHINode>(unwindBlock->begin()) &&
- unwindBlock->getUniquePredecessor() &&
+ BasicBlock *UnwindBlock = ToReplace->getUnwindDest();
+ assert(!isa<PHINode>(UnwindBlock->begin()) &&
+ UnwindBlock->getUniquePredecessor() &&
"can't safely insert in this block!");
- Instruction *IP = &*(unwindBlock->getFirstInsertionPt());
- Builder.SetInsertPoint(IP);
- Builder.SetCurrentDebugLocation(toReplace->getDebugLoc());
+ Builder.SetInsertPoint(&*UnwindBlock->getFirstInsertionPt());
+ Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
- // Extract second element from landingpad return value. We will attach
- // exceptional gc relocates to it.
- const unsigned idx = 1;
- Instruction *exceptional_token =
- cast<Instruction>(Builder.CreateExtractValue(
- unwindBlock->getLandingPadInst(), idx, "relocate_token"));
- result.UnwindToken = exceptional_token;
+ // Attach exceptional gc relocates to the landingpad.
+ Instruction *ExceptionalToken = UnwindBlock->getLandingPadInst();
+ Result.UnwindToken = ExceptionalToken;
- // Just throw away return value. We will use the one we got for normal
- // block.
- (void)CreateGCRelocates(liveVariables, live_start, basePtrs,
- exceptional_token, Builder);
+ const unsigned LiveStartIdx = Statepoint(Token).gcArgsStartIdx();
+ CreateGCRelocates(LiveVariables, LiveStartIdx, BasePtrs, ExceptionalToken,
+ Builder);
// Generate gc relocates and returns for normal block
- BasicBlock *normalDest = toReplace->getNormalDest();
- assert(!isa<PHINode>(normalDest->begin()) &&
- normalDest->getUniquePredecessor() &&
+ BasicBlock *NormalDest = ToReplace->getNormalDest();
+ assert(!isa<PHINode>(NormalDest->begin()) &&
+ NormalDest->getUniquePredecessor() &&
"can't safely insert in this block!");
- IP = &*(normalDest->getFirstInsertionPt());
- Builder.SetInsertPoint(IP);
+ Builder.SetInsertPoint(&*NormalDest->getFirstInsertionPt());
// gc relocates will be generated later as if it were regular call
// statepoint
}
- assert(token);
-
- // Take the name of the original value call if it had one.
- token->takeName(CS.getInstruction());
+ assert(Token && "Should be set in one of the above branches!");
+
+ if (UseDeoptBundles) {
+ Token->setName("statepoint_token");
+ if (!CS.getType()->isVoidTy() && !CS.getInstruction()->use_empty()) {
+ StringRef Name =
+ CS.getInstruction()->hasName() ? CS.getInstruction()->getName() : "";
+ CallInst *GCResult = Builder.CreateGCResult(Token, CS.getType(), Name);
+ GCResult->setAttributes(CS.getAttributes().getRetAttributes());
+
+ // We cannot RAUW or delete CS.getInstruction() because it could be in the
+ // live set of some other safepoint, in which case that safepoint's
+ // PartiallyConstructedSafepointRecord will hold a raw pointer to this
+ // llvm::Instruction. Instead, we defer the replacement and deletion to
+ // after the live sets have been made explicit in the IR, and we no longer
+ // have raw pointers to worry about.
+ Replacements.emplace_back(CS.getInstruction(), GCResult);
+ } else {
+ Replacements.emplace_back(CS.getInstruction(), nullptr);
+ }
+ } else {
+ assert(!CS.getInstruction()->hasNUsesOrMore(2) &&
+ "only valid use before rewrite is gc.result");
+ assert(!CS.getInstruction()->hasOneUse() ||
+ isGCResult(cast<Instruction>(*CS.getInstruction()->user_begin())));
-// The GCResult is already inserted, we just need to find it
-#ifndef NDEBUG
- Instruction *toReplace = CS.getInstruction();
- assert((toReplace->hasNUses(0) || toReplace->hasNUses(1)) &&
- "only valid use before rewrite is gc.result");
- assert(!toReplace->hasOneUse() ||
- isGCResult(cast<Instruction>(*toReplace->user_begin())));
-#endif
+ // Take the name of the original statepoint token if there was one.
+ Token->takeName(CS.getInstruction());
- // Update the gc.result of the original statepoint (if any) to use the newly
- // inserted statepoint. This is safe to do here since the token can't be
- // considered a live reference.
- CS.getInstruction()->replaceAllUsesWith(token);
+ // Update the gc.result of the original statepoint (if any) to use the newly
+ // inserted statepoint. This is safe to do here since the token can't be
+ // considered a live reference.
+ CS.getInstruction()->replaceAllUsesWith(Token);
+ CS.getInstruction()->eraseFromParent();
+ }
- result.StatepointToken = token;
+ Result.StatepointToken = Token;
// Second, create a gc.relocate for every live variable
- CreateGCRelocates(liveVariables, live_start, basePtrs, token, Builder);
+ const unsigned LiveStartIdx = Statepoint(Token).gcArgsStartIdx();
+ CreateGCRelocates(LiveVariables, LiveStartIdx, BasePtrs, Token, Builder);
}
namespace {
-struct name_ordering {
- Value *base;
- Value *derived;
- bool operator()(name_ordering const &a, name_ordering const &b) {
- return -1 == a.derived->getName().compare(b.derived->getName());
+struct NameOrdering {
+ Value *Base;
+ Value *Derived;
+
+ bool operator()(NameOrdering const &a, NameOrdering const &b) {
+ return -1 == a.Derived->getName().compare(b.Derived->getName());
}
};
}
-static void stablize_order(SmallVectorImpl<Value *> &basevec,
- SmallVectorImpl<Value *> &livevec) {
- assert(basevec.size() == livevec.size());
-
- SmallVector<name_ordering, 64> temp;
- for (size_t i = 0; i < basevec.size(); i++) {
- name_ordering v;
- v.base = basevec[i];
- v.derived = livevec[i];
- temp.push_back(v);
- }
- std::sort(temp.begin(), temp.end(), name_ordering());
- for (size_t i = 0; i < basevec.size(); i++) {
- basevec[i] = temp[i].base;
- livevec[i] = temp[i].derived;
+
+static void StabilizeOrder(SmallVectorImpl<Value *> &BaseVec,
+ SmallVectorImpl<Value *> &LiveVec) {
+ assert(BaseVec.size() == LiveVec.size());
+
+ SmallVector<NameOrdering, 64> Temp;
+ for (size_t i = 0; i < BaseVec.size(); i++) {
+ NameOrdering v;
+ v.Base = BaseVec[i];
+ v.Derived = LiveVec[i];
+ Temp.push_back(v);
+ }
+
+ std::sort(Temp.begin(), Temp.end(), NameOrdering());
+ for (size_t i = 0; i < BaseVec.size(); i++) {
+ BaseVec[i] = Temp[i].Base;
+ LiveVec[i] = Temp[i].Derived;
}
}
@@ -1401,40 +1603,39 @@ static void stablize_order(SmallVectorImpl<Value *> &basevec,
// WARNING: Does not do any fixup to adjust users of the original live
// values. That's the callers responsibility.
static void
-makeStatepointExplicit(DominatorTree &DT, const CallSite &CS, Pass *P,
- PartiallyConstructedSafepointRecord &result) {
- auto liveset = result.liveset;
- auto PointerToBase = result.PointerToBase;
+makeStatepointExplicit(DominatorTree &DT, const CallSite &CS,
+ PartiallyConstructedSafepointRecord &Result,
+ std::vector<DeferredReplacement> &Replacements) {
+ const auto &LiveSet = Result.LiveSet;
+ const auto &PointerToBase = Result.PointerToBase;
// Convert to vector for efficient cross referencing.
- SmallVector<Value *, 64> basevec, livevec;
- livevec.reserve(liveset.size());
- basevec.reserve(liveset.size());
- for (Value *L : liveset) {
- livevec.push_back(L);
-
- assert(PointerToBase.find(L) != PointerToBase.end());
- Value *base = PointerToBase[L];
- basevec.push_back(base);
+ SmallVector<Value *, 64> BaseVec, LiveVec;
+ LiveVec.reserve(LiveSet.size());
+ BaseVec.reserve(LiveSet.size());
+ for (Value *L : LiveSet) {
+ LiveVec.push_back(L);
+ assert(PointerToBase.count(L));
+ Value *Base = PointerToBase.find(L)->second;
+ BaseVec.push_back(Base);
}
- assert(livevec.size() == basevec.size());
+ assert(LiveVec.size() == BaseVec.size());
// To make the output IR slightly more stable (for use in diffs), ensure a
// fixed order of the values in the safepoint (by sorting the value name).
// The order is otherwise meaningless.
- stablize_order(basevec, livevec);
+ StabilizeOrder(BaseVec, LiveVec);
// Do the actual rewriting and delete the old statepoint
- makeStatepointExplicitImpl(CS, basevec, livevec, P, result);
- CS.getInstruction()->eraseFromParent();
+ makeStatepointExplicitImpl(CS, BaseVec, LiveVec, Result, Replacements);
}
// Helper function for the relocationViaAlloca.
-// It receives iterator to the statepoint gc relocates and emits store to the
-// assigned
-// location (via allocaMap) for the each one of them.
-// Add visited values into the visitedLiveValues set we will later use them
-// for sanity check.
+//
+// It receives iterator to the statepoint gc relocates and emits a store to the
+// assigned location (via allocaMap) for the each one of them. It adds the
+// visited values into the visitedLiveValues set, which we will later use them
+// for sanity checking.
static void
insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
DenseMap<Value *, Value *> &AllocaMap,
@@ -1459,13 +1660,15 @@ insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
Value *Alloca = AllocaMap[OriginalValue];
// Emit store into the related alloca
- // All gc_relocate are i8 addrspace(1)* typed, and it must be bitcasted to
+ // All gc_relocates are i8 addrspace(1)* typed, and it must be bitcasted to
// the correct type according to alloca.
- assert(RelocatedValue->getNextNode() && "Should always have one since it's not a terminator");
+ assert(RelocatedValue->getNextNode() &&
+ "Should always have one since it's not a terminator");
IRBuilder<> Builder(RelocatedValue->getNextNode());
Value *CastedRelocatedValue =
- Builder.CreateBitCast(RelocatedValue, cast<AllocaInst>(Alloca)->getAllocatedType(),
- RelocatedValue->hasName() ? RelocatedValue->getName() + ".casted" : "");
+ Builder.CreateBitCast(RelocatedValue,
+ cast<AllocaInst>(Alloca)->getAllocatedType(),
+ suffixed_name_or(RelocatedValue, ".casted", ""));
StoreInst *Store = new StoreInst(CastedRelocatedValue, Alloca);
Store->insertAfter(cast<Instruction>(CastedRelocatedValue));
@@ -1501,10 +1704,10 @@ insertRematerializationStores(
}
}
-/// do all the relocation update via allocas and mem2reg
+/// Do all the relocation update via allocas and mem2reg
static void relocationViaAlloca(
Function &F, DominatorTree &DT, ArrayRef<Value *> Live,
- ArrayRef<struct PartiallyConstructedSafepointRecord> Records) {
+ ArrayRef<PartiallyConstructedSafepointRecord> Records) {
#ifndef NDEBUG
// record initial number of (static) allocas; we'll check we have the same
// number when we get done.
@@ -1531,15 +1734,12 @@ static void relocationViaAlloca(
PromotableAllocas.push_back(Alloca);
};
- // emit alloca for each live gc pointer
- for (unsigned i = 0; i < Live.size(); i++) {
- emitAllocaFor(Live[i]);
- }
-
- // emit allocas for rematerialized values
- for (size_t i = 0; i < Records.size(); i++) {
- const struct PartiallyConstructedSafepointRecord &Info = Records[i];
+ // Emit alloca for each live gc pointer
+ for (Value *V : Live)
+ emitAllocaFor(V);
+ // Emit allocas for rematerialized values
+ for (const auto &Info : Records)
for (auto RematerializedValuePair : Info.RematerializedValues) {
Value *OriginalValue = RematerializedValuePair.second;
if (AllocaMap.count(OriginalValue) != 0)
@@ -1548,20 +1748,17 @@ static void relocationViaAlloca(
emitAllocaFor(OriginalValue);
++NumRematerializedValues;
}
- }
// The next two loops are part of the same conceptual operation. We need to
// insert a store to the alloca after the original def and at each
// redefinition. We need to insert a load before each use. These are split
// into distinct loops for performance reasons.
- // update gc pointer after each statepoint
- // either store a relocated value or null (if no relocated value found for
- // this gc pointer and it is not a gc_result)
- // this must happen before we update the statepoint with load of alloca
- // otherwise we lose the link between statepoint and old def
- for (size_t i = 0; i < Records.size(); i++) {
- const struct PartiallyConstructedSafepointRecord &Info = Records[i];
+ // Update gc pointer after each statepoint: either store a relocated value or
+ // null (if no relocated value was found for this gc pointer and it is not a
+ // gc_result). This must happen before we update the statepoint with load of
+ // alloca otherwise we lose the link between statepoint and old def.
+ for (const auto &Info : Records) {
Value *Statepoint = Info.StatepointToken;
// This will be used for consistency check
@@ -1582,7 +1779,7 @@ static void relocationViaAlloca(
VisitedLiveValues);
if (ClobberNonLive) {
- // As a debuging aid, pretend that an unrelocated pointer becomes null at
+ // As a debugging aid, pretend that an unrelocated pointer becomes null at
// the gc.statepoint. This will turn some subtle GC problems into
// slightly easier to debug SEGVs. Note that on large IR files with
// lots of gc.statepoints this is extremely costly both memory and time
@@ -1612,23 +1809,22 @@ static void relocationViaAlloca(
// Insert the clobbering stores. These may get intermixed with the
// gc.results and gc.relocates, but that's fine.
if (auto II = dyn_cast<InvokeInst>(Statepoint)) {
- InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt());
- InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt());
+ InsertClobbersAt(&*II->getNormalDest()->getFirstInsertionPt());
+ InsertClobbersAt(&*II->getUnwindDest()->getFirstInsertionPt());
} else {
- BasicBlock::iterator Next(cast<CallInst>(Statepoint));
- Next++;
- InsertClobbersAt(Next);
+ InsertClobbersAt(cast<Instruction>(Statepoint)->getNextNode());
}
}
}
- // update use with load allocas and add store for gc_relocated
+
+ // Update use with load allocas and add store for gc_relocated.
for (auto Pair : AllocaMap) {
Value *Def = Pair.first;
Value *Alloca = Pair.second;
- // we pre-record the uses of allocas so that we dont have to worry about
- // later update
- // that change the user information.
+ // We pre-record the uses of allocas so that we dont have to worry about
+ // later update that changes the user information..
+
SmallVector<Instruction *, 20> Uses;
// PERF: trade a linear scan for repeated reallocation
Uses.reserve(std::distance(Def->user_begin(), Def->user_end()));
@@ -1663,9 +1859,9 @@ static void relocationViaAlloca(
}
}
- // emit store for the initial gc value
- // store must be inserted after load, otherwise store will be in alloca's
- // use list and an extra load will be inserted before it
+ // Emit store for the initial gc value. Store must be inserted after load,
+ // otherwise store will be in alloca's use list and an extra load will be
+ // inserted before it.
StoreInst *Store = new StoreInst(Def, Alloca);
if (Instruction *Inst = dyn_cast<Instruction>(Def)) {
if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Inst)) {
@@ -1688,14 +1884,13 @@ static void relocationViaAlloca(
assert(PromotableAllocas.size() == Live.size() + NumRematerializedValues &&
"we must have the same allocas with lives");
if (!PromotableAllocas.empty()) {
- // apply mem2reg to promote alloca to SSA
+ // Apply mem2reg to promote alloca to SSA
PromoteMemToReg(PromotableAllocas, DT);
}
#ifndef NDEBUG
- for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E;
- I++)
- if (isa<AllocaInst>(*I))
+ for (auto &I : F.getEntryBlock())
+ if (isa<AllocaInst>(I))
InitialAllocaNum--;
assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas");
#endif
@@ -1719,28 +1914,27 @@ static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values,
// No values to hold live, might as well not insert the empty holder
return;
- Module *M = CS.getInstruction()->getParent()->getParent()->getParent();
+ Module *M = CS.getInstruction()->getModule();
// Use a dummy vararg function to actually hold the values live
Function *Func = cast<Function>(M->getOrInsertFunction(
"__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true)));
if (CS.isCall()) {
// For call safepoints insert dummy calls right after safepoint
- BasicBlock::iterator Next(CS.getInstruction());
- Next++;
- Holders.push_back(CallInst::Create(Func, Values, "", Next));
+ Holders.push_back(CallInst::Create(Func, Values, "",
+ &*++CS.getInstruction()->getIterator()));
return;
}
// For invoke safepooints insert dummy calls both in normal and
// exceptional destination blocks
auto *II = cast<InvokeInst>(CS.getInstruction());
Holders.push_back(CallInst::Create(
- Func, Values, "", II->getNormalDest()->getFirstInsertionPt()));
+ Func, Values, "", &*II->getNormalDest()->getFirstInsertionPt()));
Holders.push_back(CallInst::Create(
- Func, Values, "", II->getUnwindDest()->getFirstInsertionPt()));
+ Func, Values, "", &*II->getUnwindDest()->getFirstInsertionPt()));
}
static void findLiveReferences(
- Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate,
+ Function &F, DominatorTree &DT, ArrayRef<CallSite> toUpdate,
MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
GCPtrLivenessData OriginalLivenessData;
computeLiveInValues(DT, F, OriginalLivenessData);
@@ -1751,12 +1945,12 @@ static void findLiveReferences(
}
}
-/// Remove any vector of pointers from the liveset by scalarizing them over the
-/// statepoint instruction. Adds the scalarized pieces to the liveset. It
-/// would be preferrable to include the vector in the statepoint itself, but
+/// Remove any vector of pointers from the live set by scalarizing them over the
+/// statepoint instruction. Adds the scalarized pieces to the live set. It
+/// would be preferable to include the vector in the statepoint itself, but
/// the lowering code currently does not handle that. Extending it would be
/// slightly non-trivial since it requires a format change. Given how rare
-/// such cases are (for the moment?) scalarizing is an acceptable comprimise.
+/// such cases are (for the moment?) scalarizing is an acceptable compromise.
static void splitVectorValues(Instruction *StatepointInst,
StatepointLiveSetTy &LiveSet,
DenseMap<Value *, Value *>& PointerToBase,
@@ -1887,7 +2081,7 @@ static void splitVectorValues(Instruction *StatepointInst,
// Helper function for the "rematerializeLiveValues". It walks use chain
// starting from the "CurrentValue" until it meets "BaseValue". Only "simple"
// values are visited (currently it is GEP's and casts). Returns true if it
-// sucessfully reached "BaseValue" and false otherwise.
+// successfully reached "BaseValue" and false otherwise.
// Fills "ChainToBase" array with all visited values. "BaseValue" is not
// recorded.
static bool findRematerializableChainToBasePointer(
@@ -1907,16 +2101,12 @@ static bool findRematerializableChainToBasePointer(
}
if (CastInst *CI = dyn_cast<CastInst>(CurrentValue)) {
- Value *Def = CI->stripPointerCasts();
-
- // This two checks are basically similar. First one is here for the
- // consistency with findBasePointers logic.
- assert(!isa<CastInst>(Def) && "not a pointer cast found");
if (!CI->isNoopCast(CI->getModule()->getDataLayout()))
return false;
ChainToBase.push_back(CI);
- return findRematerializableChainToBasePointer(ChainToBase, Def, BaseValue);
+ return findRematerializableChainToBasePointer(ChainToBase,
+ CI->getOperand(0), BaseValue);
}
// Not supported instruction in the chain
@@ -1957,8 +2147,8 @@ chainToBasePointerCost(SmallVectorImpl<Instruction*> &Chain,
return Cost;
}
-// From the statepoint liveset pick values that are cheaper to recompute then to
-// relocate. Remove this values from the liveset, rematerialize them after
+// From the statepoint live set pick values that are cheaper to recompute then
+// to relocate. Remove this values from the live set, rematerialize them after
// statepoint and record them in "Info" structure. Note that similar to
// relocated values we don't do any user adjustments here.
static void rematerializeLiveValues(CallSite CS,
@@ -1970,10 +2160,10 @@ static void rematerializeLiveValues(CallSite CS,
// We can not di this in following loop due to iterator invalidation.
SmallVector<Value *, 32> LiveValuesToBeDeleted;
- for (Value *LiveValue: Info.liveset) {
+ for (Value *LiveValue: Info.LiveSet) {
// For each live pointer find it's defining chain
SmallVector<Instruction *, 3> ChainToBase;
- assert(Info.PointerToBase.find(LiveValue) != Info.PointerToBase.end());
+ assert(Info.PointerToBase.count(LiveValue));
bool FoundChain =
findRematerializableChainToBasePointer(ChainToBase,
LiveValue,
@@ -2059,9 +2249,9 @@ static void rematerializeLiveValues(CallSite CS,
InvokeInst *Invoke = cast<InvokeInst>(CS.getInstruction());
Instruction *NormalInsertBefore =
- Invoke->getNormalDest()->getFirstInsertionPt();
+ &*Invoke->getNormalDest()->getFirstInsertionPt();
Instruction *UnwindInsertBefore =
- Invoke->getUnwindDest()->getFirstInsertionPt();
+ &*Invoke->getUnwindDest()->getFirstInsertionPt();
Instruction *NormalRematerializedValue =
rematerializeChain(NormalInsertBefore);
@@ -2075,22 +2265,23 @@ static void rematerializeLiveValues(CallSite CS,
// Remove rematerializaed values from the live set
for (auto LiveValue: LiveValuesToBeDeleted) {
- Info.liveset.erase(LiveValue);
+ Info.LiveSet.erase(LiveValue);
}
}
-static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
- SmallVectorImpl<CallSite> &toUpdate) {
+static bool insertParsePoints(Function &F, DominatorTree &DT,
+ TargetTransformInfo &TTI,
+ SmallVectorImpl<CallSite> &ToUpdate) {
#ifndef NDEBUG
// sanity check the input
- std::set<CallSite> uniqued;
- uniqued.insert(toUpdate.begin(), toUpdate.end());
- assert(uniqued.size() == toUpdate.size() && "no duplicates please!");
+ std::set<CallSite> Uniqued;
+ Uniqued.insert(ToUpdate.begin(), ToUpdate.end());
+ assert(Uniqued.size() == ToUpdate.size() && "no duplicates please!");
- for (size_t i = 0; i < toUpdate.size(); i++) {
- CallSite &CS = toUpdate[i];
+ for (CallSite CS : ToUpdate) {
assert(CS.getInstruction()->getParent()->getParent() == &F);
- assert(isStatepoint(CS) && "expected to already be a deopt statepoint");
+ assert((UseDeoptBundles || isStatepoint(CS)) &&
+ "expected to already be a deopt statepoint");
}
#endif
@@ -2098,50 +2289,45 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
// the top of the successor blocks. See the comment on
// normalForInvokeSafepoint on exactly what is needed. Note that this step
// may restructure the CFG.
- for (CallSite CS : toUpdate) {
+ for (CallSite CS : ToUpdate) {
if (!CS.isInvoke())
continue;
- InvokeInst *invoke = cast<InvokeInst>(CS.getInstruction());
- normalizeForInvokeSafepoint(invoke->getNormalDest(), invoke->getParent(),
- DT);
- normalizeForInvokeSafepoint(invoke->getUnwindDest(), invoke->getParent(),
- DT);
+ auto *II = cast<InvokeInst>(CS.getInstruction());
+ normalizeForInvokeSafepoint(II->getNormalDest(), II->getParent(), DT);
+ normalizeForInvokeSafepoint(II->getUnwindDest(), II->getParent(), DT);
}
// A list of dummy calls added to the IR to keep various values obviously
// live in the IR. We'll remove all of these when done.
- SmallVector<CallInst *, 64> holders;
+ SmallVector<CallInst *, 64> Holders;
// Insert a dummy call with all of the arguments to the vm_state we'll need
// for the actual safepoint insertion. This ensures reference arguments in
// the deopt argument list are considered live through the safepoint (and
// thus makes sure they get relocated.)
- for (size_t i = 0; i < toUpdate.size(); i++) {
- CallSite &CS = toUpdate[i];
- Statepoint StatepointCS(CS);
-
+ for (CallSite CS : ToUpdate) {
SmallVector<Value *, 64> DeoptValues;
- for (Use &U : StatepointCS.vm_state_args()) {
- Value *Arg = cast<Value>(&U);
+
+ iterator_range<const Use *> DeoptStateRange =
+ UseDeoptBundles
+ ? iterator_range<const Use *>(GetDeoptBundleOperands(CS))
+ : iterator_range<const Use *>(Statepoint(CS).vm_state_args());
+
+ for (Value *Arg : DeoptStateRange) {
assert(!isUnhandledGCPointerType(Arg->getType()) &&
"support for FCA unimplemented");
if (isHandledGCPointerType(Arg->getType()))
DeoptValues.push_back(Arg);
}
- insertUseHolderAfter(CS, DeoptValues, holders);
- }
- SmallVector<struct PartiallyConstructedSafepointRecord, 64> records;
- records.reserve(toUpdate.size());
- for (size_t i = 0; i < toUpdate.size(); i++) {
- struct PartiallyConstructedSafepointRecord info;
- records.push_back(info);
+ insertUseHolderAfter(CS, DeoptValues, Holders);
}
- assert(records.size() == toUpdate.size());
- // A) Identify all gc pointers which are staticly live at the given call
+ SmallVector<PartiallyConstructedSafepointRecord, 64> Records(ToUpdate.size());
+
+ // A) Identify all gc pointers which are statically live at the given call
// site.
- findLiveReferences(F, DT, P, toUpdate, records);
+ findLiveReferences(F, DT, ToUpdate, Records);
// B) Find the base pointers for each live pointer
/* scope for caching */ {
@@ -2150,10 +2336,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
// large numbers of duplicate base_phis.
DefiningValueMapTy DVCache;
- for (size_t i = 0; i < records.size(); i++) {
- struct PartiallyConstructedSafepointRecord &info = records[i];
- CallSite &CS = toUpdate[i];
- findBasePointers(DT, DVCache, CS, info);
+ for (size_t i = 0; i < Records.size(); i++) {
+ PartiallyConstructedSafepointRecord &info = Records[i];
+ findBasePointers(DT, DVCache, ToUpdate[i], info);
}
} // end of cache scope
@@ -2170,63 +2355,75 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
// the base pointers which were identified for that safepoint. We'll then
// ask liveness for _every_ base inserted to see what is now live. Then we
// remove the dummy calls.
- holders.reserve(holders.size() + records.size());
- for (size_t i = 0; i < records.size(); i++) {
- struct PartiallyConstructedSafepointRecord &info = records[i];
- CallSite &CS = toUpdate[i];
+ Holders.reserve(Holders.size() + Records.size());
+ for (size_t i = 0; i < Records.size(); i++) {
+ PartiallyConstructedSafepointRecord &Info = Records[i];
SmallVector<Value *, 128> Bases;
- for (auto Pair : info.PointerToBase) {
+ for (auto Pair : Info.PointerToBase)
Bases.push_back(Pair.second);
- }
- insertUseHolderAfter(CS, Bases, holders);
+
+ insertUseHolderAfter(ToUpdate[i], Bases, Holders);
}
// By selecting base pointers, we've effectively inserted new uses. Thus, we
// need to rerun liveness. We may *also* have inserted new defs, but that's
// not the key issue.
- recomputeLiveInValues(F, DT, P, toUpdate, records);
+ recomputeLiveInValues(F, DT, ToUpdate, Records);
if (PrintBasePointers) {
- for (size_t i = 0; i < records.size(); i++) {
- struct PartiallyConstructedSafepointRecord &info = records[i];
+ for (auto &Info : Records) {
errs() << "Base Pairs: (w/Relocation)\n";
- for (auto Pair : info.PointerToBase) {
- errs() << " derived %" << Pair.first->getName() << " base %"
- << Pair.second->getName() << "\n";
+ for (auto Pair : Info.PointerToBase) {
+ errs() << " derived ";
+ Pair.first->printAsOperand(errs(), false);
+ errs() << " base ";
+ Pair.second->printAsOperand(errs(), false);
+ errs() << "\n";
}
}
}
- for (size_t i = 0; i < holders.size(); i++) {
- holders[i]->eraseFromParent();
- holders[i] = nullptr;
- }
- holders.clear();
+
+ // It is possible that non-constant live variables have a constant base. For
+ // example, a GEP with a variable offset from a global. In this case we can
+ // remove it from the liveset. We already don't add constants to the liveset
+ // because we assume they won't move at runtime and the GC doesn't need to be
+ // informed about them. The same reasoning applies if the base is constant.
+ // Note that the relocation placement code relies on this filtering for
+ // correctness as it expects the base to be in the liveset, which isn't true
+ // if the base is constant.
+ for (auto &Info : Records)
+ for (auto &BasePair : Info.PointerToBase)
+ if (isa<Constant>(BasePair.second))
+ Info.LiveSet.erase(BasePair.first);
+
+ for (CallInst *CI : Holders)
+ CI->eraseFromParent();
+
+ Holders.clear();
// Do a limited scalarization of any live at safepoint vector values which
// contain pointers. This enables this pass to run after vectorization at
// the cost of some possible performance loss. TODO: it would be nice to
// natively support vectors all the way through the backend so we don't need
// to scalarize here.
- for (size_t i = 0; i < records.size(); i++) {
- struct PartiallyConstructedSafepointRecord &info = records[i];
- Instruction *statepoint = toUpdate[i].getInstruction();
- splitVectorValues(cast<Instruction>(statepoint), info.liveset,
- info.PointerToBase, DT);
+ for (size_t i = 0; i < Records.size(); i++) {
+ PartiallyConstructedSafepointRecord &Info = Records[i];
+ Instruction *Statepoint = ToUpdate[i].getInstruction();
+ splitVectorValues(cast<Instruction>(Statepoint), Info.LiveSet,
+ Info.PointerToBase, DT);
}
// In order to reduce live set of statepoint we might choose to rematerialize
- // some values instead of relocating them. This is purelly an optimization and
+ // some values instead of relocating them. This is purely an optimization and
// does not influence correctness.
- TargetTransformInfo &TTI =
- P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ for (size_t i = 0; i < Records.size(); i++)
+ rematerializeLiveValues(ToUpdate[i], Records[i], TTI);
- for (size_t i = 0; i < records.size(); i++) {
- struct PartiallyConstructedSafepointRecord &info = records[i];
- CallSite &CS = toUpdate[i];
-
- rematerializeLiveValues(CS, info, TTI);
- }
+ // We need this to safely RAUW and delete call or invoke return values that
+ // may themselves be live over a statepoint. For details, please see usage in
+ // makeStatepointExplicitImpl.
+ std::vector<DeferredReplacement> Replacements;
// Now run through and replace the existing statepoints with new ones with
// the live variables listed. We do not yet update uses of the values being
@@ -2234,61 +2431,77 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
// survive to the last iteration of this loop. (By construction, the
// previous statepoint can not be a live variable, thus we can and remove
// the old statepoint calls as we go.)
- for (size_t i = 0; i < records.size(); i++) {
- struct PartiallyConstructedSafepointRecord &info = records[i];
- CallSite &CS = toUpdate[i];
- makeStatepointExplicit(DT, CS, P, info);
+ for (size_t i = 0; i < Records.size(); i++)
+ makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements);
+
+ ToUpdate.clear(); // prevent accident use of invalid CallSites
+
+ for (auto &PR : Replacements)
+ PR.doReplacement();
+
+ Replacements.clear();
+
+ for (auto &Info : Records) {
+ // These live sets may contain state Value pointers, since we replaced calls
+ // with operand bundles with calls wrapped in gc.statepoint, and some of
+ // those calls may have been def'ing live gc pointers. Clear these out to
+ // avoid accidentally using them.
+ //
+ // TODO: We should create a separate data structure that does not contain
+ // these live sets, and migrate to using that data structure from this point
+ // onward.
+ Info.LiveSet.clear();
+ Info.PointerToBase.clear();
}
- toUpdate.clear(); // prevent accident use of invalid CallSites
// Do all the fixups of the original live variables to their relocated selves
- SmallVector<Value *, 128> live;
- for (size_t i = 0; i < records.size(); i++) {
- struct PartiallyConstructedSafepointRecord &info = records[i];
+ SmallVector<Value *, 128> Live;
+ for (size_t i = 0; i < Records.size(); i++) {
+ PartiallyConstructedSafepointRecord &Info = Records[i];
+
// We can't simply save the live set from the original insertion. One of
// the live values might be the result of a call which needs a safepoint.
// That Value* no longer exists and we need to use the new gc_result.
- // Thankfully, the liveset is embedded in the statepoint (and updated), so
+ // Thankfully, the live set is embedded in the statepoint (and updated), so
// we just grab that.
- Statepoint statepoint(info.StatepointToken);
- live.insert(live.end(), statepoint.gc_args_begin(),
- statepoint.gc_args_end());
+ Statepoint Statepoint(Info.StatepointToken);
+ Live.insert(Live.end(), Statepoint.gc_args_begin(),
+ Statepoint.gc_args_end());
#ifndef NDEBUG
// Do some basic sanity checks on our liveness results before performing
// relocation. Relocation can and will turn mistakes in liveness results
// into non-sensical code which is must harder to debug.
// TODO: It would be nice to test consistency as well
- assert(DT.isReachableFromEntry(info.StatepointToken->getParent()) &&
+ assert(DT.isReachableFromEntry(Info.StatepointToken->getParent()) &&
"statepoint must be reachable or liveness is meaningless");
- for (Value *V : statepoint.gc_args()) {
+ for (Value *V : Statepoint.gc_args()) {
if (!isa<Instruction>(V))
// Non-instruction values trivial dominate all possible uses
continue;
- auto LiveInst = cast<Instruction>(V);
+ auto *LiveInst = cast<Instruction>(V);
assert(DT.isReachableFromEntry(LiveInst->getParent()) &&
"unreachable values should never be live");
- assert(DT.dominates(LiveInst, info.StatepointToken) &&
+ assert(DT.dominates(LiveInst, Info.StatepointToken) &&
"basic SSA liveness expectation violated by liveness analysis");
}
#endif
}
- unique_unsorted(live);
+ unique_unsorted(Live);
#ifndef NDEBUG
// sanity check
- for (auto ptr : live) {
- assert(isGCPointerType(ptr->getType()) && "must be a gc pointer type");
- }
+ for (auto *Ptr : Live)
+ assert(isGCPointerType(Ptr->getType()) && "must be a gc pointer type");
#endif
- relocationViaAlloca(F, DT, live, records);
- return !records.empty();
+ relocationViaAlloca(F, DT, Live, Records);
+ return !Records.empty();
}
// Handles both return values and arguments for Functions and CallSites.
template <typename AttrHolder>
-static void RemoveDerefAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
- unsigned Index) {
+static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
+ unsigned Index) {
AttrBuilder R;
if (AH.getDereferenceableBytes(Index))
R.addAttribute(Attribute::get(Ctx, Attribute::Dereferenceable,
@@ -2296,6 +2509,8 @@ static void RemoveDerefAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
if (AH.getDereferenceableOrNullBytes(Index))
R.addAttribute(Attribute::get(Ctx, Attribute::DereferenceableOrNull,
AH.getDereferenceableOrNullBytes(Index)));
+ if (AH.doesNotAlias(Index))
+ R.addAttribute(Attribute::NoAlias);
if (!R.empty())
AH.setAttributes(AH.getAttributes().removeAttributes(
@@ -2303,25 +2518,25 @@ static void RemoveDerefAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
}
void
-RewriteStatepointsForGC::stripDereferenceabilityInfoFromPrototype(Function &F) {
+RewriteStatepointsForGC::stripNonValidAttributesFromPrototype(Function &F) {
LLVMContext &Ctx = F.getContext();
for (Argument &A : F.args())
if (isa<PointerType>(A.getType()))
- RemoveDerefAttrAtIndex(Ctx, F, A.getArgNo() + 1);
+ RemoveNonValidAttrAtIndex(Ctx, F, A.getArgNo() + 1);
if (isa<PointerType>(F.getReturnType()))
- RemoveDerefAttrAtIndex(Ctx, F, AttributeSet::ReturnIndex);
+ RemoveNonValidAttrAtIndex(Ctx, F, AttributeSet::ReturnIndex);
}
-void RewriteStatepointsForGC::stripDereferenceabilityInfoFromBody(Function &F) {
+void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) {
if (F.empty())
return;
LLVMContext &Ctx = F.getContext();
MDBuilder Builder(Ctx);
- for (Instruction &I : inst_range(F)) {
+ for (Instruction &I : instructions(F)) {
if (const MDNode *MD = I.getMetadata(LLVMContext::MD_tbaa)) {
assert(MD->getNumOperands() < 5 && "unrecognized metadata shape!");
bool IsImmutableTBAA =
@@ -2344,9 +2559,9 @@ void RewriteStatepointsForGC::stripDereferenceabilityInfoFromBody(Function &F) {
if (CallSite CS = CallSite(&I)) {
for (int i = 0, e = CS.arg_size(); i != e; i++)
if (isa<PointerType>(CS.getArgument(i)->getType()))
- RemoveDerefAttrAtIndex(Ctx, CS, i + 1);
+ RemoveNonValidAttrAtIndex(Ctx, CS, i + 1);
if (isa<PointerType>(CS.getType()))
- RemoveDerefAttrAtIndex(Ctx, CS, AttributeSet::ReturnIndex);
+ RemoveNonValidAttrAtIndex(Ctx, CS, AttributeSet::ReturnIndex);
}
}
}
@@ -2365,17 +2580,17 @@ static bool shouldRewriteStatepointsIn(Function &F) {
return false;
}
-void RewriteStatepointsForGC::stripDereferenceabilityInfo(Module &M) {
+void RewriteStatepointsForGC::stripNonValidAttributes(Module &M) {
#ifndef NDEBUG
assert(std::any_of(M.begin(), M.end(), shouldRewriteStatepointsIn) &&
"precondition!");
#endif
for (Function &F : M)
- stripDereferenceabilityInfoFromPrototype(F);
+ stripNonValidAttributesFromPrototype(F);
for (Function &F : M)
- stripDereferenceabilityInfoFromBody(F);
+ stripNonValidAttributesFromBody(F);
}
bool RewriteStatepointsForGC::runOnFunction(Function &F) {
@@ -2389,15 +2604,27 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F) {
return false;
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
+ TargetTransformInfo &TTI =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+
+ auto NeedsRewrite = [](Instruction &I) {
+ if (UseDeoptBundles) {
+ if (ImmutableCallSite CS = ImmutableCallSite(&I))
+ return !callsGCLeafFunction(CS);
+ return false;
+ }
+
+ return isStatepoint(I);
+ };
// Gather all the statepoints which need rewritten. Be careful to only
// consider those in reachable code since we need to ask dominance queries
// when rewriting. We'll delete the unreachable ones in a moment.
SmallVector<CallSite, 64> ParsePointNeeded;
bool HasUnreachableStatepoint = false;
- for (Instruction &I : inst_range(F)) {
+ for (Instruction &I : instructions(F)) {
// TODO: only the ones with the flag set!
- if (isStatepoint(I)) {
+ if (NeedsRewrite(I)) {
if (DT.isReachableFromEntry(I.getParent()))
ParsePointNeeded.push_back(CallSite(&I));
else
@@ -2428,7 +2655,38 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F) {
FoldSingleEntryPHINodes(&BB);
}
- MadeChange |= insertParsePoints(F, DT, this, ParsePointNeeded);
+ // Before we start introducing relocations, we want to tweak the IR a bit to
+ // avoid unfortunate code generation effects. The main example is that we
+ // want to try to make sure the comparison feeding a branch is after any
+ // safepoints. Otherwise, we end up with a comparison of pre-relocation
+ // values feeding a branch after relocation. This is semantically correct,
+ // but results in extra register pressure since both the pre-relocation and
+ // post-relocation copies must be available in registers. For code without
+ // relocations this is handled elsewhere, but teaching the scheduler to
+ // reverse the transform we're about to do would be slightly complex.
+ // Note: This may extend the live range of the inputs to the icmp and thus
+ // increase the liveset of any statepoint we move over. This is profitable
+ // as long as all statepoints are in rare blocks. If we had in-register
+ // lowering for live values this would be a much safer transform.
+ auto getConditionInst = [](TerminatorInst *TI) -> Instruction* {
+ if (auto *BI = dyn_cast<BranchInst>(TI))
+ if (BI->isConditional())
+ return dyn_cast<Instruction>(BI->getCondition());
+ // TODO: Extend this to handle switches
+ return nullptr;
+ };
+ for (BasicBlock &BB : F) {
+ TerminatorInst *TI = BB.getTerminator();
+ if (auto *Cond = getConditionInst(TI))
+ // TODO: Handle more than just ICmps here. We should be able to move
+ // most instructions without side effects or memory access.
+ if (isa<ICmpInst>(Cond) && Cond->hasOneUse()) {
+ MadeChange = true;
+ Cond->moveBefore(TI);
+ }
+ }
+
+ MadeChange |= insertParsePoints(F, DT, TTI, ParsePointNeeded);
return MadeChange;
}
@@ -2461,7 +2719,7 @@ static void computeLiveInValues(BasicBlock::reverse_iterator rbegin,
"support for FCA unimplemented");
if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) {
// The choice to exclude all things constant here is slightly subtle.
- // There are two idependent reasons:
+ // There are two independent reasons:
// - We assume that things which are constant (from LLVM's definition)
// do not move at runtime. For example, the address of a global
// variable is fixed, even though it's contents may not be.
@@ -2599,7 +2857,7 @@ static void computeLiveInValues(DominatorTree &DT, Function &F,
} // while( !worklist.empty() )
#ifndef NDEBUG
- // Sanity check our ouput against SSA properties. This helps catch any
+ // Sanity check our output against SSA properties. This helps catch any
// missing kills during the above iteration.
for (BasicBlock &BB : F) {
checkBasicSSA(DT, Data, BB);
@@ -2620,7 +2878,7 @@ static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
// call result is not live (normal), nor are it's arguments
// (unless they're used again later). This adjustment is
// specifically what we need to relocate
- BasicBlock::reverse_iterator rend(Inst);
+ BasicBlock::reverse_iterator rend(Inst->getIterator());
computeLiveInValues(BB->rbegin(), rend, LiveOut);
LiveOut.erase(Inst);
Out.insert(LiveOut.begin(), LiveOut.end());
@@ -2669,5 +2927,5 @@ static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
assert(Updated.count(KVPair.first) && "record for non-live value");
#endif
- Info.liveset = Updated;
+ Info.LiveSet = Updated;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
index 4d3a708fa20e..2fca803adde8 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
@@ -479,6 +480,13 @@ private:
void visitExtractValueInst(ExtractValueInst &EVI);
void visitInsertValueInst(InsertValueInst &IVI);
void visitLandingPadInst(LandingPadInst &I) { markAnythingOverdefined(&I); }
+ void visitFuncletPadInst(FuncletPadInst &FPI) {
+ markAnythingOverdefined(&FPI);
+ }
+ void visitCatchSwitchInst(CatchSwitchInst &CPI) {
+ markAnythingOverdefined(&CPI);
+ visitTerminatorInst(CPI);
+ }
// Instructions that cannot be folded away.
void visitStoreInst (StoreInst &I);
@@ -539,9 +547,9 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
return;
}
- if (isa<InvokeInst>(TI)) {
- // Invoke instructions successors are always executable.
- Succs[0] = Succs[1] = true;
+ // Unwinding instructions successors are always executable.
+ if (TI.isExceptional()) {
+ Succs.assign(TI.getNumSuccessors(), true);
return;
}
@@ -605,8 +613,8 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
return BI->getSuccessor(CI->isZero()) == To;
}
- // Invoke instructions successors are always executable.
- if (isa<InvokeInst>(TI))
+ // Unwinding instructions successors are always executable.
+ if (TI->isExceptional())
return true;
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
@@ -630,7 +638,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
#ifndef NDEBUG
dbgs() << "Unknown terminator instruction: " << *TI << '\n';
#endif
- llvm_unreachable(nullptr);
+ llvm_unreachable("SCCP: Don't know how to handle this terminator!");
}
// visit Implementations - Something changed in this instruction, either an
@@ -1126,7 +1134,7 @@ CallOverdefined:
// entry block executable and merge in the actual arguments to the call into
// the formal arguments of the function.
if (!TrackingIncomingArguments.empty() && TrackingIncomingArguments.count(F)){
- MarkBlockExecutable(F->begin());
+ MarkBlockExecutable(&F->front());
// Propagate information from this call site into the callee.
CallSite::arg_iterator CAI = CS.arg_begin();
@@ -1135,17 +1143,17 @@ CallOverdefined:
// If this argument is byval, and if the function is not readonly, there
// will be an implicit copy formed of the input aggregate.
if (AI->hasByValAttr() && !F->onlyReadsMemory()) {
- markOverdefined(AI);
+ markOverdefined(&*AI);
continue;
}
if (StructType *STy = dyn_cast<StructType>(AI->getType())) {
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
LatticeVal CallArg = getStructValueState(*CAI, i);
- mergeInValue(getStructValueState(AI, i), AI, CallArg);
+ mergeInValue(getStructValueState(&*AI, i), &*AI, CallArg);
}
} else {
- mergeInValue(AI, getValueState(*CAI));
+ mergeInValue(&*AI, getValueState(*CAI));
}
}
}
@@ -1246,18 +1254,18 @@ void SCCPSolver::Solve() {
/// even if X isn't defined.
bool SCCPSolver::ResolvedUndefsIn(Function &F) {
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (!BBExecutable.count(BB))
+ if (!BBExecutable.count(&*BB))
continue;
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ for (Instruction &I : *BB) {
// Look for instructions which produce undef values.
- if (I->getType()->isVoidTy()) continue;
+ if (I.getType()->isVoidTy()) continue;
- if (StructType *STy = dyn_cast<StructType>(I->getType())) {
+ if (StructType *STy = dyn_cast<StructType>(I.getType())) {
// Only a few things that can be structs matter for undef.
// Tracked calls must never be marked overdefined in ResolvedUndefsIn.
- if (CallSite CS = CallSite(I))
+ if (CallSite CS = CallSite(&I))
if (Function *F = CS.getCalledFunction())
if (MRVFunctionsTracked.count(F))
continue;
@@ -1270,14 +1278,14 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// Send the results of everything else to overdefined. We could be
// more precise than this but it isn't worth bothering.
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- LatticeVal &LV = getStructValueState(I, i);
+ LatticeVal &LV = getStructValueState(&I, i);
if (LV.isUndefined())
- markOverdefined(LV, I);
+ markOverdefined(LV, &I);
}
continue;
}
- LatticeVal &LV = getValueState(I);
+ LatticeVal &LV = getValueState(&I);
if (!LV.isUndefined()) continue;
// extractvalue is safe; check here because the argument is a struct.
@@ -1287,24 +1295,24 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// Compute the operand LatticeVals, for convenience below.
// Anything taking a struct is conservatively assumed to require
// overdefined markings.
- if (I->getOperand(0)->getType()->isStructTy()) {
- markOverdefined(I);
+ if (I.getOperand(0)->getType()->isStructTy()) {
+ markOverdefined(&I);
return true;
}
- LatticeVal Op0LV = getValueState(I->getOperand(0));
+ LatticeVal Op0LV = getValueState(I.getOperand(0));
LatticeVal Op1LV;
- if (I->getNumOperands() == 2) {
- if (I->getOperand(1)->getType()->isStructTy()) {
- markOverdefined(I);
+ if (I.getNumOperands() == 2) {
+ if (I.getOperand(1)->getType()->isStructTy()) {
+ markOverdefined(&I);
return true;
}
- Op1LV = getValueState(I->getOperand(1));
+ Op1LV = getValueState(I.getOperand(1));
}
// If this is an instructions whose result is defined even if the input is
// not fully defined, propagate the information.
- Type *ITy = I->getType();
- switch (I->getOpcode()) {
+ Type *ITy = I.getType();
+ switch (I.getOpcode()) {
case Instruction::Add:
case Instruction::Sub:
case Instruction::Trunc:
@@ -1318,9 +1326,9 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
case Instruction::FRem:
// Floating-point binary operation: be conservative.
if (Op0LV.isUndefined() && Op1LV.isUndefined())
- markForcedConstant(I, Constant::getNullValue(ITy));
+ markForcedConstant(&I, Constant::getNullValue(ITy));
else
- markOverdefined(I);
+ markOverdefined(&I);
return true;
case Instruction::ZExt:
case Instruction::SExt:
@@ -1332,7 +1340,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
case Instruction::SIToFP:
case Instruction::UIToFP:
// undef -> 0; some outputs are impossible
- markForcedConstant(I, Constant::getNullValue(ITy));
+ markForcedConstant(&I, Constant::getNullValue(ITy));
return true;
case Instruction::Mul:
case Instruction::And:
@@ -1341,7 +1349,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
break;
// undef * X -> 0. X could be zero.
// undef & X -> 0. X could be zero.
- markForcedConstant(I, Constant::getNullValue(ITy));
+ markForcedConstant(&I, Constant::getNullValue(ITy));
return true;
case Instruction::Or:
@@ -1349,7 +1357,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
if (Op0LV.isUndefined() && Op1LV.isUndefined())
break;
// undef | X -> -1. X could be -1.
- markForcedConstant(I, Constant::getAllOnesValue(ITy));
+ markForcedConstant(&I, Constant::getAllOnesValue(ITy));
return true;
case Instruction::Xor:
@@ -1357,7 +1365,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// necessary, but we try to be nice to people who expect this
// behavior in simple cases
if (Op0LV.isUndefined() && Op1LV.isUndefined()) {
- markForcedConstant(I, Constant::getNullValue(ITy));
+ markForcedConstant(&I, Constant::getNullValue(ITy));
return true;
}
// undef ^ X -> undef
@@ -1373,7 +1381,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// undef / X -> 0. X could be maxint.
// undef % X -> 0. X could be 1.
- markForcedConstant(I, Constant::getNullValue(ITy));
+ markForcedConstant(&I, Constant::getNullValue(ITy));
return true;
case Instruction::AShr:
@@ -1381,7 +1389,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
if (Op1LV.isUndefined()) break;
// undef >>a X -> all ones
- markForcedConstant(I, Constant::getAllOnesValue(ITy));
+ markForcedConstant(&I, Constant::getAllOnesValue(ITy));
return true;
case Instruction::LShr:
case Instruction::Shl:
@@ -1391,17 +1399,17 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// undef << X -> 0
// undef >> X -> 0
- markForcedConstant(I, Constant::getNullValue(ITy));
+ markForcedConstant(&I, Constant::getNullValue(ITy));
return true;
case Instruction::Select:
- Op1LV = getValueState(I->getOperand(1));
+ Op1LV = getValueState(I.getOperand(1));
// undef ? X : Y -> X or Y. There could be commonality between X/Y.
if (Op0LV.isUndefined()) {
if (!Op1LV.isConstant()) // Pick the constant one if there is any.
- Op1LV = getValueState(I->getOperand(2));
+ Op1LV = getValueState(I.getOperand(2));
} else if (Op1LV.isUndefined()) {
// c ? undef : undef -> undef. No change.
- Op1LV = getValueState(I->getOperand(2));
+ Op1LV = getValueState(I.getOperand(2));
if (Op1LV.isUndefined())
break;
// Otherwise, c ? undef : x -> x.
@@ -1410,9 +1418,9 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
}
if (Op1LV.isConstant())
- markForcedConstant(I, Op1LV.getConstant());
+ markForcedConstant(&I, Op1LV.getConstant());
else
- markOverdefined(I);
+ markOverdefined(&I);
return true;
case Instruction::Load:
// A load here means one of two things: a load of undef from a global,
@@ -1421,9 +1429,9 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
break;
case Instruction::ICmp:
// X == undef -> undef. Other comparisons get more complicated.
- if (cast<ICmpInst>(I)->isEquality())
+ if (cast<ICmpInst>(&I)->isEquality())
break;
- markOverdefined(I);
+ markOverdefined(&I);
return true;
case Instruction::Call:
case Instruction::Invoke: {
@@ -1432,19 +1440,19 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// 2. It could be constant-foldable.
// Because of the way we solve return values, tracked calls must
// never be marked overdefined in ResolvedUndefsIn.
- if (Function *F = CallSite(I).getCalledFunction())
+ if (Function *F = CallSite(&I).getCalledFunction())
if (TrackedRetVals.count(F))
break;
// If the call is constant-foldable, we mark it overdefined because
// we do not know what return values are valid.
- markOverdefined(I);
+ markOverdefined(&I);
return true;
}
default:
// If we don't know what should happen here, conservatively mark it
// overdefined.
- markOverdefined(I);
+ markOverdefined(&I);
return true;
}
}
@@ -1462,7 +1470,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// false.
if (isa<UndefValue>(BI->getCondition())) {
BI->setCondition(ConstantInt::getFalse(BI->getContext()));
- markEdgeExecutable(BB, TI->getSuccessor(1));
+ markEdgeExecutable(&*BB, TI->getSuccessor(1));
return true;
}
@@ -1484,7 +1492,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// the first constant.
if (isa<UndefValue>(SI->getCondition())) {
SI->setCondition(SI->case_begin().getCaseValue());
- markEdgeExecutable(BB, SI->case_begin().getCaseSuccessor());
+ markEdgeExecutable(&*BB, SI->case_begin().getCaseSuccessor());
return true;
}
@@ -1506,6 +1514,7 @@ namespace {
struct SCCP : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
static char ID; // Pass identification, replacement for typeid
SCCP() : FunctionPass(ID) {
@@ -1541,11 +1550,10 @@ static void DeleteInstructionInBlock(BasicBlock *BB) {
Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
while (EndInst != BB->begin()) {
// Delete the next to last instruction.
- BasicBlock::iterator I = EndInst;
- Instruction *Inst = --I;
+ Instruction *Inst = &*--EndInst->getIterator();
if (!Inst->use_empty())
Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
- if (isa<LandingPadInst>(Inst)) {
+ if (Inst->isEHPad()) {
EndInst = Inst;
continue;
}
@@ -1568,11 +1576,11 @@ bool SCCP::runOnFunction(Function &F) {
SCCPSolver Solver(DL, TLI);
// Mark the first block of the function as being executable.
- Solver.MarkBlockExecutable(F.begin());
+ Solver.MarkBlockExecutable(&F.front());
// Mark all arguments to the function as being overdefined.
- for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E;++AI)
- Solver.markAnythingOverdefined(AI);
+ for (Argument &AI : F.args())
+ Solver.markAnythingOverdefined(&AI);
// Solve for constants.
bool ResolvedUndefs = true;
@@ -1589,8 +1597,8 @@ bool SCCP::runOnFunction(Function &F) {
// as we cannot modify the CFG of the function.
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (!Solver.isBlockExecutable(BB)) {
- DeleteInstructionInBlock(BB);
+ if (!Solver.isBlockExecutable(&*BB)) {
+ DeleteInstructionInBlock(&*BB);
MadeChanges = true;
continue;
}
@@ -1599,7 +1607,7 @@ bool SCCP::runOnFunction(Function &F) {
// constants if we have found them to be of constant values.
//
for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
- Instruction *Inst = BI++;
+ Instruction *Inst = &*BI++;
if (Inst->getType()->isVoidTy() || isa<TerminatorInst>(Inst))
continue;
@@ -1713,36 +1721,34 @@ bool IPSCCP::runOnModule(Module &M) {
// If this is a strong or ODR definition of this function, then we can
// propagate information about its result into callsites of it.
if (!F->mayBeOverridden())
- Solver.AddTrackedFunction(F);
+ Solver.AddTrackedFunction(&*F);
// If this function only has direct calls that we can see, we can track its
// arguments and return value aggressively, and can assume it is not called
// unless we see evidence to the contrary.
if (F->hasLocalLinkage()) {
- if (AddressIsTaken(F))
- AddressTakenFunctions.insert(F);
+ if (AddressIsTaken(&*F))
+ AddressTakenFunctions.insert(&*F);
else {
- Solver.AddArgumentTrackedFunction(F);
+ Solver.AddArgumentTrackedFunction(&*F);
continue;
}
}
// Assume the function is called.
- Solver.MarkBlockExecutable(F->begin());
+ Solver.MarkBlockExecutable(&F->front());
// Assume nothing about the incoming arguments.
- for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
- AI != E; ++AI)
- Solver.markAnythingOverdefined(AI);
+ for (Argument &AI : F->args())
+ Solver.markAnythingOverdefined(&AI);
}
// Loop over global variables. We inform the solver about any internal global
// variables that do not have their 'addresses taken'. If they don't have
// their addresses taken, we can propagate constants through them.
- for (Module::global_iterator G = M.global_begin(), E = M.global_end();
- G != E; ++G)
- if (!G->isConstant() && G->hasLocalLinkage() && !AddressIsTaken(G))
- Solver.TrackValueOfGlobalVariable(G);
+ for (GlobalVariable &G : M.globals())
+ if (!G.isConstant() && G.hasLocalLinkage() && !AddressIsTaken(&G))
+ Solver.TrackValueOfGlobalVariable(&G);
// Solve for constants.
bool ResolvedUndefs = true;
@@ -1763,7 +1769,10 @@ bool IPSCCP::runOnModule(Module &M) {
SmallVector<BasicBlock*, 512> BlocksToErase;
for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (Solver.isBlockExecutable(F->begin())) {
+ if (F->isDeclaration())
+ continue;
+
+ if (Solver.isBlockExecutable(&F->front())) {
for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
AI != E; ++AI) {
if (AI->use_empty() || AI->getType()->isStructTy()) continue;
@@ -1771,7 +1780,7 @@ bool IPSCCP::runOnModule(Module &M) {
// TODO: Could use getStructLatticeValueFor to find out if the entire
// result is a constant and replace it entirely if so.
- LatticeVal IV = Solver.getLatticeValueFor(AI);
+ LatticeVal IV = Solver.getLatticeValueFor(&*AI);
if (IV.isOverdefined()) continue;
Constant *CST = IV.isConstant() ?
@@ -1786,28 +1795,27 @@ bool IPSCCP::runOnModule(Module &M) {
}
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- if (!Solver.isBlockExecutable(BB)) {
- DeleteInstructionInBlock(BB);
+ if (!Solver.isBlockExecutable(&*BB)) {
+ DeleteInstructionInBlock(&*BB);
MadeChanges = true;
TerminatorInst *TI = BB->getTerminator();
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
- BasicBlock *Succ = TI->getSuccessor(i);
+ for (BasicBlock *Succ : TI->successors()) {
if (!Succ->empty() && isa<PHINode>(Succ->begin()))
- TI->getSuccessor(i)->removePredecessor(BB);
+ Succ->removePredecessor(&*BB);
}
if (!TI->use_empty())
TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
TI->eraseFromParent();
- new UnreachableInst(M.getContext(), BB);
+ new UnreachableInst(M.getContext(), &*BB);
if (&*BB != &F->front())
- BlocksToErase.push_back(BB);
+ BlocksToErase.push_back(&*BB);
continue;
}
for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
- Instruction *Inst = BI++;
+ Instruction *Inst = &*BI++;
if (Inst->getType()->isVoidTy() || Inst->getType()->isStructTy())
continue;
diff --git a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
index 947513a36572..a7361b5fe083 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -23,12 +23,12 @@
///
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/SROA.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/PtrUseVisitor.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -37,8 +37,6 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instructions.h"
@@ -53,9 +51,9 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TimeValue.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
#if __cplusplus >= 201103L && !defined(NDEBUG)
// We only use this for a debug check in C++11
@@ -63,6 +61,7 @@
#endif
using namespace llvm;
+using namespace llvm::sroa;
#define DEBUG_TYPE "sroa"
@@ -77,11 +76,6 @@ STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
STATISTIC(NumDeleted, "Number of instructions deleted");
STATISTIC(NumVectorized, "Number of vectorized aggregates");
-/// Hidden option to force the pass to not use DomTree and mem2reg, instead
-/// forming SSA values through the SSAUpdater infrastructure.
-static cl::opt<bool> ForceSSAUpdater("force-ssa-updater", cl::init(false),
- cl::Hidden);
-
/// Hidden option to enable randomly shuffling the slices to help uncover
/// instability in their order.
static cl::opt<bool> SROARandomShuffleSlices("sroa-random-shuffle-slices",
@@ -205,7 +199,6 @@ template <typename T> struct isPodLike;
template <> struct isPodLike<Slice> { static const bool value = true; };
}
-namespace {
/// \brief Representation of the alloca slices.
///
/// This class represents the slices of an alloca which are formed by its
@@ -213,7 +206,7 @@ namespace {
/// for the slices used and we reflect that in this structure. The uses are
/// stored, sorted by increasing beginning offset and with unsplittable slices
/// starting at a particular offset before splittable slices.
-class AllocaSlices {
+class llvm::sroa::AllocaSlices {
public:
/// \brief Construct the slices of a particular alloca.
AllocaSlices(const DataLayout &DL, AllocaInst &AI);
@@ -253,281 +246,10 @@ public:
std::inplace_merge(Slices.begin(), SliceI, Slices.end());
}
- // Forward declare an iterator to befriend it.
+ // Forward declare the iterator and range accessor for walking the
+ // partitions.
class partition_iterator;
-
- /// \brief A partition of the slices.
- ///
- /// An ephemeral representation for a range of slices which can be viewed as
- /// a partition of the alloca. This range represents a span of the alloca's
- /// memory which cannot be split, and provides access to all of the slices
- /// overlapping some part of the partition.
- ///
- /// Objects of this type are produced by traversing the alloca's slices, but
- /// are only ephemeral and not persistent.
- class Partition {
- private:
- friend class AllocaSlices;
- friend class AllocaSlices::partition_iterator;
-
- /// \brief The begining and ending offsets of the alloca for this partition.
- uint64_t BeginOffset, EndOffset;
-
- /// \brief The start end end iterators of this partition.
- iterator SI, SJ;
-
- /// \brief A collection of split slice tails overlapping the partition.
- SmallVector<Slice *, 4> SplitTails;
-
- /// \brief Raw constructor builds an empty partition starting and ending at
- /// the given iterator.
- Partition(iterator SI) : SI(SI), SJ(SI) {}
-
- public:
- /// \brief The start offset of this partition.
- ///
- /// All of the contained slices start at or after this offset.
- uint64_t beginOffset() const { return BeginOffset; }
-
- /// \brief The end offset of this partition.
- ///
- /// All of the contained slices end at or before this offset.
- uint64_t endOffset() const { return EndOffset; }
-
- /// \brief The size of the partition.
- ///
- /// Note that this can never be zero.
- uint64_t size() const {
- assert(BeginOffset < EndOffset && "Partitions must span some bytes!");
- return EndOffset - BeginOffset;
- }
-
- /// \brief Test whether this partition contains no slices, and merely spans
- /// a region occupied by split slices.
- bool empty() const { return SI == SJ; }
-
- /// \name Iterate slices that start within the partition.
- /// These may be splittable or unsplittable. They have a begin offset >= the
- /// partition begin offset.
- /// @{
- // FIXME: We should probably define a "concat_iterator" helper and use that
- // to stitch together pointee_iterators over the split tails and the
- // contiguous iterators of the partition. That would give a much nicer
- // interface here. We could then additionally expose filtered iterators for
- // split, unsplit, and unsplittable splices based on the usage patterns.
- iterator begin() const { return SI; }
- iterator end() const { return SJ; }
- /// @}
-
- /// \brief Get the sequence of split slice tails.
- ///
- /// These tails are of slices which start before this partition but are
- /// split and overlap into the partition. We accumulate these while forming
- /// partitions.
- ArrayRef<Slice *> splitSliceTails() const { return SplitTails; }
- };
-
- /// \brief An iterator over partitions of the alloca's slices.
- ///
- /// This iterator implements the core algorithm for partitioning the alloca's
- /// slices. It is a forward iterator as we don't support backtracking for
- /// efficiency reasons, and re-use a single storage area to maintain the
- /// current set of split slices.
- ///
- /// It is templated on the slice iterator type to use so that it can operate
- /// with either const or non-const slice iterators.
- class partition_iterator
- : public iterator_facade_base<partition_iterator,
- std::forward_iterator_tag, Partition> {
- friend class AllocaSlices;
-
- /// \brief Most of the state for walking the partitions is held in a class
- /// with a nice interface for examining them.
- Partition P;
-
- /// \brief We need to keep the end of the slices to know when to stop.
- AllocaSlices::iterator SE;
-
- /// \brief We also need to keep track of the maximum split end offset seen.
- /// FIXME: Do we really?
- uint64_t MaxSplitSliceEndOffset;
-
- /// \brief Sets the partition to be empty at given iterator, and sets the
- /// end iterator.
- partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)
- : P(SI), SE(SE), MaxSplitSliceEndOffset(0) {
- // If not already at the end, advance our state to form the initial
- // partition.
- if (SI != SE)
- advance();
- }
-
- /// \brief Advance the iterator to the next partition.
- ///
- /// Requires that the iterator not be at the end of the slices.
- void advance() {
- assert((P.SI != SE || !P.SplitTails.empty()) &&
- "Cannot advance past the end of the slices!");
-
- // Clear out any split uses which have ended.
- if (!P.SplitTails.empty()) {
- if (P.EndOffset >= MaxSplitSliceEndOffset) {
- // If we've finished all splits, this is easy.
- P.SplitTails.clear();
- MaxSplitSliceEndOffset = 0;
- } else {
- // Remove the uses which have ended in the prior partition. This
- // cannot change the max split slice end because we just checked that
- // the prior partition ended prior to that max.
- P.SplitTails.erase(
- std::remove_if(
- P.SplitTails.begin(), P.SplitTails.end(),
- [&](Slice *S) { return S->endOffset() <= P.EndOffset; }),
- P.SplitTails.end());
- assert(std::any_of(P.SplitTails.begin(), P.SplitTails.end(),
- [&](Slice *S) {
- return S->endOffset() == MaxSplitSliceEndOffset;
- }) &&
- "Could not find the current max split slice offset!");
- assert(std::all_of(P.SplitTails.begin(), P.SplitTails.end(),
- [&](Slice *S) {
- return S->endOffset() <= MaxSplitSliceEndOffset;
- }) &&
- "Max split slice end offset is not actually the max!");
- }
- }
-
- // If P.SI is already at the end, then we've cleared the split tail and
- // now have an end iterator.
- if (P.SI == SE) {
- assert(P.SplitTails.empty() && "Failed to clear the split slices!");
- return;
- }
-
- // If we had a non-empty partition previously, set up the state for
- // subsequent partitions.
- if (P.SI != P.SJ) {
- // Accumulate all the splittable slices which started in the old
- // partition into the split list.
- for (Slice &S : P)
- if (S.isSplittable() && S.endOffset() > P.EndOffset) {
- P.SplitTails.push_back(&S);
- MaxSplitSliceEndOffset =
- std::max(S.endOffset(), MaxSplitSliceEndOffset);
- }
-
- // Start from the end of the previous partition.
- P.SI = P.SJ;
-
- // If P.SI is now at the end, we at most have a tail of split slices.
- if (P.SI == SE) {
- P.BeginOffset = P.EndOffset;
- P.EndOffset = MaxSplitSliceEndOffset;
- return;
- }
-
- // If the we have split slices and the next slice is after a gap and is
- // not splittable immediately form an empty partition for the split
- // slices up until the next slice begins.
- if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
- !P.SI->isSplittable()) {
- P.BeginOffset = P.EndOffset;
- P.EndOffset = P.SI->beginOffset();
- return;
- }
- }
-
- // OK, we need to consume new slices. Set the end offset based on the
- // current slice, and step SJ past it. The beginning offset of the
- // parttion is the beginning offset of the next slice unless we have
- // pre-existing split slices that are continuing, in which case we begin
- // at the prior end offset.
- P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
- P.EndOffset = P.SI->endOffset();
- ++P.SJ;
-
- // There are two strategies to form a partition based on whether the
- // partition starts with an unsplittable slice or a splittable slice.
- if (!P.SI->isSplittable()) {
- // When we're forming an unsplittable region, it must always start at
- // the first slice and will extend through its end.
- assert(P.BeginOffset == P.SI->beginOffset());
-
- // Form a partition including all of the overlapping slices with this
- // unsplittable slice.
- while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
- if (!P.SJ->isSplittable())
- P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
- ++P.SJ;
- }
-
- // We have a partition across a set of overlapping unsplittable
- // partitions.
- return;
- }
-
- // If we're starting with a splittable slice, then we need to form
- // a synthetic partition spanning it and any other overlapping splittable
- // splices.
- assert(P.SI->isSplittable() && "Forming a splittable partition!");
-
- // Collect all of the overlapping splittable slices.
- while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
- P.SJ->isSplittable()) {
- P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
- ++P.SJ;
- }
-
- // Back upiP.EndOffset if we ended the span early when encountering an
- // unsplittable slice. This synthesizes the early end offset of
- // a partition spanning only splittable slices.
- if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
- assert(!P.SJ->isSplittable());
- P.EndOffset = P.SJ->beginOffset();
- }
- }
-
- public:
- bool operator==(const partition_iterator &RHS) const {
- assert(SE == RHS.SE &&
- "End iterators don't match between compared partition iterators!");
-
- // The observed positions of partitions is marked by the P.SI iterator and
- // the emptyness of the split slices. The latter is only relevant when
- // P.SI == SE, as the end iterator will additionally have an empty split
- // slices list, but the prior may have the same P.SI and a tail of split
- // slices.
- if (P.SI == RHS.P.SI &&
- P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
- assert(P.SJ == RHS.P.SJ &&
- "Same set of slices formed two different sized partitions!");
- assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
- "Same slice position with differently sized non-empty split "
- "slice tails!");
- return true;
- }
- return false;
- }
-
- partition_iterator &operator++() {
- advance();
- return *this;
- }
-
- Partition &operator*() { return P; }
- };
-
- /// \brief A forward range over the partitions of the alloca's slices.
- ///
- /// This accesses an iterator range over the partitions of the alloca's
- /// slices. It computes these partitions on the fly based on the overlapping
- /// offsets of the slices and the ability to split them. It will visit "empty"
- /// partitions to cover regions of the alloca only accessed via split
- /// slices.
- iterator_range<partition_iterator> partitions() {
- return make_range(partition_iterator(begin(), end()),
- partition_iterator(end(), end()));
- }
+ iterator_range<partition_iterator> partitions();
/// \brief Access the dead users for this alloca.
ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; }
@@ -595,6 +317,280 @@ private:
/// the alloca.
SmallVector<Use *, 8> DeadOperands;
};
+
+/// \brief A partition of the slices.
+///
+/// An ephemeral representation for a range of slices which can be viewed as
+/// a partition of the alloca. This range represents a span of the alloca's
+/// memory which cannot be split, and provides access to all of the slices
+/// overlapping some part of the partition.
+///
+/// Objects of this type are produced by traversing the alloca's slices, but
+/// are only ephemeral and not persistent.
+class llvm::sroa::Partition {
+private:
+ friend class AllocaSlices;
+ friend class AllocaSlices::partition_iterator;
+
+ typedef AllocaSlices::iterator iterator;
+
+ /// \brief The beginning and ending offsets of the alloca for this
+ /// partition.
+ uint64_t BeginOffset, EndOffset;
+
+ /// \brief The start end end iterators of this partition.
+ iterator SI, SJ;
+
+ /// \brief A collection of split slice tails overlapping the partition.
+ SmallVector<Slice *, 4> SplitTails;
+
+ /// \brief Raw constructor builds an empty partition starting and ending at
+ /// the given iterator.
+ Partition(iterator SI) : SI(SI), SJ(SI) {}
+
+public:
+ /// \brief The start offset of this partition.
+ ///
+ /// All of the contained slices start at or after this offset.
+ uint64_t beginOffset() const { return BeginOffset; }
+
+ /// \brief The end offset of this partition.
+ ///
+ /// All of the contained slices end at or before this offset.
+ uint64_t endOffset() const { return EndOffset; }
+
+ /// \brief The size of the partition.
+ ///
+ /// Note that this can never be zero.
+ uint64_t size() const {
+ assert(BeginOffset < EndOffset && "Partitions must span some bytes!");
+ return EndOffset - BeginOffset;
+ }
+
+ /// \brief Test whether this partition contains no slices, and merely spans
+ /// a region occupied by split slices.
+ bool empty() const { return SI == SJ; }
+
+ /// \name Iterate slices that start within the partition.
+ /// These may be splittable or unsplittable. They have a begin offset >= the
+ /// partition begin offset.
+ /// @{
+ // FIXME: We should probably define a "concat_iterator" helper and use that
+ // to stitch together pointee_iterators over the split tails and the
+ // contiguous iterators of the partition. That would give a much nicer
+ // interface here. We could then additionally expose filtered iterators for
+ // split, unsplit, and unsplittable splices based on the usage patterns.
+ iterator begin() const { return SI; }
+ iterator end() const { return SJ; }
+ /// @}
+
+ /// \brief Get the sequence of split slice tails.
+ ///
+ /// These tails are of slices which start before this partition but are
+ /// split and overlap into the partition. We accumulate these while forming
+ /// partitions.
+ ArrayRef<Slice *> splitSliceTails() const { return SplitTails; }
+};
+
+/// \brief An iterator over partitions of the alloca's slices.
+///
+/// This iterator implements the core algorithm for partitioning the alloca's
+/// slices. It is a forward iterator as we don't support backtracking for
+/// efficiency reasons, and re-use a single storage area to maintain the
+/// current set of split slices.
+///
+/// It is templated on the slice iterator type to use so that it can operate
+/// with either const or non-const slice iterators.
+class AllocaSlices::partition_iterator
+ : public iterator_facade_base<partition_iterator, std::forward_iterator_tag,
+ Partition> {
+ friend class AllocaSlices;
+
+ /// \brief Most of the state for walking the partitions is held in a class
+ /// with a nice interface for examining them.
+ Partition P;
+
+ /// \brief We need to keep the end of the slices to know when to stop.
+ AllocaSlices::iterator SE;
+
+ /// \brief We also need to keep track of the maximum split end offset seen.
+ /// FIXME: Do we really?
+ uint64_t MaxSplitSliceEndOffset;
+
+ /// \brief Sets the partition to be empty at given iterator, and sets the
+ /// end iterator.
+ partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)
+ : P(SI), SE(SE), MaxSplitSliceEndOffset(0) {
+ // If not already at the end, advance our state to form the initial
+ // partition.
+ if (SI != SE)
+ advance();
+ }
+
+ /// \brief Advance the iterator to the next partition.
+ ///
+ /// Requires that the iterator not be at the end of the slices.
+ void advance() {
+ assert((P.SI != SE || !P.SplitTails.empty()) &&
+ "Cannot advance past the end of the slices!");
+
+ // Clear out any split uses which have ended.
+ if (!P.SplitTails.empty()) {
+ if (P.EndOffset >= MaxSplitSliceEndOffset) {
+ // If we've finished all splits, this is easy.
+ P.SplitTails.clear();
+ MaxSplitSliceEndOffset = 0;
+ } else {
+ // Remove the uses which have ended in the prior partition. This
+ // cannot change the max split slice end because we just checked that
+ // the prior partition ended prior to that max.
+ P.SplitTails.erase(
+ std::remove_if(
+ P.SplitTails.begin(), P.SplitTails.end(),
+ [&](Slice *S) { return S->endOffset() <= P.EndOffset; }),
+ P.SplitTails.end());
+ assert(std::any_of(P.SplitTails.begin(), P.SplitTails.end(),
+ [&](Slice *S) {
+ return S->endOffset() == MaxSplitSliceEndOffset;
+ }) &&
+ "Could not find the current max split slice offset!");
+ assert(std::all_of(P.SplitTails.begin(), P.SplitTails.end(),
+ [&](Slice *S) {
+ return S->endOffset() <= MaxSplitSliceEndOffset;
+ }) &&
+ "Max split slice end offset is not actually the max!");
+ }
+ }
+
+ // If P.SI is already at the end, then we've cleared the split tail and
+ // now have an end iterator.
+ if (P.SI == SE) {
+ assert(P.SplitTails.empty() && "Failed to clear the split slices!");
+ return;
+ }
+
+ // If we had a non-empty partition previously, set up the state for
+ // subsequent partitions.
+ if (P.SI != P.SJ) {
+ // Accumulate all the splittable slices which started in the old
+ // partition into the split list.
+ for (Slice &S : P)
+ if (S.isSplittable() && S.endOffset() > P.EndOffset) {
+ P.SplitTails.push_back(&S);
+ MaxSplitSliceEndOffset =
+ std::max(S.endOffset(), MaxSplitSliceEndOffset);
+ }
+
+ // Start from the end of the previous partition.
+ P.SI = P.SJ;
+
+ // If P.SI is now at the end, we at most have a tail of split slices.
+ if (P.SI == SE) {
+ P.BeginOffset = P.EndOffset;
+ P.EndOffset = MaxSplitSliceEndOffset;
+ return;
+ }
+
+ // If the we have split slices and the next slice is after a gap and is
+ // not splittable immediately form an empty partition for the split
+ // slices up until the next slice begins.
+ if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
+ !P.SI->isSplittable()) {
+ P.BeginOffset = P.EndOffset;
+ P.EndOffset = P.SI->beginOffset();
+ return;
+ }
+ }
+
+ // OK, we need to consume new slices. Set the end offset based on the
+ // current slice, and step SJ past it. The beginning offset of the
+ // partition is the beginning offset of the next slice unless we have
+ // pre-existing split slices that are continuing, in which case we begin
+ // at the prior end offset.
+ P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
+ P.EndOffset = P.SI->endOffset();
+ ++P.SJ;
+
+ // There are two strategies to form a partition based on whether the
+ // partition starts with an unsplittable slice or a splittable slice.
+ if (!P.SI->isSplittable()) {
+ // When we're forming an unsplittable region, it must always start at
+ // the first slice and will extend through its end.
+ assert(P.BeginOffset == P.SI->beginOffset());
+
+ // Form a partition including all of the overlapping slices with this
+ // unsplittable slice.
+ while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
+ if (!P.SJ->isSplittable())
+ P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
+ ++P.SJ;
+ }
+
+ // We have a partition across a set of overlapping unsplittable
+ // partitions.
+ return;
+ }
+
+ // If we're starting with a splittable slice, then we need to form
+ // a synthetic partition spanning it and any other overlapping splittable
+ // splices.
+ assert(P.SI->isSplittable() && "Forming a splittable partition!");
+
+ // Collect all of the overlapping splittable slices.
+ while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
+ P.SJ->isSplittable()) {
+ P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
+ ++P.SJ;
+ }
+
+ // Back upiP.EndOffset if we ended the span early when encountering an
+ // unsplittable slice. This synthesizes the early end offset of
+ // a partition spanning only splittable slices.
+ if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
+ assert(!P.SJ->isSplittable());
+ P.EndOffset = P.SJ->beginOffset();
+ }
+ }
+
+public:
+ bool operator==(const partition_iterator &RHS) const {
+ assert(SE == RHS.SE &&
+ "End iterators don't match between compared partition iterators!");
+
+ // The observed positions of partitions is marked by the P.SI iterator and
+ // the emptiness of the split slices. The latter is only relevant when
+ // P.SI == SE, as the end iterator will additionally have an empty split
+ // slices list, but the prior may have the same P.SI and a tail of split
+ // slices.
+ if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
+ assert(P.SJ == RHS.P.SJ &&
+ "Same set of slices formed two different sized partitions!");
+ assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
+ "Same slice position with differently sized non-empty split "
+ "slice tails!");
+ return true;
+ }
+ return false;
+ }
+
+ partition_iterator &operator++() {
+ advance();
+ return *this;
+ }
+
+ Partition &operator*() { return P; }
+};
+
+/// \brief A forward range over the partitions of the alloca's slices.
+///
+/// This accesses an iterator range over the partitions of the alloca's
+/// slices. It computes these partitions on the fly based on the overlapping
+/// offsets of the slices and the ability to split them. It will visit "empty"
+/// partitions to cover regions of the alloca only accessed via split
+/// slices.
+iterator_range<AllocaSlices::partition_iterator> AllocaSlices::partitions() {
+ return make_range(partition_iterator(begin(), end()),
+ partition_iterator(end(), end()));
}
static Value *foldSelectInst(SelectInst &SI) {
@@ -1072,217 +1068,6 @@ LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); }
#endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-namespace {
-/// \brief Implementation of LoadAndStorePromoter for promoting allocas.
-///
-/// This subclass of LoadAndStorePromoter adds overrides to handle promoting
-/// the loads and stores of an alloca instruction, as well as updating its
-/// debug information. This is used when a domtree is unavailable and thus
-/// mem2reg in its full form can't be used to handle promotion of allocas to
-/// scalar values.
-class AllocaPromoter : public LoadAndStorePromoter {
- AllocaInst &AI;
- DIBuilder &DIB;
-
- SmallVector<DbgDeclareInst *, 4> DDIs;
- SmallVector<DbgValueInst *, 4> DVIs;
-
-public:
- AllocaPromoter(ArrayRef<const Instruction *> Insts,
- SSAUpdater &S,
- AllocaInst &AI, DIBuilder &DIB)
- : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {}
-
- void run(const SmallVectorImpl<Instruction *> &Insts) {
- // Retain the debug information attached to the alloca for use when
- // rewriting loads and stores.
- if (auto *L = LocalAsMetadata::getIfExists(&AI)) {
- if (auto *DINode = MetadataAsValue::getIfExists(AI.getContext(), L)) {
- for (User *U : DINode->users())
- if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
- DDIs.push_back(DDI);
- else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
- DVIs.push_back(DVI);
- }
- }
-
- LoadAndStorePromoter::run(Insts);
-
- // While we have the debug information, clear it off of the alloca. The
- // caller takes care of deleting the alloca.
- while (!DDIs.empty())
- DDIs.pop_back_val()->eraseFromParent();
- while (!DVIs.empty())
- DVIs.pop_back_val()->eraseFromParent();
- }
-
- bool
- isInstInList(Instruction *I,
- const SmallVectorImpl<Instruction *> &Insts) const override {
- Value *Ptr;
- if (LoadInst *LI = dyn_cast<LoadInst>(I))
- Ptr = LI->getOperand(0);
- else
- Ptr = cast<StoreInst>(I)->getPointerOperand();
-
- // Only used to detect cycles, which will be rare and quickly found as
- // we're walking up a chain of defs rather than down through uses.
- SmallPtrSet<Value *, 4> Visited;
-
- do {
- if (Ptr == &AI)
- return true;
-
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(Ptr))
- Ptr = BCI->getOperand(0);
- else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
- Ptr = GEPI->getPointerOperand();
- else
- return false;
-
- } while (Visited.insert(Ptr).second);
-
- return false;
- }
-
- void updateDebugInfo(Instruction *Inst) const override {
- for (DbgDeclareInst *DDI : DDIs)
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
- ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
- else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
- ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
- for (DbgValueInst *DVI : DVIs) {
- Value *Arg = nullptr;
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- // If an argument is zero extended then use argument directly. The ZExt
- // may be zapped by an optimization pass in future.
- if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
- Arg = dyn_cast<Argument>(ZExt->getOperand(0));
- else if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
- Arg = dyn_cast<Argument>(SExt->getOperand(0));
- if (!Arg)
- Arg = SI->getValueOperand();
- } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- Arg = LI->getPointerOperand();
- } else {
- continue;
- }
- DIB.insertDbgValueIntrinsic(Arg, 0, DVI->getVariable(),
- DVI->getExpression(), DVI->getDebugLoc(),
- Inst);
- }
- }
-};
-} // end anon namespace
-
-namespace {
-/// \brief An optimization pass providing Scalar Replacement of Aggregates.
-///
-/// This pass takes allocations which can be completely analyzed (that is, they
-/// don't escape) and tries to turn them into scalar SSA values. There are
-/// a few steps to this process.
-///
-/// 1) It takes allocations of aggregates and analyzes the ways in which they
-/// are used to try to split them into smaller allocations, ideally of
-/// a single scalar data type. It will split up memcpy and memset accesses
-/// as necessary and try to isolate individual scalar accesses.
-/// 2) It will transform accesses into forms which are suitable for SSA value
-/// promotion. This can be replacing a memset with a scalar store of an
-/// integer value, or it can involve speculating operations on a PHI or
-/// select to be a PHI or select of the results.
-/// 3) Finally, this will try to detect a pattern of accesses which map cleanly
-/// onto insert and extract operations on a vector value, and convert them to
-/// this form. By doing so, it will enable promotion of vector aggregates to
-/// SSA vector values.
-class SROA : public FunctionPass {
- const bool RequiresDomTree;
-
- LLVMContext *C;
- DominatorTree *DT;
- AssumptionCache *AC;
-
- /// \brief Worklist of alloca instructions to simplify.
- ///
- /// Each alloca in the function is added to this. Each new alloca formed gets
- /// added to it as well to recursively simplify unless that alloca can be
- /// directly promoted. Finally, each time we rewrite a use of an alloca other
- /// the one being actively rewritten, we add it back onto the list if not
- /// already present to ensure it is re-visited.
- SetVector<AllocaInst *, SmallVector<AllocaInst *, 16>> Worklist;
-
- /// \brief A collection of instructions to delete.
- /// We try to batch deletions to simplify code and make things a bit more
- /// efficient.
- SetVector<Instruction *, SmallVector<Instruction *, 8>> DeadInsts;
-
- /// \brief Post-promotion worklist.
- ///
- /// Sometimes we discover an alloca which has a high probability of becoming
- /// viable for SROA after a round of promotion takes place. In those cases,
- /// the alloca is enqueued here for re-processing.
- ///
- /// Note that we have to be very careful to clear allocas out of this list in
- /// the event they are deleted.
- SetVector<AllocaInst *, SmallVector<AllocaInst *, 16>> PostPromotionWorklist;
-
- /// \brief A collection of alloca instructions we can directly promote.
- std::vector<AllocaInst *> PromotableAllocas;
-
- /// \brief A worklist of PHIs to speculate prior to promoting allocas.
- ///
- /// All of these PHIs have been checked for the safety of speculation and by
- /// being speculated will allow promoting allocas currently in the promotable
- /// queue.
- SetVector<PHINode *, SmallVector<PHINode *, 2>> SpeculatablePHIs;
-
- /// \brief A worklist of select instructions to speculate prior to promoting
- /// allocas.
- ///
- /// All of these select instructions have been checked for the safety of
- /// speculation and by being speculated will allow promoting allocas
- /// currently in the promotable queue.
- SetVector<SelectInst *, SmallVector<SelectInst *, 2>> SpeculatableSelects;
-
-public:
- SROA(bool RequiresDomTree = true)
- : FunctionPass(ID), RequiresDomTree(RequiresDomTree), C(nullptr),
- DT(nullptr) {
- initializeSROAPass(*PassRegistry::getPassRegistry());
- }
- bool runOnFunction(Function &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
-
- const char *getPassName() const override { return "SROA"; }
- static char ID;
-
-private:
- friend class PHIOrSelectSpeculator;
- friend class AllocaSliceRewriter;
-
- bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
- AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS,
- AllocaSlices::Partition &P);
- bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
- bool runOnAlloca(AllocaInst &AI);
- void clobberUse(Use &U);
- void deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
- bool promoteAllocas(Function &F);
-};
-}
-
-char SROA::ID = 0;
-
-FunctionPass *llvm::createSROAPass(bool RequiresDomTree) {
- return new SROA(RequiresDomTree);
-}
-
-INITIALIZE_PASS_BEGIN(SROA, "sroa", "Scalar Replacement Of Aggregates", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates", false,
- false)
-
/// Walk the range of a partitioning looking for a common type to cover this
/// sequence of slices.
static Type *findCommonType(AllocaSlices::const_iterator B,
@@ -1373,7 +1158,7 @@ static bool isSafePHIToSpeculate(PHINode &PN) {
// Ensure that there are no instructions between the PHI and the load that
// could store.
- for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI)
+ for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI)
if (BBI->mayWriteToMemory())
return false;
@@ -1934,10 +1719,10 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
/// \brief Test whether the given slice use can be promoted to a vector.
///
-/// This function is called to test each entry in a partioning which is slated
+/// This function is called to test each entry in a partition which is slated
/// for a single slice.
-static bool isVectorPromotionViableForSlice(AllocaSlices::Partition &P,
- const Slice &S, VectorType *Ty,
+static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
+ VectorType *Ty,
uint64_t ElementSize,
const DataLayout &DL) {
// First validate the slice offsets.
@@ -2012,8 +1797,7 @@ static bool isVectorPromotionViableForSlice(AllocaSlices::Partition &P,
/// SSA value. We only can ensure this for a limited set of operations, and we
/// don't want to do the rewrites unless we are confident that the result will
/// be promotable, so we have an early test here.
-static VectorType *isVectorPromotionViable(AllocaSlices::Partition &P,
- const DataLayout &DL) {
+static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
// Collect the candidate types for vector-based promotion. Also track whether
// we have different element types.
SmallVector<VectorType *, 4> CandidateTys;
@@ -2130,7 +1914,7 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
// We can't reasonably handle cases where the load or store extends past
- // the end of the aloca's type and into its padding.
+ // the end of the alloca's type and into its padding.
if (RelEnd > Size)
return false;
@@ -2199,7 +1983,7 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
/// This is a quick test to check whether we can rewrite the integer loads and
/// stores to a particular alloca into wider loads and stores and be able to
/// promote the resulting alloca.
-static bool isIntegerWideningViable(AllocaSlices::Partition &P, Type *AllocaTy,
+static bool isIntegerWideningViable(Partition &P, Type *AllocaTy,
const DataLayout &DL) {
uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy);
// Don't create integer types larger than the maximum bitwidth.
@@ -2368,14 +2152,14 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
return V;
}
-namespace {
/// \brief Visitor to rewrite instructions using p particular slice of an alloca
/// to use a new alloca.
///
/// Also implements the rewriting to vector-based accesses when the partition
/// passes the isVectorPromotionViable predicate. Most of the rewriting logic
/// lives here.
-class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
+class llvm::sroa::AllocaSliceRewriter
+ : public InstVisitor<AllocaSliceRewriter, bool> {
// Befriend the base class so it can delegate to private visit methods.
friend class llvm::InstVisitor<AllocaSliceRewriter, bool>;
typedef llvm::InstVisitor<AllocaSliceRewriter, bool> Base;
@@ -2583,9 +2367,19 @@ private:
V = convertValue(DL, IRB, V, IntTy);
assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
- if (Offset > 0 || NewEndOffset < NewAllocaEndOffset)
- V = extractInteger(DL, IRB, V, cast<IntegerType>(LI.getType()), Offset,
- "extract");
+ if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
+ IntegerType *ExtractTy = Type::getIntNTy(LI.getContext(), SliceSize * 8);
+ V = extractInteger(DL, IRB, V, ExtractTy, Offset, "extract");
+ }
+ // It is possible that the extracted type is not the load type. This
+ // happens if there is a load past the end of the alloca, and as
+ // a consequence the slice is narrower but still a candidate for integer
+ // lowering. To handle this case, we just zero extend the extracted
+ // integer.
+ assert(cast<IntegerType>(LI.getType())->getBitWidth() >= SliceSize * 8 &&
+ "Can only handle an extract for an overly wide load");
+ if (cast<IntegerType>(LI.getType())->getBitWidth() > SliceSize * 8)
+ V = IRB.CreateZExt(V, LI.getType());
return V;
}
@@ -2648,7 +2442,7 @@ private:
DL.getTypeStoreSizeInBits(LI.getType()) &&
"Non-byte-multiple bit width");
// Move the insertion point just past the load so that we can refer to it.
- IRB.SetInsertPoint(std::next(BasicBlock::iterator(&LI)));
+ IRB.SetInsertPoint(&*std::next(BasicBlock::iterator(&LI)));
// Create a placeholder value with the same type as LI to use as the
// basis for the new value. This allows us to replace the uses of LI with
// the computed value, and then replace the placeholder with LI, leaving
@@ -3126,7 +2920,7 @@ private:
// dominate the PHI.
IRBuilderTy PtrBuilder(IRB);
if (isa<PHINode>(OldPtr))
- PtrBuilder.SetInsertPoint(OldPtr->getParent()->getFirstInsertionPt());
+ PtrBuilder.SetInsertPoint(&*OldPtr->getParent()->getFirstInsertionPt());
else
PtrBuilder.SetInsertPoint(OldPtr);
PtrBuilder.SetCurrentDebugLocation(OldPtr->getDebugLoc());
@@ -3169,7 +2963,6 @@ private:
return true;
}
};
-}
namespace {
/// \brief Visitor to rewrite aggregate loads and stores as scalar.
@@ -3181,8 +2974,6 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
// Befriend the base class so it can delegate to private visit methods.
friend class llvm::InstVisitor<AggLoadStoreRewriter, bool>;
- const DataLayout &DL;
-
/// Queue of pointer uses to analyze and potentially rewrite.
SmallVector<Use *, 8> Queue;
@@ -3194,8 +2985,6 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
Use *U;
public:
- AggLoadStoreRewriter(const DataLayout &DL) : DL(DL) {}
-
/// Rewrite loads and stores through a pointer and all pointers derived from
/// it.
bool rewrite(Instruction &I) {
@@ -3711,7 +3500,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
return true;
}),
Stores.end());
- // Now we have to go *back* through all te stores, because a later store may
+ // Now we have to go *back* through all the stores, because a later store may
// have caused an earlier store's load to become unsplittable and if it is
// unsplittable for the later store, then we can't rely on it being split in
// the earlier store either.
@@ -3773,7 +3562,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
"Cannot represent alloca access size using 64-bit integers!");
Instruction *BasePtr = cast<Instruction>(LI->getPointerOperand());
- IRB.SetInsertPoint(BasicBlock::iterator(LI));
+ IRB.SetInsertPoint(LI);
DEBUG(dbgs() << " Splitting load: " << *LI << "\n");
@@ -3825,7 +3614,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
}
Value *StoreBasePtr = SI->getPointerOperand();
- IRB.SetInsertPoint(BasicBlock::iterator(SI));
+ IRB.SetInsertPoint(SI);
DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n");
@@ -3914,7 +3703,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
if (SplitLoads) {
PLoad = (*SplitLoads)[Idx];
} else {
- IRB.SetInsertPoint(BasicBlock::iterator(LI));
+ IRB.SetInsertPoint(LI);
PLoad = IRB.CreateAlignedLoad(
getAdjustedPtr(IRB, DL, LoadBasePtr,
APInt(DL.getPointerSizeInBits(), PartOffset),
@@ -3924,7 +3713,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
}
// And store this partition.
- IRB.SetInsertPoint(BasicBlock::iterator(SI));
+ IRB.SetInsertPoint(SI);
StoreInst *PStore = IRB.CreateAlignedStore(
PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
APInt(DL.getPointerSizeInBits(), PartOffset),
@@ -3972,7 +3761,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
// Mark the original store as dead now that we've split it up and kill its
// slice. Note that we leave the original load in place unless this store
- // was its ownly use. It may in turn be split up if it is an alloca load
+ // was its only use. It may in turn be split up if it is an alloca load
// for some other alloca, but it may be a normal load. This may introduce
// redundant loads, but where those can be merged the rest of the optimizer
// should handle the merging, and this uncovers SSA splits which is more
@@ -4024,7 +3813,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
/// at enabling promotion and if it was successful queues the alloca to be
/// promoted.
AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
- AllocaSlices::Partition &P) {
+ Partition &P) {
// Try to compute a friendly type for this partition of the alloca. This
// won't always succeed, in which case we fall back to a legal integer type
// or an i8 array of an appropriate size.
@@ -4230,12 +4019,11 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
std::max<unsigned>(NumPartitions, MaxPartitionsPerAlloca);
// Migrate debug information from the old alloca to the new alloca(s)
- // and the individial partitions.
+ // and the individual partitions.
if (DbgDeclareInst *DbgDecl = FindAllocaDbgDeclare(&AI)) {
auto *Var = DbgDecl->getVariable();
auto *Expr = DbgDecl->getExpression();
- DIBuilder DIB(*AI.getParent()->getParent()->getParent(),
- /*AllowUnresolved*/ false);
+ DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false);
bool IsSplit = Pieces.size() > 1;
for (auto Piece : Pieces) {
// Create a piece expression describing the new partition or reuse AI's
@@ -4308,7 +4096,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
// First, split any FCA loads and stores touching this alloca to promote
// better splitting and promotion opportunities.
- AggLoadStoreRewriter AggRewriter(DL);
+ AggLoadStoreRewriter AggRewriter;
Changed |= AggRewriter.rewrite(AI);
// Build the slices using a recursive instruction-visiting builder.
@@ -4388,107 +4176,29 @@ void SROA::deleteDeadInstructions(
}
}
-static void enqueueUsersInWorklist(Instruction &I,
- SmallVectorImpl<Instruction *> &Worklist,
- SmallPtrSetImpl<Instruction *> &Visited) {
- for (User *U : I.users())
- if (Visited.insert(cast<Instruction>(U)).second)
- Worklist.push_back(cast<Instruction>(U));
-}
-
/// \brief Promote the allocas, using the best available technique.
///
/// This attempts to promote whatever allocas have been identified as viable in
/// the PromotableAllocas list. If that list is empty, there is nothing to do.
-/// If there is a domtree available, we attempt to promote using the full power
-/// of mem2reg. Otherwise, we build and use the AllocaPromoter above which is
-/// based on the SSAUpdater utilities. This function returns whether any
-/// promotion occurred.
+/// This function returns whether any promotion occurred.
bool SROA::promoteAllocas(Function &F) {
if (PromotableAllocas.empty())
return false;
NumPromoted += PromotableAllocas.size();
- if (DT && !ForceSSAUpdater) {
- DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
- PromoteMemToReg(PromotableAllocas, *DT, nullptr, AC);
- PromotableAllocas.clear();
- return true;
- }
-
- DEBUG(dbgs() << "Promoting allocas with SSAUpdater...\n");
- SSAUpdater SSA;
- DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false);
- SmallVector<Instruction *, 64> Insts;
-
- // We need a worklist to walk the uses of each alloca.
- SmallVector<Instruction *, 8> Worklist;
- SmallPtrSet<Instruction *, 8> Visited;
- SmallVector<Instruction *, 32> DeadInsts;
-
- for (unsigned Idx = 0, Size = PromotableAllocas.size(); Idx != Size; ++Idx) {
- AllocaInst *AI = PromotableAllocas[Idx];
- Insts.clear();
- Worklist.clear();
- Visited.clear();
-
- enqueueUsersInWorklist(*AI, Worklist, Visited);
-
- while (!Worklist.empty()) {
- Instruction *I = Worklist.pop_back_val();
-
- // FIXME: Currently the SSAUpdater infrastructure doesn't reason about
- // lifetime intrinsics and so we strip them (and the bitcasts+GEPs
- // leading to them) here. Eventually it should use them to optimize the
- // scalar values produced.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
- assert(II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end);
- II->eraseFromParent();
- continue;
- }
-
- // Push the loads and stores we find onto the list. SROA will already
- // have validated that all loads and stores are viable candidates for
- // promotion.
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- assert(LI->getType() == AI->getAllocatedType());
- Insts.push_back(LI);
- continue;
- }
- if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- assert(SI->getValueOperand()->getType() == AI->getAllocatedType());
- Insts.push_back(SI);
- continue;
- }
-
- // For everything else, we know that only no-op bitcasts and GEPs will
- // make it this far, just recurse through them and recall them for later
- // removal.
- DeadInsts.push_back(I);
- enqueueUsersInWorklist(*I, Worklist, Visited);
- }
- AllocaPromoter(Insts, SSA, *AI, DIB).run(Insts);
- while (!DeadInsts.empty())
- DeadInsts.pop_back_val()->eraseFromParent();
- AI->eraseFromParent();
- }
-
+ DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
+ PromoteMemToReg(PromotableAllocas, *DT, nullptr, AC);
PromotableAllocas.clear();
return true;
}
-bool SROA::runOnFunction(Function &F) {
- if (skipOptnoneFunction(F))
- return false;
-
+PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT,
+ AssumptionCache &RunAC) {
DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
C = &F.getContext();
- DominatorTreeWrapperPass *DTWP =
- getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DT = DTWP ? &DTWP->getDomTree() : nullptr;
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ DT = &RunDT;
+ AC = &RunAC;
BasicBlock &EntryBB = F.getEntryBlock();
for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
@@ -4527,12 +4237,55 @@ bool SROA::runOnFunction(Function &F) {
PostPromotionWorklist.clear();
} while (!Worklist.empty());
- return Changed;
+ // FIXME: Even when promoting allocas we should preserve some abstract set of
+ // CFG-specific analyses.
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
-void SROA::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<AssumptionCacheTracker>();
- if (RequiresDomTree)
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.setPreservesCFG();
+PreservedAnalyses SROA::run(Function &F, AnalysisManager<Function> *AM) {
+ return runImpl(F, AM->getResult<DominatorTreeAnalysis>(F),
+ AM->getResult<AssumptionAnalysis>(F));
}
+
+/// A legacy pass for the legacy pass manager that wraps the \c SROA pass.
+///
+/// This is in the llvm namespace purely to allow it to be a friend of the \c
+/// SROA pass.
+class llvm::sroa::SROALegacyPass : public FunctionPass {
+ /// The SROA implementation.
+ SROA Impl;
+
+public:
+ SROALegacyPass() : FunctionPass(ID) {
+ initializeSROALegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F) override {
+ if (skipOptnoneFunction(F))
+ return false;
+
+ auto PA = Impl.runImpl(
+ F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+ getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F));
+ return !PA.areAllPreserved();
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.setPreservesCFG();
+ }
+
+ const char *getPassName() const override { return "SROA"; }
+ static char ID;
+};
+
+char SROALegacyPass::ID = 0;
+
+FunctionPass *llvm::createSROAPass() { return new SROALegacyPass(); }
+
+INITIALIZE_PASS_BEGIN(SROALegacyPass, "sroa",
+ "Scalar Replacement Of Aggregates", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates",
+ false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
index d5d360571f88..52d477cc9573 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -16,7 +16,10 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm-c/Initialization.h"
#include "llvm-c/Transforms/Scalar.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
@@ -27,10 +30,9 @@ using namespace llvm;
/// initializeScalarOptsPasses - Initialize all passes linked into the
/// ScalarOpts library.
void llvm::initializeScalarOpts(PassRegistry &Registry) {
- initializeADCEPass(Registry);
+ initializeADCELegacyPassPass(Registry);
initializeBDCEPass(Registry);
initializeAlignmentFromAssumptionsPass(Registry);
- initializeSampleProfileLoaderPass(Registry);
initializeConstantHoistingPass(Registry);
initializeConstantPropagationPass(Registry);
initializeCorrelatedValuePropagationPass(Registry);
@@ -66,7 +68,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeRewriteStatepointsForGCPass(Registry);
initializeSCCPPass(Registry);
initializeIPSCCPPass(Registry);
- initializeSROAPass(Registry);
+ initializeSROALegacyPassPass(Registry);
initializeSROA_DTPass(Registry);
initializeSROA_SSAUpPass(Registry);
initializeCFGSimplifyPassPass(Registry);
@@ -81,6 +83,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializePlaceSafepointsPass(Registry);
initializeFloat2IntPass(Registry);
initializeLoopDistributePass(Registry);
+ initializeLoopLoadEliminationPass(Registry);
}
void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
@@ -225,15 +228,15 @@ void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) {
}
void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createTypeBasedAliasAnalysisPass());
+ unwrap(PM)->add(createTypeBasedAAWrapperPass());
}
void LLVMAddScopedNoAliasAAPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createScopedNoAliasAAPass());
+ unwrap(PM)->add(createScopedNoAliasAAWrapperPass());
}
void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createBasicAliasAnalysisPass());
+ unwrap(PM)->add(createBasicAAWrapperPass());
}
void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index d955da7ce75d..114d22ddf2e4 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -60,6 +60,7 @@ STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion");
STATISTIC(NumConverted, "Number of aggregates converted to scalar");
namespace {
+#define SROA SROA_
struct SROA : public FunctionPass {
SROA(int T, bool hasDT, char &ID, int ST, int AT, int SLT)
: FunctionPass(ID), HasDomTree(hasDT) {
@@ -382,8 +383,8 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// Create and insert the integer alloca.
NewTy = IntegerType::get(AI->getContext(), BitWidth);
}
- AllocaInst *NewAI = new AllocaInst(NewTy, nullptr, "",
- AI->getParent()->begin());
+ AllocaInst *NewAI =
+ new AllocaInst(NewTy, nullptr, "", &AI->getParent()->front());
ConvertUsesToScalar(AI, NewAI, 0, nullptr);
return NewAI;
}
@@ -1195,7 +1196,7 @@ static bool isSafePHIToSpeculate(PHINode *PN) {
// Ensure that there are no instructions between the PHI and the load that
// could store.
- for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI)
+ for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI)
if (BBI->mayWriteToMemory())
return false;
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 049300350857..054bacdc706b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -253,10 +253,10 @@ bool Scalarizer::doInitialization(Module &M) {
}
bool Scalarizer::runOnFunction(Function &F) {
- for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
- BasicBlock *BB = BBI;
- for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
- Instruction *I = II;
+ assert(Gathered.empty() && Scattered.empty());
+ for (BasicBlock &BB : F) {
+ for (BasicBlock::iterator II = BB.begin(), IE = BB.end(); II != IE;) {
+ Instruction *I = &*II;
bool Done = visit(I);
++II;
if (Done && I->getType()->isVoidTy())
@@ -285,7 +285,7 @@ Scatterer Scalarizer::scatter(Instruction *Point, Value *V) {
}
// In the fallback case, just put the scattered before Point and
// keep the result local to Point.
- return Scatterer(Point->getParent(), Point, V);
+ return Scatterer(Point->getParent(), Point->getIterator(), V);
}
// Replace Op with the gathered form of the components in CV. Defer the
@@ -377,7 +377,7 @@ bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
return false;
unsigned NumElems = VT->getNumElements();
- IRBuilder<> Builder(I.getParent(), &I);
+ IRBuilder<> Builder(&I);
Scatterer Op0 = scatter(&I, I.getOperand(0));
Scatterer Op1 = scatter(&I, I.getOperand(1));
assert(Op0.size() == NumElems && "Mismatched binary operation");
@@ -397,7 +397,7 @@ bool Scalarizer::visitSelectInst(SelectInst &SI) {
return false;
unsigned NumElems = VT->getNumElements();
- IRBuilder<> Builder(SI.getParent(), &SI);
+ IRBuilder<> Builder(&SI);
Scatterer Op1 = scatter(&SI, SI.getOperand(1));
Scatterer Op2 = scatter(&SI, SI.getOperand(2));
assert(Op1.size() == NumElems && "Mismatched select");
@@ -438,7 +438,7 @@ bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
if (!VT)
return false;
- IRBuilder<> Builder(GEPI.getParent(), &GEPI);
+ IRBuilder<> Builder(&GEPI);
unsigned NumElems = VT->getNumElements();
unsigned NumIndices = GEPI.getNumIndices();
@@ -472,7 +472,7 @@ bool Scalarizer::visitCastInst(CastInst &CI) {
return false;
unsigned NumElems = VT->getNumElements();
- IRBuilder<> Builder(CI.getParent(), &CI);
+ IRBuilder<> Builder(&CI);
Scatterer Op0 = scatter(&CI, CI.getOperand(0));
assert(Op0.size() == NumElems && "Mismatched cast");
ValueVector Res;
@@ -492,7 +492,7 @@ bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
unsigned DstNumElems = DstVT->getNumElements();
unsigned SrcNumElems = SrcVT->getNumElements();
- IRBuilder<> Builder(BCI.getParent(), &BCI);
+ IRBuilder<> Builder(&BCI);
Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
ValueVector Res;
Res.resize(DstNumElems);
@@ -569,7 +569,7 @@ bool Scalarizer::visitPHINode(PHINode &PHI) {
return false;
unsigned NumElems = VT->getNumElements();
- IRBuilder<> Builder(PHI.getParent(), &PHI);
+ IRBuilder<> Builder(&PHI);
ValueVector Res;
Res.resize(NumElems);
@@ -600,7 +600,7 @@ bool Scalarizer::visitLoadInst(LoadInst &LI) {
return false;
unsigned NumElems = Layout.VecTy->getNumElements();
- IRBuilder<> Builder(LI.getParent(), &LI);
+ IRBuilder<> Builder(&LI);
Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
ValueVector Res;
Res.resize(NumElems);
@@ -625,7 +625,7 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) {
return false;
unsigned NumElems = Layout.VecTy->getNumElements();
- IRBuilder<> Builder(SI.getParent(), &SI);
+ IRBuilder<> Builder(&SI);
Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
Scatterer Val = scatter(&SI, FullValue);
@@ -642,7 +642,9 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) {
// Delete the instructions that we scalarized. If a full vector result
// is still needed, recreate it using InsertElements.
bool Scalarizer::finish() {
- if (Gathered.empty())
+ // The presence of data in Gathered or Scattered indicates changes
+ // made to the Function.
+ if (Gathered.empty() && Scattered.empty())
return false;
for (GatherList::iterator GMI = Gathered.begin(), GME = Gathered.end();
GMI != GME; ++GMI) {
@@ -655,7 +657,7 @@ bool Scalarizer::finish() {
Value *Res = UndefValue::get(Ty);
BasicBlock *BB = Op->getParent();
unsigned Count = Ty->getVectorNumElements();
- IRBuilder<> Builder(BB, Op);
+ IRBuilder<> Builder(Op);
if (isa<PHINode>(Op))
Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
for (unsigned I = 0; I < Count; ++I)
diff --git a/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 4a875311881a..86a10d2a1612 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -156,6 +156,10 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
@@ -164,6 +168,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
@@ -174,6 +179,7 @@
#include "llvm/IR/IRBuilder.h"
using namespace llvm;
+using namespace llvm::PatternMatch;
static cl::opt<bool> DisableSeparateConstOffsetFromGEP(
"disable-separate-const-offset-from-gep", cl::init(false),
@@ -319,8 +325,11 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
bool doInitialization(Module &M) override {
@@ -373,15 +382,42 @@ private:
///
/// Verified in @i32_add in split-gep.ll
bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP);
+ /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow.
+ /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting
+ /// the constant offset. After extraction, it becomes desirable to reunion the
+ /// distributed sexts. For example,
+ ///
+ /// &a[sext(i +nsw (j +nsw 5)]
+ /// => distribute &a[sext(i) +nsw (sext(j) +nsw 5)]
+ /// => constant extraction &a[sext(i) + sext(j)] + 5
+ /// => reunion &a[sext(i +nsw j)] + 5
+ bool reuniteExts(Function &F);
+ /// A helper that reunites sexts in an instruction.
+ bool reuniteExts(Instruction *I);
+ /// Find the closest dominator of <Dominatee> that is equivalent to <Key>.
+ Instruction *findClosestMatchingDominator(const SCEV *Key,
+ Instruction *Dominatee);
/// Verify F is free of dead code.
void verifyNoDeadCode(Function &F);
+ bool hasMoreThanOneUseInLoop(Value *v, Loop *L);
+ // Swap the index operand of two GEP.
+ void swapGEPOperand(GetElementPtrInst *First, GetElementPtrInst *Second);
+ // Check if it is safe to swap operand of two GEP.
+ bool isLegalToSwapOperand(GetElementPtrInst *First, GetElementPtrInst *Second,
+ Loop *CurLoop);
+
const DataLayout *DL;
- const DominatorTree *DT;
+ DominatorTree *DT;
+ ScalarEvolution *SE;
const TargetMachine *TM;
+
+ LoopInfo *LI;
+ TargetLibraryInfo *TLI;
/// Whether to lower a GEP with multiple indices into arithmetic operations or
/// multiple GEPs with a single index.
bool LowerGEP;
+ DenseMap<const SCEV *, SmallVector<Instruction *, 2>> DominatingExprs;
};
} // anonymous namespace
@@ -391,7 +427,10 @@ INITIALIZE_PASS_BEGIN(
"Split GEPs to a variadic base and a constant offset for better CSE", false,
false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(
SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
"Split GEPs to a variadic base and a constant offset for better CSE", false,
@@ -734,6 +773,13 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
Type *I8PtrTy =
Builder.getInt8PtrTy(Variadic->getType()->getPointerAddressSpace());
Value *ResultPtr = Variadic->getOperand(0);
+ Loop *L = LI->getLoopFor(Variadic->getParent());
+ // Check if the base is not loop invariant or used more than once.
+ bool isSwapCandidate =
+ L && L->isLoopInvariant(ResultPtr) &&
+ !hasMoreThanOneUseInLoop(ResultPtr, L);
+ Value *FirstResult = nullptr;
+
if (ResultPtr->getType() != I8PtrTy)
ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
@@ -762,6 +808,8 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
// Create an ugly GEP with a single index for each index.
ResultPtr =
Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep");
+ if (FirstResult == nullptr)
+ FirstResult = ResultPtr;
}
}
@@ -770,7 +818,17 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset);
ResultPtr =
Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep");
- }
+ } else
+ isSwapCandidate = false;
+
+ // If we created a GEP with constant index, and the base is loop invariant,
+ // then we swap the first one with it, so LICM can move constant GEP out
+ // later.
+ GetElementPtrInst *FirstGEP = dyn_cast<GetElementPtrInst>(FirstResult);
+ GetElementPtrInst *SecondGEP = dyn_cast<GetElementPtrInst>(ResultPtr);
+ if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L))
+ swapGEPOperand(FirstGEP, SecondGEP);
+
if (ResultPtr->getType() != Variadic->getType())
ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType());
@@ -891,13 +949,13 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
// Clear the inbounds attribute because the new index may be off-bound.
// e.g.,
//
- // b = add i64 a, 5
- // addr = gep inbounds float* p, i64 b
+ // b = add i64 a, 5
+ // addr = gep inbounds float, float* p, i64 b
//
// is transformed to:
//
- // addr2 = gep float* p, i64 a
- // addr = gep float* addr2, i64 5
+ // addr2 = gep float, float* p, i64 a ; inbounds removed
+ // addr = gep inbounds float, float* addr2, i64 5
//
// If a is -4, although the old index b is in bounds, the new index a is
// off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
@@ -907,6 +965,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
//
// TODO(jingyue): do some range analysis to keep as many inbounds as
// possible. GEPs with inbounds are more friendly to alias analysis.
+ bool GEPWasInBounds = GEP->isInBounds();
GEP->setIsInBounds(false);
// Lowers a GEP to either GEPs with a single index or arithmetic operations.
@@ -968,6 +1027,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
ConstantInt::get(IntPtrTy, Index, true),
GEP->getName(), GEP);
+ // Inherit the inbounds attribute of the original GEP.
+ cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
} else {
// Unlikely but possible. For example,
// #pragma pack(1)
@@ -990,6 +1051,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
Type::getInt8Ty(GEP->getContext()), NewGEP,
ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep",
GEP);
+ // Inherit the inbounds attribute of the original GEP.
+ cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
if (GEP->getType() != I8PtrTy)
NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP);
}
@@ -1008,24 +1071,96 @@ bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) {
return false;
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
bool Changed = false;
for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) {
- for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ) {
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++)) {
+ for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE;)
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++))
Changed |= splitGEP(GEP);
- }
- // No need to split GEP ConstantExprs because all its indices are constant
- // already.
- }
+ // No need to split GEP ConstantExprs because all its indices are constant
+ // already.
}
+ Changed |= reuniteExts(F);
+
if (VerifyNoDeadCode)
verifyNoDeadCode(F);
return Changed;
}
+Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
+ const SCEV *Key, Instruction *Dominatee) {
+ auto Pos = DominatingExprs.find(Key);
+ if (Pos == DominatingExprs.end())
+ return nullptr;
+
+ auto &Candidates = Pos->second;
+ // Because we process the basic blocks in pre-order of the dominator tree, a
+ // candidate that doesn't dominate the current instruction won't dominate any
+ // future instruction either. Therefore, we pop it out of the stack. This
+ // optimization makes the algorithm O(n).
+ while (!Candidates.empty()) {
+ Instruction *Candidate = Candidates.back();
+ if (DT->dominates(Candidate, Dominatee))
+ return Candidate;
+ Candidates.pop_back();
+ }
+ return nullptr;
+}
+
+bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
+ if (!SE->isSCEVable(I->getType()))
+ return false;
+
+ // Dom: LHS+RHS
+ // I: sext(LHS)+sext(RHS)
+ // If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).
+ // TODO: handle zext
+ Value *LHS = nullptr, *RHS = nullptr;
+ if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS)))) ||
+ match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
+ if (LHS->getType() == RHS->getType()) {
+ const SCEV *Key =
+ SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
+ if (auto *Dom = findClosestMatchingDominator(Key, I)) {
+ Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
+ NewSExt->takeName(I);
+ I->replaceAllUsesWith(NewSExt);
+ RecursivelyDeleteTriviallyDeadInstructions(I);
+ return true;
+ }
+ }
+ }
+
+ // Add I to DominatingExprs if it's an add/sub that can't sign overflow.
+ if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS))) ||
+ match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
+ if (isKnownNotFullPoison(I)) {
+ const SCEV *Key =
+ SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
+ DominatingExprs[Key].push_back(I);
+ }
+ }
+ return false;
+}
+
+bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
+ bool Changed = false;
+ DominatingExprs.clear();
+ for (auto Node = GraphTraits<DominatorTree *>::nodes_begin(DT);
+ Node != GraphTraits<DominatorTree *>::nodes_end(DT); ++Node) {
+ BasicBlock *BB = Node->getBlock();
+ for (auto I = BB->begin(); I != BB->end(); ) {
+ Instruction *Cur = &*I++;
+ Changed |= reuniteExts(Cur);
+ }
+ }
+ return Changed;
+}
+
void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
for (auto &B : F) {
for (auto &I : B) {
@@ -1038,3 +1173,93 @@ void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
}
}
}
+
+bool SeparateConstOffsetFromGEP::isLegalToSwapOperand(
+ GetElementPtrInst *FirstGEP, GetElementPtrInst *SecondGEP, Loop *CurLoop) {
+ if (!FirstGEP || !FirstGEP->hasOneUse())
+ return false;
+
+ if (!SecondGEP || FirstGEP->getParent() != SecondGEP->getParent())
+ return false;
+
+ if (FirstGEP == SecondGEP)
+ return false;
+
+ unsigned FirstNum = FirstGEP->getNumOperands();
+ unsigned SecondNum = SecondGEP->getNumOperands();
+ // Give up if the number of operands are not 2.
+ if (FirstNum != SecondNum || FirstNum != 2)
+ return false;
+
+ Value *FirstBase = FirstGEP->getOperand(0);
+ Value *SecondBase = SecondGEP->getOperand(0);
+ Value *FirstOffset = FirstGEP->getOperand(1);
+ // Give up if the index of the first GEP is loop invariant.
+ if (CurLoop->isLoopInvariant(FirstOffset))
+ return false;
+
+ // Give up if base doesn't have same type.
+ if (FirstBase->getType() != SecondBase->getType())
+ return false;
+
+ Instruction *FirstOffsetDef = dyn_cast<Instruction>(FirstOffset);
+
+ // Check if the second operand of first GEP has constant coefficient.
+ // For an example, for the following code, we won't gain anything by
+ // hoisting the second GEP out because the second GEP can be folded away.
+ // %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256
+ // %67 = shl i64 %scevgep.sum.ur159, 2
+ // %uglygep160 = getelementptr i8* %65, i64 %67
+ // %uglygep161 = getelementptr i8* %uglygep160, i64 -1024
+
+ // Skip constant shift instruction which may be generated by Splitting GEPs.
+ if (FirstOffsetDef && FirstOffsetDef->isShift() &&
+ isa<ConstantInt>(FirstOffsetDef->getOperand(1)))
+ FirstOffsetDef = dyn_cast<Instruction>(FirstOffsetDef->getOperand(0));
+
+ // Give up if FirstOffsetDef is an Add or Sub with constant.
+ // Because it may not profitable at all due to constant folding.
+ if (FirstOffsetDef)
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FirstOffsetDef)) {
+ unsigned opc = BO->getOpcode();
+ if ((opc == Instruction::Add || opc == Instruction::Sub) &&
+ (isa<ConstantInt>(BO->getOperand(0)) ||
+ isa<ConstantInt>(BO->getOperand(1))))
+ return false;
+ }
+ return true;
+}
+
+bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value *V, Loop *L) {
+ int UsesInLoop = 0;
+ for (User *U : V->users()) {
+ if (Instruction *User = dyn_cast<Instruction>(U))
+ if (L->contains(User))
+ if (++UsesInLoop > 1)
+ return true;
+ }
+ return false;
+}
+
+void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
+ GetElementPtrInst *Second) {
+ Value *Offset1 = First->getOperand(1);
+ Value *Offset2 = Second->getOperand(1);
+ First->setOperand(1, Offset2);
+ Second->setOperand(1, Offset1);
+
+ // We changed p+o+c to p+c+o, p+c may not be inbound anymore.
+ const DataLayout &DAL = First->getModule()->getDataLayout();
+ APInt Offset(DAL.getPointerSizeInBits(
+ cast<PointerType>(First->getType())->getAddressSpace()),
+ 0);
+ Value *NewBase =
+ First->stripAndAccumulateInBoundsConstantOffsets(DAL, Offset);
+ uint64_t ObjectSize;
+ if (!getObjectSize(NewBase, ObjectSize, DAL, TLI) ||
+ Offset.ugt(ObjectSize)) {
+ First->setIsInBounds(false);
+ Second->setIsInBounds(false);
+ } else
+ First->setIsInBounds(true);
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 231411a16c05..63c8836bf381 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -25,6 +25,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Attributes.h"
@@ -67,15 +68,14 @@ static bool mergeEmptyReturnBlocks(Function &F) {
// single PHI node that is the operand to the return.
if (Ret != &BB.front()) {
// Check for something else in the block.
- BasicBlock::iterator I = Ret;
+ BasicBlock::iterator I(Ret);
--I;
// Skip over debug info.
while (isa<DbgInfoIntrinsic>(I) && I != BB.begin())
--I;
if (!isa<DbgInfoIntrinsic>(I) &&
- (!isa<PHINode>(I) || I != BB.begin() ||
- Ret->getNumOperands() == 0 ||
- Ret->getOperand(0) != I))
+ (!isa<PHINode>(I) || I != BB.begin() || Ret->getNumOperands() == 0 ||
+ Ret->getOperand(0) != &*I))
continue;
}
@@ -136,7 +136,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
// Loop over all of the basic blocks and remove them if they are unneeded.
for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
- if (SimplifyCFG(BBIt++, TTI, BonusInstThreshold, AC)) {
+ if (SimplifyCFG(&*BBIt++, TTI, BonusInstThreshold, AC)) {
LocalChange = true;
++NumSimpl;
}
@@ -217,6 +217,7 @@ struct CFGSimplifyPass : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
};
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
index f49f4eaaedcb..64109b2df117 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -48,7 +48,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
FunctionPass::getAnalysisUsage(AU);
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
@@ -66,7 +66,7 @@ char Sinking::ID = 0;
INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(Sinking, "sink", "Code sinking", false, false)
FunctionPass *llvm::createSinkingPass() { return new Sinking(); }
@@ -99,7 +99,7 @@ bool Sinking::AllUsesDominatedByBlock(Instruction *Inst,
bool Sinking::runOnFunction(Function &F) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
bool MadeChange, EverMadeChange = false;
@@ -119,7 +119,7 @@ bool Sinking::runOnFunction(Function &F) {
bool Sinking::ProcessBlock(BasicBlock &BB) {
// Can't sink anything out of a block that has less than two successors.
- if (BB.getTerminator()->getNumSuccessors() <= 1 || BB.empty()) return false;
+ if (BB.getTerminator()->getNumSuccessors() <= 1) return false;
// Don't bother sinking code out of unreachable blocks. In addition to being
// unprofitable, it can also lead to infinite looping, because in an
@@ -134,7 +134,7 @@ bool Sinking::ProcessBlock(BasicBlock &BB) {
bool ProcessedBegin = false;
SmallPtrSet<Instruction *, 8> Stores;
do {
- Instruction *Inst = I; // The instruction to sink.
+ Instruction *Inst = &*I; // The instruction to sink.
// Predecrement I (if it's not begin) so that it isn't invalidated by
// sinking.
@@ -165,14 +165,16 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {
MemoryLocation Loc = MemoryLocation::get(L);
for (Instruction *S : Stores)
- if (AA->getModRefInfo(S, Loc) & AliasAnalysis::Mod)
+ if (AA->getModRefInfo(S, Loc) & MRI_Mod)
return false;
}
- if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst))
+ if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst) || Inst->isEHPad() ||
+ Inst->mayThrow())
return false;
- // Convergent operations can only be moved to control equivalent blocks.
+ // Convergent operations cannot be made control-dependent on additional
+ // values.
if (auto CS = CallSite(Inst)) {
if (CS.hasFnAttr(Attribute::Convergent))
return false;
@@ -193,6 +195,11 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst,
if (Inst->getParent() == SuccToSinkTo)
return false;
+ // It's never legal to sink an instruction into a block which terminates in an
+ // EH-pad.
+ if (SuccToSinkTo->getTerminator()->isExceptional())
+ return false;
+
// If the block has multiple predecessors, this would introduce computation
// on different code paths. We could split the critical edge, but for now we
// just punt.
@@ -278,6 +285,6 @@ bool Sinking::SinkInstruction(Instruction *Inst,
dbgs() << ")\n");
// Move the instruction.
- Inst->moveBefore(SuccToSinkTo->getFirstInsertionPt());
+ Inst->moveBefore(&*SuccToSinkTo->getFirstInsertionPt());
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index ff3f00a2e2f8..147d615488ff 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -227,7 +227,7 @@ bool SpeculativeExecution::considerHoistingFromTo(BasicBlock &FromBlock,
// changes the list that I is iterating through.
auto Current = I;
++I;
- if (!NotHoisted.count(Current)) {
+ if (!NotHoisted.count(&*Current)) {
Current->moveBefore(ToBlock.getTerminator());
}
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 6d9d417ef943..1faa65eb3417 100644
--- a/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -131,7 +131,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
// We do not modify the shape of the CFG.
AU.setPreservesCFG();
@@ -212,7 +212,7 @@ char StraightLineStrengthReduce::ID = 0;
INITIALIZE_PASS_BEGIN(StraightLineStrengthReduce, "slsr",
"Straight line strength reduction", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(StraightLineStrengthReduce, "slsr",
"Straight line strength reduction", false, false)
@@ -234,6 +234,7 @@ bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,
Basis.CandidateKind == C.CandidateKind);
}
+// TODO: use TTI->getGEPCost.
static bool isGEPFoldable(GetElementPtrInst *GEP,
const TargetTransformInfo *TTI,
const DataLayout *DL) {
@@ -523,7 +524,7 @@ void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
continue;
const SCEV *OrigIndexExpr = IndexExprs[I - 1];
- IndexExprs[I - 1] = SE->getConstant(OrigIndexExpr->getType(), 0);
+ IndexExprs[I - 1] = SE->getZero(OrigIndexExpr->getType());
// The base of this candidate is GEP's base plus the offsets of all
// indices except this current one.
@@ -689,7 +690,7 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) {
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
// Traverse the dominator tree in the depth-first order. This order makes sure
// all bases of a candidate are in Candidates when we process it.
for (auto node = GraphTraits<DominatorTree *>::nodes_begin(DT);
diff --git a/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 4f23e20d251d..662513c7d8ae 100644
--- a/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -358,13 +358,9 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
BasicBlock *BB = N->getNodeAs<BasicBlock>();
BranchInst *Term = cast<BranchInst>(BB->getTerminator());
- for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
- BasicBlock *Succ = Term->getSuccessor(i);
-
- if (Visited.count(Succ)) {
+ for (BasicBlock *Succ : Term->successors())
+ if (Visited.count(Succ))
Loops[Succ] = BB;
- }
- }
}
}
@@ -903,14 +899,14 @@ void StructurizeCFG::rebuildSSA() {
continue;
}
- if (DT->dominates(II, User))
+ if (DT->dominates(&*II, User))
continue;
if (!Initialized) {
Value *Undef = UndefValue::get(II->getType());
Updater.Initialize(II->getType(), "");
Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
- Updater.AddAvailableValue(BB, II);
+ Updater.AddAvailableValue(BB, &*II);
Initialized = true;
}
Updater.RewriteUseAfterInsertions(U);
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index c7de2e2965c7..0e0b00df85bb 100644
--- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -54,6 +54,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/InlineCost.h"
@@ -136,6 +137,7 @@ FunctionPass *llvm::createTailCallEliminationPass() {
void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
/// \brief Scan the specified function for alloca instructions.
@@ -195,8 +197,8 @@ struct AllocaDerivedValueTracker {
case Instruction::Call:
case Instruction::Invoke: {
CallSite CS(I);
- bool IsNocapture = !CS.isCallee(U) &&
- CS.doesNotCapture(CS.getArgumentNo(U));
+ bool IsNocapture =
+ CS.isDataOperand(U) && CS.doesNotCapture(CS.getDataOperandNo(U));
callUsesLocalStack(CS, IsNocapture);
if (IsNocapture) {
// If the alloca-derived argument is passed in as nocapture, then it
@@ -302,7 +304,9 @@ bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) {
if (!CI || CI->isTailCall())
continue;
- if (CI->doesNotAccessMemory()) {
+ bool IsNoTail = CI->isNoTailCall();
+
+ if (!IsNoTail && CI->doesNotAccessMemory()) {
// A call to a readnone function whose arguments are all things computed
// outside this function can be marked tail. Even if you stored the
// alloca address into a global, a readnone function can't load the
@@ -330,7 +334,7 @@ bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) {
}
}
- if (Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) {
+ if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) {
DeferredTails.push_back(CI);
} else {
AllCallsAreTailCalls = false;
@@ -404,7 +408,7 @@ bool TailCallElim::runTRE(Function &F) {
// Until this is resolved, disable this transformation if that would ever
// happen. This bug is PR962.
for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; /*in loop*/) {
- BasicBlock *BB = BBI++; // FoldReturnAndProcessPred may delete BB.
+ BasicBlock *BB = &*BBI++; // FoldReturnAndProcessPred may delete BB.
if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
ArgumentPHIs, !CanTRETailMarkedCall);
@@ -574,7 +578,7 @@ TailCallElim::FindTRECandidate(Instruction *TI,
// Scan backwards from the return, checking to see if there is a tail call in
// this block. If so, set CI to it.
CallInst *CI = nullptr;
- BasicBlock::iterator BBI = TI;
+ BasicBlock::iterator BBI(TI);
while (true) {
CI = dyn_cast<CallInst>(BBI);
if (CI && CI->getCalledFunction() == F)
@@ -595,9 +599,8 @@ TailCallElim::FindTRECandidate(Instruction *TI,
// and disable this xform in this case, because the code generator will
// lower the call to fabs into inline code.
if (BB == &F->getEntryBlock() &&
- FirstNonDbg(BB->front()) == CI &&
- FirstNonDbg(std::next(BB->begin())) == TI &&
- CI->getCalledFunction() &&
+ FirstNonDbg(BB->front().getIterator()) == CI &&
+ FirstNonDbg(std::next(BB->begin())) == TI && CI->getCalledFunction() &&
!TTI->isLoweredToCall(CI->getCalledFunction())) {
// A single-block function with just a call and a return. Check that
// the arguments match.
@@ -636,19 +639,19 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
// tail call if all of the instructions between the call and the return are
// movable to above the call itself, leaving the call next to the return.
// Check that this is the case now.
- BasicBlock::iterator BBI = CI;
+ BasicBlock::iterator BBI(CI);
for (++BBI; &*BBI != Ret; ++BBI) {
- if (CanMoveAboveCall(BBI, CI)) continue;
+ if (CanMoveAboveCall(&*BBI, CI)) continue;
// If we can't move the instruction above the call, it might be because it
// is an associative and commutative operation that could be transformed
// using accumulator recursion elimination. Check to see if this is the
// case, and if so, remember the initial accumulator value for later.
if ((AccumulatorRecursionEliminationInitVal =
- CanTransformAccumulatorRecursion(BBI, CI))) {
+ CanTransformAccumulatorRecursion(&*BBI, CI))) {
// Yes, this is accumulator recursion. Remember which instruction
// accumulates.
- AccumulatorRecursionInstr = BBI;
+ AccumulatorRecursionInstr = &*BBI;
} else {
return false; // Otherwise, we cannot eliminate the tail recursion!
}
@@ -698,19 +701,19 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
NEBI = NewEntry->begin(); OEBI != E; )
if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
if (isa<ConstantInt>(AI->getArraySize()))
- AI->moveBefore(NEBI);
+ AI->moveBefore(&*NEBI);
// Now that we have created a new block, which jumps to the entry
// block, insert a PHI node for each argument of the function.
// For now, we initialize each PHI to only have the real arguments
// which are passed in.
- Instruction *InsertPos = OldEntry->begin();
+ Instruction *InsertPos = &OldEntry->front();
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
I != E; ++I) {
PHINode *PN = PHINode::Create(I->getType(), 2,
I->getName() + ".tr", InsertPos);
I->replaceAllUsesWith(PN); // Everyone use the PHI node now!
- PN->addIncoming(I, NewEntry);
+ PN->addIncoming(&*I, NewEntry);
ArgumentPHIs.push_back(PN);
}
}
@@ -739,10 +742,9 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
Instruction *AccRecInstr = AccumulatorRecursionInstr;
// Start by inserting a new PHI node for the accumulator.
pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry);
- PHINode *AccPN =
- PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(),
- std::distance(PB, PE) + 1,
- "accumulator.tr", OldEntry->begin());
+ PHINode *AccPN = PHINode::Create(
+ AccumulatorRecursionEliminationInitVal->getType(),
+ std::distance(PB, PE) + 1, "accumulator.tr", &OldEntry->front());
// Loop over all of the predecessors of the tail recursion block. For the
// real entry into the function we seed the PHI with the initial value,
diff --git a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
index 03c3a80170a3..409326eba401 100644
--- a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
namespace llvm {
diff --git a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
index e9f62391a44f..0262358fa3d5 100644
--- a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -52,32 +52,34 @@
// http://wiki.dwarfstd.org/index.php?title=Path_Discriminators
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
using namespace llvm;
#define DEBUG_TYPE "add-discriminators"
namespace {
- struct AddDiscriminators : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- AddDiscriminators() : FunctionPass(ID) {
- initializeAddDiscriminatorsPass(*PassRegistry::getPassRegistry());
- }
+struct AddDiscriminators : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ AddDiscriminators() : FunctionPass(ID) {
+ initializeAddDiscriminatorsPass(*PassRegistry::getPassRegistry());
+ }
- bool runOnFunction(Function &F) override;
- };
+ bool runOnFunction(Function &F) override;
+};
}
char AddDiscriminators::ID = 0;
@@ -89,17 +91,17 @@ INITIALIZE_PASS_END(AddDiscriminators, "add-discriminators",
// Command line option to disable discriminator generation even in the
// presence of debug information. This is only needed when debugging
// debug info generation issues.
-static cl::opt<bool>
-NoDiscriminators("no-discriminators", cl::init(false),
- cl::desc("Disable generation of discriminator information."));
+static cl::opt<bool> NoDiscriminators(
+ "no-discriminators", cl::init(false),
+ cl::desc("Disable generation of discriminator information."));
FunctionPass *llvm::createAddDiscriminatorsPass() {
return new AddDiscriminators();
}
static bool hasDebugInfo(const Function &F) {
- NamedMDNode *CUNodes = F.getParent()->getNamedMetadata("llvm.dbg.cu");
- return CUNodes != nullptr;
+ DISubprogram *S = getDISubprogram(&F);
+ return S != nullptr;
}
/// \brief Assign DWARF discriminators.
@@ -159,8 +161,7 @@ bool AddDiscriminators::runOnFunction(Function &F) {
// Simlarly, if the function has no debug info, do nothing.
// Finally, if this module is built with dwarf versions earlier than 4,
// do nothing (discriminator support is a DWARF 4 feature).
- if (NoDiscriminators ||
- !hasDebugInfo(F) ||
+ if (NoDiscriminators || !hasDebugInfo(F) ||
F.getParent()->getDwarfVersion() < 4)
return false;
@@ -169,59 +170,77 @@ bool AddDiscriminators::runOnFunction(Function &F) {
LLVMContext &Ctx = M->getContext();
DIBuilder Builder(*M, /*AllowUnresolved*/ false);
- // Traverse all the blocks looking for instructions in different
- // blocks that are at the same file:line location.
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
- BasicBlock *B = I;
- TerminatorInst *Last = B->getTerminator();
- const DILocation *LastDIL = Last->getDebugLoc();
- if (!LastDIL)
- continue;
-
- for (unsigned I = 0; I < Last->getNumSuccessors(); ++I) {
- BasicBlock *Succ = Last->getSuccessor(I);
- Instruction *First = Succ->getFirstNonPHIOrDbgOrLifetime();
- const DILocation *FirstDIL = First->getDebugLoc();
- if (!FirstDIL)
+ typedef std::pair<StringRef, unsigned> Location;
+ typedef DenseMap<const BasicBlock *, Metadata *> BBScopeMap;
+ typedef DenseMap<Location, BBScopeMap> LocationBBMap;
+
+ LocationBBMap LBM;
+
+ // Traverse all instructions in the function. If the source line location
+ // of the instruction appears in other basic block, assign a new
+ // discriminator for this instruction.
+ for (BasicBlock &B : F) {
+ for (auto &I : B.getInstList()) {
+ if (isa<DbgInfoIntrinsic>(&I))
+ continue;
+ const DILocation *DIL = I.getDebugLoc();
+ if (!DIL)
+ continue;
+ Location L = std::make_pair(DIL->getFilename(), DIL->getLine());
+ auto &BBMap = LBM[L];
+ auto R = BBMap.insert(std::make_pair(&B, (Metadata *)nullptr));
+ if (BBMap.size() == 1)
+ continue;
+ bool InsertSuccess = R.second;
+ Metadata *&NewScope = R.first->second;
+ // If we could insert a different block in the same location, a
+ // discriminator is needed to distinguish both instructions.
+ if (InsertSuccess) {
+ auto *Scope = DIL->getScope();
+ auto *File =
+ Builder.createFile(DIL->getFilename(), Scope->getDirectory());
+ NewScope = Builder.createLexicalBlockFile(
+ Scope, File, DIL->computeNewDiscriminator());
+ }
+ I.setDebugLoc(DILocation::get(Ctx, DIL->getLine(), DIL->getColumn(),
+ NewScope, DIL->getInlinedAt()));
+ DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
+ << DIL->getColumn() << ":"
+ << dyn_cast<DILexicalBlockFile>(NewScope)->getDiscriminator()
+ << I << "\n");
+ Changed = true;
+ }
+ }
+
+ // Traverse all instructions and assign new discriminators to call
+ // instructions with the same lineno that are in the same basic block.
+ // Sample base profile needs to distinguish different function calls within
+ // a same source line for correct profile annotation.
+ for (BasicBlock &B : F) {
+ const DILocation *FirstDIL = NULL;
+ for (auto &I : B.getInstList()) {
+ CallInst *Current = dyn_cast<CallInst>(&I);
+ if (!Current || isa<DbgInfoIntrinsic>(&I))
continue;
- // If the first instruction (First) of Succ is at the same file
- // location as B's last instruction (Last), add a new
- // discriminator for First's location and all the instructions
- // in Succ that share the same location with First.
- if (!FirstDIL->canDiscriminate(*LastDIL)) {
- // Create a new lexical scope and compute a new discriminator
- // number for it.
- StringRef Filename = FirstDIL->getFilename();
- auto *Scope = FirstDIL->getScope();
- auto *File = Builder.createFile(Filename, Scope->getDirectory());
-
- // FIXME: Calculate the discriminator here, based on local information,
- // and delete DILocation::computeNewDiscriminator(). The current
- // solution gives different results depending on other modules in the
- // same context. All we really need is to discriminate between
- // FirstDIL and LastDIL -- a local map would suffice.
- unsigned Discriminator = FirstDIL->computeNewDiscriminator();
- auto *NewScope =
- Builder.createLexicalBlockFile(Scope, File, Discriminator);
- auto *NewDIL =
- DILocation::get(Ctx, FirstDIL->getLine(), FirstDIL->getColumn(),
- NewScope, FirstDIL->getInlinedAt());
- DebugLoc newDebugLoc = NewDIL;
-
- // Attach this new debug location to First and every
- // instruction following First that shares the same location.
- for (BasicBlock::iterator I1(*First), E1 = Succ->end(); I1 != E1;
- ++I1) {
- if (I1->getDebugLoc().get() != FirstDIL)
- break;
- I1->setDebugLoc(newDebugLoc);
- DEBUG(dbgs() << NewDIL->getFilename() << ":" << NewDIL->getLine()
- << ":" << NewDIL->getColumn() << ":"
- << NewDIL->getDiscriminator() << *I1 << "\n");
+ DILocation *CurrentDIL = Current->getDebugLoc();
+ if (FirstDIL) {
+ if (CurrentDIL && CurrentDIL->getLine() == FirstDIL->getLine() &&
+ CurrentDIL->getFilename() == FirstDIL->getFilename()) {
+ auto *Scope = FirstDIL->getScope();
+ auto *File = Builder.createFile(FirstDIL->getFilename(),
+ Scope->getDirectory());
+ auto *NewScope = Builder.createLexicalBlockFile(
+ Scope, File, FirstDIL->computeNewDiscriminator());
+ Current->setDebugLoc(DILocation::get(
+ Ctx, CurrentDIL->getLine(), CurrentDIL->getColumn(), NewScope,
+ CurrentDIL->getInlinedAt()));
+ Changed = true;
+ } else {
+ FirstDIL = CurrentDIL;
}
- DEBUG(dbgs() << "\n");
- Changed = true;
+ } else {
+ FirstDIL = CurrentDIL;
}
}
}
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index ef7dacac79cb..a5137e933e83 100644
--- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -41,8 +41,8 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
// Loop through all of our successors and make sure they know that one
// of their predecessors is going away.
- for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i)
- BBTerm->getSuccessor(i)->removePredecessor(BB);
+ for (BasicBlock *Succ : BBTerm->successors())
+ Succ->removePredecessor(BB);
// Zap all the instructions in the block.
while (!BB->empty()) {
@@ -65,7 +65,7 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
/// any single-entry PHI nodes in it, fold them away. This handles the case
/// when all entries to the PHI nodes in a block are guaranteed equal, such as
/// when the block has exactly one predecessor.
-void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, AliasAnalysis *AA,
+void llvm::FoldSingleEntryPHINodes(BasicBlock *BB,
MemoryDependenceAnalysis *MemDep) {
if (!isa<PHINode>(BB->begin())) return;
@@ -77,8 +77,6 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, AliasAnalysis *AA,
if (MemDep)
MemDep->removeInstruction(PN); // Memdep updates AA itself.
- else if (AA && isa<PointerType>(PN->getType()))
- AA->deleteValue(PN);
PN->eraseFromParent();
}
@@ -108,7 +106,7 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
/// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor,
/// if possible. The return value indicates success or failure.
bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
- LoopInfo *LI, AliasAnalysis *AA,
+ LoopInfo *LI,
MemoryDependenceAnalysis *MemDep) {
// Don't merge away blocks who have their address taken.
if (BB->hasAddressTaken()) return false;
@@ -119,8 +117,9 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
// Don't break self-loops.
if (PredBB == BB) return false;
- // Don't break invokes.
- if (isa<InvokeInst>(PredBB->getTerminator())) return false;
+ // Don't break unwinding instructions.
+ if (PredBB->getTerminator()->isExceptional())
+ return false;
succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB));
BasicBlock *OnlySucc = BB;
@@ -145,7 +144,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
// Begin by getting rid of unneeded PHIs.
if (isa<PHINode>(BB->front()))
- FoldSingleEntryPHINodes(BB, AA, MemDep);
+ FoldSingleEntryPHINodes(BB, MemDep);
// Delete the unconditional branch from the predecessor...
PredBB->getInstList().pop_back();
@@ -253,7 +252,7 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
// block.
assert(SP == BB && "CFG broken");
SP = nullptr;
- return SplitBlock(Succ, Succ->begin(), DT, LI);
+ return SplitBlock(Succ, &Succ->front(), DT, LI);
}
// Otherwise, if BB has a single successor, split it at the bottom of the
@@ -284,8 +283,8 @@ llvm::SplitAllCriticalEdges(Function &F,
///
BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
DominatorTree *DT, LoopInfo *LI) {
- BasicBlock::iterator SplitIt = SplitPt;
- while (isa<PHINode>(SplitIt) || isa<LandingPadInst>(SplitIt))
+ BasicBlock::iterator SplitIt = SplitPt->getIterator();
+ while (isa<PHINode>(SplitIt) || SplitIt->isEHPad())
++SplitIt;
BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split");
@@ -393,7 +392,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
/// from NewBB. This also updates AliasAnalysis, if available.
static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
ArrayRef<BasicBlock *> Preds, BranchInst *BI,
- AliasAnalysis *AA, bool HasLoopExit) {
+ bool HasLoopExit) {
// Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB.
SmallPtrSet<BasicBlock *, 16> PredSet(Preds.begin(), Preds.end());
for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) {
@@ -474,17 +473,20 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
///
BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
ArrayRef<BasicBlock *> Preds,
- const char *Suffix, AliasAnalysis *AA,
- DominatorTree *DT, LoopInfo *LI,
- bool PreserveLCSSA) {
+ const char *Suffix, DominatorTree *DT,
+ LoopInfo *LI, bool PreserveLCSSA) {
+ // Do not attempt to split that which cannot be split.
+ if (!BB->canSplitPredecessors())
+ return nullptr;
+
// For the landingpads we need to act a bit differently.
// Delegate this work to the SplitLandingPadPredecessors.
if (BB->isLandingPad()) {
SmallVector<BasicBlock*, 2> NewBBs;
std::string NewName = std::string(Suffix) + ".split-lp";
- SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(),
- NewBBs, AA, DT, LI, PreserveLCSSA);
+ SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs, DT,
+ LI, PreserveLCSSA);
return NewBBs[0];
}
@@ -523,7 +525,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
HasLoopExit);
// Update the PHI nodes in BB with the values coming from NewBB.
- UpdatePHINodes(BB, NewBB, Preds, BI, AA, HasLoopExit);
+ UpdatePHINodes(BB, NewBB, Preds, BI, HasLoopExit);
return NewBB;
}
@@ -544,8 +546,8 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
ArrayRef<BasicBlock *> Preds,
const char *Suffix1, const char *Suffix2,
SmallVectorImpl<BasicBlock *> &NewBBs,
- AliasAnalysis *AA, DominatorTree *DT,
- LoopInfo *LI, bool PreserveLCSSA) {
+ DominatorTree *DT, LoopInfo *LI,
+ bool PreserveLCSSA) {
assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!");
// Create a new basic block for OrigBB's predecessors listed in Preds. Insert
@@ -574,7 +576,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
HasLoopExit);
// Update the PHI nodes in OrigBB with the values coming from NewBB1.
- UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, AA, HasLoopExit);
+ UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, HasLoopExit);
// Move the remaining edges from OrigBB to point to NewBB2.
SmallVector<BasicBlock*, 8> NewBB2Preds;
@@ -611,7 +613,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
PreserveLCSSA, HasLoopExit);
// Update the PHI nodes in OrigBB with the values coming from NewBB2.
- UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, AA, HasLoopExit);
+ UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, HasLoopExit);
}
LandingPadInst *LPad = OrigBB->getLandingPadInst();
@@ -661,7 +663,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
// return instruction.
V = BCI->getOperand(0);
NewBC = BCI->clone();
- Pred->getInstList().insert(NewRet, NewBC);
+ Pred->getInstList().insert(NewRet->getIterator(), NewBC);
*i = NewBC;
}
if (PHINode *PN = dyn_cast<PHINode>(V)) {
@@ -707,7 +709,7 @@ TerminatorInst *llvm::SplitBlockAndInsertIfThen(Value *Cond,
MDNode *BranchWeights,
DominatorTree *DT) {
BasicBlock *Head = SplitBefore->getParent();
- BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
+ BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
TerminatorInst *HeadOldTerm = Head->getTerminator();
LLVMContext &C = Head->getContext();
BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
@@ -757,7 +759,7 @@ void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
TerminatorInst **ElseTerm,
MDNode *BranchWeights) {
BasicBlock *Head = SplitBefore->getParent();
- BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
+ BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
TerminatorInst *HeadOldTerm = Head->getTerminator();
LLVMContext &C = Head->getContext();
BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 7e83c9eeceb7..95825991cee9 100644
--- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -101,10 +101,9 @@ static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,
continue;
// Otherwise a new PHI is needed. Create one and populate it.
- PHINode *NewPN =
- PHINode::Create(PN->getType(), Preds.size(), "split",
- SplitBB->isLandingPad() ?
- SplitBB->begin() : SplitBB->getTerminator());
+ PHINode *NewPN = PHINode::Create(
+ PN->getType(), Preds.size(), "split",
+ SplitBB->isLandingPad() ? &SplitBB->front() : SplitBB->getTerminator());
for (unsigned i = 0, e = Preds.size(); i != e; ++i)
NewPN->addIncoming(V, Preds[i]);
@@ -141,9 +140,9 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
BasicBlock *TIBB = TI->getParent();
BasicBlock *DestBB = TI->getSuccessor(SuccNum);
- // Splitting the critical edge to a landing pad block is non-trivial. Don't do
+ // Splitting the critical edge to a pad block is non-trivial. Don't do
// it in this generic function.
- if (DestBB->isLandingPad()) return nullptr;
+ if (DestBB->isEHPad()) return nullptr;
// Create a new basic block, linking it into the CFG.
BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
@@ -157,7 +156,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
// Insert the block into the function... right after the block TI lives in.
Function &F = *TIBB->getParent();
- Function::iterator FBBI = TIBB;
+ Function::iterator FBBI = TIBB->getIterator();
F.getBasicBlockList().insert(++FBBI, NewBB);
// If there are any PHI nodes in DestBB, we need to update them so that they
@@ -197,7 +196,6 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
}
// If we have nothing to update, just return.
- auto *AA = Options.AA;
auto *DT = Options.DT;
auto *LI = Options.LI;
if (!DT && !LI)
@@ -319,10 +317,9 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
LoopPreds.push_back(P);
}
if (!LoopPreds.empty()) {
- assert(!DestBB->isLandingPad() &&
- "We don't split edges to landing pads!");
+ assert(!DestBB->isEHPad() && "We don't split edges to EH pads!");
BasicBlock *NewExitBB = SplitBlockPredecessors(
- DestBB, LoopPreds, "split", AA, DT, LI, Options.PreserveLCSSA);
+ DestBB, LoopPreds, "split", DT, LI, Options.PreserveLCSSA);
if (Options.PreserveLCSSA)
createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB);
}
diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 8aa7b2a65ba9..64b44a6b7919 100644
--- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -13,6 +13,7 @@
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@@ -21,7 +22,6 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
using namespace llvm;
@@ -55,32 +55,6 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
return CI;
}
-/// EmitStrNLen - Emit a call to the strnlen function to the builder, for the
-/// specified pointer. Ptr is required to be some pointer type, MaxLen must
-/// be of size_t type, and the return value has 'intptr_t' type.
-Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
- const DataLayout &DL, const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc::strnlen))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- AttributeSet AS[2];
- AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
- Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
- AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
-
- LLVMContext &Context = B.GetInsertBlock()->getContext();
- Constant *StrNLen =
- M->getOrInsertFunction("strnlen", AttributeSet::get(M->getContext(), AS),
- DL.getIntPtrType(Context), B.getInt8PtrTy(),
- DL.getIntPtrType(Context), nullptr);
- CallInst *CI = B.CreateCall(StrNLen, {CastToCStr(Ptr, B), MaxLen}, "strnlen");
- if (const Function *F = dyn_cast<Function>(StrNLen->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
-
- return CI;
-}
-
/// EmitStrChr - Emit a call to the strchr function to the builder, for the
/// specified pointer and character. Ptr is required to be some pointer type,
/// and the return value has 'i8*' type.
diff --git a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
index f2d5e0745035..0914699a2e38 100644
--- a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -82,7 +82,7 @@ static bool insertFastDiv(Function &F,
bool UseSignedOp,
DivCacheTy &PerBBDivCache) {
// Get instruction operands
- Instruction *Instr = J;
+ Instruction *Instr = &*J;
Value *Dividend = Instr->getOperand(0);
Value *Divisor = Instr->getOperand(1);
@@ -94,7 +94,7 @@ static bool insertFastDiv(Function &F,
}
// Basic Block is split before divide
- BasicBlock *MainBB = I;
+ BasicBlock *MainBB = &*I;
BasicBlock *SuccessorBB = I->splitBasicBlock(J);
++I; //advance iterator I to successorBB
@@ -190,7 +190,7 @@ static bool reuseOrInsertFastDiv(Function &F,
bool UseSignedOp,
DivCacheTy &PerBBDivCache) {
// Get instruction operands
- Instruction *Instr = J;
+ Instruction *Instr = &*J;
DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
index cc4d6c6fb192..854a3b855f54 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -52,8 +52,8 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
if (II->hasName())
NewInst->setName(II->getName()+NameSuffix);
NewBB->getInstList().push_back(NewInst);
- VMap[II] = NewInst; // Add instruction map to value.
-
+ VMap[&*II] = NewInst; // Add instruction map to value.
+
hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
if (isa<ConstantInt>(AI->getArraySize()))
@@ -85,9 +85,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
assert(NameSuffix && "NameSuffix cannot be null!");
#ifndef NDEBUG
- for (Function::const_arg_iterator I = OldFunc->arg_begin(),
- E = OldFunc->arg_end(); I != E; ++I)
- assert(VMap.count(I) && "No mapping from source argument specified!");
+ for (const Argument &I : OldFunc->args())
+ assert(VMap.count(&I) && "No mapping from source argument specified!");
#endif
// Copy all attributes other than those stored in the AttributeSet. We need
@@ -96,6 +95,13 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
NewFunc->copyAttributesFrom(OldFunc);
NewFunc->setAttributes(NewAttrs);
+ // Fix up the personality function that got copied over.
+ if (OldFunc->hasPersonalityFn())
+ NewFunc->setPersonalityFn(
+ MapValue(OldFunc->getPersonalityFn(), VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+ TypeMapper, Materializer));
+
AttributeSet OldAttrs = OldFunc->getAttributes();
// Clone any argument attributes that are present in the VMap.
for (const Argument &OldArg : OldFunc->args())
@@ -136,7 +142,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
if (BB.hasAddressTaken()) {
Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc),
const_cast<BasicBlock*>(&BB));
- VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB);
+ VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB);
}
// Note return instructions for the caller.
@@ -146,11 +152,13 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
// Loop over all of the instructions in the function, fixing up operand
// references as we go. This uses VMap to do all the hard work.
- for (Function::iterator BB = cast<BasicBlock>(VMap[OldFunc->begin()]),
- BE = NewFunc->end(); BB != BE; ++BB)
+ for (Function::iterator BB =
+ cast<BasicBlock>(VMap[&OldFunc->front()])->getIterator(),
+ BE = NewFunc->end();
+ BB != BE; ++BB)
// Loop over all instructions, fixing each one as we find it...
- for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
- RemapInstruction(II, VMap,
+ for (Instruction &II : *BB)
+ RemapInstruction(&II, VMap,
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
TypeMapper, Materializer);
}
@@ -187,11 +195,9 @@ static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc,
const DISubprogram *OldSubprogramMDNode = FindSubprogram(OldFunc, Finder);
if (!OldSubprogramMDNode) return;
- // Ensure that OldFunc appears in the map.
- // (if it's already there it must point to NewFunc anyway)
- VMap[OldFunc] = NewFunc;
auto *NewSubprogram =
cast<DISubprogram>(MapMetadata(OldSubprogramMDNode, VMap));
+ NewFunc->setSubprogram(NewSubprogram);
for (auto *CU : Finder.compile_units()) {
auto Subprograms = CU->getSubprograms();
@@ -222,10 +228,9 @@ Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
// The user might be deleting arguments to the function by specifying them in
// the VMap. If so, we need to not add the arguments to the arg ty vector
//
- for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I)
- if (VMap.count(I) == 0) // Haven't mapped the argument to anything yet?
- ArgTypes.push_back(I->getType());
+ for (const Argument &I : F->args())
+ if (VMap.count(&I) == 0) // Haven't mapped the argument to anything yet?
+ ArgTypes.push_back(I.getType());
// Create a new function type...
FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(),
@@ -236,11 +241,10 @@ Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
// Loop over the arguments, copying the names of the mapped arguments over...
Function::arg_iterator DestI = NewF->arg_begin();
- for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I)
- if (VMap.count(I) == 0) { // Is this argument preserved?
- DestI->setName(I->getName()); // Copy the name over...
- VMap[I] = DestI++; // Add mapping to VMap
+ for (const Argument & I : F->args())
+ if (VMap.count(&I) == 0) { // Is this argument preserved?
+ DestI->setName(I.getName()); // Copy the name over...
+ VMap[&I] = &*DestI++; // Add mapping to VMap
}
if (ModuleLevelChanges)
@@ -330,8 +334,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
II != IE; ++II) {
// If the "Director" remaps the instruction, don't clone it.
if (Director) {
- CloningDirector::CloningAction Action
- = Director->handleInstruction(VMap, II, NewBB);
+ CloningDirector::CloningAction Action =
+ Director->handleInstruction(VMap, &*II, NewBB);
// If the cloning director says stop, we want to stop everything, not
// just break out of the loop (which would cause the terminator to be
// cloned). The cloning director is responsible for inserting a proper
@@ -365,7 +369,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
if (Value *MappedV = VMap.lookup(V))
V = MappedV;
- VMap[II] = V;
+ VMap[&*II] = V;
delete NewInst;
continue;
}
@@ -373,9 +377,15 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
if (II->hasName())
NewInst->setName(II->getName()+NameSuffix);
- VMap[II] = NewInst; // Add instruction map to value.
+ VMap[&*II] = NewInst; // Add instruction map to value.
NewBB->getInstList().push_back(NewInst);
hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+
+ if (CodeInfo)
+ if (auto CS = ImmutableCallSite(&*II))
+ if (CS.hasOperandBundles())
+ CodeInfo->OperandBundleCallSites.push_back(NewInst);
+
if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
if (isa<ConstantInt>(AI->getArraySize()))
hasStaticAllocas = true;
@@ -400,8 +410,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// If the director says to skip with a terminate instruction, we still
// need to clone this block's successors.
const TerminatorInst *TI = NewBB->getTerminator();
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- ToClone.push_back(TI->getSuccessor(i));
+ for (const BasicBlock *Succ : TI->successors())
+ ToClone.push_back(Succ);
return;
}
assert(Action != CloningDirector::SkipInstruction &&
@@ -447,11 +457,16 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
NewInst->setName(OldTI->getName()+NameSuffix);
NewBB->getInstList().push_back(NewInst);
VMap[OldTI] = NewInst; // Add instruction map to value.
-
+
+ if (CodeInfo)
+ if (auto CS = ImmutableCallSite(OldTI))
+ if (CS.hasOperandBundles())
+ CodeInfo->OperandBundleCallSites.push_back(NewInst);
+
// Recursively clone any reachable successor blocks.
const TerminatorInst *TI = BB->getTerminator();
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- ToClone.push_back(TI->getSuccessor(i));
+ for (const BasicBlock *Succ : TI->successors())
+ ToClone.push_back(Succ);
}
if (CodeInfo) {
@@ -484,12 +499,11 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
}
#ifndef NDEBUG
- // If the cloning starts at the begining of the function, verify that
+ // If the cloning starts at the beginning of the function, verify that
// the function arguments are mapped.
if (!StartingInst)
- for (Function::const_arg_iterator II = OldFunc->arg_begin(),
- E = OldFunc->arg_end(); II != E; ++II)
- assert(VMap.count(II) && "No mapping from source argument specified!");
+ for (const Argument &II : OldFunc->args())
+ assert(VMap.count(&II) && "No mapping from source argument specified!");
#endif
PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
@@ -499,12 +513,12 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
StartingBB = StartingInst->getParent();
else {
StartingBB = &OldFunc->getEntryBlock();
- StartingInst = StartingBB->begin();
+ StartingInst = &StartingBB->front();
}
// Clone the entry block, and anything recursively reachable from it.
std::vector<const BasicBlock*> CloneWorklist;
- PFC.CloneBlock(StartingBB, StartingInst, CloneWorklist);
+ PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist);
while (!CloneWorklist.empty()) {
const BasicBlock *BB = CloneWorklist.back();
CloneWorklist.pop_back();
@@ -517,9 +531,8 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
//
// Defer PHI resolution until rest of function is resolved.
SmallVector<const PHINode*, 16> PHIToResolve;
- for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
- BI != BE; ++BI) {
- Value *V = VMap[BI];
+ for (const BasicBlock &BI : *OldFunc) {
+ Value *V = VMap[&BI];
BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
if (!NewBB) continue; // Dead block.
@@ -528,7 +541,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// Handle PHI nodes specially, as we have to remove references to dead
// blocks.
- for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) {
+ for (BasicBlock::const_iterator I = BI.begin(), E = BI.end(); I != E; ++I) {
// PHI nodes may have been remapped to non-PHI nodes by the caller or
// during the cloning process.
if (const PHINode *PN = dyn_cast<PHINode>(I)) {
@@ -621,8 +634,8 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
while ((PN = dyn_cast<PHINode>(I++))) {
Value *NV = UndefValue::get(PN->getType());
PN->replaceAllUsesWith(NV);
- assert(VMap[OldI] == PN && "VMap mismatch");
- VMap[OldI] = NV;
+ assert(VMap[&*OldI] == PN && "VMap mismatch");
+ VMap[&*OldI] = NV;
PN->eraseFromParent();
++OldI;
}
@@ -644,15 +657,15 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// and zap unconditional fall-through branches. This happens all the time when
// specializing code: code specialization turns conditional branches into
// uncond branches, and this code folds them.
- Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB]);
+ Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator();
Function::iterator I = Begin;
while (I != NewFunc->end()) {
// Check if this block has become dead during inlining or other
// simplifications. Note that the first block will appear dead, as it has
// not yet been wired up properly.
- if (I != Begin && (pred_begin(I) == pred_end(I) ||
- I->getSinglePredecessor() == I)) {
- BasicBlock *DeadBB = I++;
+ if (I != Begin && (pred_begin(&*I) == pred_end(&*I) ||
+ I->getSinglePredecessor() == &*I)) {
+ BasicBlock *DeadBB = &*I++;
DeleteDeadBlock(DeadBB);
continue;
}
@@ -662,7 +675,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// simplification required looking through PHI nodes, those are only
// available after forming the full basic block. That may leave some here,
// and we still want to prune the dead code as early as possible.
- ConstantFoldTerminator(I);
+ ConstantFoldTerminator(&*I);
BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
if (!BI || BI->isConditional()) { ++I; continue; }
@@ -681,7 +694,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
BI->eraseFromParent();
// Make all PHI nodes that referred to Dest now refer to I as their source.
- Dest->replaceAllUsesWith(I);
+ Dest->replaceAllUsesWith(&*I);
// Move all the instructions in the succ to the pred.
I->getInstList().splice(I->end(), Dest->getInstList());
@@ -695,7 +708,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// Make a final pass over the basic blocks from the old function to gather
// any return instructions which survived folding. We have to do this here
// because we can iteratively remove and merge returns above.
- for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB]),
+ for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB])->getIterator(),
E = NewFunc->end();
I != E; ++I)
if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator()))
@@ -717,7 +730,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
const char *NameSuffix,
ClonedCodeInfo *CodeInfo,
Instruction *TheCall) {
- CloneAndPruneIntoFromInst(NewFunc, OldFunc, OldFunc->front().begin(), VMap,
+ CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap,
ModuleLevelChanges, Returns, NameSuffix, CodeInfo,
nullptr);
}
@@ -780,9 +793,10 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
}
// Move them physically from the end of the block list.
- F->getBasicBlockList().splice(Before, F->getBasicBlockList(), NewPH);
- F->getBasicBlockList().splice(Before, F->getBasicBlockList(),
- NewLoop->getHeader(), F->end());
+ F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(),
+ NewPH);
+ F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(),
+ NewLoop->getHeader()->getIterator(), F->end());
return NewLoop;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
index 61f1811e7b4a..ab083353ece6 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -20,21 +20,28 @@
#include "llvm-c/Core.h"
using namespace llvm;
-/// CloneModule - Return an exact copy of the specified module. This is not as
-/// easy as it might seem because we have to worry about making copies of global
-/// variables and functions, and making their (initializers and references,
-/// respectively) refer to the right globals.
+/// This is not as easy as it might seem because we have to worry about making
+/// copies of global variables and functions, and making their (initializers and
+/// references, respectively) refer to the right globals.
///
-Module *llvm::CloneModule(const Module *M) {
+std::unique_ptr<Module> llvm::CloneModule(const Module *M) {
// Create the value map that maps things from the old module over to the new
// module.
ValueToValueMapTy VMap;
return CloneModule(M, VMap);
}
-Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
+std::unique_ptr<Module> llvm::CloneModule(const Module *M,
+ ValueToValueMapTy &VMap) {
+ return CloneModule(M, VMap, [](const GlobalValue *GV) { return true; });
+}
+
+std::unique_ptr<Module> llvm::CloneModule(
+ const Module *M, ValueToValueMapTy &VMap,
+ std::function<bool(const GlobalValue *)> ShouldCloneDefinition) {
// First off, we need to create the new module.
- Module *New = new Module(M->getModuleIdentifier(), M->getContext());
+ std::unique_ptr<Module> New =
+ llvm::make_unique<Module>(M->getModuleIdentifier(), M->getContext());
New->setDataLayout(M->getDataLayout());
New->setTargetTriple(M->getTargetTriple());
New->setModuleInlineAsm(M->getModuleInlineAsm());
@@ -52,26 +59,48 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
(GlobalVariable*) nullptr,
I->getThreadLocalMode(),
I->getType()->getAddressSpace());
- GV->copyAttributesFrom(I);
- VMap[I] = GV;
+ GV->copyAttributesFrom(&*I);
+ VMap[&*I] = GV;
}
// Loop over the functions in the module, making external functions as before
for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
Function *NF =
- Function::Create(cast<FunctionType>(I->getType()->getElementType()),
- I->getLinkage(), I->getName(), New);
- NF->copyAttributesFrom(I);
- VMap[I] = NF;
+ Function::Create(cast<FunctionType>(I->getType()->getElementType()),
+ I->getLinkage(), I->getName(), New.get());
+ NF->copyAttributesFrom(&*I);
+ VMap[&*I] = NF;
}
// Loop over the aliases in the module
for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
I != E; ++I) {
- auto *PTy = cast<PointerType>(I->getType());
- auto *GA = GlobalAlias::create(PTy, I->getLinkage(), I->getName(), New);
- GA->copyAttributesFrom(I);
- VMap[I] = GA;
+ if (!ShouldCloneDefinition(&*I)) {
+ // An alias cannot act as an external reference, so we need to create
+ // either a function or a global variable depending on the value type.
+ // FIXME: Once pointee types are gone we can probably pick one or the
+ // other.
+ GlobalValue *GV;
+ if (I->getValueType()->isFunctionTy())
+ GV = Function::Create(cast<FunctionType>(I->getValueType()),
+ GlobalValue::ExternalLinkage, I->getName(),
+ New.get());
+ else
+ GV = new GlobalVariable(
+ *New, I->getValueType(), false, GlobalValue::ExternalLinkage,
+ (Constant *)nullptr, I->getName(), (GlobalVariable *)nullptr,
+ I->getThreadLocalMode(), I->getType()->getAddressSpace());
+ VMap[&*I] = GV;
+ // We do not copy attributes (mainly because copying between different
+ // kinds of globals is forbidden), but this is generally not required for
+ // correctness.
+ continue;
+ }
+ auto *GA = GlobalAlias::create(I->getValueType(),
+ I->getType()->getPointerAddressSpace(),
+ I->getLinkage(), I->getName(), New.get());
+ GA->copyAttributesFrom(&*I);
+ VMap[&*I] = GA;
}
// Now that all of the things that global variable initializer can refer to
@@ -80,7 +109,12 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
//
for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
I != E; ++I) {
- GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
+ GlobalVariable *GV = cast<GlobalVariable>(VMap[&*I]);
+ if (!ShouldCloneDefinition(&*I)) {
+ // Skip after setting the correct linkage for an external reference.
+ GV->setLinkage(GlobalValue::ExternalLinkage);
+ continue;
+ }
if (I->hasInitializer())
GV->setInitializer(MapValue(I->getInitializer(), VMap));
}
@@ -88,18 +122,22 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
// Similarly, copy over function bodies now...
//
for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
- Function *F = cast<Function>(VMap[I]);
+ Function *F = cast<Function>(VMap[&*I]);
+ if (!ShouldCloneDefinition(&*I)) {
+ // Skip after setting the correct linkage for an external reference.
+ F->setLinkage(GlobalValue::ExternalLinkage);
+ continue;
+ }
if (!I->isDeclaration()) {
Function::arg_iterator DestI = F->arg_begin();
for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end();
++J) {
DestI->setName(J->getName());
- VMap[J] = DestI++;
+ VMap[&*J] = &*DestI++;
}
SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
- CloneFunctionInto(F, I, VMap, /*ModuleLevelChanges=*/true, Returns);
-
+ CloneFunctionInto(F, &*I, VMap, /*ModuleLevelChanges=*/true, Returns);
}
if (I->hasPersonalityFn())
@@ -109,7 +147,10 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
// And aliases
for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
I != E; ++I) {
- GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
+ // We already dealt with undefined aliases above.
+ if (!ShouldCloneDefinition(&*I))
+ continue;
+ GlobalAlias *GA = cast<GlobalAlias>(VMap[&*I]);
if (const Constant *C = I->getAliasee())
GA->setAliasee(MapValue(C, VMap));
}
@@ -129,7 +170,7 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
extern "C" {
LLVMModuleRef LLVMCloneModule(LLVMModuleRef M) {
- return wrap(CloneModule(unwrap(M)));
+ return wrap(CloneModule(unwrap(M)).release());
}
}
diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index ab89b41f6788..823696d88e65 100644
--- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -51,7 +51,7 @@ AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,
/// \brief Test whether a block is valid for extraction.
static bool isBlockValidForExtraction(const BasicBlock &BB) {
// Landing pads must be in the function where they were inserted for cleanup.
- if (BB.isLandingPad())
+ if (BB.isEHPad())
return false;
// Don't hoist code containing allocas, invokes, or vastarts.
@@ -175,7 +175,7 @@ void CodeExtractor::findInputsOutputs(ValueSet &Inputs,
for (User *U : II->users())
if (!definedInRegion(Blocks, U)) {
- Outputs.insert(II);
+ Outputs.insert(&*II);
break;
}
}
@@ -211,7 +211,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
// containing PHI nodes merging values from outside of the region, and a
// second that contains all of the code for the block and merges back any
// incoming values from inside of the region.
- BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI();
+ BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI()->getIterator();
BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs,
Header->getName()+".ce");
@@ -246,7 +246,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
// Create a new PHI node in the new region, which has an incoming value
// from OldPred of PN.
PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion,
- PN->getName()+".ce", NewBB->begin());
+ PN->getName() + ".ce", &NewBB->front());
NewPN->addIncoming(PN, OldPred);
// Loop over all of the incoming value in PN, moving them to NewPN if they
@@ -266,7 +266,8 @@ void CodeExtractor::splitReturnBlocks() {
for (SetVector<BasicBlock *>::iterator I = Blocks.begin(), E = Blocks.end();
I != E; ++I)
if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) {
- BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret");
+ BasicBlock *New =
+ (*I)->splitBasicBlock(RI->getIterator(), (*I)->getName() + ".ret");
if (DT) {
// Old dominates New. New node dominates all other nodes dominated
// by Old.
@@ -365,10 +366,10 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
TerminatorInst *TI = newFunction->begin()->getTerminator();
GetElementPtrInst *GEP = GetElementPtrInst::Create(
- StructTy, AI, Idx, "gep_" + inputs[i]->getName(), TI);
+ StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI);
RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);
} else
- RewriteVal = AI++;
+ RewriteVal = &*AI++;
std::vector<User*> Users(inputs[i]->user_begin(), inputs[i]->user_end());
for (std::vector<User*>::iterator use = Users.begin(), useE = Users.end();
@@ -440,8 +441,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
StructValues.push_back(*i);
} else {
AllocaInst *alloca =
- new AllocaInst((*i)->getType(), nullptr, (*i)->getName()+".loc",
- codeReplacer->getParent()->begin()->begin());
+ new AllocaInst((*i)->getType(), nullptr, (*i)->getName() + ".loc",
+ &codeReplacer->getParent()->front().front());
ReloadOutputs.push_back(alloca);
params.push_back(alloca);
}
@@ -457,9 +458,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
// Allocate a struct at the beginning of this function
StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
- Struct =
- new AllocaInst(StructArgTy, nullptr, "structArg",
- codeReplacer->getParent()->begin()->begin());
+ Struct = new AllocaInst(StructArgTy, nullptr, "structArg",
+ &codeReplacer->getParent()->front().front());
params.push_back(Struct);
for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
@@ -566,8 +566,12 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
bool DominatesDef = true;
- if (InvokeInst *Invoke = dyn_cast<InvokeInst>(outputs[out])) {
- DefBlock = Invoke->getNormalDest();
+ BasicBlock *NormalDest = nullptr;
+ if (auto *Invoke = dyn_cast<InvokeInst>(outputs[out]))
+ NormalDest = Invoke->getNormalDest();
+
+ if (NormalDest) {
+ DefBlock = NormalDest;
// Make sure we are looking at the original successor block, not
// at a newly inserted exit block, which won't be in the dominator
@@ -606,11 +610,11 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
Idx[1] = ConstantInt::get(Type::getInt32Ty(Context),
FirstOut+out);
GetElementPtrInst *GEP = GetElementPtrInst::Create(
- StructArgTy, OAI, Idx, "gep_" + outputs[out]->getName(),
+ StructArgTy, &*OAI, Idx, "gep_" + outputs[out]->getName(),
NTRet);
new StoreInst(outputs[out], GEP, NTRet);
} else {
- new StoreInst(outputs[out], OAI, NTRet);
+ new StoreInst(outputs[out], &*OAI, NTRet);
}
}
// Advance output iterator even if we don't emit a store
diff --git a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
index dc95089cd2ca..b56ff684e8a8 100644
--- a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
@@ -50,7 +50,7 @@ void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemove) {
GlobalVariable *NGV =
new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(),
CA, "", GCL->getThreadLocalMode());
- GCL->getParent()->getGlobalList().insert(GCL, NGV);
+ GCL->getParent()->getGlobalList().insert(GCL->getIterator(), NGV);
NGV->takeName(GCL);
// Nuke the old list, replacing any uses with the new one.
diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
index 003da58ee798..75a1dde57c4c 100644
--- a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -35,8 +35,8 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
I.getName()+".reg2mem", AllocaPoint);
} else {
Function *F = I.getParent()->getParent();
- Slot = new AllocaInst(I.getType(), nullptr, I.getName()+".reg2mem",
- F->getEntryBlock().begin());
+ Slot = new AllocaInst(I.getType(), nullptr, I.getName() + ".reg2mem",
+ &F->getEntryBlock().front());
}
// We cannot demote invoke instructions to the stack if their normal edge
@@ -89,16 +89,15 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
// AFTER the terminator instruction.
BasicBlock::iterator InsertPt;
if (!isa<TerminatorInst>(I)) {
- InsertPt = &I;
- ++InsertPt;
- for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt)
+ InsertPt = ++I.getIterator();
+ for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
/* empty */; // Don't insert before PHI nodes or landingpad instrs.
} else {
InvokeInst &II = cast<InvokeInst>(I);
InsertPt = II.getNormalDest()->getFirstInsertionPt();
}
- new StoreInst(&I, Slot, InsertPt);
+ new StoreInst(&I, Slot, &*InsertPt);
return Slot;
}
@@ -118,8 +117,8 @@ AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
P->getName()+".reg2mem", AllocaPoint);
} else {
Function *F = P->getParent()->getParent();
- Slot = new AllocaInst(P->getType(), nullptr, P->getName()+".reg2mem",
- F->getEntryBlock().begin());
+ Slot = new AllocaInst(P->getType(), nullptr, P->getName() + ".reg2mem",
+ &F->getEntryBlock().front());
}
// Iterate over each operand inserting a store in each predecessor.
@@ -133,12 +132,12 @@ AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
}
// Insert a load in place of the PHI and replace all uses.
- BasicBlock::iterator InsertPt = P;
+ BasicBlock::iterator InsertPt = P->getIterator();
- for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt)
+ for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
/* empty */; // Don't insert before PHI nodes or landingpad instrs.
- Value *V = new LoadInst(Slot, P->getName()+".reload", InsertPt);
+ Value *V = new LoadInst(Slot, P->getName() + ".reload", &*InsertPt);
P->replaceAllUsesWith(V);
// Delete PHI.
diff --git a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
index 4eb3e3dd17d2..492ae9f69a65 100644
--- a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
@@ -28,12 +28,11 @@ class FlattenCFGOpt {
AliasAnalysis *AA;
/// \brief Use parallel-and or parallel-or to generate conditions for
/// conditional branches.
- bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
- Pass *P = nullptr);
+ bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder);
/// \brief If \param BB is the merge block of an if-region, attempt to merge
/// the if-region with an adjacent if-region upstream if two if-regions
/// contain identical instructions.
- bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = nullptr);
+ bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder);
/// \brief Compare a pair of blocks: \p Block1 and \p Block2, which
/// are from two if-regions whose entry blocks are \p Head1 and \p
/// Head2. \returns true if \p Block1 and \p Block2 contain identical
@@ -122,8 +121,7 @@ public:
/// its predecessor. In Case 2, \param BB (BB3) only has conditional branches
/// as its predecessors.
///
-bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
- Pass *P) {
+bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
PHINode *PHI = dyn_cast<PHINode>(BB->begin());
if (PHI)
return false; // For simplicity, avoid cases containing PHI nodes.
@@ -177,8 +175,9 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
// Instructions in the internal condition blocks should be safe
// to hoist up.
- for (BasicBlock::iterator BI = Pred->begin(), BE = PBI; BI != BE;) {
- Instruction *CI = BI++;
+ for (BasicBlock::iterator BI = Pred->begin(), BE = PBI->getIterator();
+ BI != BE;) {
+ Instruction *CI = &*BI++;
if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI))
return false;
}
@@ -315,7 +314,7 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
BasicBlock *Block1,
BasicBlock *Block2) {
TerminatorInst *PTI2 = Head2->getTerminator();
- Instruction *PBI2 = Head2->begin();
+ Instruction *PBI2 = &Head2->front();
bool eq1 = (Block1 == Head1);
bool eq2 = (Block2 == Head2);
@@ -327,9 +326,9 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
// Check whether instructions in Block1 and Block2 are identical
// and do not alias with instructions in Head2.
BasicBlock::iterator iter1 = Block1->begin();
- BasicBlock::iterator end1 = Block1->getTerminator();
+ BasicBlock::iterator end1 = Block1->getTerminator()->getIterator();
BasicBlock::iterator iter2 = Block2->begin();
- BasicBlock::iterator end2 = Block2->getTerminator();
+ BasicBlock::iterator end2 = Block2->getTerminator()->getIterator();
while (1) {
if (iter1 == end1) {
@@ -338,7 +337,7 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
break;
}
- if (!iter1->isIdenticalTo(iter2))
+ if (!iter1->isIdenticalTo(&*iter2))
return false;
// Illegal to remove instructions with side effects except
@@ -356,10 +355,10 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
return false;
if (iter1->mayWriteToMemory()) {
- for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) {
+ for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) {
if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) {
// Check alias with Head2.
- if (!AA || AA->alias(iter1, BI))
+ if (!AA || AA->alias(&*iter1, &*BI))
return false;
}
}
@@ -386,8 +385,7 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
/// if (a || b)
/// statement;
///
-bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder,
- Pass *P) {
+bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
BasicBlock *IfTrue2, *IfFalse2;
Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2);
Instruction *CInst2 = dyn_cast_or_null<Instruction>(IfCond2);
@@ -413,7 +411,7 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder,
return false;
TerminatorInst *PTI2 = SecondEntryBlock->getTerminator();
- Instruction *PBI2 = SecondEntryBlock->begin();
+ Instruction *PBI2 = &SecondEntryBlock->front();
if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1,
IfTrue2))
@@ -425,8 +423,8 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder,
// Check whether \param SecondEntryBlock has side-effect and is safe to
// speculate.
- for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) {
- Instruction *CI = BI;
+ for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) {
+ Instruction *CI = &*BI;
if (isa<PHINode>(CI) || CI->mayHaveSideEffects() ||
!isSafeToSpeculativelyExecute(CI))
return false;
diff --git a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
index 44b7d25d519a..3893a752503b 100644
--- a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
@@ -49,6 +49,10 @@ bool llvm::isSafeToDestroyConstant(const Constant *C) {
static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
SmallPtrSetImpl<const PHINode *> &PhiUsers) {
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ if (GV->isExternallyInitialized())
+ GS.StoredType = GlobalStatus::StoredOnce;
+
for (const Use &U : V->uses()) {
const User *UR = U.getUser();
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(UR)) {
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
index d2d60d7cd9f6..14574119b9a8 100644
--- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -13,14 +13,15 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
@@ -41,6 +42,7 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/CommandLine.h"
#include <algorithm>
+
using namespace llvm;
static cl::opt<bool>
@@ -54,17 +56,17 @@ PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
cl::desc("Convert align attributes to assumptions during inlining."));
bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI,
- bool InsertLifetime) {
- return InlineFunction(CallSite(CI), IFI, InsertLifetime);
+ AAResults *CalleeAAR, bool InsertLifetime) {
+ return InlineFunction(CallSite(CI), IFI, CalleeAAR, InsertLifetime);
}
bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
- bool InsertLifetime) {
- return InlineFunction(CallSite(II), IFI, InsertLifetime);
+ AAResults *CalleeAAR, bool InsertLifetime) {
+ return InlineFunction(CallSite(II), IFI, CalleeAAR, InsertLifetime);
}
namespace {
- /// A class for recording information about inlining through an invoke.
- class InvokeInliningInfo {
+ /// A class for recording information about inlining a landing pad.
+ class LandingPadInliningInfo {
BasicBlock *OuterResumeDest; ///< Destination of the invoke's unwind.
BasicBlock *InnerResumeDest; ///< Destination for the callee's resume.
LandingPadInst *CallerLPad; ///< LandingPadInst associated with the invoke.
@@ -72,7 +74,7 @@ namespace {
SmallVector<Value*, 8> UnwindDestPHIValues;
public:
- InvokeInliningInfo(InvokeInst *II)
+ LandingPadInliningInfo(InvokeInst *II)
: OuterResumeDest(II->getUnwindDest()), InnerResumeDest(nullptr),
CallerLPad(nullptr), InnerEHValuesPHI(nullptr) {
// If there are PHI nodes in the unwind destination block, we need to keep
@@ -121,14 +123,14 @@ namespace {
}
}
};
-}
+} // anonymous namespace
/// Get or create a target for the branch from ResumeInsts.
-BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
+BasicBlock *LandingPadInliningInfo::getInnerResumeDest() {
if (InnerResumeDest) return InnerResumeDest;
// Split the landing pad.
- BasicBlock::iterator SplitPoint = CallerLPad; ++SplitPoint;
+ BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator();
InnerResumeDest =
OuterResumeDest->splitBasicBlock(SplitPoint,
OuterResumeDest->getName() + ".body");
@@ -137,7 +139,7 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
const unsigned PHICapacity = 2;
// Create corresponding new PHIs for all the PHIs in the outer landing pad.
- BasicBlock::iterator InsertPoint = InnerResumeDest->begin();
+ Instruction *InsertPoint = &InnerResumeDest->front();
BasicBlock::iterator I = OuterResumeDest->begin();
for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
PHINode *OuterPHI = cast<PHINode>(I);
@@ -162,8 +164,8 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
/// When the landing pad block has only one predecessor, this is a simple
/// branch. When there is more than one predecessor, we need to split the
/// landing pad block after the landingpad instruction and jump to there.
-void InvokeInliningInfo::forwardResume(ResumeInst *RI,
- SmallPtrSetImpl<LandingPadInst*> &InlinedLPads) {
+void LandingPadInliningInfo::forwardResume(
+ ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) {
BasicBlock *Dest = getInnerResumeDest();
BasicBlock *Src = RI->getParent();
@@ -182,33 +184,39 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI,
/// This function analyze BB to see if there are any calls, and if so,
/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
/// nodes in that block with the values specified in InvokeDestPHIValues.
-static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
- InvokeInliningInfo &Invoke) {
+static BasicBlock *
+HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, BasicBlock *UnwindEdge) {
for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
- Instruction *I = BBI++;
+ Instruction *I = &*BBI++;
// We only need to check for function calls: inlined invoke
// instructions require no special handling.
CallInst *CI = dyn_cast<CallInst>(I);
- // If this call cannot unwind, don't convert it to an invoke.
- // Inline asm calls cannot throw.
if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue()))
continue;
// Convert this function call into an invoke instruction. First, split the
// basic block.
- BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc");
+ BasicBlock *Split =
+ BB->splitBasicBlock(CI->getIterator(), CI->getName() + ".noexc");
// Delete the unconditional branch inserted by splitBasicBlock
BB->getInstList().pop_back();
// Create the new invoke instruction.
- ImmutableCallSite CS(CI);
- SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end());
- InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split,
- Invoke.getOuterResumeDest(),
- InvokeArgs, CI->getName(), BB);
+ SmallVector<Value*, 8> InvokeArgs(CI->arg_begin(), CI->arg_end());
+ SmallVector<OperandBundleDef, 1> OpBundles;
+
+ CI->getOperandBundlesAsDefs(OpBundles);
+
+ // Note: we're round tripping operand bundles through memory here, and that
+ // can potentially be avoided with a cleverer API design that we do not have
+ // as of this time.
+
+ InvokeInst *II =
+ InvokeInst::Create(CI->getCalledValue(), Split, UnwindEdge, InvokeArgs,
+ OpBundles, CI->getName(), BB);
II->setDebugLoc(CI->getDebugLoc());
II->setCallingConv(CI->getCallingConv());
II->setAttributes(CI->getAttributes());
@@ -219,12 +227,9 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
// Delete the original call
Split->getInstList().pop_front();
-
- // Update any PHI nodes in the exceptional block to indicate that there is
- // now a new entry in them.
- Invoke.addIncomingPHIValuesFor(BB);
- return;
+ return BB;
}
+ return nullptr;
}
/// If we inlined an invoke site, we need to convert calls
@@ -233,8 +238,8 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
/// II is the invoke instruction being inlined. FirstNewBlock is the first
/// block of the inlined code (the last block is the end of the function),
/// and InlineCodeInfo is information about the code that got inlined.
-static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
- ClonedCodeInfo &InlinedCodeInfo) {
+static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock,
+ ClonedCodeInfo &InlinedCodeInfo) {
BasicBlock *InvokeDest = II->getUnwindDest();
Function *Caller = FirstNewBlock->getParent();
@@ -242,11 +247,12 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
// The inlined code is currently at the end of the function, scan from the
// start of the inlined code to its end, checking for stuff we need to
// rewrite.
- InvokeInliningInfo Invoke(II);
+ LandingPadInliningInfo Invoke(II);
// Get all of the inlined landing pad instructions.
SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
- for (Function::iterator I = FirstNewBlock, E = Caller->end(); I != E; ++I)
+ for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end();
+ I != E; ++I)
if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))
InlinedLPads.insert(II->getLandingPadInst());
@@ -262,9 +268,14 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
InlinedLPad->setCleanup(true);
}
- for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
+ for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
+ BB != E; ++BB) {
if (InlinedCodeInfo.ContainsCalls)
- HandleCallsInBlockInlinedThroughInvoke(BB, Invoke);
+ if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
+ &*BB, Invoke.getOuterResumeDest()))
+ // Update any PHI nodes in the exceptional block to indicate that there
+ // is now a new entry in them.
+ Invoke.addIncomingPHIValuesFor(NewBB);
// Forward any resumes that are remaining here.
if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
@@ -278,6 +289,99 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
InvokeDest->removePredecessor(II->getParent());
}
+/// If we inlined an invoke site, we need to convert calls
+/// in the body of the inlined function into invokes.
+///
+/// II is the invoke instruction being inlined. FirstNewBlock is the first
+/// block of the inlined code (the last block is the end of the function),
+/// and InlineCodeInfo is information about the code that got inlined.
+static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
+ ClonedCodeInfo &InlinedCodeInfo) {
+ BasicBlock *UnwindDest = II->getUnwindDest();
+ Function *Caller = FirstNewBlock->getParent();
+
+ assert(UnwindDest->getFirstNonPHI()->isEHPad() && "unexpected BasicBlock!");
+
+ // If there are PHI nodes in the unwind destination block, we need to keep
+ // track of which values came into them from the invoke before removing the
+ // edge from this block.
+ SmallVector<Value *, 8> UnwindDestPHIValues;
+ llvm::BasicBlock *InvokeBB = II->getParent();
+ for (Instruction &I : *UnwindDest) {
+ // Save the value to use for this edge.
+ PHINode *PHI = dyn_cast<PHINode>(&I);
+ if (!PHI)
+ break;
+ UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
+ }
+
+ // Add incoming-PHI values to the unwind destination block for the given basic
+ // block, using the values for the original invoke's source block.
+ auto UpdatePHINodes = [&](BasicBlock *Src) {
+ BasicBlock::iterator I = UnwindDest->begin();
+ for (Value *V : UnwindDestPHIValues) {
+ PHINode *PHI = cast<PHINode>(I);
+ PHI->addIncoming(V, Src);
+ ++I;
+ }
+ };
+
+ // This connects all the instructions which 'unwind to caller' to the invoke
+ // destination.
+ for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
+ BB != E; ++BB) {
+ if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
+ if (CRI->unwindsToCaller()) {
+ CleanupReturnInst::Create(CRI->getCleanupPad(), UnwindDest, CRI);
+ CRI->eraseFromParent();
+ UpdatePHINodes(&*BB);
+ }
+ }
+
+ Instruction *I = BB->getFirstNonPHI();
+ if (!I->isEHPad())
+ continue;
+
+ Instruction *Replacement = nullptr;
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {
+ if (CatchSwitch->unwindsToCaller()) {
+ auto *NewCatchSwitch = CatchSwitchInst::Create(
+ CatchSwitch->getParentPad(), UnwindDest,
+ CatchSwitch->getNumHandlers(), CatchSwitch->getName(),
+ CatchSwitch);
+ for (BasicBlock *PadBB : CatchSwitch->handlers())
+ NewCatchSwitch->addHandler(PadBB);
+ Replacement = NewCatchSwitch;
+ }
+ } else if (!isa<FuncletPadInst>(I)) {
+ llvm_unreachable("unexpected EHPad!");
+ }
+
+ if (Replacement) {
+ Replacement->takeName(I);
+ I->replaceAllUsesWith(Replacement);
+ I->eraseFromParent();
+ UpdatePHINodes(&*BB);
+ }
+ }
+
+ if (InlinedCodeInfo.ContainsCalls)
+ for (Function::iterator BB = FirstNewBlock->getIterator(),
+ E = Caller->end();
+ BB != E; ++BB)
+ if (BasicBlock *NewBB =
+ HandleCallsInBlockInlinedThroughInvoke(&*BB, UnwindDest))
+ // Update any PHI nodes in the exceptional block to indicate that there
+ // is now a new entry in them.
+ UpdatePHINodes(NewBB);
+
+ // Now that everything is happy, we have one final detail. The PHI nodes in
+ // the exception destination block still have entries due to the original
+ // invoke instruction. Eliminate these entries (which might even delete the
+ // PHI node) now.
+ UnwindDest->removePredecessor(InvokeBB);
+}
+
/// When inlining a function that contains noalias scope metadata,
/// this metadata needs to be cloned so that the inlined blocks
/// have different "unqiue scopes" at every call site. Were this not done, then
@@ -395,17 +499,16 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
/// parameters with noalias metadata specifying the new scope, and tag all
/// non-derived loads, stores and memory intrinsics with the new alias scopes.
static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
- const DataLayout &DL, AliasAnalysis *AA) {
+ const DataLayout &DL, AAResults *CalleeAAR) {
if (!EnableNoAliasConversion)
return;
const Function *CalledFunc = CS.getCalledFunction();
SmallVector<const Argument *, 4> NoAliasArgs;
- for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
- E = CalledFunc->arg_end(); I != E; ++I) {
- if (I->hasNoAliasAttr() && !I->hasNUses(0))
- NoAliasArgs.push_back(I);
+ for (const Argument &I : CalledFunc->args()) {
+ if (I.hasNoAliasAttr() && !I.hasNUses(0))
+ NoAliasArgs.push_back(&I);
}
if (NoAliasArgs.empty())
@@ -480,10 +583,10 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
continue;
IsFuncCall = true;
- if (AA) {
- AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(ICS);
- if (MRB == AliasAnalysis::OnlyAccessesArgumentPointees ||
- MRB == AliasAnalysis::OnlyReadsArgumentPointees)
+ if (CalleeAAR) {
+ FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(ICS);
+ if (MRB == FMRB_OnlyAccessesArgumentPointees ||
+ MRB == FMRB_OnlyReadsArgumentPointees)
IsArgMemOnlyCall = true;
}
@@ -518,7 +621,7 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
for (unsigned i = 0, ie = PtrArgs.size(); i != ie; ++i) {
SmallVector<Value *, 4> Objects;
GetUnderlyingObjects(const_cast<Value*>(PtrArgs[i]),
- Objects, DL, /* MaxLookup = */ 0);
+ Objects, DL, /* LI = */ nullptr);
for (Value *O : Objects)
ObjSet.insert(O);
@@ -646,7 +749,7 @@ static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
// caller, then don't bother inserting the assumption.
Value *Arg = CS.getArgument(I->getArgNo());
if (getKnownAlignment(Arg, DL, CS.getInstruction(),
- &IFI.ACT->getAssumptionCache(*CalledFunc),
+ &IFI.ACT->getAssumptionCache(*CS.getCaller()),
&DT) >= Align)
continue;
@@ -731,7 +834,7 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
BasicBlock *InsertBlock,
InlineFunctionInfo &IFI) {
Type *AggTy = cast<PointerType>(Src->getType())->getElementType();
- IRBuilder<> Builder(InsertBlock->begin());
+ IRBuilder<> Builder(InsertBlock, InsertBlock->begin());
Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(AggTy));
@@ -851,9 +954,8 @@ updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx,
// Starting from the top, rebuild the nodes to point to the new inlined-at
// location (then rebuilding the rest of the chain behind it) and update the
// map of already-constructed inlined-at nodes.
- for (auto I = InlinedAtLocations.rbegin(), E = InlinedAtLocations.rend();
- I != E; ++I) {
- const DILocation *MD = *I;
+ for (const DILocation *MD : make_range(InlinedAtLocations.rbegin(),
+ InlinedAtLocations.rend())) {
Last = IANodes[MD] = DILocation::getDistinct(
Ctx, MD->getLine(), MD->getColumn(), MD->getScope(), Last);
}
@@ -917,7 +1019,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
/// exists in the instruction stream. Similarly this will inline a recursive
/// function by one level.
bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
- bool InsertLifetime) {
+ AAResults *CalleeAAR, bool InsertLifetime) {
Instruction *TheCall = CS.getInstruction();
assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
"Instruction not in function!");
@@ -930,6 +1032,22 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
CalledFunc->isDeclaration() || // call, or call to a vararg function!
CalledFunc->getFunctionType()->isVarArg()) return false;
+ // The inliner does not know how to inline through calls with operand bundles
+ // in general ...
+ if (CS.hasOperandBundles()) {
+ for (int i = 0, e = CS.getNumOperandBundles(); i != e; ++i) {
+ uint32_t Tag = CS.getOperandBundleAt(i).getTagID();
+ // ... but it knows how to inline through "deopt" operand bundles ...
+ if (Tag == LLVMContext::OB_deopt)
+ continue;
+ // ... and "funclet" operand bundles.
+ if (Tag == LLVMContext::OB_funclet)
+ continue;
+
+ return false;
+ }
+ }
+
// If the call to the callee cannot throw, set the 'nounwind' flag on any
// calls that we inline.
bool MarkNoUnwind = CS.doesNotThrow();
@@ -950,13 +1068,17 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Get the personality function from the callee if it contains a landing pad.
Constant *CalledPersonality =
- CalledFunc->hasPersonalityFn() ? CalledFunc->getPersonalityFn() : nullptr;
+ CalledFunc->hasPersonalityFn()
+ ? CalledFunc->getPersonalityFn()->stripPointerCasts()
+ : nullptr;
// Find the personality function used by the landing pads of the caller. If it
// exists, then check to see that it matches the personality function used in
// the callee.
Constant *CallerPersonality =
- Caller->hasPersonalityFn() ? Caller->getPersonalityFn() : nullptr;
+ Caller->hasPersonalityFn()
+ ? Caller->getPersonalityFn()->stripPointerCasts()
+ : nullptr;
if (CalledPersonality) {
if (!CallerPersonality)
Caller->setPersonalityFn(CalledPersonality);
@@ -968,9 +1090,46 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
return false;
}
+ // We need to figure out which funclet the callsite was in so that we may
+ // properly nest the callee.
+ Instruction *CallSiteEHPad = nullptr;
+ if (CallerPersonality) {
+ EHPersonality Personality = classifyEHPersonality(CallerPersonality);
+ if (isFuncletEHPersonality(Personality)) {
+ Optional<OperandBundleUse> ParentFunclet =
+ CS.getOperandBundle(LLVMContext::OB_funclet);
+ if (ParentFunclet)
+ CallSiteEHPad = cast<FuncletPadInst>(ParentFunclet->Inputs.front());
+
+ // OK, the inlining site is legal. What about the target function?
+
+ if (CallSiteEHPad) {
+ if (Personality == EHPersonality::MSVC_CXX) {
+ // The MSVC personality cannot tolerate catches getting inlined into
+ // cleanup funclets.
+ if (isa<CleanupPadInst>(CallSiteEHPad)) {
+ // Ok, the call site is within a cleanuppad. Let's check the callee
+ // for catchpads.
+ for (const BasicBlock &CalledBB : *CalledFunc) {
+ if (isa<CatchSwitchInst>(CalledBB.getFirstNonPHI()))
+ return false;
+ }
+ }
+ } else if (isAsynchronousEHPersonality(Personality)) {
+ // SEH is even less tolerant, there may not be any sort of exceptional
+ // funclet in the callee.
+ for (const BasicBlock &CalledBB : *CalledFunc) {
+ if (CalledBB.isEHPad())
+ return false;
+ }
+ }
+ }
+ }
+ }
+
// Get an iterator to the last basic block in the function, which will have
// the new function inlined after it.
- Function::iterator LastBlock = &Caller->back();
+ Function::iterator LastBlock = --Caller->end();
// Make sure to capture all of the return instructions from the cloned
// function.
@@ -1007,7 +1166,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI));
}
- VMap[I] = ActualArg;
+ VMap[&*I] = ActualArg;
}
// Add alignment assumptions if necessary. We do this before the inlined
@@ -1029,7 +1188,61 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Inject byval arguments initialization.
for (std::pair<Value*, Value*> &Init : ByValInit)
HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(),
- FirstNewBlock, IFI);
+ &*FirstNewBlock, IFI);
+
+ Optional<OperandBundleUse> ParentDeopt =
+ CS.getOperandBundle(LLVMContext::OB_deopt);
+ if (ParentDeopt) {
+ SmallVector<OperandBundleDef, 2> OpDefs;
+
+ for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) {
+ Instruction *I = dyn_cast_or_null<Instruction>(VH);
+ if (!I) continue; // instruction was DCE'd or RAUW'ed to undef
+
+ OpDefs.clear();
+
+ CallSite ICS(I);
+ OpDefs.reserve(ICS.getNumOperandBundles());
+
+ for (unsigned i = 0, e = ICS.getNumOperandBundles(); i < e; ++i) {
+ auto ChildOB = ICS.getOperandBundleAt(i);
+ if (ChildOB.getTagID() != LLVMContext::OB_deopt) {
+ // If the inlined call has other operand bundles, let them be
+ OpDefs.emplace_back(ChildOB);
+ continue;
+ }
+
+ // It may be useful to separate this logic (of handling operand
+ // bundles) out to a separate "policy" component if this gets crowded.
+ // Prepend the parent's deoptimization continuation to the newly
+ // inlined call's deoptimization continuation.
+ std::vector<Value *> MergedDeoptArgs;
+ MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() +
+ ChildOB.Inputs.size());
+
+ MergedDeoptArgs.insert(MergedDeoptArgs.end(),
+ ParentDeopt->Inputs.begin(),
+ ParentDeopt->Inputs.end());
+ MergedDeoptArgs.insert(MergedDeoptArgs.end(), ChildOB.Inputs.begin(),
+ ChildOB.Inputs.end());
+
+ OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs));
+ }
+
+ Instruction *NewI = nullptr;
+ if (isa<CallInst>(I))
+ NewI = CallInst::Create(cast<CallInst>(I), OpDefs, I);
+ else
+ NewI = InvokeInst::Create(cast<InvokeInst>(I), OpDefs, I);
+
+ // Note: the RAUW does the appropriate fixup in VMap, so we need to do
+ // this even if the call returns void.
+ I->replaceAllUsesWith(NewI);
+
+ VH = nullptr;
+ I->eraseFromParent();
+ }
+ }
// Update the callgraph if requested.
if (IFI.CG)
@@ -1042,7 +1255,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
CloneAliasScopeMetadata(CS, VMap);
// Add noalias metadata if necessary.
- AddAliasScopeMetadata(CS, VMap, DL, IFI.AA);
+ AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR);
// FIXME: We could register any cloned assumptions instead of clearing the
// whole function's cache.
@@ -1085,9 +1298,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Transfer all of the allocas over in a block. Using splice means
// that the instructions aren't removed from the symbol table, then
// reinserted.
- Caller->getEntryBlock().getInstList().splice(InsertPoint,
- FirstNewBlock->getInstList(),
- AI, I);
+ Caller->getEntryBlock().getInstList().splice(
+ InsertPoint, FirstNewBlock->getInstList(), AI->getIterator(), I);
}
// Move any dbg.declares describing the allocas into the entry basic block.
DIBuilder DIB(*Caller->getParent());
@@ -1137,7 +1349,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Leave lifetime markers for the static alloca's, scoping them to the
// function we just inlined.
if (InsertLifetime && !IFI.StaticAllocas.empty()) {
- IRBuilder<> builder(FirstNewBlock->begin());
+ IRBuilder<> builder(&FirstNewBlock->front());
for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
AllocaInst *AI = IFI.StaticAllocas[ai];
@@ -1189,7 +1401,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);
// Insert the llvm.stacksave.
- CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin())
+ CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin())
.CreateCall(StackSave, {}, "savedstack");
// Insert a call to llvm.stackrestore before any return instructions in the
@@ -1203,10 +1415,74 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
}
}
+ // Update the lexical scopes of the new funclets and callsites.
+ // Anything that had 'none' as its parent is now nested inside the callsite's
+ // EHPad.
+
+ if (CallSiteEHPad) {
+ for (Function::iterator BB = FirstNewBlock->getIterator(),
+ E = Caller->end();
+ BB != E; ++BB) {
+ // Add bundle operands to any top-level call sites.
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) {
+ Instruction *I = &*BBI++;
+ CallSite CS(I);
+ if (!CS)
+ continue;
+
+ // Skip call sites which are nounwind intrinsics.
+ auto *CalledFn =
+ dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+ if (CalledFn && CalledFn->isIntrinsic() && CS.doesNotThrow())
+ continue;
+
+ // Skip call sites which already have a "funclet" bundle.
+ if (CS.getOperandBundle(LLVMContext::OB_funclet))
+ continue;
+
+ CS.getOperandBundlesAsDefs(OpBundles);
+ OpBundles.emplace_back("funclet", CallSiteEHPad);
+
+ Instruction *NewInst;
+ if (CS.isCall())
+ NewInst = CallInst::Create(cast<CallInst>(I), OpBundles, I);
+ else
+ NewInst = InvokeInst::Create(cast<InvokeInst>(I), OpBundles, I);
+ NewInst->setDebugLoc(I->getDebugLoc());
+ NewInst->takeName(I);
+ I->replaceAllUsesWith(NewInst);
+ I->eraseFromParent();
+
+ OpBundles.clear();
+ }
+
+ Instruction *I = BB->getFirstNonPHI();
+ if (!I->isEHPad())
+ continue;
+
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {
+ if (isa<ConstantTokenNone>(CatchSwitch->getParentPad()))
+ CatchSwitch->setParentPad(CallSiteEHPad);
+ } else {
+ auto *FPI = cast<FuncletPadInst>(I);
+ if (isa<ConstantTokenNone>(FPI->getParentPad()))
+ FPI->setParentPad(CallSiteEHPad);
+ }
+ }
+ }
+
// If we are inlining for an invoke instruction, we must make sure to rewrite
// any call instructions into invoke instructions.
- if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
- HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);
+ if (auto *II = dyn_cast<InvokeInst>(TheCall)) {
+ BasicBlock *UnwindDest = II->getUnwindDest();
+ Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI();
+ if (isa<LandingPadInst>(FirstNonPHI)) {
+ HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo);
+ } else {
+ HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo);
+ }
+ }
// Handle any inlined musttail call sites. In order for a new call site to be
// musttail, the source of the clone and the inlined call site must have been
@@ -1250,7 +1526,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// the calling basic block.
if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
// Move all of the instructions right before the call.
- OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(),
+ OrigBB->getInstList().splice(TheCall->getIterator(),
+ FirstNewBlock->getInstList(),
FirstNewBlock->begin(), FirstNewBlock->end());
// Remove the cloned basic block.
Caller->getBasicBlockList().pop_back();
@@ -1297,15 +1574,16 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Split the basic block. This guarantees that no PHI nodes will have to be
// updated due to new incoming edges, and make the invoke case more
// symmetric to the call case.
- AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest,
- CalledFunc->getName()+".exit");
+ AfterCallBB =
+ OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(),
+ CalledFunc->getName() + ".exit");
} else { // It's a call
// If this is a call instruction, we need to split the basic block that
// the call lives in.
//
- AfterCallBB = OrigBB->splitBasicBlock(TheCall,
- CalledFunc->getName()+".exit");
+ AfterCallBB = OrigBB->splitBasicBlock(TheCall->getIterator(),
+ CalledFunc->getName() + ".exit");
}
// Change the branch that used to go to AfterCallBB to branch to the first
@@ -1314,14 +1592,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
TerminatorInst *Br = OrigBB->getTerminator();
assert(Br && Br->getOpcode() == Instruction::Br &&
"splitBasicBlock broken!");
- Br->setOperand(0, FirstNewBlock);
-
+ Br->setOperand(0, &*FirstNewBlock);
// Now that the function is correct, make it a little bit nicer. In
// particular, move the basic blocks inserted from the end of the function
// into the space made by splitting the source basic block.
- Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(),
- FirstNewBlock, Caller->end());
+ Caller->getBasicBlockList().splice(AfterCallBB->getIterator(),
+ Caller->getBasicBlockList(), FirstNewBlock,
+ Caller->end());
// Handle all of the return instructions that we just cloned in, and eliminate
// any users of the original call/invoke instruction.
@@ -1333,7 +1611,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// possible incoming values.
if (!TheCall->use_empty()) {
PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(),
- AfterCallBB->begin());
+ &AfterCallBB->front());
// Anything that used the result of the function call should now use the
// PHI node as their operand.
TheCall->replaceAllUsesWith(PHI);
@@ -1350,7 +1628,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
}
}
-
// Add a branch to the merge points and remove return instructions.
DebugLoc Loc;
for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
@@ -1413,7 +1690,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Splice the code entry block into calling block, right before the
// unconditional branch.
CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes
- OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());
+ OrigBB->getInstList().splice(Br->getIterator(), CalleeEntry->getInstList());
// Remove the unconditional branch.
OrigBB->getInstList().erase(Br);
diff --git a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
index 30edf3b7aae4..5687afa61e2a 100644
--- a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
@@ -380,14 +380,10 @@ bool llvm::expandRemainder(BinaryOperator *Rem) {
IRBuilder<> Builder(Rem);
- Type *RemTy = Rem->getType();
- if (RemTy->isVectorTy())
- llvm_unreachable("Div over vectors not supported");
-
- unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
-
- if (RemTyBitWidth != 32 && RemTyBitWidth != 64)
- llvm_unreachable("Div of bitwidth other than 32 or 64 not supported");
+ assert(!Rem->getType()->isVectorTy() && "Div over vectors not supported");
+ assert((Rem->getType()->getIntegerBitWidth() == 32 ||
+ Rem->getType()->getIntegerBitWidth() == 64) &&
+ "Div of bitwidth other than 32 or 64 not supported");
// First prepare the sign if it's a signed remainder
if (Rem->getOpcode() == Instruction::SRem) {
@@ -401,7 +397,7 @@ bool llvm::expandRemainder(BinaryOperator *Rem) {
// If we didn't actually generate an urem instruction, we're done
// This happens for example if the input were constant. In this case the
// Builder insertion point was unchanged
- if (Rem == Builder.GetInsertPoint())
+ if (Rem == Builder.GetInsertPoint().getNodePtrUnchecked())
return true;
BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
@@ -440,14 +436,10 @@ bool llvm::expandDivision(BinaryOperator *Div) {
IRBuilder<> Builder(Div);
- Type *DivTy = Div->getType();
- if (DivTy->isVectorTy())
- llvm_unreachable("Div over vectors not supported");
-
- unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
-
- if (DivTyBitWidth != 32 && DivTyBitWidth != 64)
- llvm_unreachable("Div of bitwidth other than 32 or 64 not supported");
+ assert(!Div->getType()->isVectorTy() && "Div over vectors not supported");
+ assert((Div->getType()->getIntegerBitWidth() == 32 ||
+ Div->getType()->getIntegerBitWidth() == 64) &&
+ "Div of bitwidth other than 32 or 64 not supported");
// First prepare the sign if it's a signed division
if (Div->getOpcode() == Instruction::SDiv) {
@@ -461,7 +453,7 @@ bool llvm::expandDivision(BinaryOperator *Div) {
// If we didn't actually generate an udiv instruction, we're done
// This happens for example if the input were constant. In this case the
// Builder insertion point was unchanged
- if (Div == Builder.GetInsertPoint())
+ if (Div == Builder.GetInsertPoint().getNodePtrUnchecked())
return true;
BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
@@ -492,15 +484,14 @@ bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {
"Trying to expand remainder from a non-remainder function");
Type *RemTy = Rem->getType();
- if (RemTy->isVectorTy())
- llvm_unreachable("Div over vectors not supported");
+ assert(!RemTy->isVectorTy() && "Div over vectors not supported");
unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
- if (RemTyBitWidth > 32)
- llvm_unreachable("Div of bitwidth greater than 32 not supported");
+ assert(RemTyBitWidth <= 32 &&
+ "Div of bitwidth greater than 32 not supported");
- if (RemTyBitWidth == 32)
+ if (RemTyBitWidth == 32)
return expandRemainder(Rem);
// If bitwidth smaller than 32 extend inputs, extend output and proceed
@@ -542,15 +533,13 @@ bool llvm::expandRemainderUpTo64Bits(BinaryOperator *Rem) {
"Trying to expand remainder from a non-remainder function");
Type *RemTy = Rem->getType();
- if (RemTy->isVectorTy())
- llvm_unreachable("Div over vectors not supported");
+ assert(!RemTy->isVectorTy() && "Div over vectors not supported");
unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
- if (RemTyBitWidth > 64)
- llvm_unreachable("Div of bitwidth greater than 64 not supported");
+ assert(RemTyBitWidth <= 64 && "Div of bitwidth greater than 64 not supported");
- if (RemTyBitWidth == 64)
+ if (RemTyBitWidth == 64)
return expandRemainder(Rem);
// If bitwidth smaller than 64 extend inputs, extend output and proceed
@@ -593,13 +582,11 @@ bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {
"Trying to expand division from a non-division function");
Type *DivTy = Div->getType();
- if (DivTy->isVectorTy())
- llvm_unreachable("Div over vectors not supported");
+ assert(!DivTy->isVectorTy() && "Div over vectors not supported");
unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
- if (DivTyBitWidth > 32)
- llvm_unreachable("Div of bitwidth greater than 32 not supported");
+ assert(DivTyBitWidth <= 32 && "Div of bitwidth greater than 32 not supported");
if (DivTyBitWidth == 32)
return expandDivision(Div);
@@ -643,13 +630,12 @@ bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) {
"Trying to expand division from a non-division function");
Type *DivTy = Div->getType();
- if (DivTy->isVectorTy())
- llvm_unreachable("Div over vectors not supported");
+ assert(!DivTy->isVectorTy() && "Div over vectors not supported");
unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
- if (DivTyBitWidth > 64)
- llvm_unreachable("Div of bitwidth greater than 64 not supported");
+ assert(DivTyBitWidth <= 64 &&
+ "Div of bitwidth greater than 64 not supported");
if (DivTyBitWidth == 64)
return expandDivision(Div);
diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
index 9d40b6989d6e..b4b2e148dfbb 100644
--- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -31,8 +31,10 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -64,6 +66,13 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
PredIteratorCache &PredCache, LoopInfo *LI) {
SmallVector<Use *, 16> UsesToRewrite;
+ // Tokens cannot be used in PHI nodes, so we skip over them.
+ // We can run into tokens which are live out of a loop with catchswitch
+ // instructions in Windows EH if the catchswitch has one catchpad which
+ // is inside the loop and another which is not.
+ if (Inst.getType()->isTokenTy())
+ return false;
+
BasicBlock *InstBB = Inst.getParent();
for (Use &U : Inst.uses()) {
@@ -84,9 +93,8 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
// Invoke instructions are special in that their result value is not available
// along their unwind edge. The code below tests to see whether DomBB
- // dominates
- // the value, so adjust DomBB to the normal destination block, which is
- // effectively where the value is first usable.
+ // dominates the value, so adjust DomBB to the normal destination block,
+ // which is effectively where the value is first usable.
BasicBlock *DomBB = Inst.getParent();
if (InvokeInst *Inv = dyn_cast<InvokeInst>(&Inst))
DomBB = Inv->getNormalDest();
@@ -101,10 +109,7 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
// Insert the LCSSA phi's into all of the exit blocks dominated by the
// value, and add them to the Phi's map.
- for (SmallVectorImpl<BasicBlock *>::const_iterator BBI = ExitBlocks.begin(),
- BBE = ExitBlocks.end();
- BBI != BBE; ++BBI) {
- BasicBlock *ExitBB = *BBI;
+ for (BasicBlock *ExitBB : ExitBlocks) {
if (!DT.dominates(DomNode, DT.getNode(ExitBB)))
continue;
@@ -113,7 +118,7 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
continue;
PHINode *PN = PHINode::Create(Inst.getType(), PredCache.size(ExitBB),
- Inst.getName() + ".lcssa", ExitBB->begin());
+ Inst.getName() + ".lcssa", &ExitBB->front());
// Add inputs from inside the loop for this PHI.
for (BasicBlock *Pred : PredCache.get(ExitBB)) {
@@ -148,26 +153,26 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
// Rewrite all uses outside the loop in terms of the new PHIs we just
// inserted.
- for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) {
+ for (Use *UseToRewrite : UsesToRewrite) {
// If this use is in an exit block, rewrite to use the newly inserted PHI.
// This is required for correctness because SSAUpdate doesn't handle uses in
// the same block. It assumes the PHI we inserted is at the end of the
// block.
- Instruction *User = cast<Instruction>(UsesToRewrite[i]->getUser());
+ Instruction *User = cast<Instruction>(UseToRewrite->getUser());
BasicBlock *UserBB = User->getParent();
if (PHINode *PN = dyn_cast<PHINode>(User))
- UserBB = PN->getIncomingBlock(*UsesToRewrite[i]);
+ UserBB = PN->getIncomingBlock(*UseToRewrite);
if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
// Tell the VHs that the uses changed. This updates SCEV's caches.
- if (UsesToRewrite[i]->get()->hasValueHandle())
- ValueHandleBase::ValueIsRAUWd(*UsesToRewrite[i], UserBB->begin());
- UsesToRewrite[i]->set(UserBB->begin());
+ if (UseToRewrite->get()->hasValueHandle())
+ ValueHandleBase::ValueIsRAUWd(*UseToRewrite, &UserBB->front());
+ UseToRewrite->set(&UserBB->front());
continue;
}
// Otherwise, do full PHI insertion.
- SSAUpdate.RewriteUse(*UsesToRewrite[i]);
+ SSAUpdate.RewriteUse(*UseToRewrite);
}
// Post process PHI instructions that were inserted into another disjoint loop
@@ -190,10 +195,9 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
}
// Remove PHI nodes that did not have any uses rewritten.
- for (unsigned i = 0, e = AddedPHIs.size(); i != e; ++i) {
- if (AddedPHIs[i]->use_empty())
- AddedPHIs[i]->eraseFromParent();
- }
+ for (PHINode *PN : AddedPHIs)
+ if (PN->use_empty())
+ PN->eraseFromParent();
return true;
}
@@ -205,8 +209,8 @@ blockDominatesAnExit(BasicBlock *BB,
DominatorTree &DT,
const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
DomTreeNode *DomNode = DT.getNode(BB);
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- if (DT.dominates(DomNode, DT.getNode(ExitBlocks[i])))
+ for (BasicBlock *ExitBB : ExitBlocks)
+ if (DT.dominates(DomNode, DT.getNode(ExitBB)))
return true;
return false;
@@ -227,25 +231,22 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
// Look at all the instructions in the loop, checking to see if they have uses
// outside the loop. If so, rewrite those uses.
- for (Loop::block_iterator BBI = L.block_begin(), BBE = L.block_end();
- BBI != BBE; ++BBI) {
- BasicBlock *BB = *BBI;
-
+ for (BasicBlock *BB : L.blocks()) {
// For large loops, avoid use-scanning by using dominance information: In
// particular, if a block does not dominate any of the loop exits, then none
// of the values defined in the block could be used outside the loop.
if (!blockDominatesAnExit(BB, DT, ExitBlocks))
continue;
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ for (Instruction &I : *BB) {
// Reject two common cases fast: instructions with no uses (like stores)
// and instructions with one use that is in the same block as this.
- if (I->use_empty() ||
- (I->hasOneUse() && I->user_back()->getParent() == BB &&
- !isa<PHINode>(I->user_back())))
+ if (I.use_empty() ||
+ (I.hasOneUse() && I.user_back()->getParent() == BB &&
+ !isa<PHINode>(I.user_back())))
continue;
- Changed |= processInstruction(L, *I, DT, ExitBlocks, PredCache, LI);
+ Changed |= processInstruction(L, I, DT, ExitBlocks, PredCache, LI);
}
}
@@ -266,8 +267,8 @@ bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
bool Changed = false;
// Recurse depth-first through inner loops.
- for (Loop::iterator I = L.begin(), E = L.end(); I != E; ++I)
- Changed |= formLCSSARecursively(**I, DT, LI, SE);
+ for (Loop *SubLoop : L.getSubLoops())
+ Changed |= formLCSSARecursively(*SubLoop, DT, LI, SE);
Changed |= formLCSSA(L, DT, LI, SE);
return Changed;
@@ -296,8 +297,10 @@ struct LCSSA : public FunctionPass {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreservedID(LoopSimplifyID);
- AU.addPreserved<AliasAnalysis>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
}
};
}
@@ -306,6 +309,8 @@ char LCSSA::ID = 0;
INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
Pass *llvm::createLCSSAPass() { return new LCSSA(); }
@@ -317,7 +322,8 @@ bool LCSSA::runOnFunction(Function &F) {
bool Changed = false;
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- SE = getAnalysisIfAvailable<ScalarEvolution>();
+ auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
+ SE = SEWP ? &SEWP->getSE() : nullptr;
// Simplify each loop nest in the function.
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
index ba8af47b54e1..e75163f323df 100644
--- a/contrib/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -17,10 +17,11 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LibCallSemantics.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
@@ -188,9 +189,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
BasicBlock *BB = SI->getParent();
// Remove entries from PHI nodes which we no longer branch to...
- for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
+ for (BasicBlock *Succ : SI->successors()) {
// Found case matching a constant operand?
- BasicBlock *Succ = SI->getSuccessor(i);
if (Succ == TheOnlyDest)
TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest
else
@@ -230,6 +230,11 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
SIDef->getValue().getZExtValue()));
}
+ // Update make.implicit metadata to the newly-created conditional branch.
+ MDNode *MakeImplicitMD = SI->getMetadata(LLVMContext::MD_make_implicit);
+ if (MakeImplicitMD)
+ NewBr->setMetadata(LLVMContext::MD_make_implicit, MakeImplicitMD);
+
// Delete the old switch.
SI->eraseFromParent();
return true;
@@ -283,8 +288,9 @@ bool llvm::isInstructionTriviallyDead(Instruction *I,
const TargetLibraryInfo *TLI) {
if (!I->use_empty() || isa<TerminatorInst>(I)) return false;
- // We don't want the landingpad instruction removed by anything this general.
- if (isa<LandingPadInst>(I))
+ // We don't want the landingpad-like instructions removed by anything this
+ // general.
+ if (I->isEHPad())
return false;
// We don't want debug info removed by anything this general, unless
@@ -414,6 +420,49 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN,
return false;
}
+static bool
+simplifyAndDCEInstruction(Instruction *I,
+ SmallSetVector<Instruction *, 16> &WorkList,
+ const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ if (isInstructionTriviallyDead(I, TLI)) {
+ // Null out all of the instruction's operands to see if any operand becomes
+ // dead as we go.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Value *OpV = I->getOperand(i);
+ I->setOperand(i, nullptr);
+
+ if (!OpV->use_empty() || I == OpV)
+ continue;
+
+ // If the operand is an instruction that became dead as we nulled out the
+ // operand, and if it is 'trivially' dead, delete it in a future loop
+ // iteration.
+ if (Instruction *OpI = dyn_cast<Instruction>(OpV))
+ if (isInstructionTriviallyDead(OpI, TLI))
+ WorkList.insert(OpI);
+ }
+
+ I->eraseFromParent();
+
+ return true;
+ }
+
+ if (Value *SimpleV = SimplifyInstruction(I, DL)) {
+ // Add the users to the worklist. CAREFUL: an instruction can use itself,
+ // in the case of a phi node.
+ for (User *U : I->users())
+ if (U != I)
+ WorkList.insert(cast<Instruction>(U));
+
+ // Replace the instruction with its simplified value.
+ I->replaceAllUsesWith(SimpleV);
+ I->eraseFromParent();
+ return true;
+ }
+ return false;
+}
+
/// SimplifyInstructionsInBlock - Scan the specified basic block and try to
/// simplify any instructions in it and recursively delete dead instructions.
///
@@ -422,30 +471,34 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN,
bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB,
const TargetLibraryInfo *TLI) {
bool MadeChange = false;
+ const DataLayout &DL = BB->getModule()->getDataLayout();
#ifndef NDEBUG
// In debug builds, ensure that the terminator of the block is never replaced
// or deleted by these simplifications. The idea of simplification is that it
// cannot introduce new instructions, and there is no way to replace the
// terminator of a block without introducing a new instruction.
- AssertingVH<Instruction> TerminatorVH(--BB->end());
+ AssertingVH<Instruction> TerminatorVH(&BB->back());
#endif
- for (BasicBlock::iterator BI = BB->begin(), E = --BB->end(); BI != E; ) {
+ SmallSetVector<Instruction *, 16> WorkList;
+ // Iterate over the original function, only adding insts to the worklist
+ // if they actually need to be revisited. This avoids having to pre-init
+ // the worklist with the entire function's worth of instructions.
+ for (BasicBlock::iterator BI = BB->begin(), E = std::prev(BB->end()); BI != E;) {
assert(!BI->isTerminator());
- Instruction *Inst = BI++;
+ Instruction *I = &*BI;
+ ++BI;
- WeakVH BIHandle(BI);
- if (recursivelySimplifyInstruction(Inst, TLI)) {
- MadeChange = true;
- if (BIHandle != BI)
- BI = BB->begin();
- continue;
- }
+ // We're visiting this instruction now, so make sure it's not in the
+ // worklist from an earlier visit.
+ if (!WorkList.count(I))
+ MadeChange |= simplifyAndDCEInstruction(I, WorkList, DL, TLI);
+ }
- MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
- if (BIHandle != BI)
- BI = BB->begin();
+ while (!WorkList.empty()) {
+ Instruction *I = WorkList.pop_back_val();
+ MadeChange |= simplifyAndDCEInstruction(I, WorkList, DL, TLI);
}
return MadeChange;
}
@@ -808,7 +861,8 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
// Copy over any phi, debug or lifetime instruction.
BB->getTerminator()->eraseFromParent();
- Succ->getInstList().splice(Succ->getFirstNonPHI(), BB->getInstList());
+ Succ->getInstList().splice(Succ->getFirstNonPHI()->getIterator(),
+ BB->getInstList());
} else {
while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
// We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
@@ -1017,8 +1071,13 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
if (LdStHasDebugValue(DIVar, LI))
return true;
- Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, DIExpr,
- DDI->getDebugLoc(), LI);
+ // We are now tracking the loaded value instead of the address. In the
+ // future if multi-location support is added to the IR, it might be
+ // preferable to keep tracking both the loaded value and the original
+ // address in case the alloca can not be elided.
+ Instruction *DbgValue = Builder.insertDbgValueIntrinsic(
+ LI, 0, DIVar, DIExpr, DDI->getDebugLoc(), (Instruction *)nullptr);
+ DbgValue->insertAfter(LI);
return true;
}
@@ -1034,8 +1093,8 @@ bool llvm::LowerDbgDeclare(Function &F) {
DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false);
SmallVector<DbgDeclareInst *, 4> Dbgs;
for (auto &FI : F)
- for (BasicBlock::iterator BI : FI)
- if (auto DDI = dyn_cast<DbgDeclareInst>(BI))
+ for (Instruction &BI : FI)
+ if (auto DDI = dyn_cast<DbgDeclareInst>(&BI))
Dbgs.push_back(DDI);
if (Dbgs.empty())
@@ -1060,9 +1119,13 @@ bool llvm::LowerDbgDeclare(Function &F) {
// This is a call by-value or some other instruction that
// takes a pointer to the variable. Insert a *value*
// intrinsic that describes the alloca.
+ SmallVector<uint64_t, 1> NewDIExpr;
+ auto *DIExpr = DDI->getExpression();
+ NewDIExpr.push_back(dwarf::DW_OP_deref);
+ NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end());
DIB.insertDbgValueIntrinsic(AI, 0, DDI->getVariable(),
- DDI->getExpression(), DDI->getDebugLoc(),
- CI);
+ DIB.createExpression(NewDIExpr),
+ DDI->getDebugLoc(), CI);
}
DDI->eraseFromParent();
}
@@ -1082,9 +1145,10 @@ DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
return nullptr;
}
-bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
- DIBuilder &Builder, bool Deref) {
- DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI);
+bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
+ Instruction *InsertBefore, DIBuilder &Builder,
+ bool Deref, int Offset) {
+ DbgDeclareInst *DDI = FindAllocaDbgDeclare(Address);
if (!DDI)
return false;
DebugLoc Loc = DDI->getDebugLoc();
@@ -1092,29 +1156,40 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
auto *DIExpr = DDI->getExpression();
assert(DIVar && "Missing variable");
- if (Deref) {
+ if (Deref || Offset) {
// Create a copy of the original DIDescriptor for user variable, prepending
// "deref" operation to a list of address elements, as new llvm.dbg.declare
// will take a value storing address of the memory for variable, not
// alloca itself.
SmallVector<uint64_t, 4> NewDIExpr;
- NewDIExpr.push_back(dwarf::DW_OP_deref);
+ if (Deref)
+ NewDIExpr.push_back(dwarf::DW_OP_deref);
+ if (Offset > 0) {
+ NewDIExpr.push_back(dwarf::DW_OP_plus);
+ NewDIExpr.push_back(Offset);
+ } else if (Offset < 0) {
+ NewDIExpr.push_back(dwarf::DW_OP_minus);
+ NewDIExpr.push_back(-Offset);
+ }
if (DIExpr)
NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end());
DIExpr = Builder.createExpression(NewDIExpr);
}
- // Insert llvm.dbg.declare in the same basic block as the original alloca,
- // and remove old llvm.dbg.declare.
- BasicBlock *BB = AI->getParent();
- Builder.insertDeclare(NewAllocaAddress, DIVar, DIExpr, Loc, BB);
+ // Insert llvm.dbg.declare immediately after the original alloca, and remove
+ // old llvm.dbg.declare.
+ Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore);
DDI->eraseFromParent();
return true;
}
-/// changeToUnreachable - Insert an unreachable instruction before the specified
-/// instruction, making it and the rest of the code in the block dead.
-static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
+bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
+ DIBuilder &Builder, bool Deref, int Offset) {
+ return replaceDbgDeclare(AI, NewAllocaAddress, AI->getNextNode(), Builder,
+ Deref, Offset);
+}
+
+void llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
BasicBlock *BB = I->getParent();
// Loop over all of the successors, removing BB's entry from any PHI
// nodes.
@@ -1132,7 +1207,7 @@ static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
new UnreachableInst(I->getContext(), I);
// All instructions after this are dead.
- BasicBlock::iterator BBI = I, BBE = BB->end();
+ BasicBlock::iterator BBI = I->getIterator(), BBE = BB->end();
while (BBI != BBE) {
if (!BBI->use_empty())
BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
@@ -1142,8 +1217,11 @@ static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
/// changeToCall - Convert the specified invoke into a normal call.
static void changeToCall(InvokeInst *II) {
- SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
- CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
+ SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end());
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ II->getOperandBundlesAsDefs(OpBundles);
+ CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, OpBundles,
+ "", II);
NewCall->takeName(II);
NewCall->setCallingConv(II->getCallingConv());
NewCall->setAttributes(II->getAttributes());
@@ -1162,7 +1240,7 @@ static bool markAliveBlocks(Function &F,
SmallPtrSetImpl<BasicBlock*> &Reachable) {
SmallVector<BasicBlock*, 128> Worklist;
- BasicBlock *BB = F.begin();
+ BasicBlock *BB = &F.front();
Worklist.push_back(BB);
Reachable.insert(BB);
bool Changed = false;
@@ -1187,7 +1265,7 @@ static bool markAliveBlocks(Function &F,
if (MakeUnreachable) {
// Don't insert a call to llvm.trap right before the unreachable.
- changeToUnreachable(BBI, false);
+ changeToUnreachable(&*BBI, false);
Changed = true;
break;
}
@@ -1201,7 +1279,7 @@ static bool markAliveBlocks(Function &F,
++BBI;
if (!isa<UnreachableInst>(BBI)) {
// Don't insert a call to llvm.trap right before the unreachable.
- changeToUnreachable(BBI, false);
+ changeToUnreachable(&*BBI, false);
Changed = true;
}
break;
@@ -1253,6 +1331,40 @@ static bool markAliveBlocks(Function &F,
return Changed;
}
+void llvm::removeUnwindEdge(BasicBlock *BB) {
+ TerminatorInst *TI = BB->getTerminator();
+
+ if (auto *II = dyn_cast<InvokeInst>(TI)) {
+ changeToCall(II);
+ return;
+ }
+
+ TerminatorInst *NewTI;
+ BasicBlock *UnwindDest;
+
+ if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
+ NewTI = CleanupReturnInst::Create(CRI->getCleanupPad(), nullptr, CRI);
+ UnwindDest = CRI->getUnwindDest();
+ } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(TI)) {
+ auto *NewCatchSwitch = CatchSwitchInst::Create(
+ CatchSwitch->getParentPad(), nullptr, CatchSwitch->getNumHandlers(),
+ CatchSwitch->getName(), CatchSwitch);
+ for (BasicBlock *PadBB : CatchSwitch->handlers())
+ NewCatchSwitch->addHandler(PadBB);
+
+ NewTI = NewCatchSwitch;
+ UnwindDest = CatchSwitch->getUnwindDest();
+ } else {
+ llvm_unreachable("Could not find unwind successor");
+ }
+
+ NewTI->takeName(TI);
+ NewTI->setDebugLoc(TI->getDebugLoc());
+ UnwindDest->removePredecessor(BB);
+ TI->replaceAllUsesWith(NewTI);
+ TI->eraseFromParent();
+}
+
/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
/// if they are in a dead cycle. Return true if a change was made, false
/// otherwise.
@@ -1270,17 +1382,18 @@ bool llvm::removeUnreachableBlocks(Function &F) {
// Loop over all of the basic blocks that are not reachable, dropping all of
// their internal references...
for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
- if (Reachable.count(BB))
+ if (Reachable.count(&*BB))
continue;
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ for (succ_iterator SI = succ_begin(&*BB), SE = succ_end(&*BB); SI != SE;
+ ++SI)
if (Reachable.count(*SI))
- (*SI)->removePredecessor(BB);
+ (*SI)->removePredecessor(&*BB);
BB->dropAllReferences();
}
for (Function::iterator I = ++F.begin(); I != F.end();)
- if (!Reachable.count(I))
+ if (!Reachable.count(&*I))
I = F.getBasicBlockList().erase(I);
else
++I;
@@ -1288,9 +1401,10 @@ bool llvm::removeUnreachableBlocks(Function &F) {
return true;
}
-void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsigned> KnownIDs) {
+void llvm::combineMetadata(Instruction *K, const Instruction *J,
+ ArrayRef<unsigned> KnownIDs) {
SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
- K->dropUnknownMetadata(KnownIDs);
+ K->dropUnknownNonDebugMetadata(KnownIDs);
K->getAllMetadataOtherThanDebugLoc(Metadata);
for (unsigned i = 0, n = Metadata.size(); i < n; ++i) {
unsigned Kind = Metadata[i].first;
@@ -1326,8 +1440,29 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsign
// Only set the !nonnull if it is present in both instructions.
K->setMetadata(Kind, JMD);
break;
+ case LLVMContext::MD_invariant_group:
+ // Preserve !invariant.group in K.
+ break;
+ case LLVMContext::MD_align:
+ K->setMetadata(Kind,
+ MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
+ break;
+ case LLVMContext::MD_dereferenceable:
+ case LLVMContext::MD_dereferenceable_or_null:
+ K->setMetadata(Kind,
+ MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
+ break;
}
}
+ // Set !invariant.group from J if J has it. If both instructions have it
+ // then we will just pick it from J - even when they are different.
+ // Also make sure that K is load or store - f.e. combining bitcast with load
+ // could produce bitcast with invariant.group metadata, which is invalid.
+ // FIXME: we should try to preserve both invariant.group md if they are
+ // different, but right now instruction can only have one invariant.group.
+ if (auto *JMD = J->getMetadata(LLVMContext::MD_invariant_group))
+ if (isa<LoadInst>(K) || isa<StoreInst>(K))
+ K->setMetadata(LLVMContext::MD_invariant_group, JMD);
}
unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
@@ -1349,3 +1484,40 @@ unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
}
return Count;
}
+
+unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
+ DominatorTree &DT,
+ const BasicBlock *BB) {
+ assert(From->getType() == To->getType());
+
+ unsigned Count = 0;
+ for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
+ UI != UE;) {
+ Use &U = *UI++;
+ auto *I = cast<Instruction>(U.getUser());
+ if (DT.dominates(BB, I->getParent())) {
+ U.set(To);
+ DEBUG(dbgs() << "Replace dominated use of '" << From->getName() << "' as "
+ << *To << " in " << *U << "\n");
+ ++Count;
+ }
+ }
+ return Count;
+}
+
+bool llvm::callsGCLeafFunction(ImmutableCallSite CS) {
+ if (isa<IntrinsicInst>(CS.getInstruction()))
+ // Most LLVM intrinsics are things which can never take a safepoint.
+ // As a result, we don't need to have the stack parsable at the
+ // callsite. This is a highly useful optimization since intrinsic
+ // calls are fairly prevalent, particularly in debug builds.
+ return true;
+
+ // Check if the function is specifically marked as a gc leaf function.
+ //
+ // TODO: we should be checking the attributes on the call site as well.
+ if (const Function *F = CS.getCalledFunction())
+ return F->hasFnAttribute("gc-leaf-function");
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index 5c98043e4632..1fa469595d16 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -44,11 +44,14 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -78,7 +81,7 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB,
SmallVectorImpl<BasicBlock *> &SplitPreds,
Loop *L) {
// Check to see if NewBB is already well placed.
- Function::iterator BBI = NewBB; --BBI;
+ Function::iterator BBI = --NewBB->getIterator();
for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
if (&*BBI == SplitPreds[i])
return;
@@ -92,9 +95,8 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB,
// block that neighbors a BB actually in the loop.
BasicBlock *FoundBB = nullptr;
for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
- Function::iterator BBI = SplitPreds[i];
- if (++BBI != NewBB->getParent()->end() &&
- L->contains(BBI)) {
+ Function::iterator BBI = SplitPreds[i]->getIterator();
+ if (++BBI != NewBB->getParent()->end() && L->contains(&*BBI)) {
FoundBB = SplitPreds[i];
break;
}
@@ -112,17 +114,10 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB,
/// preheader, this method is called to insert one. This method has two phases:
/// preheader insertion and analysis updating.
///
-BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
+BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
+ LoopInfo *LI, bool PreserveLCSSA) {
BasicBlock *Header = L->getHeader();
- // Get analyses that we try to update.
- auto *AA = PP->getAnalysisIfAvailable<AliasAnalysis>();
- auto *DTWP = PP->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
- auto *LIWP = PP->getAnalysisIfAvailable<LoopInfoWrapperPass>();
- auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
- bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID);
-
// Compute the set of predecessors of the loop that are not in the loop.
SmallVector<BasicBlock*, 8> OutsideBlocks;
for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
@@ -141,8 +136,10 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
// Split out the loop pre-header.
BasicBlock *PreheaderBB;
- PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
- AA, DT, LI, PreserveLCSSA);
+ PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", DT,
+ LI, PreserveLCSSA);
+ if (!PreheaderBB)
+ return nullptr;
DEBUG(dbgs() << "LoopSimplify: Creating pre-header "
<< PreheaderBB->getName() << "\n");
@@ -159,8 +156,8 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
/// This method is used to split exit blocks that have predecessors outside of
/// the loop.
static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit,
- AliasAnalysis *AA, DominatorTree *DT,
- LoopInfo *LI, Pass *PP) {
+ DominatorTree *DT, LoopInfo *LI,
+ bool PreserveLCSSA) {
SmallVector<BasicBlock*, 8> LoopBlocks;
for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
BasicBlock *P = *I;
@@ -175,10 +172,10 @@ static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit,
assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
BasicBlock *NewExitBB = nullptr;
- bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID);
-
- NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", AA, DT,
- LI, PreserveLCSSA);
+ NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", DT, LI,
+ PreserveLCSSA);
+ if (!NewExitBB)
+ return nullptr;
DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
<< NewExitBB->getName() << "\n");
@@ -206,8 +203,7 @@ static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
/// \brief The first part of loop-nestification is to find a PHI node that tells
/// us how to partition the loops.
-static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA,
- DominatorTree *DT,
+static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT,
AssumptionCache *AC) {
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
@@ -216,7 +212,6 @@ static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA,
if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) {
// This is a degenerate PHI already, don't modify it!
PN->replaceAllUsesWith(V);
- if (AA) AA->deleteValue(PN);
PN->eraseFromParent();
continue;
}
@@ -251,18 +246,18 @@ static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA,
/// created.
///
static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
- AliasAnalysis *AA, DominatorTree *DT,
- LoopInfo *LI, ScalarEvolution *SE, Pass *PP,
+ DominatorTree *DT, LoopInfo *LI,
+ ScalarEvolution *SE, bool PreserveLCSSA,
AssumptionCache *AC) {
// Don't try to separate loops without a preheader.
if (!Preheader)
return nullptr;
// The header is not a landing pad; preheader insertion should ensure this.
- assert(!L->getHeader()->isLandingPad() &&
- "Can't insert backedge to landing pad");
+ BasicBlock *Header = L->getHeader();
+ assert(!Header->isEHPad() && "Can't insert backedge to EH pad");
- PHINode *PN = findPHIToPartitionLoops(L, AA, DT, AC);
+ PHINode *PN = findPHIToPartitionLoops(L, DT, AC);
if (!PN) return nullptr; // No known way to partition.
// Pull out all predecessors that have varying values in the loop. This
@@ -286,11 +281,8 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
if (SE)
SE->forgetLoop(L);
- bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID);
-
- BasicBlock *Header = L->getHeader();
BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer",
- AA, DT, LI, PreserveLCSSA);
+ DT, LI, PreserveLCSSA);
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
@@ -357,7 +349,6 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
/// and have that block branch to the loop header. This ensures that loops
/// have exactly one backedge.
static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
- AliasAnalysis *AA,
DominatorTree *DT, LoopInfo *LI) {
assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
@@ -369,8 +360,8 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
if (!Preheader)
return nullptr;
- // The header is not a landing pad; preheader insertion should ensure this.
- assert(!Header->isLandingPad() && "Can't insert backedge to landing pad");
+ // The header is not an EH pad; preheader insertion should ensure this.
+ assert(!Header->isEHPad() && "Can't insert backedge to EH pad");
// Figure out which basic blocks contain back-edges to the loop header.
std::vector<BasicBlock*> BackedgeBlocks;
@@ -394,7 +385,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
<< BEBlock->getName() << "\n");
// Move the new backedge block to right after the last backedge block.
- Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos;
+ Function::iterator InsertPos = ++BackedgeBlocks.back()->getIterator();
F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock);
// Now that the block has been inserted into the function, create PHI nodes in
@@ -443,7 +434,6 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
// eliminate the PHI Node.
if (HasUniqueIncomingValue) {
NewPN->replaceAllUsesWith(UniqueValue);
- if (AA) AA->deleteValue(NewPN);
BEBlock->getInstList().erase(NewPN);
}
}
@@ -470,15 +460,10 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
}
/// \brief Simplify one loop and queue further loops for simplification.
-///
-/// FIXME: Currently this accepts both lots of analyses that it uses and a raw
-/// Pass pointer. The Pass pointer is used by numerous utilities to update
-/// specific analyses. Rather than a pass it would be much cleaner and more
-/// explicit if they accepted the analysis directly and then updated it.
static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
- AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI,
- ScalarEvolution *SE, Pass *PP,
- AssumptionCache *AC) {
+ DominatorTree *DT, LoopInfo *LI,
+ ScalarEvolution *SE, AssumptionCache *AC,
+ bool PreserveLCSSA) {
bool Changed = false;
ReprocessLoop:
@@ -544,7 +529,7 @@ ReprocessLoop:
// Does the loop already have a preheader? If so, don't insert one.
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
- Preheader = InsertPreheaderForLoop(L, PP);
+ Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
if (Preheader) {
++NumInserted;
Changed = true;
@@ -568,7 +553,7 @@ ReprocessLoop:
// Must be exactly this loop: no subloops, parent loops, or non-loop preds
// allowed.
if (!L->contains(*PI)) {
- if (rewriteLoopExitBlock(L, ExitBlock, AA, DT, LI, PP)) {
+ if (rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA)) {
++NumInserted;
Changed = true;
}
@@ -585,7 +570,7 @@ ReprocessLoop:
// common backedge instead.
if (L->getNumBackEdges() < 8) {
if (Loop *OuterL =
- separateNestedLoop(L, Preheader, AA, DT, LI, SE, PP, AC)) {
+ separateNestedLoop(L, Preheader, DT, LI, SE, PreserveLCSSA, AC)) {
++NumNested;
// Enqueue the outer loop as it should be processed next in our
// depth-first nest walk.
@@ -602,7 +587,7 @@ ReprocessLoop:
// If we either couldn't, or didn't want to, identify nesting of the loops,
// insert a new block that all backedges target, then make it jump to the
// loop header.
- LoopLatch = insertUniqueBackedgeBlock(L, Preheader, AA, DT, LI);
+ LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI);
if (LoopLatch) {
++NumInserted;
Changed = true;
@@ -618,7 +603,6 @@ ReprocessLoop:
for (BasicBlock::iterator I = L->getHeader()->begin();
(PN = dyn_cast<PHINode>(I++)); )
if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) {
- if (AA) AA->deleteValue(PN);
if (SE) SE->forgetValue(PN);
PN->replaceAllUsesWith(V);
PN->eraseFromParent();
@@ -654,7 +638,7 @@ ReprocessLoop:
bool AllInvariant = true;
bool AnyInvariant = false;
for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
- Instruction *Inst = I++;
+ Instruction *Inst = &*I++;
// Skip debug info intrinsics.
if (isa<DbgInfoIntrinsic>(Inst))
continue;
@@ -716,9 +700,9 @@ ReprocessLoop:
return Changed;
}
-bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
- AliasAnalysis *AA, ScalarEvolution *SE,
- AssumptionCache *AC) {
+bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
+ ScalarEvolution *SE, AssumptionCache *AC,
+ bool PreserveLCSSA) {
bool Changed = false;
// Worklist maintains our depth-first queue of loops in this nest to process.
@@ -734,8 +718,8 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
}
while (!Worklist.empty())
- Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, AA, DT, LI,
- SE, PP, AC);
+ Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, DT, LI, SE,
+ AC, PreserveLCSSA);
return Changed;
}
@@ -747,9 +731,6 @@ namespace {
initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
}
- // AA - If we have an alias analysis object to update, this is it, otherwise
- // this is null.
- AliasAnalysis *AA;
DominatorTree *DT;
LoopInfo *LI;
ScalarEvolution *SE;
@@ -767,8 +748,11 @@ namespace {
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
- AU.addPreserved<AliasAnalysis>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
AU.addPreserved<DependenceAnalysis>();
AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
}
@@ -784,6 +768,9 @@ INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
"Canonicalize natural loops", false, false)
@@ -796,15 +783,16 @@ Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
///
bool LoopSimplify::runOnFunction(Function &F) {
bool Changed = false;
- AA = getAnalysisIfAvailable<AliasAnalysis>();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- SE = getAnalysisIfAvailable<ScalarEvolution>();
+ auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
+ SE = SEWP ? &SEWP->getSE() : nullptr;
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
// Simplify each loop nest in the function.
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
- Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, AC);
+ Changed |= simplifyLoop(*I, DT, LI, SE, AC, PreserveLCSSA);
return Changed;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 1dbce4746835..2499b88741fe 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -73,7 +73,7 @@ static inline void RemapInstruction(Instruction *I,
/// of loops that have already been forgotten to prevent redundant, expensive
/// calls to ScalarEvolution::forgetLoop. Returns the new combined block.
static BasicBlock *
-FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM,
+FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, ScalarEvolution *SE,
SmallPtrSetImpl<Loop *> &ForgottenLoops) {
// Merge basic blocks into their predecessor if there is only one distinct
// pred, and if there is only one distinct successor of the predecessor, and
@@ -109,12 +109,10 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM,
// Erase basic block from the function...
// ScalarEvolution holds references to loop exit blocks.
- if (LPM) {
- if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>()) {
- if (Loop *L = LI->getLoopFor(BB)) {
- if (ForgottenLoops.insert(L).second)
- SE->forgetLoop(L);
- }
+ if (SE) {
+ if (Loop *L = LI->getLoopFor(BB)) {
+ if (ForgottenLoops.insert(L).second)
+ SE->forgetLoop(L);
}
}
LI->removeBlock(BB);
@@ -155,15 +153,13 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM,
///
/// The LoopInfo Analysis that is passed will be kept consistent.
///
-/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
-/// removed from the LoopPassManager as well. LPM can also be NULL.
-///
-/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
-/// available from the Pass it must also preserve those analyses.
+/// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
+/// DominatorTree if they are non-null.
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
bool AllowRuntime, bool AllowExpensiveTripCount,
- unsigned TripMultiple, LoopInfo *LI, Pass *PP,
- LPPassManager *LPM, AssumptionCache *AC) {
+ unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE,
+ DominatorTree *DT, AssumptionCache *AC,
+ bool PreserveLCSSA) {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
@@ -220,6 +216,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// Are we eliminating the loop control altogether?
bool CompletelyUnroll = Count == TripCount;
+ SmallVector<BasicBlock *, 4> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ Loop *ParentL = L->getParentLoop();
+ bool AllExitsAreInsideParentLoop = !ParentL ||
+ std::all_of(ExitBlocks.begin(), ExitBlocks.end(),
+ [&](BasicBlock *BB) { return ParentL->contains(BB); });
// We assume a run-time trip count if the compiler cannot
// figure out the loop trip count and the unroll-runtime
@@ -227,13 +229,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
if (RuntimeTripCount &&
- !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, LPM))
+ !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, SE, DT,
+ PreserveLCSSA))
return false;
// Notify ScalarEvolution that the loop will be substantially changed,
// if not outright eliminated.
- ScalarEvolution *SE =
- PP ? PP->getAnalysisIfAvailable<ScalarEvolution>() : nullptr;
if (SE)
SE->forgetLoop(L);
@@ -392,7 +393,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
for (unsigned i = 0; i < NewBlocks.size(); ++i)
for (BasicBlock::iterator I = NewBlocks[i]->begin(),
E = NewBlocks[i]->end(); I != E; ++I)
- ::RemapInstruction(I, LastValueMap);
+ ::RemapInstruction(&*I, LastValueMap);
}
// Loop over the PHI nodes in the original block, setting incoming values.
@@ -432,8 +433,9 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// For a complete unroll, make the last iteration end with a branch
// to the exit block.
- if (CompletelyUnroll && j == 0) {
- Dest = LoopExit;
+ if (CompletelyUnroll) {
+ if (j == 0)
+ Dest = LoopExit;
NeedConditional = false;
}
@@ -473,7 +475,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
if (Term->isUnconditional()) {
BasicBlock *Dest = Term->getSuccessor(0);
- if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM,
+ if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, SE,
ForgottenLoops))
std::replace(Latches.begin(), Latches.end(), Dest, Fold);
}
@@ -483,29 +485,24 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// whole function's cache.
AC->clear();
- DominatorTree *DT = nullptr;
- if (PP) {
- // FIXME: Reconstruct dom info, because it is not preserved properly.
- // Incrementally updating domtree after loop unrolling would be easy.
- if (DominatorTreeWrapperPass *DTWP =
- PP->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
- DT = &DTWP->getDomTree();
- DT->recalculate(*L->getHeader()->getParent());
- }
-
- // Simplify any new induction variables in the partially unrolled loop.
- if (SE && !CompletelyUnroll) {
- SmallVector<WeakVH, 16> DeadInsts;
- simplifyLoopIVs(L, SE, LPM, DeadInsts);
-
- // Aggressively clean up dead instructions that simplifyLoopIVs already
- // identified. Any remaining should be cleaned up below.
- while (!DeadInsts.empty())
- if (Instruction *Inst =
- dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
- RecursivelyDeleteTriviallyDeadInstructions(Inst);
- }
+ // FIXME: Reconstruct dom info, because it is not preserved properly.
+ // Incrementally updating domtree after loop unrolling would be easy.
+ if (DT)
+ DT->recalculate(*L->getHeader()->getParent());
+
+ // Simplify any new induction variables in the partially unrolled loop.
+ if (SE && !CompletelyUnroll) {
+ SmallVector<WeakVH, 16> DeadInsts;
+ simplifyLoopIVs(L, SE, DT, LI, DeadInsts);
+
+ // Aggressively clean up dead instructions that simplifyLoopIVs already
+ // identified. Any remaining should be cleaned up below.
+ while (!DeadInsts.empty())
+ if (Instruction *Inst =
+ dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
+ RecursivelyDeleteTriviallyDeadInstructions(Inst);
}
+
// At this point, the code is well formed. We now do a quick sweep over the
// inserted code, doing constant propagation and dead code elimination as we
// go.
@@ -514,7 +511,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
- Instruction *Inst = I++;
+ Instruction *Inst = &*I++;
if (isInstructionTriviallyDead(Inst))
(*BB)->getInstList().erase(Inst);
@@ -529,29 +526,33 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
++NumUnrolled;
Loop *OuterL = L->getParentLoop();
- // Remove the loop from the LoopPassManager if it's completely removed.
- if (CompletelyUnroll && LPM != nullptr)
- LPM->deleteLoopFromQueue(L);
+ // Update LoopInfo if the loop is completely removed.
+ if (CompletelyUnroll)
+ LI->updateUnloop(L);;
// If we have a pass and a DominatorTree we should re-simplify impacted loops
// to ensure subsequent analyses can rely on this form. We want to simplify
// at least one layer outside of the loop that was unrolled so that any
// changes to the parent loop exposed by the unrolling are considered.
- if (PP && DT) {
+ if (DT) {
if (!OuterL && !CompletelyUnroll)
OuterL = L;
if (OuterL) {
- simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, AC);
+ bool Simplified = simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA);
// LCSSA must be performed on the outermost affected loop. The unrolled
// loop's last loop latch is guaranteed to be in the outermost loop after
- // deleteLoopFromQueue updates LoopInfo.
+ // LoopInfo's been updated by updateUnloop.
Loop *LatchLoop = LI->getLoopFor(Latches.back());
if (!OuterL->contains(LatchLoop))
while (OuterL->getParentLoop() != LatchLoop)
OuterL = OuterL->getParentLoop();
- formLCSSARecursively(*OuterL, *DT, LI, SE);
+ if (CompletelyUnroll && (!AllExitsAreInsideParentLoop || Simplified))
+ formLCSSARecursively(*OuterL, *DT, LI, SE);
+ else
+ assert(OuterL->isLCSSAForm(*DT) &&
+ "Loops should be in LCSSA form after loop-unroll.");
}
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index add5432aa276..0d68f18ad0e5 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -62,8 +62,8 @@ STATISTIC(NumRuntimeUnrolled,
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
BasicBlock *OrigPH, BasicBlock *NewPH,
- ValueToValueMapTy &VMap, AliasAnalysis *AA,
- DominatorTree *DT, LoopInfo *LI, Pass *P) {
+ ValueToValueMapTy &VMap, DominatorTree *DT,
+ LoopInfo *LI, bool PreserveLCSSA) {
BasicBlock *Latch = L->getLoopLatch();
assert(Latch && "Loop must have a latch");
@@ -127,8 +127,8 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
assert(Exit && "Loop must have a single exit block only");
// Split the exit to maintain loop canonicalization guarantees
SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
- SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", AA, DT, LI,
- P->mustPreserveAnalysisID(LCSSAID));
+ SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI,
+ PreserveLCSSA);
// Add the branch to the exit block (around the unrolled loop)
B.CreateCondBr(BrLoopExit, Exit, NewPH);
InsertPt->eraseFromParent();
@@ -150,7 +150,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
Function *F = Header->getParent();
LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
- Loop *NewLoop = 0;
+ Loop *NewLoop = nullptr;
Loop *ParentLoop = L->getParentLoop();
if (!UnrollProlog) {
NewLoop = new Loop();
@@ -206,9 +206,9 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
// Change the incoming values to the ones defined in the preheader or
// cloned loop.
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
- PHINode *NewPHI = cast<PHINode>(VMap[I]);
+ PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
if (UnrollProlog) {
- VMap[I] = NewPHI->getIncomingValueForBlock(Preheader);
+ VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
} else {
unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
@@ -279,7 +279,8 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
///
bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
bool AllowExpensiveTripCount, LoopInfo *LI,
- LPPassManager *LPM) {
+ ScalarEvolution *SE, DominatorTree *DT,
+ bool PreserveLCSSA) {
// for now, only unroll loops that contain a single exit
if (!L->getExitingBlock())
return false;
@@ -291,9 +292,6 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
// Use Scalar Evolution to compute the trip count. This allows more
// loops to be unrolled than relying on induction var simplification
- if (!LPM)
- return false;
- ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
if (!SE)
return false;
@@ -308,7 +306,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
// Add 1 since the backedge count doesn't include the first loop iteration
const SCEV *TripCountSC =
- SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
+ SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
if (isa<SCEVCouldNotCompute>(TripCountSC))
return false;
@@ -333,10 +331,6 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
if (Loop *ParentLoop = L->getParentLoop())
SE->forgetLoop(ParentLoop);
- // Grab analyses that we preserve.
- auto *DTWP = LPM->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
-
BasicBlock *PH = L->getLoopPreheader();
BasicBlock *Latch = L->getLoopLatch();
// It helps to splits the original preheader twice, one for the end of the
@@ -397,8 +391,8 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
VMap, LI);
// Insert the cloned blocks into function just before the original loop
- F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(), NewBlocks[0],
- F->end());
+ F->getBasicBlockList().splice(PEnd->getIterator(), F->getBasicBlockList(),
+ NewBlocks[0]->getIterator(), F->end());
// Rewrite the cloned instruction operands to use the values
// created when the clone is created.
@@ -406,7 +400,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
for (BasicBlock::iterator I = NewBlocks[i]->begin(),
E = NewBlocks[i]->end();
I != E; ++I) {
- RemapInstruction(I, VMap,
+ RemapInstruction(&*I, VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
}
}
@@ -414,8 +408,8 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
// Connect the prolog code to the original loop and update the
// PHI functions.
BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]);
- ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap,
- /*AliasAnalysis*/ nullptr, DT, LI, LPM->getAsPass());
+ ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap, DT, LI,
+ PreserveLCSSA);
NumRuntimeUnrolled++;
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 5cbde94a98ed..e03880526bfa 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -12,13 +12,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
@@ -34,6 +34,124 @@ bool RecurrenceDescriptor::areAllUsesIn(Instruction *I,
return true;
}
+bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) {
+ switch (Kind) {
+ default:
+ break;
+ case RK_IntegerAdd:
+ case RK_IntegerMult:
+ case RK_IntegerOr:
+ case RK_IntegerAnd:
+ case RK_IntegerXor:
+ case RK_IntegerMinMax:
+ return true;
+ }
+ return false;
+}
+
+bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) {
+ return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind);
+}
+
+bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) {
+ switch (Kind) {
+ default:
+ break;
+ case RK_IntegerAdd:
+ case RK_IntegerMult:
+ case RK_FloatAdd:
+ case RK_FloatMult:
+ return true;
+ }
+ return false;
+}
+
+Instruction *
+RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT,
+ SmallPtrSetImpl<Instruction *> &Visited,
+ SmallPtrSetImpl<Instruction *> &CI) {
+ if (!Phi->hasOneUse())
+ return Phi;
+
+ const APInt *M = nullptr;
+ Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
+
+ // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
+ // with a new integer type of the corresponding bit width.
+ if (match(J, m_CombineOr(m_And(m_Instruction(I), m_APInt(M)),
+ m_And(m_APInt(M), m_Instruction(I))))) {
+ int32_t Bits = (*M + 1).exactLogBase2();
+ if (Bits > 0) {
+ RT = IntegerType::get(Phi->getContext(), Bits);
+ Visited.insert(Phi);
+ CI.insert(J);
+ return J;
+ }
+ }
+ return Phi;
+}
+
+bool RecurrenceDescriptor::getSourceExtensionKind(
+ Instruction *Start, Instruction *Exit, Type *RT, bool &IsSigned,
+ SmallPtrSetImpl<Instruction *> &Visited,
+ SmallPtrSetImpl<Instruction *> &CI) {
+
+ SmallVector<Instruction *, 8> Worklist;
+ bool FoundOneOperand = false;
+ unsigned DstSize = RT->getPrimitiveSizeInBits();
+ Worklist.push_back(Exit);
+
+ // Traverse the instructions in the reduction expression, beginning with the
+ // exit value.
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
+ for (Use &U : I->operands()) {
+
+ // Terminate the traversal if the operand is not an instruction, or we
+ // reach the starting value.
+ Instruction *J = dyn_cast<Instruction>(U.get());
+ if (!J || J == Start)
+ continue;
+
+ // Otherwise, investigate the operation if it is also in the expression.
+ if (Visited.count(J)) {
+ Worklist.push_back(J);
+ continue;
+ }
+
+ // If the operand is not in Visited, it is not a reduction operation, but
+ // it does feed into one. Make sure it is either a single-use sign- or
+ // zero-extend instruction.
+ CastInst *Cast = dyn_cast<CastInst>(J);
+ bool IsSExtInst = isa<SExtInst>(J);
+ if (!Cast || !Cast->hasOneUse() || !(isa<ZExtInst>(J) || IsSExtInst))
+ return false;
+
+ // Ensure the source type of the extend is no larger than the reduction
+ // type. It is not necessary for the types to be identical.
+ unsigned SrcSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
+ if (SrcSize > DstSize)
+ return false;
+
+ // Furthermore, ensure that all such extends are of the same kind.
+ if (FoundOneOperand) {
+ if (IsSigned != IsSExtInst)
+ return false;
+ } else {
+ FoundOneOperand = true;
+ IsSigned = IsSExtInst;
+ }
+
+ // Lastly, if the source type of the extend matches the reduction type,
+ // add the extend to CI so that we can avoid accounting for it in the
+ // cost model.
+ if (SrcSize == DstSize)
+ CI.insert(Cast);
+ }
+ }
+ return true;
+}
+
bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
Loop *TheLoop, bool HasFunNoNaNAttr,
RecurrenceDescriptor &RedDes) {
@@ -68,10 +186,32 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
unsigned NumCmpSelectPatternInst = 0;
InstDesc ReduxDesc(false, nullptr);
+ // Data used for determining if the recurrence has been type-promoted.
+ Type *RecurrenceType = Phi->getType();
+ SmallPtrSet<Instruction *, 4> CastInsts;
+ Instruction *Start = Phi;
+ bool IsSigned = false;
+
SmallPtrSet<Instruction *, 8> VisitedInsts;
SmallVector<Instruction *, 8> Worklist;
- Worklist.push_back(Phi);
- VisitedInsts.insert(Phi);
+
+ // Return early if the recurrence kind does not match the type of Phi. If the
+ // recurrence kind is arithmetic, we attempt to look through AND operations
+ // resulting from the type promotion performed by InstCombine. Vector
+ // operations are not limited to the legal integer widths, so we may be able
+ // to evaluate the reduction in the narrower width.
+ if (RecurrenceType->isFloatingPointTy()) {
+ if (!isFloatingPointRecurrenceKind(Kind))
+ return false;
+ } else {
+ if (!isIntegerRecurrenceKind(Kind))
+ return false;
+ if (isArithmeticRecurrenceKind(Kind))
+ Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
+ }
+
+ Worklist.push_back(Start);
+ VisitedInsts.insert(Start);
// A value in the reduction can be used:
// - By the reduction:
@@ -110,10 +250,14 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
!VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
return false;
- // Any reduction instruction must be of one of the allowed kinds.
- ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
- if (!ReduxDesc.isRecurrence())
- return false;
+ // Any reduction instruction must be of one of the allowed kinds. We ignore
+ // the starting value (the Phi or an AND instruction if the Phi has been
+ // type-promoted).
+ if (Cur != Start) {
+ ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
+ if (!ReduxDesc.isRecurrence())
+ return false;
+ }
// A reduction operation must only have one use of the reduction value.
if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax &&
@@ -131,7 +275,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
++NumCmpSelectPatternInst;
// Check whether we found a reduction operator.
- FoundReduxOp |= !IsAPhi;
+ FoundReduxOp |= !IsAPhi && Cur != Start;
// Process users of current instruction. Push non-PHI nodes after PHI nodes
// onto the stack. This way we are going to have seen all inputs to PHI
@@ -193,6 +337,14 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
return false;
+ // If we think Phi may have been type-promoted, we also need to ensure that
+ // all source operands of the reduction are either SExtInsts or ZEstInsts. If
+ // so, we will be able to evaluate the reduction in the narrower bit width.
+ if (Start != Phi)
+ if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType,
+ IsSigned, VisitedInsts, CastInsts))
+ return false;
+
// We found a reduction var if we have reached the original phi node and we
// only have a single instruction with out-of-loop users.
@@ -200,9 +352,9 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
// is saved as part of the RecurrenceDescriptor.
// Save the description of this reduction variable.
- RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind,
- ReduxDesc.getMinMaxKind());
-
+ RecurrenceDescriptor RD(
+ RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(),
+ ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts);
RedDes = RD;
return true;
@@ -263,14 +415,14 @@ RecurrenceDescriptor::InstDesc
RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
InstDesc &Prev, bool HasFunNoNaNAttr) {
bool FP = I->getType()->isFloatingPointTy();
- bool FastMath = FP && I->hasUnsafeAlgebra();
+ Instruction *UAI = Prev.getUnsafeAlgebraInst();
+ if (!UAI && FP && !I->hasUnsafeAlgebra())
+ UAI = I; // Found an unsafe (unvectorizable) algebra instruction.
+
switch (I->getOpcode()) {
default:
return InstDesc(false, I);
case Instruction::PHI:
- if (FP &&
- (Kind != RK_FloatMult && Kind != RK_FloatAdd && Kind != RK_FloatMinMax))
- return InstDesc(false, I);
return InstDesc(I, Prev.getMinMaxKind());
case Instruction::Sub:
case Instruction::Add:
@@ -284,10 +436,10 @@ RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
case Instruction::Xor:
return InstDesc(Kind == RK_IntegerXor, I);
case Instruction::FMul:
- return InstDesc(Kind == RK_FloatMult && FastMath, I);
+ return InstDesc(Kind == RK_FloatMult, I, UAI);
case Instruction::FSub:
case Instruction::FAdd:
- return InstDesc(Kind == RK_FloatAdd && FastMath, I);
+ return InstDesc(Kind == RK_FloatAdd, I, UAI);
case Instruction::FCmp:
case Instruction::ICmp:
case Instruction::Select:
@@ -442,6 +594,13 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
break;
}
+ // We only match FP sequences with unsafe algebra, so we can unconditionally
+ // set it on any generated instructions.
+ IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+ FastMathFlags FMF;
+ FMF.setUnsafeAlgebra();
+ Builder.SetFastMathFlags(FMF);
+
Value *Cmp;
if (RK == MRK_FloatMin || RK == MRK_FloatMax)
Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
@@ -452,8 +611,54 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
return Select;
}
-bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
- ConstantInt *&StepValue) {
+InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
+ ConstantInt *Step)
+ : StartValue(Start), IK(K), StepValue(Step) {
+ assert(IK != IK_NoInduction && "Not an induction");
+ assert(StartValue && "StartValue is null");
+ assert(StepValue && !StepValue->isZero() && "StepValue is zero");
+ assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
+ "StartValue is not a pointer for pointer induction");
+ assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
+ "StartValue is not an integer for integer induction");
+ assert(StepValue->getType()->isIntegerTy() &&
+ "StepValue is not an integer");
+}
+
+int InductionDescriptor::getConsecutiveDirection() const {
+ if (StepValue && (StepValue->isOne() || StepValue->isMinusOne()))
+ return StepValue->getSExtValue();
+ return 0;
+}
+
+Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index) const {
+ switch (IK) {
+ case IK_IntInduction:
+ assert(Index->getType() == StartValue->getType() &&
+ "Index type does not match StartValue type");
+ if (StepValue->isMinusOne())
+ return B.CreateSub(StartValue, Index);
+ if (!StepValue->isOne())
+ Index = B.CreateMul(Index, StepValue);
+ return B.CreateAdd(StartValue, Index);
+
+ case IK_PtrInduction:
+ assert(Index->getType() == StepValue->getType() &&
+ "Index type does not match StepValue type");
+ if (StepValue->isMinusOne())
+ Index = B.CreateNeg(Index);
+ else if (!StepValue->isOne())
+ Index = B.CreateMul(Index, StepValue);
+ return B.CreateGEP(nullptr, StartValue, Index);
+
+ case IK_NoInduction:
+ return nullptr;
+ }
+ llvm_unreachable("invalid enum");
+}
+
+bool InductionDescriptor::isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
+ InductionDescriptor &D) {
Type *PhiTy = Phi->getType();
// We only handle integer and pointer inductions variables.
if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
@@ -467,6 +672,10 @@ bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
return false;
}
+ assert(AR->getLoop()->getHeader() == Phi->getParent() &&
+ "PHI is an AddRec for a different loop?!");
+ Value *StartValue =
+ Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
const SCEV *Step = AR->getStepRecurrence(*SE);
// Calculate the pointer stride and check if it is consecutive.
const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
@@ -475,7 +684,7 @@ bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
ConstantInt *CV = C->getValue();
if (PhiTy->isIntegerTy()) {
- StepValue = CV;
+ D = InductionDescriptor(StartValue, IK_IntInduction, CV);
return true;
}
@@ -494,6 +703,27 @@ bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
int64_t CVSize = CV->getSExtValue();
if (CVSize % Size)
return false;
- StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size);
+ auto *StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size);
+
+ D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue);
return true;
}
+
+/// \brief Returns the instructions that use values defined in the loop.
+SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) {
+ SmallVector<Instruction *, 8> UsedOutside;
+
+ for (auto *Block : L->getBlocks())
+ // FIXME: I believe that this could use copy_if if the Inst reference could
+ // be adapted into a pointer.
+ for (auto &Inst : *Block) {
+ auto Users = Inst.users();
+ if (std::any_of(Users.begin(), Users.end(), [&](User *U) {
+ auto *Use = cast<Instruction>(U);
+ return !L->contains(Use->getParent());
+ }))
+ UsedOutside.push_back(&Inst);
+ }
+
+ return UsedOutside;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index 832079d2cf63..9a2a06cf6891 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -13,43 +13,81 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/LoopVersioning.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/IR/Dominators.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/LoopVersioning.h"
using namespace llvm;
LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
- DominatorTree *DT,
- const SmallVector<int, 8> *PtrToPartition)
- : VersionedLoop(L), NonVersionedLoop(nullptr),
- PtrToPartition(PtrToPartition), LAI(LAI), LI(LI), DT(DT) {
+ DominatorTree *DT, ScalarEvolution *SE,
+ bool UseLAIChecks)
+ : VersionedLoop(L), NonVersionedLoop(nullptr), LAI(LAI), LI(LI), DT(DT),
+ SE(SE) {
assert(L->getExitBlock() && "No single exit block");
assert(L->getLoopPreheader() && "No preheader");
+ if (UseLAIChecks) {
+ setAliasChecks(LAI.getRuntimePointerChecking()->getChecks());
+ setSCEVChecks(LAI.PSE.getUnionPredicate());
+ }
}
-bool LoopVersioning::needsRuntimeChecks() const {
- return LAI.getRuntimePointerChecking()->needsAnyChecking(PtrToPartition);
+void LoopVersioning::setAliasChecks(
+ const SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks) {
+ AliasChecks = std::move(Checks);
}
-void LoopVersioning::versionLoop(Pass *P) {
+void LoopVersioning::setSCEVChecks(SCEVUnionPredicate Check) {
+ Preds = std::move(Check);
+}
+
+void LoopVersioning::versionLoop(
+ const SmallVectorImpl<Instruction *> &DefsUsedOutside) {
Instruction *FirstCheckInst;
Instruction *MemRuntimeCheck;
+ Value *SCEVRuntimeCheck;
+ Value *RuntimeCheck = nullptr;
+
// Add the memcheck in the original preheader (this is empty initially).
- BasicBlock *MemCheckBB = VersionedLoop->getLoopPreheader();
+ BasicBlock *RuntimeCheckBB = VersionedLoop->getLoopPreheader();
std::tie(FirstCheckInst, MemRuntimeCheck) =
- LAI.addRuntimeCheck(MemCheckBB->getTerminator(), PtrToPartition);
+ LAI.addRuntimeChecks(RuntimeCheckBB->getTerminator(), AliasChecks);
assert(MemRuntimeCheck && "called even though needsAnyChecking = false");
+ const SCEVUnionPredicate &Pred = LAI.PSE.getUnionPredicate();
+ SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(),
+ "scev.check");
+ SCEVRuntimeCheck =
+ Exp.expandCodeForPredicate(&Pred, RuntimeCheckBB->getTerminator());
+ auto *CI = dyn_cast<ConstantInt>(SCEVRuntimeCheck);
+
+ // Discard the SCEV runtime check if it is always true.
+ if (CI && CI->isZero())
+ SCEVRuntimeCheck = nullptr;
+
+ if (MemRuntimeCheck && SCEVRuntimeCheck) {
+ RuntimeCheck = BinaryOperator::Create(Instruction::Or, MemRuntimeCheck,
+ SCEVRuntimeCheck, "ldist.safe");
+ if (auto *I = dyn_cast<Instruction>(RuntimeCheck))
+ I->insertBefore(RuntimeCheckBB->getTerminator());
+ } else
+ RuntimeCheck = MemRuntimeCheck ? MemRuntimeCheck : SCEVRuntimeCheck;
+
+ assert(RuntimeCheck && "called even though we don't need "
+ "any runtime checks");
+
// Rename the block to make the IR more readable.
- MemCheckBB->setName(VersionedLoop->getHeader()->getName() + ".lver.memcheck");
+ RuntimeCheckBB->setName(VersionedLoop->getHeader()->getName() +
+ ".lver.check");
// Create empty preheader for the loop (and after cloning for the
// non-versioned loop).
- BasicBlock *PH = SplitBlock(MemCheckBB, MemCheckBB->getTerminator(), DT, LI);
+ BasicBlock *PH =
+ SplitBlock(RuntimeCheckBB, RuntimeCheckBB->getTerminator(), DT, LI);
PH->setName(VersionedLoop->getHeader()->getName() + ".ph");
// Clone the loop including the preheader.
@@ -58,20 +96,23 @@ void LoopVersioning::versionLoop(Pass *P) {
// block is a join between the two loops.
SmallVector<BasicBlock *, 8> NonVersionedLoopBlocks;
NonVersionedLoop =
- cloneLoopWithPreheader(PH, MemCheckBB, VersionedLoop, VMap, ".lver.orig",
- LI, DT, NonVersionedLoopBlocks);
+ cloneLoopWithPreheader(PH, RuntimeCheckBB, VersionedLoop, VMap,
+ ".lver.orig", LI, DT, NonVersionedLoopBlocks);
remapInstructionsInBlocks(NonVersionedLoopBlocks, VMap);
// Insert the conditional branch based on the result of the memchecks.
- Instruction *OrigTerm = MemCheckBB->getTerminator();
+ Instruction *OrigTerm = RuntimeCheckBB->getTerminator();
BranchInst::Create(NonVersionedLoop->getLoopPreheader(),
- VersionedLoop->getLoopPreheader(), MemRuntimeCheck,
- OrigTerm);
+ VersionedLoop->getLoopPreheader(), RuntimeCheck, OrigTerm);
OrigTerm->eraseFromParent();
// The loops merge in the original exit block. This is now dominated by the
// memchecking block.
- DT->changeImmediateDominator(VersionedLoop->getExitBlock(), MemCheckBB);
+ DT->changeImmediateDominator(VersionedLoop->getExitBlock(), RuntimeCheckBB);
+
+ // Adds the necessary PHI nodes for the versioned loops based on the
+ // loop-defined values used outside of the loop.
+ addPHINodes(DefsUsedOutside);
}
void LoopVersioning::addPHINodes(
@@ -94,7 +135,7 @@ void LoopVersioning::addPHINodes(
// If not create it.
if (!PN) {
PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver",
- PHIBlock->begin());
+ &PHIBlock->front());
for (auto *User : Inst->users())
if (!VersionedLoop->contains(cast<Instruction>(User)->getParent()))
User->replaceUsesOfWith(Inst, PN);
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
index 66d57b069fe7..b0ad4d5e84a1 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
@@ -69,7 +69,7 @@ bool LowerInvoke::runOnFunction(Function &F) {
BranchInst::Create(II->getNormalDest(), II);
// Remove any PHI node entries from the exception destination.
- II->getUnwindDest()->removePredecessor(BB);
+ II->getUnwindDest()->removePredecessor(&*BB);
// Remove the invoke instruction now.
BB->getInstList().erase(II);
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index 4acd988691d2..52beb1542497 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -49,8 +49,7 @@ namespace {
return I != Ranges.end() && I->Low <= R.Low;
}
- /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch
- /// instructions.
+ /// Replace all SwitchInst instructions with chained branch instructions.
class LowerSwitch : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
@@ -78,7 +77,7 @@ namespace {
typedef std::vector<CaseRange> CaseVector;
typedef std::vector<CaseRange>::iterator CaseItr;
private:
- void processSwitchInst(SwitchInst *SI);
+ void processSwitchInst(SwitchInst *SI, SmallPtrSetImpl<BasicBlock*> &DeleteList);
BasicBlock *switchConvert(CaseItr Begin, CaseItr End,
ConstantInt *LowerBound, ConstantInt *UpperBound,
@@ -116,21 +115,30 @@ FunctionPass *llvm::createLowerSwitchPass() {
bool LowerSwitch::runOnFunction(Function &F) {
bool Changed = false;
+ SmallPtrSet<BasicBlock*, 8> DeleteList;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
- BasicBlock *Cur = I++; // Advance over block so we don't traverse new blocks
+ BasicBlock *Cur = &*I++; // Advance over block so we don't traverse new blocks
+
+ // If the block is a dead Default block that will be deleted later, don't
+ // waste time processing it.
+ if (DeleteList.count(Cur))
+ continue;
if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) {
Changed = true;
- processSwitchInst(SI);
+ processSwitchInst(SI, DeleteList);
}
}
+ for (BasicBlock* BB: DeleteList) {
+ DeleteDeadBlock(BB);
+ }
+
return Changed;
}
-// operator<< - Used for debugging purposes.
-//
+/// Used for debugging purposes.
static raw_ostream& operator<<(raw_ostream &O,
const LowerSwitch::CaseVector &C)
LLVM_ATTRIBUTE_USED;
@@ -147,23 +155,24 @@ static raw_ostream& operator<<(raw_ostream &O,
return O << "]";
}
-// \brief Update the first occurrence of the "switch statement" BB in the PHI
-// node with the "new" BB. The other occurrences will:
-//
-// 1) Be updated by subsequent calls to this function. Switch statements may
-// have more than one outcoming edge into the same BB if they all have the same
-// value. When the switch statement is converted these incoming edges are now
-// coming from multiple BBs.
-// 2) Removed if subsequent incoming values now share the same case, i.e.,
-// multiple outcome edges are condensed into one. This is necessary to keep the
-// number of phi values equal to the number of branches to SuccBB.
+/// \brief Update the first occurrence of the "switch statement" BB in the PHI
+/// node with the "new" BB. The other occurrences will:
+///
+/// 1) Be updated by subsequent calls to this function. Switch statements may
+/// have more than one outcoming edge into the same BB if they all have the same
+/// value. When the switch statement is converted these incoming edges are now
+/// coming from multiple BBs.
+/// 2) Removed if subsequent incoming values now share the same case, i.e.,
+/// multiple outcome edges are condensed into one. This is necessary to keep the
+/// number of phi values equal to the number of branches to SuccBB.
static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
unsigned NumMergedCases) {
- for (BasicBlock::iterator I = SuccBB->begin(), IE = SuccBB->getFirstNonPHI();
+ for (BasicBlock::iterator I = SuccBB->begin(),
+ IE = SuccBB->getFirstNonPHI()->getIterator();
I != IE; ++I) {
PHINode *PN = cast<PHINode>(I);
- // Only update the first occurence.
+ // Only update the first occurrence.
unsigned Idx = 0, E = PN->getNumIncomingValues();
unsigned LocalNumMergedCases = NumMergedCases;
for (; Idx != E; ++Idx) {
@@ -173,7 +182,7 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
}
}
- // Remove additional occurences coming from condensed cases and keep the
+ // Remove additional occurrences coming from condensed cases and keep the
// number of incoming values equal to the number of branches to SuccBB.
SmallVector<unsigned, 8> Indices;
for (++Idx; LocalNumMergedCases > 0 && Idx < E; ++Idx)
@@ -188,11 +197,11 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
}
}
-// switchConvert - Convert the switch statement into a binary lookup of
-// the case values. The function recursively builds this tree.
-// LowerBound and UpperBound are used to keep track of the bounds for Val
-// that have already been checked by a block emitted by one of the previous
-// calls to switchConvert in the call stack.
+/// Convert the switch statement into a binary lookup of the case values.
+/// The function recursively builds this tree. LowerBound and UpperBound are
+/// used to keep track of the bounds for Val that have already been checked by
+/// a block emitted by one of the previous calls to switchConvert in the call
+/// stack.
BasicBlock *
LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
ConstantInt *UpperBound, Value *Val,
@@ -278,28 +287,24 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
UpperBound, Val, NewNode, OrigBlock,
Default, UnreachableRanges);
- Function::iterator FI = OrigBlock;
- F->getBasicBlockList().insert(++FI, NewNode);
+ F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewNode);
NewNode->getInstList().push_back(Comp);
BranchInst::Create(LBranch, RBranch, Comp, NewNode);
return NewNode;
}
-// newLeafBlock - Create a new leaf block for the binary lookup tree. It
-// checks if the switch's value == the case's value. If not, then it
-// jumps to the default branch. At this point in the tree, the value
-// can't be another valid case value, so the jump to the "default" branch
-// is warranted.
-//
+/// Create a new leaf block for the binary lookup tree. It checks if the
+/// switch's value == the case's value. If not, then it jumps to the default
+/// branch. At this point in the tree, the value can't be another valid case
+/// value, so the jump to the "default" branch is warranted.
BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
BasicBlock* OrigBlock,
BasicBlock* Default)
{
Function* F = OrigBlock->getParent();
BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
- Function::iterator FI = OrigBlock;
- F->getBasicBlockList().insert(++FI, NewLeaf);
+ F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf);
// Emit comparison
ICmpInst* Comp = nullptr;
@@ -352,7 +357,7 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
return NewLeaf;
}
-// Clusterify - Transform simple list of Cases into list of CaseRange's
+/// Transform simple list of Cases into list of CaseRange's.
unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
unsigned numCmps = 0;
@@ -394,10 +399,10 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
return numCmps;
}
-// processSwitchInst - Replace the specified switch instruction with a sequence
-// of chained if-then insts in a balanced binary search.
-//
-void LowerSwitch::processSwitchInst(SwitchInst *SI) {
+/// Replace the specified switch instruction with a sequence of chained if-then
+/// insts in a balanced binary search.
+void LowerSwitch::processSwitchInst(SwitchInst *SI,
+ SmallPtrSetImpl<BasicBlock*> &DeleteList) {
BasicBlock *CurBlock = SI->getParent();
BasicBlock *OrigBlock = CurBlock;
Function *F = CurBlock->getParent();
@@ -424,7 +429,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
std::vector<IntRange> UnreachableRanges;
if (isa<UnreachableInst>(Default->getFirstNonPHIOrDbg())) {
- // Make the bounds tightly fitted around the case value range, becase we
+ // Make the bounds tightly fitted around the case value range, because we
// know that the value passed to the switch must be exactly one of the case
// values.
assert(!Cases.empty());
@@ -495,7 +500,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
// Create a new, empty default block so that the new hierarchy of
// if-then statements go to this and the PHI nodes are happy.
BasicBlock *NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
- F->getBasicBlockList().insert(Default, NewDefault);
+ F->getBasicBlockList().insert(Default->getIterator(), NewDefault);
BranchInst::Create(Default, NewDefault);
// If there is an entry in any PHI nodes for the default edge, make sure
@@ -518,7 +523,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
BasicBlock *OldDefault = SI->getDefaultDest();
CurBlock->getInstList().erase(SI);
- // If the Default block has no more predecessors just remove it.
+ // If the Default block has no more predecessors just add it to DeleteList.
if (pred_begin(OldDefault) == pred_end(OldDefault))
- DeleteDeadBlock(OldDefault);
+ DeleteList.insert(OldDefault);
}
diff --git a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
index 00cf4e6c01c8..aa1e35ddba02 100644
--- a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
@@ -63,6 +63,9 @@ bool PromotePass::runOnFunction(Function &F) {
BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
+ if (F.hasFnAttribute(Attribute::OptimizeNone))
+ return false;
+
bool Changed = false;
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
diff --git a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
index 395a46bad97b..c999bd008fef 100644
--- a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
@@ -42,6 +42,24 @@ namespace {
}
};
+ static const char *const metaNames[] = {
+ // See http://en.wikipedia.org/wiki/Metasyntactic_variable
+ "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge",
+ "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam"
+ };
+
+ struct Renamer {
+ Renamer(unsigned int seed) {
+ prng.srand(seed);
+ }
+
+ const char *newName() {
+ return metaNames[prng.rand() % array_lengthof(metaNames)];
+ }
+
+ PRNG prng;
+ };
+
struct MetaRenamer : public ModulePass {
static char ID; // Pass identification, replacement for typeid
MetaRenamer() : ModulePass(ID) {
@@ -53,36 +71,26 @@ namespace {
}
bool runOnModule(Module &M) override {
- static const char *const metaNames[] = {
- // See http://en.wikipedia.org/wiki/Metasyntactic_variable
- "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge",
- "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam"
- };
-
// Seed our PRNG with simple additive sum of ModuleID. We're looking to
// simply avoid always having the same function names, and we need to
// remain deterministic.
unsigned int randSeed = 0;
- for (std::string::const_iterator I = M.getModuleIdentifier().begin(),
- E = M.getModuleIdentifier().end(); I != E; ++I)
- randSeed += *I;
+ for (auto C : M.getModuleIdentifier())
+ randSeed += C;
- PRNG prng;
- prng.srand(randSeed);
+ Renamer renamer(randSeed);
// Rename all aliases
- for (Module::alias_iterator AI = M.alias_begin(), AE = M.alias_end();
- AI != AE; ++AI) {
+ for (auto AI = M.alias_begin(), AE = M.alias_end(); AI != AE; ++AI) {
StringRef Name = AI->getName();
if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
continue;
AI->setName("alias");
}
-
+
// Rename all global variables
- for (Module::global_iterator GI = M.global_begin(), GE = M.global_end();
- GI != GE; ++GI) {
+ for (auto GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) {
StringRef Name = GI->getName();
if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
continue;
@@ -93,40 +101,37 @@ namespace {
// Rename all struct types
TypeFinder StructTypes;
StructTypes.run(M, true);
- for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
- StructType *STy = StructTypes[i];
+ for (StructType *STy : StructTypes) {
if (STy->isLiteral() || STy->getName().empty()) continue;
SmallString<128> NameStorage;
- STy->setName((Twine("struct.") + metaNames[prng.rand() %
- array_lengthof(metaNames)]).toStringRef(NameStorage));
+ STy->setName((Twine("struct.") +
+ renamer.newName()).toStringRef(NameStorage));
}
// Rename all functions
- for (Module::iterator FI = M.begin(), FE = M.end();
- FI != FE; ++FI) {
- StringRef Name = FI->getName();
+ for (auto &F : M) {
+ StringRef Name = F.getName();
if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
continue;
- FI->setName(metaNames[prng.rand() % array_lengthof(metaNames)]);
- runOnFunction(*FI);
+ F.setName(renamer.newName());
+ runOnFunction(F);
}
return true;
}
bool runOnFunction(Function &F) {
- for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end();
- AI != AE; ++AI)
+ for (auto AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI)
if (!AI->getType()->isVoidTy())
AI->setName("arg");
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- BB->setName("bb");
+ for (auto &BB : F) {
+ BB.setName("bb");
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
- if (!I->getType()->isVoidTy())
- I->setName("tmp");
+ for (auto &I : BB)
+ if (!I.getType()->isVoidTy())
+ I.setName("tmp");
}
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index d69a81ec4741..9ec28a3f3d47 100644
--- a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -43,9 +43,9 @@ static void appendToGlobalArray(const char *Array,
}
GVCtor->eraseFromParent();
} else {
- // Use a simple two-field struct if there isn't one already.
+ // Use the new three-field struct if there isn't one already.
EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy),
- nullptr);
+ IRB.getInt8PtrTy(), nullptr);
}
// Build a 2 or 3 field global_ctor entry. We don't take a comdat key.
@@ -107,7 +107,8 @@ Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) {
std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
Module &M, StringRef CtorName, StringRef InitName,
- ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs) {
+ ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
+ StringRef VersionCheckName) {
assert(!InitName.empty() && "Expected init function name");
assert(InitArgTypes.size() == InitArgTypes.size() &&
"Sanitizer's init function expects different number of arguments");
@@ -122,6 +123,13 @@ std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
AttributeSet()));
InitFunction->setLinkage(Function::ExternalLinkage);
IRB.CreateCall(InitFunction, InitArgs);
+ if (!VersionCheckName.empty()) {
+ Function *VersionCheckFunction =
+ checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
+ AttributeSet()));
+ IRB.CreateCall(VersionCheckFunction, {});
+ }
return std::make_pair(Ctor, InitFunction);
}
diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index a87f8504bfb5..c4f9b9f61407 100644
--- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -205,10 +205,9 @@ public:
// avoid gratuitus rescans.
const BasicBlock *BB = I->getParent();
unsigned InstNo = 0;
- for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); BBI != E;
- ++BBI)
- if (isInterestingInstruction(BBI))
- InstNumbers[BBI] = InstNo++;
+ for (const Instruction &BBI : *BB)
+ if (isInterestingInstruction(&BBI))
+ InstNumbers[&BBI] = InstNo++;
It = InstNumbers.find(I);
assert(It != InstNumbers.end() && "Didn't insert instruction?");
@@ -402,8 +401,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
// Record debuginfo for the store and remove the declaration's
// debuginfo.
if (DbgDeclareInst *DDI = Info.DbgDeclare) {
- DIBuilder DIB(*AI->getParent()->getParent()->getParent(),
- /*AllowUnresolved*/ false);
+ DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB);
DDI->eraseFromParent();
LBI.deleteValue(DDI);
@@ -425,14 +423,17 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
/// using the Alloca.
///
/// If we cannot promote this alloca (because it is read before it is written),
-/// return true. This is necessary in cases where, due to control flow, the
-/// alloca is potentially undefined on some control flow paths. e.g. code like
-/// this is potentially correct:
-///
-/// for (...) { if (c) { A = undef; undef = B; } }
-///
-/// ... so long as A is not used before undef is set.
-static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
+/// return false. This is necessary in cases where, due to control flow, the
+/// alloca is undefined only on some control flow paths. e.g. code like
+/// this is correct in LLVM IR:
+/// // A is an alloca with no stores so far
+/// for (...) {
+/// int t = *A;
+/// if (!first_iteration)
+/// use(t);
+/// *A = 42;
+/// }
+static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
LargeBlockInfo &LBI,
AliasSetTracker *AST) {
// The trickiest case to handle is when we have large blocks. Because of this,
@@ -467,10 +468,15 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
std::make_pair(LoadIdx,
static_cast<StoreInst *>(nullptr)),
less_first());
-
- if (I == StoresByIndex.begin())
- // If there is no store before this load, the load takes the undef value.
- LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
+ if (I == StoresByIndex.begin()) {
+ if (StoresByIndex.empty())
+ // If there are no stores, the load takes the undef value.
+ LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
+ else
+ // There is no store before this load, bail out (load may be affected
+ // by the following stores - see main comment).
+ return false;
+ }
else
// Otherwise, there was a store before this load, the load takes its value.
LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0));
@@ -486,8 +492,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
StoreInst *SI = cast<StoreInst>(AI->user_back());
// Record debuginfo for the store before removing it.
if (DbgDeclareInst *DDI = Info.DbgDeclare) {
- DIBuilder DIB(*AI->getParent()->getParent()->getParent(),
- /*AllowUnresolved*/ false);
+ DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
}
SI->eraseFromParent();
@@ -506,6 +511,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
}
++NumLocalPromoted;
+ return true;
}
void PromoteMem2Reg::run() {
@@ -557,9 +563,8 @@ void PromoteMem2Reg::run() {
// If the alloca is only read and written in one basic block, just perform a
// linear sweep over the block to eliminate it.
- if (Info.OnlyUsedInOneBlock) {
- promoteSingleBlockAlloca(AI, Info, LBI, AST);
-
+ if (Info.OnlyUsedInOneBlock &&
+ promoteSingleBlockAlloca(AI, Info, LBI, AST)) {
// The alloca has been processed, move on.
RemoveFromAllocasList(AllocaNum);
continue;
@@ -636,7 +641,7 @@ void PromoteMem2Reg::run() {
// and inserting the phi nodes we marked as necessary
//
std::vector<RenamePassData> RenamePassWorkList;
- RenamePassWorkList.emplace_back(F.begin(), nullptr, std::move(Values));
+ RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values));
do {
RenamePassData RPD;
RPD.swap(RenamePassWorkList.back());
@@ -854,7 +859,7 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
// BasicBlock.
PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
Allocas[AllocaNo]->getName() + "." + Twine(Version++),
- BB->begin());
+ &BB->front());
++NumPHIInsert;
PhiToAllocaMap[PN] = AllocaNo;
@@ -919,7 +924,7 @@ NextIteration:
return;
for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) {
- Instruction *I = II++; // get the instruction, increment iterator
+ Instruction *I = &*II++; // get the instruction, increment iterator
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 36781c1189cd..d0932f834cf5 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -14,6 +14,7 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -43,7 +44,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <map>
@@ -73,6 +73,22 @@ static cl::opt<bool> HoistCondStores(
"simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
cl::desc("Hoist conditional stores if an unconditional store precedes"));
+static cl::opt<bool> MergeCondStores(
+ "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
+ cl::desc("Hoist conditional stores even if an unconditional store does not "
+ "precede - hoist multiple conditional stores into a single "
+ "predicated store"));
+
+static cl::opt<bool> MergeCondStoresAggressively(
+ "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
+ cl::desc("When merging conditional stores, do so even if the resultant "
+ "basic blocks are unlikely to be if-converted as a result"));
+
+static cl::opt<bool> SpeculateOneExpensiveInst(
+ "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
+ cl::desc("Allow exactly one expensive instruction to be speculatively "
+ "executed"));
+
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping");
STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
@@ -83,13 +99,13 @@ STATISTIC(NumSpeculations, "Number of speculative executed instructions");
namespace {
// The first field contains the value that the switch produces when a certain
- // case group is selected, and the second field is a vector containing the cases
- // composing the case group.
+ // case group is selected, and the second field is a vector containing the
+ // cases composing the case group.
typedef SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>
SwitchCaseResultVectorTy;
// The first field contains the phi node that generates a result of the switch
- // and the second field contains the value generated for a certain case in the switch
- // for that PHI.
+ // and the second field contains the value generated for a certain case in the
+ // switch for that PHI.
typedef SmallVector<std::pair<PHINode *, Constant *>, 4> SwitchCaseResultsTy;
/// ValueEqualityComparisonCase - Represents a case of a switch.
@@ -124,6 +140,7 @@ class SimplifyCFGOpt {
bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
+ bool SimplifyCleanupReturn(CleanupReturnInst *RI);
bool SimplifyUnreachable(UnreachableInst *UI);
bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
bool SimplifyIndirectBr(IndirectBrInst *IBI);
@@ -226,6 +243,7 @@ static unsigned ComputeSpeculationCost(const User *I,
"Instruction is not safe to speculatively execute!");
return TTI.getUserCost(I);
}
+
/// If we have a merge point of an "if condition" as accepted above,
/// return true if the specified value dominates the block. We
/// don't handle the true generality of domination here, just a special case
@@ -246,7 +264,8 @@ static unsigned ComputeSpeculationCost(const User *I,
static bool DominatesMergePoint(Value *V, BasicBlock *BB,
SmallPtrSetImpl<Instruction*> *AggressiveInsts,
unsigned &CostRemaining,
- const TargetTransformInfo &TTI) {
+ const TargetTransformInfo &TTI,
+ unsigned Depth = 0) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I) {
// Non-instructions all dominate instructions, but not all constantexprs
@@ -284,15 +303,24 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
unsigned Cost = ComputeSpeculationCost(I, TTI);
- if (Cost > CostRemaining)
+ // Allow exactly one instruction to be speculated regardless of its cost
+ // (as long as it is safe to do so).
+ // This is intended to flatten the CFG even if the instruction is a division
+ // or other expensive operation. The speculation of an expensive instruction
+ // is expected to be undone in CodeGenPrepare if the speculation has not
+ // enabled further IR optimizations.
+ if (Cost > CostRemaining &&
+ (!SpeculateOneExpensiveInst || !AggressiveInsts->empty() || Depth > 0))
return false;
- CostRemaining -= Cost;
+ // Avoid unsigned wrap.
+ CostRemaining = (Cost > CostRemaining) ? 0 : CostRemaining - Cost;
// Okay, we can only really hoist these out if their operands do
// not take us over the cost threshold.
for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
- if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI))
+ if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI,
+ Depth + 1))
return false;
// Okay, it's safe to do this! Remember this instruction.
AggressiveInsts->insert(I);
@@ -970,8 +998,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
// Okay, at this point, we know which new successor Pred will get. Make
// sure we update the number of entries in the PHI nodes for these
// successors.
- for (unsigned i = 0, e = NewSuccessors.size(); i != e; ++i)
- AddPredecessorToBlock(NewSuccessors[i], Pred, BB);
+ for (BasicBlock *NewSuccessor : NewSuccessors)
+ AddPredecessorToBlock(NewSuccessor, Pred, BB);
Builder.SetInsertPoint(PTI);
// Convert pointer to int before we switch.
@@ -984,8 +1012,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault,
PredCases.size());
NewSI->setDebugLoc(PTI->getDebugLoc());
- for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
- NewSI->addCase(PredCases[i].Value, PredCases[i].Dest);
+ for (ValueEqualityComparisonCase &V : PredCases)
+ NewSI->addCase(V.Value, V.Dest);
if (PredHasWeights || SuccHasWeights) {
// Halve the weights if any of them cannot fit in an uint32_t
@@ -1059,15 +1087,15 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
BasicBlock::iterator BB1_Itr = BB1->begin();
BasicBlock::iterator BB2_Itr = BB2->begin();
- Instruction *I1 = BB1_Itr++, *I2 = BB2_Itr++;
+ Instruction *I1 = &*BB1_Itr++, *I2 = &*BB2_Itr++;
// Skip debug info if it is not identical.
DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
while (isa<DbgInfoIntrinsic>(I1))
- I1 = BB1_Itr++;
+ I1 = &*BB1_Itr++;
while (isa<DbgInfoIntrinsic>(I2))
- I2 = BB2_Itr++;
+ I2 = &*BB2_Itr++;
}
if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) ||
(isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
@@ -1088,31 +1116,30 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
// For a normal instruction, we just move one to right before the branch,
// then replace all uses of the other with the first. Finally, we remove
// the now redundant second instruction.
- BIParent->getInstList().splice(BI, BB1->getInstList(), I1);
+ BIParent->getInstList().splice(BI->getIterator(), BB1->getInstList(), I1);
if (!I2->use_empty())
I2->replaceAllUsesWith(I1);
I1->intersectOptionalDataWith(I2);
unsigned KnownIDs[] = {
- LLVMContext::MD_tbaa,
- LLVMContext::MD_range,
- LLVMContext::MD_fpmath,
- LLVMContext::MD_invariant_load,
- LLVMContext::MD_nonnull
- };
+ LLVMContext::MD_tbaa, LLVMContext::MD_range,
+ LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load,
+ LLVMContext::MD_nonnull, LLVMContext::MD_invariant_group,
+ LLVMContext::MD_align, LLVMContext::MD_dereferenceable,
+ LLVMContext::MD_dereferenceable_or_null};
combineMetadata(I1, I2, KnownIDs);
I2->eraseFromParent();
Changed = true;
- I1 = BB1_Itr++;
- I2 = BB2_Itr++;
+ I1 = &*BB1_Itr++;
+ I2 = &*BB2_Itr++;
// Skip debug info if it is not identical.
DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
while (isa<DbgInfoIntrinsic>(I1))
- I1 = BB1_Itr++;
+ I1 = &*BB1_Itr++;
while (isa<DbgInfoIntrinsic>(I2))
- I2 = BB2_Itr++;
+ I2 = &*BB2_Itr++;
}
} while (I1->isIdenticalToWhenDefined(I2));
@@ -1147,7 +1174,7 @@ HoistTerminator:
// Okay, it is safe to hoist the terminator.
Instruction *NT = I1->clone();
- BIParent->getInstList().insert(BI, NT);
+ BIParent->getInstList().insert(BI->getIterator(), NT);
if (!NT->getType()->isVoidTy()) {
I1->replaceAllUsesWith(NT);
I2->replaceAllUsesWith(NT);
@@ -1265,7 +1292,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
// Cannot move control-flow-involving, volatile loads, vaarg, etc.
if (isa<PHINode>(I1) || isa<PHINode>(I2) ||
isa<TerminatorInst>(I1) || isa<TerminatorInst>(I2) ||
- isa<LandingPadInst>(I1) || isa<LandingPadInst>(I2) ||
+ I1->isEHPad() || I2->isEHPad() ||
isa<AllocaInst>(I1) || isa<AllocaInst>(I2) ||
I1->mayHaveSideEffects() || I2->mayHaveSideEffects() ||
I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() ||
@@ -1324,7 +1351,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
if (!NewPN) {
NewPN =
PHINode::Create(DifferentOp1->getType(), 2,
- DifferentOp1->getName() + ".sink", BBEnd->begin());
+ DifferentOp1->getName() + ".sink", &BBEnd->front());
NewPN->addIncoming(DifferentOp1, BB1);
NewPN->addIncoming(DifferentOp2, BB2);
DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";);
@@ -1339,7 +1366,8 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
// instruction in the basic block down.
bool UpdateRE1 = (I1 == BB1->begin()), UpdateRE2 = (I2 == BB2->begin());
// Sink the instruction.
- BBEnd->getInstList().splice(FirstNonPhiInBBEnd, BB1->getInstList(), I1);
+ BBEnd->getInstList().splice(FirstNonPhiInBBEnd->getIterator(),
+ BB1->getInstList(), I1);
if (!OldPN->use_empty())
OldPN->replaceAllUsesWith(I1);
OldPN->eraseFromParent();
@@ -1355,7 +1383,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
RE1 = BB1->getInstList().rend();
if (UpdateRE2)
RE2 = BB2->getInstList().rend();
- FirstNonPhiInBBEnd = I1;
+ FirstNonPhiInBBEnd = &*I1;
NumSinkCommons++;
Changed = true;
}
@@ -1491,7 +1519,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
for (BasicBlock::iterator BBI = ThenBB->begin(),
BBE = std::prev(ThenBB->end());
BBI != BBE; ++BBI) {
- Instruction *I = BBI;
+ Instruction *I = &*BBI;
// Skip debug info.
if (isa<DbgInfoIntrinsic>(I))
continue;
@@ -1604,9 +1632,14 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
SpeculatedStore->setOperand(0, S);
}
+ // Metadata can be dependent on the condition we are hoisting above.
+ // Conservatively strip all metadata on the instruction.
+ for (auto &I: *ThenBB)
+ I.dropUnknownNonDebugMetadata();
+
// Hoist the instructions.
- BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(),
- std::prev(ThenBB->end()));
+ BB->getInstList().splice(BI->getIterator(), ThenBB->getInstList(),
+ ThenBB->begin(), std::prev(ThenBB->end()));
// Insert selects and rewrite the PHI operands.
IRBuilder<true, NoFolder> Builder(BI);
@@ -1747,13 +1780,13 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) {
// Check for trivial simplification.
if (Value *V = SimplifyInstruction(N, DL)) {
- TranslateMap[BBI] = V;
+ TranslateMap[&*BBI] = V;
delete N; // Instruction folded away, don't need actual inst
} else {
// Insert the new instruction into its new home.
EdgeBB->getInstList().insert(InsertPt, N);
if (!BBI->use_empty())
- TranslateMap[BBI] = N;
+ TranslateMap[&*BBI] = N;
}
}
@@ -1850,7 +1883,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
} else {
DomBlock = *pred_begin(IfBlock1);
for (BasicBlock::iterator I = IfBlock1->begin();!isa<TerminatorInst>(I);++I)
- if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
+ if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) {
// This is not an aggressive instruction that we can promote.
// Because of this, we won't be able to get rid of the control
// flow, so the xform is not worth it.
@@ -1863,7 +1896,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
} else {
DomBlock = *pred_begin(IfBlock2);
for (BasicBlock::iterator I = IfBlock2->begin();!isa<TerminatorInst>(I);++I)
- if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
+ if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) {
// This is not an aggressive instruction that we can promote.
// Because of this, we won't be able to get rid of the control
// flow, so the xform is not worth it.
@@ -1882,13 +1915,13 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// Move all 'aggressive' instructions, which are defined in the
// conditional parts of the if's up to the dominating block.
if (IfBlock1)
- DomBlock->getInstList().splice(InsertPt,
+ DomBlock->getInstList().splice(InsertPt->getIterator(),
IfBlock1->getInstList(), IfBlock1->begin(),
- IfBlock1->getTerminator());
+ IfBlock1->getTerminator()->getIterator());
if (IfBlock2)
- DomBlock->getInstList().splice(InsertPt,
+ DomBlock->getInstList().splice(InsertPt->getIterator(),
IfBlock2->getInstList(), IfBlock2->begin(),
- IfBlock2->getTerminator());
+ IfBlock2->getTerminator()->getIterator());
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
// Change the PHI node into a select instruction.
@@ -2057,7 +2090,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
BI->getSuccessor(0) == PBI->getSuccessor(1))) {
for (BasicBlock::iterator I = BB->begin(), E = BB->end();
I != E; ) {
- Instruction *Curr = I++;
+ Instruction *Curr = &*I++;
if (isa<CmpInst>(Curr)) {
Cond = Curr;
break;
@@ -2077,7 +2110,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
return false;
// Make sure the instruction after the condition is the cond branch.
- BasicBlock::iterator CondIt = Cond; ++CondIt;
+ BasicBlock::iterator CondIt = ++Cond->getIterator();
// Ignore dbg intrinsics.
while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt;
@@ -2095,7 +2128,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
// Ignore dbg intrinsics.
if (isa<DbgInfoIntrinsic>(I))
continue;
- if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(I))
+ if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(&*I))
return false;
// I has only one use and can be executed unconditionally.
Instruction *User = dyn_cast<Instruction>(I->user_back());
@@ -2192,17 +2225,17 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
Instruction *NewBonusInst = BonusInst->clone();
RemapInstruction(NewBonusInst, VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
- VMap[BonusInst] = NewBonusInst;
+ VMap[&*BonusInst] = NewBonusInst;
// If we moved a load, we cannot any longer claim any knowledge about
// its potential value. The previous information might have been valid
// only given the branch precondition.
// For an analogous reason, we must also drop all the metadata whose
// semantics we don't understand.
- NewBonusInst->dropUnknownMetadata(LLVMContext::MD_dbg);
+ NewBonusInst->dropUnknownNonDebugMetadata();
- PredBlock->getInstList().insert(PBI, NewBonusInst);
- NewBonusInst->takeName(BonusInst);
+ PredBlock->getInstList().insert(PBI->getIterator(), NewBonusInst);
+ NewBonusInst->takeName(&*BonusInst);
BonusInst->setName(BonusInst->getName() + ".old");
}
@@ -2211,7 +2244,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
Instruction *New = Cond->clone();
RemapInstruction(New, VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
- PredBlock->getInstList().insert(PBI, New);
+ PredBlock->getInstList().insert(PBI->getIterator(), New);
New->takeName(Cond);
Cond->setName(New->getName() + ".old");
@@ -2332,11 +2365,297 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
return false;
}
+// If there is only one store in BB1 and BB2, return it, otherwise return
+// nullptr.
+static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) {
+ StoreInst *S = nullptr;
+ for (auto *BB : {BB1, BB2}) {
+ if (!BB)
+ continue;
+ for (auto &I : *BB)
+ if (auto *SI = dyn_cast<StoreInst>(&I)) {
+ if (S)
+ // Multiple stores seen.
+ return nullptr;
+ else
+ S = SI;
+ }
+ }
+ return S;
+}
+
+static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
+ Value *AlternativeV = nullptr) {
+ // PHI is going to be a PHI node that allows the value V that is defined in
+ // BB to be referenced in BB's only successor.
+ //
+ // If AlternativeV is nullptr, the only value we care about in PHI is V. It
+ // doesn't matter to us what the other operand is (it'll never get used). We
+ // could just create a new PHI with an undef incoming value, but that could
+ // increase register pressure if EarlyCSE/InstCombine can't fold it with some
+ // other PHI. So here we directly look for some PHI in BB's successor with V
+ // as an incoming operand. If we find one, we use it, else we create a new
+ // one.
+ //
+ // If AlternativeV is not nullptr, we care about both incoming values in PHI.
+ // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
+ // where OtherBB is the single other predecessor of BB's only successor.
+ PHINode *PHI = nullptr;
+ BasicBlock *Succ = BB->getSingleSuccessor();
+
+ for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
+ if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
+ PHI = cast<PHINode>(I);
+ if (!AlternativeV)
+ break;
+
+ assert(std::distance(pred_begin(Succ), pred_end(Succ)) == 2);
+ auto PredI = pred_begin(Succ);
+ BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
+ if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
+ break;
+ PHI = nullptr;
+ }
+ if (PHI)
+ return PHI;
+
+ // If V is not an instruction defined in BB, just return it.
+ if (!AlternativeV &&
+ (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
+ return V;
+
+ PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge", &Succ->front());
+ PHI->addIncoming(V, BB);
+ for (BasicBlock *PredBB : predecessors(Succ))
+ if (PredBB != BB)
+ PHI->addIncoming(AlternativeV ? AlternativeV : UndefValue::get(V->getType()),
+ PredBB);
+ return PHI;
+}
+
+static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
+ BasicBlock *QTB, BasicBlock *QFB,
+ BasicBlock *PostBB, Value *Address,
+ bool InvertPCond, bool InvertQCond) {
+ auto IsaBitcastOfPointerType = [](const Instruction &I) {
+ return Operator::getOpcode(&I) == Instruction::BitCast &&
+ I.getType()->isPointerTy();
+ };
+
+ // If we're not in aggressive mode, we only optimize if we have some
+ // confidence that by optimizing we'll allow P and/or Q to be if-converted.
+ auto IsWorthwhile = [&](BasicBlock *BB) {
+ if (!BB)
+ return true;
+ // Heuristic: if the block can be if-converted/phi-folded and the
+ // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
+ // thread this store.
+ unsigned N = 0;
+ for (auto &I : *BB) {
+ // Cheap instructions viable for folding.
+ if (isa<BinaryOperator>(I) || isa<GetElementPtrInst>(I) ||
+ isa<StoreInst>(I))
+ ++N;
+ // Free instructions.
+ else if (isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
+ IsaBitcastOfPointerType(I))
+ continue;
+ else
+ return false;
+ }
+ return N <= PHINodeFoldingThreshold;
+ };
+
+ if (!MergeCondStoresAggressively && (!IsWorthwhile(PTB) ||
+ !IsWorthwhile(PFB) ||
+ !IsWorthwhile(QTB) ||
+ !IsWorthwhile(QFB)))
+ return false;
+
+ // For every pointer, there must be exactly two stores, one coming from
+ // PTB or PFB, and the other from QTB or QFB. We don't support more than one
+ // store (to any address) in PTB,PFB or QTB,QFB.
+ // FIXME: We could relax this restriction with a bit more work and performance
+ // testing.
+ StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
+ StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
+ if (!PStore || !QStore)
+ return false;
+
+ // Now check the stores are compatible.
+ if (!QStore->isUnordered() || !PStore->isUnordered())
+ return false;
+
+ // Check that sinking the store won't cause program behavior changes. Sinking
+ // the store out of the Q blocks won't change any behavior as we're sinking
+ // from a block to its unconditional successor. But we're moving a store from
+ // the P blocks down through the middle block (QBI) and past both QFB and QTB.
+ // So we need to check that there are no aliasing loads or stores in
+ // QBI, QTB and QFB. We also need to check there are no conflicting memory
+ // operations between PStore and the end of its parent block.
+ //
+ // The ideal way to do this is to query AliasAnalysis, but we don't
+ // preserve AA currently so that is dangerous. Be super safe and just
+ // check there are no other memory operations at all.
+ for (auto &I : *QFB->getSinglePredecessor())
+ if (I.mayReadOrWriteMemory())
+ return false;
+ for (auto &I : *QFB)
+ if (&I != QStore && I.mayReadOrWriteMemory())
+ return false;
+ if (QTB)
+ for (auto &I : *QTB)
+ if (&I != QStore && I.mayReadOrWriteMemory())
+ return false;
+ for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
+ I != E; ++I)
+ if (&*I != PStore && I->mayReadOrWriteMemory())
+ return false;
+
+ // OK, we're going to sink the stores to PostBB. The store has to be
+ // conditional though, so first create the predicate.
+ Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator())
+ ->getCondition();
+ Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator())
+ ->getCondition();
+
+ Value *PPHI = ensureValueAvailableInSuccessor(PStore->getValueOperand(),
+ PStore->getParent());
+ Value *QPHI = ensureValueAvailableInSuccessor(QStore->getValueOperand(),
+ QStore->getParent(), PPHI);
+
+ IRBuilder<> QB(&*PostBB->getFirstInsertionPt());
+
+ Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond);
+ Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond);
+
+ if (InvertPCond)
+ PPred = QB.CreateNot(PPred);
+ if (InvertQCond)
+ QPred = QB.CreateNot(QPred);
+ Value *CombinedPred = QB.CreateOr(PPred, QPred);
+
+ auto *T =
+ SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(), false);
+ QB.SetInsertPoint(T);
+ StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
+ AAMDNodes AAMD;
+ PStore->getAAMetadata(AAMD, /*Merge=*/false);
+ PStore->getAAMetadata(AAMD, /*Merge=*/true);
+ SI->setAAMetadata(AAMD);
+
+ QStore->eraseFromParent();
+ PStore->eraseFromParent();
+
+ return true;
+}
+
+static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) {
+ // The intention here is to find diamonds or triangles (see below) where each
+ // conditional block contains a store to the same address. Both of these
+ // stores are conditional, so they can't be unconditionally sunk. But it may
+ // be profitable to speculatively sink the stores into one merged store at the
+ // end, and predicate the merged store on the union of the two conditions of
+ // PBI and QBI.
+ //
+ // This can reduce the number of stores executed if both of the conditions are
+ // true, and can allow the blocks to become small enough to be if-converted.
+ // This optimization will also chain, so that ladders of test-and-set
+ // sequences can be if-converted away.
+ //
+ // We only deal with simple diamonds or triangles:
+ //
+ // PBI or PBI or a combination of the two
+ // / \ | \
+ // PTB PFB | PFB
+ // \ / | /
+ // QBI QBI
+ // / \ | \
+ // QTB QFB | QFB
+ // \ / | /
+ // PostBB PostBB
+ //
+ // We model triangles as a type of diamond with a nullptr "true" block.
+ // Triangles are canonicalized so that the fallthrough edge is represented by
+ // a true condition, as in the diagram above.
+ //
+ BasicBlock *PTB = PBI->getSuccessor(0);
+ BasicBlock *PFB = PBI->getSuccessor(1);
+ BasicBlock *QTB = QBI->getSuccessor(0);
+ BasicBlock *QFB = QBI->getSuccessor(1);
+ BasicBlock *PostBB = QFB->getSingleSuccessor();
+
+ bool InvertPCond = false, InvertQCond = false;
+ // Canonicalize fallthroughs to the true branches.
+ if (PFB == QBI->getParent()) {
+ std::swap(PFB, PTB);
+ InvertPCond = true;
+ }
+ if (QFB == PostBB) {
+ std::swap(QFB, QTB);
+ InvertQCond = true;
+ }
+
+ // From this point on we can assume PTB or QTB may be fallthroughs but PFB
+ // and QFB may not. Model fallthroughs as a nullptr block.
+ if (PTB == QBI->getParent())
+ PTB = nullptr;
+ if (QTB == PostBB)
+ QTB = nullptr;
+
+ // Legality bailouts. We must have at least the non-fallthrough blocks and
+ // the post-dominating block, and the non-fallthroughs must only have one
+ // predecessor.
+ auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
+ return BB->getSinglePredecessor() == P &&
+ BB->getSingleSuccessor() == S;
+ };
+ if (!PostBB ||
+ !HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
+ !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
+ return false;
+ if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
+ (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
+ return false;
+ if (PostBB->getNumUses() != 2 || QBI->getParent()->getNumUses() != 2)
+ return false;
+
+ // OK, this is a sequence of two diamonds or triangles.
+ // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
+ SmallPtrSet<Value *,4> PStoreAddresses, QStoreAddresses;
+ for (auto *BB : {PTB, PFB}) {
+ if (!BB)
+ continue;
+ for (auto &I : *BB)
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I))
+ PStoreAddresses.insert(SI->getPointerOperand());
+ }
+ for (auto *BB : {QTB, QFB}) {
+ if (!BB)
+ continue;
+ for (auto &I : *BB)
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I))
+ QStoreAddresses.insert(SI->getPointerOperand());
+ }
+
+ set_intersect(PStoreAddresses, QStoreAddresses);
+ // set_intersect mutates PStoreAddresses in place. Rename it here to make it
+ // clear what it contains.
+ auto &CommonAddresses = PStoreAddresses;
+
+ bool Changed = false;
+ for (auto *Address : CommonAddresses)
+ Changed |= mergeConditionalStoreToAddress(
+ PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond);
+ return Changed;
+}
+
/// If we have a conditional branch as a predecessor of another block,
/// this function tries to simplify it. We know
/// that PBI and BI are both conditional branches, and BI is in one of the
/// successor blocks of PBI - PBI branches to BI.
-static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
+static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
+ const DataLayout &DL) {
assert(PBI->isConditional() && BI->isConditional());
BasicBlock *BB = BI->getParent();
@@ -2360,10 +2679,9 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
// simplifycfg will thread the block.
if (BlockIsSimpleEnoughToThreadThrough(BB)) {
pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
- PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()),
- std::distance(PB, PE),
- BI->getCondition()->getName() + ".pr",
- BB->begin());
+ PHINode *NewPN = PHINode::Create(
+ Type::getInt1Ty(BB->getContext()), std::distance(PB, PE),
+ BI->getCondition()->getName() + ".pr", &BB->front());
// Okay, we're going to insert the PHI node. Since PBI is not the only
// predecessor, compute the PHI'd conditional value for all of the preds.
// Any predecessor where the condition is not computable we keep symbolic.
@@ -2386,6 +2704,29 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
}
}
+ if (auto *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
+ if (CE->canTrap())
+ return false;
+
+ // If BI is reached from the true path of PBI and PBI's condition implies
+ // BI's condition, we know the direction of the BI branch.
+ if (PBI->getSuccessor(0) == BI->getParent() &&
+ isImpliedCondition(PBI->getCondition(), BI->getCondition(), DL) &&
+ PBI->getSuccessor(0) != PBI->getSuccessor(1) &&
+ BB->getSinglePredecessor()) {
+ // Turn this into a branch on constant.
+ auto *OldCond = BI->getCondition();
+ BI->setCondition(ConstantInt::getTrue(BB->getContext()));
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+ return true; // Nuke the branch on constant.
+ }
+
+ // If both branches are conditional and both contain stores to the same
+ // address, remove the stores from the conditionals and create a conditional
+ // merged store at the end.
+ if (MergeCondStores && mergeConditionalStores(PBI, BI))
+ return true;
+
// If this is a conditional branch in an empty block, and if any
// predecessors are a conditional branch to one of our destinations,
// fold the conditions into logical ops and one cond br.
@@ -2396,11 +2737,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
if (&*BBI != BI)
return false;
-
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
- if (CE->canTrap())
- return false;
-
int PBIOp, BIOp;
if (PBI->getSuccessor(0) == BI->getSuccessor(0))
PBIOp = BIOp = 0;
@@ -2565,15 +2901,15 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
// Then remove the rest.
- for (unsigned I = 0, E = OldTerm->getNumSuccessors(); I != E; ++I) {
- BasicBlock *Succ = OldTerm->getSuccessor(I);
+ for (BasicBlock *Succ : OldTerm->successors()) {
// Make sure only to keep exactly one copy of each edge.
if (Succ == KeepEdge1)
KeepEdge1 = nullptr;
else if (Succ == KeepEdge2)
KeepEdge2 = nullptr;
else
- Succ->removePredecessor(OldTerm->getParent());
+ Succ->removePredecessor(OldTerm->getParent(),
+ /*DontDeleteUselessPHIs=*/true);
}
IRBuilder<> Builder(OldTerm);
@@ -2827,7 +3163,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
// If Extra was used, we require at least two switch values to do the
- // transformation. A switch with one value is just an cond branch.
+ // transformation. A switch with one value is just a conditional branch.
if (ExtraCase && Values.size() < 2) return false;
// TODO: Preserve branch weight metadata, similarly to how
@@ -2847,7 +3183,8 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
// then we evaluate them with an explicit branch first. Split the block
// right before the condbr to handle it.
if (ExtraCase) {
- BasicBlock *NewBB = BB->splitBasicBlock(BI, "switch.early.test");
+ BasicBlock *NewBB =
+ BB->splitBasicBlock(BI->getIterator(), "switch.early.test");
// Remove the uncond branch added to the old block.
TerminatorInst *OldTI = BB->getTerminator();
Builder.SetInsertPoint(OldTI);
@@ -2911,34 +3248,15 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
return false;
// Check that there are no other instructions except for debug intrinsics.
- BasicBlock::iterator I = LPInst, E = RI;
+ BasicBlock::iterator I = LPInst->getIterator(), E = RI->getIterator();
while (++I != E)
if (!isa<DbgInfoIntrinsic>(I))
return false;
// Turn all invokes that unwind here into calls and delete the basic block.
for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
- InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator());
- SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
- // Insert a call instruction before the invoke.
- CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
- Call->takeName(II);
- Call->setCallingConv(II->getCallingConv());
- Call->setAttributes(II->getAttributes());
- Call->setDebugLoc(II->getDebugLoc());
-
- // Anything that used the value produced by the invoke instruction now uses
- // the value produced by the call instruction. Note that we do this even
- // for void functions and calls with no uses so that the callgraph edge is
- // updated.
- II->replaceAllUsesWith(Call);
- BB->removePredecessor(II->getParent());
-
- // Insert a branch to the normal destination right before the invoke.
- BranchInst::Create(II->getNormalDest(), II);
-
- // Finally, delete the invoke instruction!
- II->eraseFromParent();
+ BasicBlock *Pred = *PI++;
+ removeUnwindEdge(Pred);
}
// The landingpad is now unreachable. Zap it.
@@ -2946,6 +3264,124 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
return true;
}
+bool SimplifyCFGOpt::SimplifyCleanupReturn(CleanupReturnInst *RI) {
+ // If this is a trivial cleanup pad that executes no instructions, it can be
+ // eliminated. If the cleanup pad continues to the caller, any predecessor
+ // that is an EH pad will be updated to continue to the caller and any
+ // predecessor that terminates with an invoke instruction will have its invoke
+ // instruction converted to a call instruction. If the cleanup pad being
+ // simplified does not continue to the caller, each predecessor will be
+ // updated to continue to the unwind destination of the cleanup pad being
+ // simplified.
+ BasicBlock *BB = RI->getParent();
+ CleanupPadInst *CPInst = RI->getCleanupPad();
+ if (CPInst->getParent() != BB)
+ // This isn't an empty cleanup.
+ return false;
+
+ // Check that there are no other instructions except for debug intrinsics.
+ BasicBlock::iterator I = CPInst->getIterator(), E = RI->getIterator();
+ while (++I != E)
+ if (!isa<DbgInfoIntrinsic>(I))
+ return false;
+
+ // If the cleanup return we are simplifying unwinds to the caller, this will
+ // set UnwindDest to nullptr.
+ BasicBlock *UnwindDest = RI->getUnwindDest();
+ Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr;
+
+ // We're about to remove BB from the control flow. Before we do, sink any
+ // PHINodes into the unwind destination. Doing this before changing the
+ // control flow avoids some potentially slow checks, since we can currently
+ // be certain that UnwindDest and BB have no common predecessors (since they
+ // are both EH pads).
+ if (UnwindDest) {
+ // First, go through the PHI nodes in UnwindDest and update any nodes that
+ // reference the block we are removing
+ for (BasicBlock::iterator I = UnwindDest->begin(),
+ IE = DestEHPad->getIterator();
+ I != IE; ++I) {
+ PHINode *DestPN = cast<PHINode>(I);
+
+ int Idx = DestPN->getBasicBlockIndex(BB);
+ // Since BB unwinds to UnwindDest, it has to be in the PHI node.
+ assert(Idx != -1);
+ // This PHI node has an incoming value that corresponds to a control
+ // path through the cleanup pad we are removing. If the incoming
+ // value is in the cleanup pad, it must be a PHINode (because we
+ // verified above that the block is otherwise empty). Otherwise, the
+ // value is either a constant or a value that dominates the cleanup
+ // pad being removed.
+ //
+ // Because BB and UnwindDest are both EH pads, all of their
+ // predecessors must unwind to these blocks, and since no instruction
+ // can have multiple unwind destinations, there will be no overlap in
+ // incoming blocks between SrcPN and DestPN.
+ Value *SrcVal = DestPN->getIncomingValue(Idx);
+ PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
+
+ // Remove the entry for the block we are deleting.
+ DestPN->removeIncomingValue(Idx, false);
+
+ if (SrcPN && SrcPN->getParent() == BB) {
+ // If the incoming value was a PHI node in the cleanup pad we are
+ // removing, we need to merge that PHI node's incoming values into
+ // DestPN.
+ for (unsigned SrcIdx = 0, SrcE = SrcPN->getNumIncomingValues();
+ SrcIdx != SrcE; ++SrcIdx) {
+ DestPN->addIncoming(SrcPN->getIncomingValue(SrcIdx),
+ SrcPN->getIncomingBlock(SrcIdx));
+ }
+ } else {
+ // Otherwise, the incoming value came from above BB and
+ // so we can just reuse it. We must associate all of BB's
+ // predecessors with this value.
+ for (auto *pred : predecessors(BB)) {
+ DestPN->addIncoming(SrcVal, pred);
+ }
+ }
+ }
+
+ // Sink any remaining PHI nodes directly into UnwindDest.
+ Instruction *InsertPt = DestEHPad;
+ for (BasicBlock::iterator I = BB->begin(),
+ IE = BB->getFirstNonPHI()->getIterator();
+ I != IE;) {
+ // The iterator must be incremented here because the instructions are
+ // being moved to another block.
+ PHINode *PN = cast<PHINode>(I++);
+ if (PN->use_empty())
+ // If the PHI node has no uses, just leave it. It will be erased
+ // when we erase BB below.
+ continue;
+
+ // Otherwise, sink this PHI node into UnwindDest.
+ // Any predecessors to UnwindDest which are not already represented
+ // must be back edges which inherit the value from the path through
+ // BB. In this case, the PHI value must reference itself.
+ for (auto *pred : predecessors(UnwindDest))
+ if (pred != BB)
+ PN->addIncoming(PN, pred);
+ PN->moveBefore(InsertPt);
+ }
+ }
+
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
+ // The iterator must be updated here because we are removing this pred.
+ BasicBlock *PredBB = *PI++;
+ if (UnwindDest == nullptr) {
+ removeUnwindEdge(PredBB);
+ } else {
+ TerminatorInst *TI = PredBB->getTerminator();
+ TI->replaceUsesOfWith(BB, UnwindDest);
+ }
+ }
+
+ // The cleanup pad is now unreachable. Zap it.
+ BB->eraseFromParent();
+ return true;
+}
+
bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
BasicBlock *BB = RI->getParent();
if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
@@ -3003,8 +3439,8 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
// If there are any instructions immediately before the unreachable that can
// be removed, do so.
- while (UI != BB->begin()) {
- BasicBlock::iterator BBI = UI;
+ while (UI->getIterator() != BB->begin()) {
+ BasicBlock::iterator BBI = UI->getIterator();
--BBI;
// Do not delete instructions that can have side effects which might cause
// the unreachable to not be reachable; specifically, calls and volatile
@@ -3075,26 +3511,18 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
--i; --e;
Changed = true;
}
- } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) {
- if (II->getUnwindDest() == BB) {
- // Convert the invoke to a call instruction. This would be a good
- // place to note that the call does not throw though.
- BranchInst *BI = Builder.CreateBr(II->getNormalDest());
- II->removeFromParent(); // Take out of symbol table
-
- // Insert the call now...
- SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3);
- Builder.SetInsertPoint(BI);
- CallInst *CI = Builder.CreateCall(II->getCalledValue(),
- Args, II->getName());
- CI->setCallingConv(II->getCallingConv());
- CI->setAttributes(II->getAttributes());
- // If the invoke produced a value, the call does now instead.
- II->replaceAllUsesWith(CI);
- delete II;
- Changed = true;
- }
+ } else if ((isa<InvokeInst>(TI) &&
+ cast<InvokeInst>(TI)->getUnwindDest() == BB) ||
+ isa<CatchSwitchInst>(TI)) {
+ removeUnwindEdge(TI->getParent());
+ Changed = true;
+ } else if (isa<CleanupReturnInst>(TI)) {
+ new UnreachableInst(TI->getContext(), TI);
+ TI->eraseFromParent();
+ Changed = true;
}
+ // TODO: We can remove a catchswitch if all it's catchpads end in
+ // unreachable.
}
// If this block is now dead, remove it.
@@ -3249,6 +3677,29 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
}
}
+ // If we can prove that the cases must cover all possible values, the
+ // default destination becomes dead and we can remove it. If we know some
+ // of the bits in the value, we can use that to more precisely compute the
+ // number of possible unique case values.
+ bool HasDefault =
+ !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
+ const unsigned NumUnknownBits = Bits -
+ (KnownZero.Or(KnownOne)).countPopulation();
+ assert(NumUnknownBits <= Bits);
+ if (HasDefault && DeadCases.empty() &&
+ NumUnknownBits < 64 /* avoid overflow */ &&
+ SI->getNumCases() == (1ULL << NumUnknownBits)) {
+ DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
+ BasicBlock *NewDefault = SplitBlockPredecessors(SI->getDefaultDest(),
+ SI->getParent(), "");
+ SI->setDefaultDest(&*NewDefault);
+ SplitBlock(&*NewDefault, &NewDefault->front());
+ auto *OldTI = NewDefault->getTerminator();
+ new UnreachableInst(SI->getContext(), OldTI);
+ EraseTerminatorInstAndDCECond(OldTI);
+ return true;
+ }
+
SmallVector<uint64_t, 8> Weights;
bool HasWeight = HasBranchWeights(SI);
if (HasWeight) {
@@ -3439,7 +3890,7 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
} else if (isa<DbgInfoIntrinsic>(I)) {
// Skip debug intrinsic.
continue;
- } else if (Constant *C = ConstantFold(I, DL, ConstantPool)) {
+ } else if (Constant *C = ConstantFold(&*I, DL, ConstantPool)) {
// Instruction is side-effect free and constant.
// If the instruction has uses outside this block or a phi node slot for
@@ -3456,7 +3907,7 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
return false;
}
- ConstantPool.insert(std::make_pair(I, C));
+ ConstantPool.insert(std::make_pair(&*I, C));
} else {
break;
}
@@ -3664,7 +4115,7 @@ namespace {
/// Return true if a table with TableSize elements of
/// type ElementType would fit in a target-legal register.
static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
- const Type *ElementType);
+ Type *ElementType);
private:
// Depending on the contents of the table, it can be represented in
@@ -3880,8 +4331,8 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
uint64_t TableSize,
- const Type *ElementType) {
- const IntegerType *IT = dyn_cast<IntegerType>(ElementType);
+ Type *ElementType) {
+ auto *IT = dyn_cast<IntegerType>(ElementType);
if (!IT)
return false;
// FIXME: If the type is wider than it needs to be, e.g. i8 but all values
@@ -3992,7 +4443,7 @@ static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock,
assert((CaseConst == TrueConst || CaseConst == FalseConst) &&
"Expect true or false as compare result.");
}
-
+
// Check if the branch instruction dominates the phi node. It's a simple
// dominance check, but sufficient for our needs.
// Although this check is invariant in the calling loops, it's better to do it
@@ -4422,7 +4873,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
return true;
// If the Terminator is the only non-phi instruction, simplify the block.
- BasicBlock::iterator I = BB->getFirstNonPHIOrDbg();
+ BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator();
if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
TryToSimplifyUncondBranchFromEmptyBlock(BB))
return true;
@@ -4457,6 +4908,16 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
return false;
}
+static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) {
+ BasicBlock *PredPred = nullptr;
+ for (auto *P : predecessors(BB)) {
+ BasicBlock *PPred = P->getSinglePredecessor();
+ if (!PPred || (PredPred && PredPred != PPred))
+ return nullptr;
+ PredPred = PPred;
+ }
+ return PredPred;
+}
bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
BasicBlock *BB = BI->getParent();
@@ -4537,9 +4998,17 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
if (PBI != BI && PBI->isConditional())
- if (SimplifyCondBranchToCondBranch(PBI, BI))
+ if (SimplifyCondBranchToCondBranch(PBI, BI, DL))
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ // Look for diamond patterns.
+ if (MergeCondStores)
+ if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
+ if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
+ if (PBI != BI && PBI->isConditional())
+ if (mergeConditionalStores(PBI, BI))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+
return false;
}
@@ -4663,6 +5132,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
if (SimplifyReturn(RI, Builder)) return true;
} else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
if (SimplifyResume(RI, Builder)) return true;
+ } else if (CleanupReturnInst *RI =
+ dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
+ if (SimplifyCleanupReturn(RI)) return true;
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
if (SimplifySwitch(SI, Builder)) return true;
} else if (UnreachableInst *UI =
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index ab30aa17c76b..ddd8775a8431 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -47,15 +47,16 @@ namespace {
Loop *L;
LoopInfo *LI;
ScalarEvolution *SE;
+ DominatorTree *DT;
SmallVectorImpl<WeakVH> &DeadInsts;
bool Changed;
public:
- SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LoopInfo *LI,
- SmallVectorImpl<WeakVH> &Dead)
- : L(Loop), LI(LI), SE(SE), DeadInsts(Dead), Changed(false) {
+ SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT,
+ LoopInfo *LI,SmallVectorImpl<WeakVH> &Dead)
+ : L(Loop), LI(LI), SE(SE), DT(DT), DeadInsts(Dead), Changed(false) {
assert(LI && "IV simplification requires LoopInfo");
}
@@ -63,11 +64,13 @@ namespace {
/// Iteratively perform simplification on a worklist of users of the
/// specified induction variable. This is the top-level driver that applies
- /// all simplicitions to users of an IV.
+ /// all simplifications to users of an IV.
void simplifyUsers(PHINode *CurrIV, IVVisitor *V = nullptr);
Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand);
+ bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand);
+
bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
@@ -166,19 +169,65 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
S = SE->getSCEVAtScope(S, ICmpLoop);
X = SE->getSCEVAtScope(X, ICmpLoop);
+ ICmpInst::Predicate InvariantPredicate;
+ const SCEV *InvariantLHS, *InvariantRHS;
+
// If the condition is always true or always false, replace it with
// a constant value.
- if (SE->isKnownPredicate(Pred, S, X))
+ if (SE->isKnownPredicate(Pred, S, X)) {
ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext()));
- else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X))
+ DeadInsts.emplace_back(ICmp);
+ DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
+ } else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) {
ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
- else
+ DeadInsts.emplace_back(ICmp);
+ DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
+ } else if (isa<PHINode>(IVOperand) &&
+ SE->isLoopInvariantPredicate(Pred, S, X, ICmpLoop,
+ InvariantPredicate, InvariantLHS,
+ InvariantRHS)) {
+
+ // Rewrite the comparison to a loop invariant comparison if it can be done
+ // cheaply, where cheaply means "we don't need to emit any new
+ // instructions".
+
+ Value *NewLHS = nullptr, *NewRHS = nullptr;
+
+ if (S == InvariantLHS || X == InvariantLHS)
+ NewLHS =
+ ICmp->getOperand(S == InvariantLHS ? IVOperIdx : (1 - IVOperIdx));
+
+ if (S == InvariantRHS || X == InvariantRHS)
+ NewRHS =
+ ICmp->getOperand(S == InvariantRHS ? IVOperIdx : (1 - IVOperIdx));
+
+ for (Value *Incoming : cast<PHINode>(IVOperand)->incoming_values()) {
+ if (NewLHS && NewRHS)
+ break;
+
+ const SCEV *IncomingS = SE->getSCEV(Incoming);
+
+ if (!NewLHS && IncomingS == InvariantLHS)
+ NewLHS = Incoming;
+ if (!NewRHS && IncomingS == InvariantRHS)
+ NewRHS = Incoming;
+ }
+
+ if (!NewLHS || !NewRHS)
+ // We could not find an existing value to replace either LHS or RHS.
+ // Generating new instructions has subtler tradeoffs, so avoid doing that
+ // for now.
+ return;
+
+ DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n');
+ ICmp->setPredicate(InvariantPredicate);
+ ICmp->setOperand(0, NewLHS);
+ ICmp->setOperand(1, NewRHS);
+ } else
return;
- DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
++NumElimCmp;
Changed = true;
- DeadInsts.emplace_back(ICmp);
}
/// SimplifyIVUsers helper for eliminating useless
@@ -207,8 +256,7 @@ void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
Rem->replaceAllUsesWith(Rem->getOperand(0));
else {
// (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n).
- const SCEV *LessOne =
- SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1));
+ const SCEV *LessOne = SE->getMinusSCEV(S, SE->getOne(S->getType()));
if (IsSigned && !SE->isKnownNonNegative(LessOne))
return;
@@ -232,9 +280,9 @@ void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
DeadInsts.emplace_back(Rem);
}
-/// Eliminate an operation that consumes a simple IV and has
-/// no observable side-effect given the range of IV values.
-/// IVOperand is guaranteed SCEVable, but UseInst may not be.
+/// Eliminate an operation that consumes a simple IV and has no observable
+/// side-effect given the range of IV values. IVOperand is guaranteed SCEVable,
+/// but UseInst may not be.
bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
Instruction *IVOperand) {
if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
@@ -249,12 +297,45 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
}
}
- // Eliminate any operation that SCEV can prove is an identity function.
+ if (eliminateIdentitySCEV(UseInst, IVOperand))
+ return true;
+
+ return false;
+}
+
+/// Eliminate any operation that SCEV can prove is an identity function.
+bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
+ Instruction *IVOperand) {
if (!SE->isSCEVable(UseInst->getType()) ||
(UseInst->getType() != IVOperand->getType()) ||
(SE->getSCEV(UseInst) != SE->getSCEV(IVOperand)))
return false;
+ // getSCEV(X) == getSCEV(Y) does not guarantee that X and Y are related in the
+ // dominator tree, even if X is an operand to Y. For instance, in
+ //
+ // %iv = phi i32 {0,+,1}
+ // br %cond, label %left, label %merge
+ //
+ // left:
+ // %X = add i32 %iv, 0
+ // br label %merge
+ //
+ // merge:
+ // %M = phi (%X, %iv)
+ //
+ // getSCEV(%M) == getSCEV(%X) == {0,+,1}, but %X does not dominate %M, and
+ // %M.replaceAllUsesWith(%X) would be incorrect.
+
+ if (isa<PHINode>(UseInst))
+ // If UseInst is not a PHI node then we know that IVOperand dominates
+ // UseInst directly from the legality of SSA.
+ if (!DT || !DT->dominates(IVOperand, UseInst))
+ return false;
+
+ if (!LI->replacementPreservesLCSSAForm(UseInst, IVOperand))
+ return false;
+
DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
UseInst->replaceAllUsesWith(IVOperand);
@@ -436,8 +517,8 @@ static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
/// This algorithm does not require IVUsers analysis. Instead, it simplifies
/// instructions in-place during analysis. Rather than rewriting induction
/// variables bottom-up from their users, it transforms a chain of IVUsers
-/// top-down, updating the IR only when it encouters a clear optimization
-/// opportunitiy.
+/// top-down, updating the IR only when it encounters a clear optimization
+/// opportunity.
///
/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
///
@@ -513,22 +594,21 @@ void IVVisitor::anchor() { }
/// Simplify instructions that use this induction variable
/// by using ScalarEvolution to analyze the IV's recurrence.
-bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM,
- SmallVectorImpl<WeakVH> &Dead, IVVisitor *V)
-{
- LoopInfo *LI = &LPM->getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, LI, Dead);
+bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
+ LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead,
+ IVVisitor *V) {
+ SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Dead);
SIV.simplifyUsers(CurrIV, V);
return SIV.hasChanged();
}
/// Simplify users of induction variables within this
/// loop. This does not actually change or add IVs.
-bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, LPPassManager *LPM,
- SmallVectorImpl<WeakVH> &Dead) {
+bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
+ LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead) {
bool Changed = false;
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
- Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, LPM, Dead);
+ Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, Dead);
}
return Changed;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
index c499c87b1f0b..d5377f9a4c1f 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -20,12 +20,12 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -64,7 +64,7 @@ namespace {
// Here be subtlety: the iterator must be incremented before the loop
// body (not sure why), so a range-for loop won't work here.
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
- Instruction *I = BI++;
+ Instruction *I = &*BI++;
// The first time through the loop ToSimplify is empty and we try to
// simplify all instructions. On later iterations ToSimplify is not
// empty and we only bother simplifying instructions that are in it.
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 6bbf8287e223..81dea6d1b9ae 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -30,8 +31,8 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
using namespace PatternMatch;
@@ -52,16 +53,8 @@ static cl::opt<bool>
//===----------------------------------------------------------------------===//
static bool ignoreCallingConv(LibFunc::Func Func) {
- switch (Func) {
- case LibFunc::abs:
- case LibFunc::labs:
- case LibFunc::llabs:
- case LibFunc::strlen:
- return true;
- default:
- return false;
- }
- llvm_unreachable("All cases should be covered in the switch.");
+ return Func == LibFunc::abs || Func == LibFunc::labs ||
+ Func == LibFunc::llabs || Func == LibFunc::strlen;
}
/// isOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
@@ -93,16 +86,13 @@ static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
}
static bool callHasFloatingPointArgument(const CallInst *CI) {
- for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end();
- it != e; ++it) {
- if ((*it)->getType()->isFloatingPointTy())
- return true;
- }
- return false;
+ return std::any_of(CI->op_begin(), CI->op_end(), [](const Use &OI) {
+ return OI->getType()->isFloatingPointTy();
+ });
}
/// \brief Check whether the overloaded unary floating point function
-/// corresponing to \a Ty is available.
+/// corresponding to \a Ty is available.
static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
LibFunc::Func DoubleFn, LibFunc::Func FloatFn,
LibFunc::Func LongDoubleFn) {
@@ -116,6 +106,23 @@ static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
}
}
+/// \brief Check whether we can use unsafe floating point math for
+/// the function passed as input.
+static bool canUseUnsafeFPMath(Function *F) {
+
+ // FIXME: For finer-grain optimization, we need intrinsics to have the same
+ // fast-math flag decorations that are applied to FP instructions. For now,
+ // we have to rely on the function-level unsafe-fp-math attribute to do this
+ // optimization because there's no other way to express that the call can be
+ // relaxed.
+ if (F->hasFnAttribute("unsafe-fp-math")) {
+ Attribute Attr = F->getFnAttribute("unsafe-fp-math");
+ if (Attr.getValueAsString() == "true")
+ return true;
+ }
+ return false;
+}
+
/// \brief Returns whether \p F matches the signature expected for the
/// string/memory copying library function \p Func.
/// Acceptable functions are st[rp][n]?cpy, memove, memcpy, and memset.
@@ -467,9 +474,6 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- // Verify the "stpcpy" function prototype.
- FunctionType *FT = Callee->getFunctionType();
-
if (!checkStringCopyLibFuncSignature(Callee, LibFunc::stpcpy))
return nullptr;
@@ -484,7 +488,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
if (Len == 0)
return nullptr;
- Type *PT = FT->getParamType(0);
+ Type *PT = Callee->getFunctionType()->getParamType(0);
Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len);
Value *DstEnd =
B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1));
@@ -497,8 +501,6 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
-
if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strncpy))
return nullptr;
@@ -531,7 +533,7 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
if (Len > SrcLen + 1)
return nullptr;
- Type *PT = FT->getParamType(0);
+ Type *PT = Callee->getFunctionType()->getParamType(0);
// strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
B.CreateMemCpy(Dst, Src, ConstantInt::get(DL.getIntPtrType(PT), Len), 1);
@@ -862,6 +864,27 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
return B.CreateSub(LHSV, RHSV, "chardiff");
}
+ // memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0
+ if (DL.isLegalInteger(Len * 8) && isOnlyUsedInZeroEqualityComparison(CI)) {
+
+ IntegerType *IntType = IntegerType::get(CI->getContext(), Len * 8);
+ unsigned PrefAlignment = DL.getPrefTypeAlignment(IntType);
+
+ if (getKnownAlignment(LHS, DL, CI) >= PrefAlignment &&
+ getKnownAlignment(RHS, DL, CI) >= PrefAlignment) {
+
+ Type *LHSPtrTy =
+ IntType->getPointerTo(LHS->getType()->getPointerAddressSpace());
+ Type *RHSPtrTy =
+ IntType->getPointerTo(RHS->getType()->getPointerAddressSpace());
+
+ Value *LHSV = B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy, "lhsc"), "lhsv");
+ Value *RHSV = B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy, "rhsc"), "rhsv");
+
+ return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp");
+ }
+ }
+
// Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
StringRef LHSStr, RHSStr;
if (getConstantStringInfo(LHS, LHSStr) &&
@@ -972,7 +995,7 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
// floor((double)floatval) -> (double)floorf(floatval)
if (Callee->isIntrinsic()) {
- Module *M = CI->getParent()->getParent()->getParent();
+ Module *M = CI->getModule();
Intrinsic::ID IID = Callee->getIntrinsicID();
Function *F = Intrinsic::getDeclaration(M, IID, B.getFloatTy());
V = B.CreateCall(F, V);
@@ -1015,9 +1038,9 @@ Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeCos(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
Value *Ret = nullptr;
- if (UnsafeFPShrink && Callee->getName() == "cos" && TLI->has(LibFunc::cosf)) {
+ StringRef Name = Callee->getName();
+ if (UnsafeFPShrink && Name == "cos" && hasFloatVersion(Name))
Ret = optimizeUnaryDoubleFP(CI, B, true);
- }
FunctionType *FT = Callee->getFunctionType();
// Just make sure this has 1 argument of FP type, which matches the
@@ -1035,13 +1058,37 @@ Value *LibCallSimplifier::optimizeCos(CallInst *CI, IRBuilder<> &B) {
return Ret;
}
+static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) {
+ // Multiplications calculated using Addition Chains.
+ // Refer: http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html
+
+ assert(Exp != 0 && "Incorrect exponent 0 not handled");
+
+ if (InnerChain[Exp])
+ return InnerChain[Exp];
+
+ static const unsigned AddChain[33][2] = {
+ {0, 0}, // Unused.
+ {0, 0}, // Unused (base case = pow1).
+ {1, 1}, // Unused (pre-computed).
+ {1, 2}, {2, 2}, {2, 3}, {3, 3}, {2, 5}, {4, 4},
+ {1, 8}, {5, 5}, {1, 10}, {6, 6}, {4, 9}, {7, 7},
+ {3, 12}, {8, 8}, {8, 9}, {2, 16}, {1, 18}, {10, 10},
+ {6, 15}, {11, 11}, {3, 20}, {12, 12}, {8, 17}, {13, 13},
+ {3, 24}, {14, 14}, {4, 25}, {15, 15}, {3, 28}, {16, 16},
+ };
+
+ InnerChain[Exp] = B.CreateFMul(getPow(InnerChain, AddChain[Exp][0], B),
+ getPow(InnerChain, AddChain[Exp][1], B));
+ return InnerChain[Exp];
+}
+
Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
-
Value *Ret = nullptr;
- if (UnsafeFPShrink && Callee->getName() == "pow" && TLI->has(LibFunc::powf)) {
+ StringRef Name = Callee->getName();
+ if (UnsafeFPShrink && Name == "pow" && hasFloatVersion(Name))
Ret = optimizeUnaryDoubleFP(CI, B, true);
- }
FunctionType *FT = Callee->getFunctionType();
// Just make sure this has 2 arguments of the same FP type, which match the
@@ -1060,7 +1107,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
if (Op1C->isExactlyValue(2.0) &&
hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp2, LibFunc::exp2f,
LibFunc::exp2l))
- return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
+ return EmitUnaryFloatFnCall(Op2, TLI->getName(LibFunc::exp2), B,
+ Callee->getAttributes());
// pow(10.0, x) -> exp10(x)
if (Op1C->isExactlyValue(10.0) &&
hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp10, LibFunc::exp10f,
@@ -1069,6 +1117,32 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
Callee->getAttributes());
}
+ bool unsafeFPMath = canUseUnsafeFPMath(CI->getParent()->getParent());
+
+ // pow(exp(x), y) -> exp(x*y)
+ // pow(exp2(x), y) -> exp2(x * y)
+ // We enable these only under fast-math. Besides rounding
+ // differences the transformation changes overflow and
+ // underflow behavior quite dramatically.
+ // Example: x = 1000, y = 0.001.
+ // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1).
+ if (unsafeFPMath) {
+ if (auto *OpC = dyn_cast<CallInst>(Op1)) {
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ FastMathFlags FMF;
+ FMF.setUnsafeAlgebra();
+ B.SetFastMathFlags(FMF);
+
+ LibFunc::Func Func;
+ Function *OpCCallee = OpC->getCalledFunction();
+ if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) &&
+ TLI->has(Func) && (Func == LibFunc::exp || Func == LibFunc::exp2))
+ return EmitUnaryFloatFnCall(
+ B.CreateFMul(OpC->getArgOperand(0), Op2, "mul"),
+ OpCCallee->getName(), B, OpCCallee->getAttributes());
+ }
+ }
+
ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
if (!Op2C)
return Ret;
@@ -1081,10 +1155,15 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
LibFunc::sqrtl) &&
hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::fabs, LibFunc::fabsf,
LibFunc::fabsl)) {
+
+ // In -ffast-math, pow(x, 0.5) -> sqrt(x).
+ if (unsafeFPMath)
+ return EmitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B,
+ Callee->getAttributes());
+
// Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
// This is faster than calling pow, and still handles negative zero
// and negative infinity correctly.
- // TODO: In fast-math mode, this could be just sqrt(x).
// TODO: In finite-only mode, this could be just fabs(sqrt(x)).
Value *Inf = ConstantFP::getInfinity(CI->getType());
Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
@@ -1102,18 +1181,42 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
return B.CreateFMul(Op1, Op1, "pow2");
if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip");
+
+ // In -ffast-math, generate repeated fmul instead of generating pow(x, n).
+ if (unsafeFPMath) {
+ APFloat V = abs(Op2C->getValueAPF());
+ // We limit to a max of 7 fmul(s). Thus max exponent is 32.
+ // This transformation applies to integer exponents only.
+ if (V.compare(APFloat(V.getSemantics(), 32.0)) == APFloat::cmpGreaterThan ||
+ !V.isInteger())
+ return nullptr;
+
+ // We will memoize intermediate products of the Addition Chain.
+ Value *InnerChain[33] = {nullptr};
+ InnerChain[1] = Op1;
+ InnerChain[2] = B.CreateFMul(Op1, Op1);
+
+ // We cannot readily convert a non-double type (like float) to a double.
+ // So we first convert V to something which could be converted to double.
+ bool ignored;
+ V.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
+ Value *FMul = getPow(InnerChain, V.convertToDouble(), B);
+ // For negative exponents simply compute the reciprocal.
+ if (Op2C->isNegative())
+ FMul = B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), FMul);
+ return FMul;
+ }
+
return nullptr;
}
Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
Function *Caller = CI->getParent()->getParent();
-
Value *Ret = nullptr;
- if (UnsafeFPShrink && Callee->getName() == "exp2" &&
- TLI->has(LibFunc::exp2f)) {
+ StringRef Name = Callee->getName();
+ if (UnsafeFPShrink && Name == "exp2" && hasFloatVersion(Name))
Ret = optimizeUnaryDoubleFP(CI, B, true);
- }
FunctionType *FT = Callee->getFunctionType();
// Just make sure this has 1 argument of FP type, which matches the
@@ -1162,11 +1265,10 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
-
Value *Ret = nullptr;
- if (Callee->getName() == "fabs" && TLI->has(LibFunc::fabsf)) {
+ StringRef Name = Callee->getName();
+ if (Name == "fabs" && hasFloatVersion(Name))
Ret = optimizeUnaryDoubleFP(CI, B, false);
- }
FunctionType *FT = Callee->getFunctionType();
// Make sure this has 1 argument of FP type which matches the result type.
@@ -1184,6 +1286,105 @@ Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) {
return Ret;
}
+Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
+ // If we can shrink the call to a float function rather than a double
+ // function, do that first.
+ Function *Callee = CI->getCalledFunction();
+ StringRef Name = Callee->getName();
+ if ((Name == "fmin" && hasFloatVersion(Name)) ||
+ (Name == "fmax" && hasFloatVersion(Name))) {
+ Value *Ret = optimizeBinaryDoubleFP(CI, B);
+ if (Ret)
+ return Ret;
+ }
+
+ // Make sure this has 2 arguments of FP type which match the result type.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return nullptr;
+
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ FastMathFlags FMF;
+ Function *F = CI->getParent()->getParent();
+ if (canUseUnsafeFPMath(F)) {
+ // Unsafe algebra sets all fast-math-flags to true.
+ FMF.setUnsafeAlgebra();
+ } else {
+ // At a minimum, no-nans-fp-math must be true.
+ Attribute Attr = F->getFnAttribute("no-nans-fp-math");
+ if (Attr.getValueAsString() != "true")
+ return nullptr;
+ // No-signed-zeros is implied by the definitions of fmax/fmin themselves:
+ // "Ideally, fmax would be sensitive to the sign of zero, for example
+ // fmax(-0. 0, +0. 0) would return +0; however, implementation in software
+ // might be impractical."
+ FMF.setNoSignedZeros();
+ FMF.setNoNaNs();
+ }
+ B.SetFastMathFlags(FMF);
+
+ // We have a relaxed floating-point environment. We can ignore NaN-handling
+ // and transform to a compare and select. We do not have to consider errno or
+ // exceptions, because fmin/fmax do not have those.
+ Value *Op0 = CI->getArgOperand(0);
+ Value *Op1 = CI->getArgOperand(1);
+ Value *Cmp = Callee->getName().startswith("fmin") ?
+ B.CreateFCmpOLT(Op0, Op1) : B.CreateFCmpOGT(Op0, Op1);
+ return B.CreateSelect(Cmp, Op0, Op1);
+}
+
+Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ Value *Ret = nullptr;
+ StringRef Name = Callee->getName();
+ if (UnsafeFPShrink && hasFloatVersion(Name))
+ Ret = optimizeUnaryDoubleFP(CI, B, true);
+ FunctionType *FT = Callee->getFunctionType();
+
+ // Just make sure this has 1 argument of FP type, which matches the
+ // result type.
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return Ret;
+
+ if (!canUseUnsafeFPMath(CI->getParent()->getParent()))
+ return Ret;
+ Value *Op1 = CI->getArgOperand(0);
+ auto *OpC = dyn_cast<CallInst>(Op1);
+ if (!OpC)
+ return Ret;
+
+ // log(pow(x,y)) -> y*log(x)
+ // This is only applicable to log, log2, log10.
+ if (Name != "log" && Name != "log2" && Name != "log10")
+ return Ret;
+
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ FastMathFlags FMF;
+ FMF.setUnsafeAlgebra();
+ B.SetFastMathFlags(FMF);
+
+ LibFunc::Func Func;
+ Function *F = OpC->getCalledFunction();
+ if (F && ((TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) &&
+ Func == LibFunc::pow) || F->getIntrinsicID() == Intrinsic::pow))
+ return B.CreateFMul(OpC->getArgOperand(1),
+ EmitUnaryFloatFnCall(OpC->getOperand(0), Callee->getName(), B,
+ Callee->getAttributes()), "mul");
+
+ // log(exp2(y)) -> y*log(2)
+ if (F && Name == "log" && TLI->getLibFunc(F->getName(), Func) &&
+ TLI->has(Func) && Func == LibFunc::exp2)
+ return B.CreateFMul(
+ OpC->getArgOperand(0),
+ EmitUnaryFloatFnCall(ConstantFP::get(CI->getType(), 2.0),
+ Callee->getName(), B, Callee->getAttributes()),
+ "logmul");
+ return Ret;
+}
+
Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
@@ -1191,19 +1392,9 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" ||
Callee->getIntrinsicID() == Intrinsic::sqrt))
Ret = optimizeUnaryDoubleFP(CI, B, true);
+ if (!canUseUnsafeFPMath(CI->getParent()->getParent()))
+ return Ret;
- // FIXME: For finer-grain optimization, we need intrinsics to have the same
- // fast-math flag decorations that are applied to FP instructions. For now,
- // we have to rely on the function-level unsafe-fp-math attribute to do this
- // optimization because there's no other way to express that the sqrt can be
- // reassociated.
- Function *F = CI->getParent()->getParent();
- if (F->hasFnAttribute("unsafe-fp-math")) {
- // Check for unsafe-fp-math = true.
- Attribute Attr = F->getFnAttribute("unsafe-fp-math");
- if (Attr.getValueAsString() != "true")
- return Ret;
- }
Value *Op = CI->getArgOperand(0);
if (Instruction *I = dyn_cast<Instruction>(Op)) {
if (I->getOpcode() == Instruction::FMul && I->hasUnsafeAlgebra()) {
@@ -1238,8 +1429,7 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
// and multiply.
// FIXME: We're not checking the sqrt because it doesn't have
// fast-math-flags (see earlier comment).
- IRBuilder<true, ConstantFolder,
- IRBuilderDefaultInserter<true> >::FastMathFlagGuard Guard(B);
+ IRBuilder<>::FastMathFlagGuard Guard(B);
B.SetFastMathFlags(I->getFastMathFlags());
// If we found a repeated factor, hoist it out of the square root and
// replace it with the fabs of that factor.
@@ -1262,6 +1452,40 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
return Ret;
}
+Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ Value *Ret = nullptr;
+ StringRef Name = Callee->getName();
+ if (UnsafeFPShrink && Name == "tan" && hasFloatVersion(Name))
+ Ret = optimizeUnaryDoubleFP(CI, B, true);
+ FunctionType *FT = Callee->getFunctionType();
+
+ // Just make sure this has 1 argument of FP type, which matches the
+ // result type.
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return Ret;
+
+ if (!canUseUnsafeFPMath(CI->getParent()->getParent()))
+ return Ret;
+ Value *Op1 = CI->getArgOperand(0);
+ auto *OpC = dyn_cast<CallInst>(Op1);
+ if (!OpC)
+ return Ret;
+
+ // tan(atan(x)) -> x
+ // tanf(atanf(x)) -> x
+ // tanl(atanl(x)) -> x
+ LibFunc::Func Func;
+ Function *F = OpC->getCalledFunction();
+ if (F && TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) &&
+ ((Func == LibFunc::atan && Callee->getName() == "tan") ||
+ (Func == LibFunc::atanf && Callee->getName() == "tanf") ||
+ (Func == LibFunc::atanl && Callee->getName() == "tanl")))
+ Ret = OpC->getArgOperand(0);
+ return Ret;
+}
+
static bool isTrigLibCall(CallInst *CI);
static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
bool UseFloat, Value *&Sin, Value *&Cos,
@@ -1329,9 +1553,9 @@ LibCallSimplifier::classifyArgUse(Value *Val, BasicBlock *BB, bool IsFloat,
return;
Function *Callee = CI->getCalledFunction();
- StringRef FuncName = Callee->getName();
LibFunc::Func Func;
- if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func) || !isTrigLibCall(CI))
+ if (!Callee || !TLI->getLibFunc(Callee->getName(), Func) || !TLI->has(Func) ||
+ !isTrigLibCall(CI))
return;
if (IsFloat) {
@@ -1353,10 +1577,8 @@ LibCallSimplifier::classifyArgUse(Value *Val, BasicBlock *BB, bool IsFloat,
void LibCallSimplifier::replaceTrigInsts(SmallVectorImpl<CallInst *> &Calls,
Value *Res) {
- for (SmallVectorImpl<CallInst *>::iterator I = Calls.begin(), E = Calls.end();
- I != E; ++I) {
- replaceAllUsesWith(*I, Res);
- }
+ for (CallInst *C : Calls)
+ replaceAllUsesWith(C, Res);
}
void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
@@ -1387,8 +1609,7 @@ void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
// If the argument is an instruction, it must dominate all uses so put our
// sincos call there.
- BasicBlock::iterator Loc = ArgInst;
- B.SetInsertPoint(ArgInst->getParent(), ++Loc);
+ B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
} else {
// Otherwise (e.g. for a constant) the beginning of the function is as
// good a place as any.
@@ -1413,15 +1634,16 @@ void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
// Integer Library Call Optimizations
//===----------------------------------------------------------------------===//
+static bool checkIntUnaryReturnAndParam(Function *Callee) {
+ FunctionType *FT = Callee->getFunctionType();
+ return FT->getNumParams() == 1 && FT->getReturnType()->isIntegerTy(32) &&
+ FT->getParamType(0)->isIntegerTy();
+}
+
Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
- // Just make sure this has 2 arguments of the same FP type, which match the
- // result type.
- if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy(32) ||
- !FT->getParamType(0)->isIntegerTy())
+ if (!checkIntUnaryReturnAndParam(Callee))
return nullptr;
-
Value *Op = CI->getArgOperand(0);
// Constant fold.
@@ -1436,7 +1658,7 @@ Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) {
Type *ArgType = Op->getType();
Value *F =
Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::cttz, ArgType);
- Value *V = B.CreateCall(F, {Op, B.getFalse()}, "cttz");
+ Value *V = B.CreateCall(F, {Op, B.getTrue()}, "cttz");
V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
V = B.CreateIntCast(V, B.getInt32Ty(), false);
@@ -1461,11 +1683,7 @@ Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
- // We require integer(i32)
- if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
- !FT->getParamType(0)->isIntegerTy(32))
+ if (!checkIntUnaryReturnAndParam(CI->getCalledFunction()))
return nullptr;
// isdigit(c) -> (c-'0') <u 10
@@ -1476,11 +1694,7 @@ Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
- // We require integer(i32)
- if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
- !FT->getParamType(0)->isIntegerTy(32))
+ if (!checkIntUnaryReturnAndParam(CI->getCalledFunction()))
return nullptr;
// isascii(c) -> c <u 128
@@ -1490,11 +1704,7 @@ Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeToAscii(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
- // We require i32(i32)
- if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isIntegerTy(32))
+ if (!checkIntUnaryReturnAndParam(CI->getCalledFunction()))
return nullptr;
// toascii(c) -> c & 0x7f
@@ -1529,10 +1739,7 @@ Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B,
}
static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) {
- if (!ColdErrorCalls)
- return false;
-
- if (!Callee || !Callee->isDeclaration())
+ if (!ColdErrorCalls || !Callee || !Callee->isDeclaration())
return false;
if (StreamArg < 0)
@@ -1968,16 +2175,8 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
// Command-line parameter overrides function attribute.
if (EnableUnsafeFPShrink.getNumOccurrences() > 0)
UnsafeFPShrink = EnableUnsafeFPShrink;
- else if (Callee->hasFnAttribute("unsafe-fp-math")) {
- // FIXME: This is the same problem as described in optimizeSqrt().
- // If calls gain access to IR-level FMF, then use that instead of a
- // function attribute.
-
- // Check for unsafe-fp-math = true.
- Attribute Attr = Callee->getFnAttribute("unsafe-fp-math");
- if (Attr.getValueAsString() == "true")
- UnsafeFPShrink = true;
- }
+ else if (canUseUnsafeFPMath(Callee))
+ UnsafeFPShrink = true;
// First, check for intrinsics.
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
@@ -1990,6 +2189,8 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
return optimizeExp2(CI, Builder);
case Intrinsic::fabs:
return optimizeFabs(CI, Builder);
+ case Intrinsic::log:
+ return optimizeLog(CI, Builder);
case Intrinsic::sqrt:
return optimizeSqrt(CI, Builder);
default:
@@ -2001,13 +2202,17 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
if (Value *SimplifiedFortifiedCI = FortifiedSimplifier.optimizeCall(CI)) {
// Try to further simplify the result.
CallInst *SimplifiedCI = dyn_cast<CallInst>(SimplifiedFortifiedCI);
- if (SimplifiedCI && SimplifiedCI->getCalledFunction())
- if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, Builder)) {
+ if (SimplifiedCI && SimplifiedCI->getCalledFunction()) {
+ // Use an IR Builder from SimplifiedCI if available instead of CI
+ // to guarantee we reach all uses we might replace later on.
+ IRBuilder<> TmpBuilder(SimplifiedCI);
+ if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, TmpBuilder)) {
// If we were able to further simplify, remove the now redundant call.
SimplifiedCI->replaceAllUsesWith(V);
SimplifiedCI->eraseFromParent();
return V;
}
+ }
return SimplifiedFortifiedCI;
}
@@ -2068,8 +2273,18 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
return optimizeFWrite(CI, Builder);
case LibFunc::fputs:
return optimizeFPuts(CI, Builder);
+ case LibFunc::log:
+ case LibFunc::log10:
+ case LibFunc::log1p:
+ case LibFunc::log2:
+ case LibFunc::logb:
+ return optimizeLog(CI, Builder);
case LibFunc::puts:
return optimizePuts(CI, Builder);
+ case LibFunc::tan:
+ case LibFunc::tanf:
+ case LibFunc::tanl:
+ return optimizeTan(CI, Builder);
case LibFunc::perror:
return optimizeErrorReporting(CI, Builder);
case LibFunc::vfprintf:
@@ -2097,24 +2312,23 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
case LibFunc::exp:
case LibFunc::exp10:
case LibFunc::expm1:
- case LibFunc::log:
- case LibFunc::log10:
- case LibFunc::log1p:
- case LibFunc::log2:
- case LibFunc::logb:
case LibFunc::sin:
case LibFunc::sinh:
- case LibFunc::tan:
case LibFunc::tanh:
if (UnsafeFPShrink && hasFloatVersion(FuncName))
return optimizeUnaryDoubleFP(CI, Builder, true);
return nullptr;
case LibFunc::copysign:
- case LibFunc::fmin:
- case LibFunc::fmax:
if (hasFloatVersion(FuncName))
return optimizeBinaryDoubleFP(CI, Builder);
return nullptr;
+ case LibFunc::fminf:
+ case LibFunc::fmin:
+ case LibFunc::fminl:
+ case LibFunc::fmaxf:
+ case LibFunc::fmax:
+ case LibFunc::fmaxl:
+ return optimizeFMinFMax(CI, Builder);
default:
return nullptr;
}
@@ -2133,37 +2347,27 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
Replacer(I, With);
}
-/*static*/ void LibCallSimplifier::replaceAllUsesWithDefault(Instruction *I,
- Value *With) {
- I->replaceAllUsesWith(With);
- I->eraseFromParent();
-}
-
// TODO:
// Additional cases that we need to add to this file:
//
// cbrt:
// * cbrt(expN(X)) -> expN(x/3)
// * cbrt(sqrt(x)) -> pow(x,1/6)
-// * cbrt(sqrt(x)) -> pow(x,1/9)
+// * cbrt(cbrt(x)) -> pow(x,1/9)
//
// exp, expf, expl:
// * exp(log(x)) -> x
//
// log, logf, logl:
// * log(exp(x)) -> x
-// * log(x**y) -> y*log(x)
// * log(exp(y)) -> y*log(e)
-// * log(exp2(y)) -> y*log(2)
// * log(exp10(y)) -> y*log(10)
// * log(sqrt(x)) -> 0.5*log(x)
-// * log(pow(x,y)) -> y*log(x)
//
// lround, lroundf, lroundl:
// * lround(cnst) -> cnst'
//
// pow, powf, powl:
-// * pow(exp(x),y) -> exp(x*y)
// * pow(sqrt(x),y) -> pow(x,y*0.5)
// * pow(pow(x,y),z)-> pow(x,y*z)
//
@@ -2179,9 +2383,6 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
// * sqrt(Nroot(x)) -> pow(x,1/(2*N))
// * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
//
-// tan, tanf, tanl:
-// * tan(atan(x)) -> x
-//
// trunc, truncf, truncl:
// * trunc(cnst) -> cnst'
//
diff --git a/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp b/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp
new file mode 100644
index 000000000000..ad6b782caf8b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp
@@ -0,0 +1,85 @@
+//===- SplitModule.cpp - Split a module into partitions -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the function llvm::SplitModule, which splits a module
+// into multiple linkable partitions. It can be used to implement parallel code
+// generation for link-time optimization.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SplitModule.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/MD5.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+using namespace llvm;
+
+static void externalize(GlobalValue *GV) {
+ if (GV->hasLocalLinkage()) {
+ GV->setLinkage(GlobalValue::ExternalLinkage);
+ GV->setVisibility(GlobalValue::HiddenVisibility);
+ }
+
+ // Unnamed entities must be named consistently between modules. setName will
+ // give a distinct name to each such entity.
+ if (!GV->hasName())
+ GV->setName("__llvmsplit_unnamed");
+}
+
+// Returns whether GV should be in partition (0-based) I of N.
+static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) {
+ if (auto GA = dyn_cast<GlobalAlias>(GV))
+ if (const GlobalObject *Base = GA->getBaseObject())
+ GV = Base;
+
+ StringRef Name;
+ if (const Comdat *C = GV->getComdat())
+ Name = C->getName();
+ else
+ Name = GV->getName();
+
+ // Partition by MD5 hash. We only need a few bits for evenness as the number
+ // of partitions will generally be in the 1-2 figure range; the low 16 bits
+ // are enough.
+ MD5 H;
+ MD5::MD5Result R;
+ H.update(Name);
+ H.final(R);
+ return (R[0] | (R[1] << 8)) % N == I;
+}
+
+void llvm::SplitModule(
+ std::unique_ptr<Module> M, unsigned N,
+ std::function<void(std::unique_ptr<Module> MPart)> ModuleCallback) {
+ for (Function &F : *M)
+ externalize(&F);
+ for (GlobalVariable &GV : M->globals())
+ externalize(&GV);
+ for (GlobalAlias &GA : M->aliases())
+ externalize(&GA);
+
+ // FIXME: We should be able to reuse M as the last partition instead of
+ // cloning it.
+ for (unsigned I = 0; I != N; ++I) {
+ ValueToValueMapTy VMap;
+ std::unique_ptr<Module> MPart(
+ CloneModule(M.get(), VMap, [=](const GlobalValue *GV) {
+ return isInPartition(GV, I, N);
+ }));
+ if (I != 0)
+ MPart->setModuleInlineAsm("");
+ ModuleCallback(std::move(MPart));
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
index a2a54da8590c..1d1f602b041d 100644
--- a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -69,7 +69,6 @@
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/YAMLParser.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Utils/SymbolRewriter.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 7e00a80989dc..6b1d1dae5f01 100644
--- a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -50,11 +50,11 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
//
std::vector<BasicBlock*> ReturningBlocks;
std::vector<BasicBlock*> UnreachableBlocks;
- for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
- if (isa<ReturnInst>(I->getTerminator()))
- ReturningBlocks.push_back(I);
- else if (isa<UnreachableInst>(I->getTerminator()))
- UnreachableBlocks.push_back(I);
+ for (BasicBlock &I : F)
+ if (isa<ReturnInst>(I.getTerminator()))
+ ReturningBlocks.push_back(&I);
+ else if (isa<UnreachableInst>(I.getTerminator()))
+ UnreachableBlocks.push_back(&I);
// Then unreachable blocks.
if (UnreachableBlocks.empty()) {
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
index 8c72641da9e7..1add78e01657 100644
--- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -19,11 +19,14 @@
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Operator.h"
using namespace llvm;
// Out of line method to get vtable etc for class.
void ValueMapTypeRemapper::anchor() {}
void ValueMaterializer::anchor() {}
+void ValueMaterializer::materializeInitFor(GlobalValue *New, GlobalValue *Old) {
+}
Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
@@ -35,15 +38,28 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
// If we have a materializer and it can materialize a value, use that.
if (Materializer) {
- if (Value *NewV = Materializer->materializeValueFor(const_cast<Value*>(V)))
- return VM[V] = NewV;
+ if (Value *NewV =
+ Materializer->materializeDeclFor(const_cast<Value *>(V))) {
+ VM[V] = NewV;
+ if (auto *NewGV = dyn_cast<GlobalValue>(NewV))
+ Materializer->materializeInitFor(
+ NewGV, const_cast<GlobalValue *>(cast<GlobalValue>(V)));
+ return NewV;
+ }
}
// Global values do not need to be seeded into the VM if they
// are using the identity mapping.
- if (isa<GlobalValue>(V))
+ if (isa<GlobalValue>(V)) {
+ if (Flags & RF_NullMapMissingGlobalValues) {
+ assert(!(Flags & RF_IgnoreMissingEntries) &&
+ "Illegal to specify both RF_NullMapMissingGlobalValues and "
+ "RF_IgnoreMissingEntries");
+ return nullptr;
+ }
return VM[V] = const_cast<Value*>(V);
-
+ }
+
if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
// Inline asm may need *type* remapping.
FunctionType *NewTy = IA->getFunctionType();
@@ -73,7 +89,8 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
// correct. For now, just match behaviour from before the metadata/value
// split.
//
- // assert(MappedMD && "Referenced metadata value not in value map");
+ // assert((MappedMD || (Flags & RF_NullMapMissingGlobalValues)) &&
+ // "Referenced metadata value not in value map");
return VM[V] = MetadataAsValue::get(V->getContext(), MappedMD);
}
@@ -127,9 +144,13 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
Ops.push_back(MapValue(cast<Constant>(C->getOperand(OpNo)), VM,
Flags, TypeMapper, Materializer));
}
-
+ Type *NewSrcTy = nullptr;
+ if (TypeMapper)
+ if (auto *GEPO = dyn_cast<GEPOperator>(C))
+ NewSrcTy = TypeMapper->remapType(GEPO->getSourceElementType());
+
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- return VM[V] = CE->getWithOperands(Ops, NewTy);
+ return VM[V] = CE->getWithOperands(Ops, NewTy, false, NewSrcTy);
if (isa<ConstantArray>(C))
return VM[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops);
if (isa<ConstantStruct>(C))
@@ -146,29 +167,42 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
}
static Metadata *mapToMetadata(ValueToValueMapTy &VM, const Metadata *Key,
- Metadata *Val) {
+ Metadata *Val, ValueMaterializer *Materializer,
+ RemapFlags Flags) {
VM.MD()[Key].reset(Val);
+ if (Materializer && !(Flags & RF_HaveUnmaterializedMetadata)) {
+ auto *N = dyn_cast_or_null<MDNode>(Val);
+ // Need to invoke this once we have non-temporary MD.
+ if (!N || !N->isTemporary())
+ Materializer->replaceTemporaryMetadata(Key, Val);
+ }
return Val;
}
-static Metadata *mapToSelf(ValueToValueMapTy &VM, const Metadata *MD) {
- return mapToMetadata(VM, MD, const_cast<Metadata *>(MD));
+static Metadata *mapToSelf(ValueToValueMapTy &VM, const Metadata *MD,
+ ValueMaterializer *Materializer, RemapFlags Flags) {
+ return mapToMetadata(VM, MD, const_cast<Metadata *>(MD), Materializer, Flags);
}
static Metadata *MapMetadataImpl(const Metadata *MD,
- SmallVectorImpl<MDNode *> &Cycles,
+ SmallVectorImpl<MDNode *> &DistinctWorklist,
ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer);
-static Metadata *mapMetadataOp(Metadata *Op, SmallVectorImpl<MDNode *> &Cycles,
+static Metadata *mapMetadataOp(Metadata *Op,
+ SmallVectorImpl<MDNode *> &DistinctWorklist,
ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
if (!Op)
return nullptr;
- if (Metadata *MappedOp =
- MapMetadataImpl(Op, Cycles, VM, Flags, TypeMapper, Materializer))
+
+ if (Materializer && !Materializer->isMetadataNeeded(Op))
+ return nullptr;
+
+ if (Metadata *MappedOp = MapMetadataImpl(Op, DistinctWorklist, VM, Flags,
+ TypeMapper, Materializer))
return MappedOp;
// Use identity map if MappedOp is null and we can ignore missing entries.
if (Flags & RF_IgnoreMissingEntries)
@@ -178,89 +212,113 @@ static Metadata *mapMetadataOp(Metadata *Op, SmallVectorImpl<MDNode *> &Cycles,
// correct. For now, just match behaviour from before the metadata/value
// split.
//
- // llvm_unreachable("Referenced metadata not in value map!");
+ // assert((Flags & RF_NullMapMissingGlobalValues) &&
+ // "Referenced metadata not in value map!");
return nullptr;
}
-/// \brief Remap nodes.
+/// Resolve uniquing cycles involving the given metadata.
+static void resolveCycles(Metadata *MD, bool MDMaterialized) {
+ if (auto *N = dyn_cast_or_null<MDNode>(MD)) {
+ if (!MDMaterialized && N->isTemporary())
+ return;
+ if (!N->isResolved())
+ N->resolveCycles(MDMaterialized);
+ }
+}
+
+/// Remap the operands of an MDNode.
///
-/// Insert \c NewNode in the value map, and then remap \c OldNode's operands.
-/// Assumes that \c NewNode is already a clone of \c OldNode.
+/// If \c Node is temporary, uniquing cycles are ignored. If \c Node is
+/// distinct, uniquing cycles are resolved as they're found.
///
-/// \pre \c NewNode is a clone of \c OldNode.
-static bool remap(const MDNode *OldNode, MDNode *NewNode,
- SmallVectorImpl<MDNode *> &Cycles, ValueToValueMapTy &VM,
- RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,
- ValueMaterializer *Materializer) {
- assert(OldNode->getNumOperands() == NewNode->getNumOperands() &&
- "Expected nodes to match");
- assert(OldNode->isResolved() && "Expected resolved node");
- assert(!NewNode->isUniqued() && "Expected non-uniqued node");
-
- // Map the node upfront so it's available for cyclic references.
- mapToMetadata(VM, OldNode, NewNode);
- bool AnyChanged = false;
- for (unsigned I = 0, E = OldNode->getNumOperands(); I != E; ++I) {
- Metadata *Old = OldNode->getOperand(I);
- assert(NewNode->getOperand(I) == Old &&
- "Expected old operands to already be in place");
+/// \pre \c Node.isDistinct() or \c Node.isTemporary().
+static bool remapOperands(MDNode &Node,
+ SmallVectorImpl<MDNode *> &DistinctWorklist,
+ ValueToValueMapTy &VM, RemapFlags Flags,
+ ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer) {
+ assert(!Node.isUniqued() && "Expected temporary or distinct node");
+ const bool IsDistinct = Node.isDistinct();
- Metadata *New = mapMetadataOp(OldNode->getOperand(I), Cycles, VM, Flags,
- TypeMapper, Materializer);
+ bool AnyChanged = false;
+ for (unsigned I = 0, E = Node.getNumOperands(); I != E; ++I) {
+ Metadata *Old = Node.getOperand(I);
+ Metadata *New = mapMetadataOp(Old, DistinctWorklist, VM, Flags, TypeMapper,
+ Materializer);
if (Old != New) {
AnyChanged = true;
- NewNode->replaceOperandWith(I, New);
+ Node.replaceOperandWith(I, New);
+
+ // Resolve uniquing cycles underneath distinct nodes on the fly so they
+ // don't infect later operands.
+ if (IsDistinct)
+ resolveCycles(New, !(Flags & RF_HaveUnmaterializedMetadata));
}
}
return AnyChanged;
}
-/// \brief Map a distinct MDNode.
+/// Map a distinct MDNode.
///
-/// Distinct nodes are not uniqued, so they must always recreated.
+/// Whether distinct nodes change is independent of their operands. If \a
+/// RF_MoveDistinctMDs, then they are reused, and their operands remapped in
+/// place; effectively, they're moved from one graph to another. Otherwise,
+/// they're cloned/duplicated, and the new copy's operands are remapped.
static Metadata *mapDistinctNode(const MDNode *Node,
- SmallVectorImpl<MDNode *> &Cycles,
+ SmallVectorImpl<MDNode *> &DistinctWorklist,
ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
assert(Node->isDistinct() && "Expected distinct node");
- MDNode *NewMD = MDNode::replaceWithDistinct(Node->clone());
- remap(Node, NewMD, Cycles, VM, Flags, TypeMapper, Materializer);
+ MDNode *NewMD;
+ if (Flags & RF_MoveDistinctMDs)
+ NewMD = const_cast<MDNode *>(Node);
+ else
+ NewMD = MDNode::replaceWithDistinct(Node->clone());
- // Track any cycles beneath this node.
- for (Metadata *Op : NewMD->operands())
- if (auto *Node = dyn_cast_or_null<MDNode>(Op))
- if (!Node->isResolved())
- Cycles.push_back(Node);
-
- return NewMD;
+ // Remap operands later.
+ DistinctWorklist.push_back(NewMD);
+ return mapToMetadata(VM, Node, NewMD, Materializer, Flags);
}
/// \brief Map a uniqued MDNode.
///
/// Uniqued nodes may not need to be recreated (they may map to themselves).
static Metadata *mapUniquedNode(const MDNode *Node,
- SmallVectorImpl<MDNode *> &Cycles,
+ SmallVectorImpl<MDNode *> &DistinctWorklist,
ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
- assert(Node->isUniqued() && "Expected uniqued node");
+ assert(((Flags & RF_HaveUnmaterializedMetadata) || Node->isUniqued()) &&
+ "Expected uniqued node");
- // Create a temporary node upfront in case we have a metadata cycle.
+ // Create a temporary node and map it upfront in case we have a uniquing
+ // cycle. If necessary, this mapping will get updated by RAUW logic before
+ // returning.
auto ClonedMD = Node->clone();
- if (!remap(Node, ClonedMD.get(), Cycles, VM, Flags, TypeMapper, Materializer))
- // No operands changed, so use the identity mapping.
- return mapToSelf(VM, Node);
+ mapToMetadata(VM, Node, ClonedMD.get(), Materializer, Flags);
+ if (!remapOperands(*ClonedMD, DistinctWorklist, VM, Flags, TypeMapper,
+ Materializer)) {
+ // No operands changed, so use the original.
+ ClonedMD->replaceAllUsesWith(const_cast<MDNode *>(Node));
+ // Even though replaceAllUsesWith would have replaced the value map
+ // entry, we need to explictly map with the final non-temporary node
+ // to replace any temporary metadata via the callback.
+ return mapToSelf(VM, Node, Materializer, Flags);
+ }
- // At least one operand has changed, so uniquify the cloned node.
+ // Uniquify the cloned node. Explicitly map it with the final non-temporary
+ // node so that replacement of temporary metadata via the callback occurs.
return mapToMetadata(VM, Node,
- MDNode::replaceWithUniqued(std::move(ClonedMD)));
+ MDNode::replaceWithUniqued(std::move(ClonedMD)),
+ Materializer, Flags);
}
static Metadata *MapMetadataImpl(const Metadata *MD,
- SmallVectorImpl<MDNode *> &Cycles,
+ SmallVectorImpl<MDNode *> &DistinctWorklist,
ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
@@ -269,26 +327,28 @@ static Metadata *MapMetadataImpl(const Metadata *MD,
return NewMD;
if (isa<MDString>(MD))
- return mapToSelf(VM, MD);
+ return mapToSelf(VM, MD, Materializer, Flags);
if (isa<ConstantAsMetadata>(MD))
if ((Flags & RF_NoModuleLevelChanges))
- return mapToSelf(VM, MD);
+ return mapToSelf(VM, MD, Materializer, Flags);
if (const auto *VMD = dyn_cast<ValueAsMetadata>(MD)) {
Value *MappedV =
MapValue(VMD->getValue(), VM, Flags, TypeMapper, Materializer);
if (VMD->getValue() == MappedV ||
(!MappedV && (Flags & RF_IgnoreMissingEntries)))
- return mapToSelf(VM, MD);
+ return mapToSelf(VM, MD, Materializer, Flags);
// FIXME: This assert crashes during bootstrap, but I think it should be
// correct. For now, just match behaviour from before the metadata/value
// split.
//
- // assert(MappedV && "Referenced metadata not in value map!");
+ // assert((MappedV || (Flags & RF_NullMapMissingGlobalValues)) &&
+ // "Referenced metadata not in value map!");
if (MappedV)
- return mapToMetadata(VM, MD, ValueAsMetadata::get(MappedV));
+ return mapToMetadata(VM, MD, ValueAsMetadata::get(MappedV), Materializer,
+ Flags);
return nullptr;
}
@@ -299,37 +359,54 @@ static Metadata *MapMetadataImpl(const Metadata *MD,
// If this is a module-level metadata and we know that nothing at the
// module level is changing, then use an identity mapping.
if (Flags & RF_NoModuleLevelChanges)
- return mapToSelf(VM, MD);
+ return mapToSelf(VM, MD, Materializer, Flags);
// Require resolved nodes whenever metadata might be remapped.
- assert(Node->isResolved() && "Unexpected unresolved node");
+ assert(((Flags & RF_HaveUnmaterializedMetadata) || Node->isResolved()) &&
+ "Unexpected unresolved node");
+
+ if (Materializer && Node->isTemporary()) {
+ assert(Flags & RF_HaveUnmaterializedMetadata);
+ Metadata *TempMD =
+ Materializer->mapTemporaryMetadata(const_cast<Metadata *>(MD));
+ // If the above callback returned an existing temporary node, use it
+ // instead of the current temporary node. This happens when earlier
+ // function importing passes already created and saved a temporary
+ // metadata node for the same value id.
+ if (TempMD) {
+ mapToMetadata(VM, MD, TempMD, Materializer, Flags);
+ return TempMD;
+ }
+ }
if (Node->isDistinct())
- return mapDistinctNode(Node, Cycles, VM, Flags, TypeMapper, Materializer);
+ return mapDistinctNode(Node, DistinctWorklist, VM, Flags, TypeMapper,
+ Materializer);
- return mapUniquedNode(Node, Cycles, VM, Flags, TypeMapper, Materializer);
+ return mapUniquedNode(Node, DistinctWorklist, VM, Flags, TypeMapper,
+ Materializer);
}
Metadata *llvm::MapMetadata(const Metadata *MD, ValueToValueMapTy &VM,
RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
- SmallVector<MDNode *, 8> Cycles;
- Metadata *NewMD =
- MapMetadataImpl(MD, Cycles, VM, Flags, TypeMapper, Materializer);
-
- // Resolve cycles underneath MD.
- if (NewMD && NewMD != MD) {
- if (auto *N = dyn_cast<MDNode>(NewMD))
- if (!N->isResolved())
- N->resolveCycles();
-
- for (MDNode *N : Cycles)
- if (!N->isResolved())
- N->resolveCycles();
- } else {
- // Shouldn't get unresolved cycles if nothing was remapped.
- assert(Cycles.empty() && "Expected no unresolved cycles");
- }
+ SmallVector<MDNode *, 8> DistinctWorklist;
+ Metadata *NewMD = MapMetadataImpl(MD, DistinctWorklist, VM, Flags, TypeMapper,
+ Materializer);
+
+ // When there are no module-level changes, it's possible that the metadata
+ // graph has temporaries. Skip the logic to resolve cycles, since it's
+ // unnecessary (and invalid) in that case.
+ if (Flags & RF_NoModuleLevelChanges)
+ return NewMD;
+
+ // Resolve cycles involving the entry metadata.
+ resolveCycles(NewMD, !(Flags & RF_HaveUnmaterializedMetadata));
+
+ // Remap the operands of distinct MDNodes.
+ while (!DistinctWorklist.empty())
+ remapOperands(*DistinctWorklist.pop_back_val(), DistinctWorklist, VM, Flags,
+ TypeMapper, Materializer);
return NewMD;
}
@@ -374,14 +451,11 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
// Remap attached metadata.
SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
I->getAllMetadata(MDs);
- for (SmallVectorImpl<std::pair<unsigned, MDNode *>>::iterator
- MI = MDs.begin(),
- ME = MDs.end();
- MI != ME; ++MI) {
- MDNode *Old = MI->second;
+ for (const auto &MI : MDs) {
+ MDNode *Old = MI.second;
MDNode *New = MapMetadata(Old, VMap, Flags, TypeMapper, Materializer);
if (New != Old)
- I->setMetadata(MI->first, New);
+ I->setMetadata(MI.first, New);
}
if (!TypeMapper)
diff --git a/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
index 215d6f9a1eb6..8844d574a79d 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -25,8 +25,11 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
@@ -204,9 +207,10 @@ namespace {
BBVectorize(Pass *P, Function &F, const VectorizeConfig &C)
: BasicBlockPass(ID), Config(C) {
- AA = &P->getAnalysis<AliasAnalysis>();
+ AA = &P->getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &P->getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- SE = &P->getAnalysis<ScalarEvolution>();
+ SE = &P->getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ TLI = &P->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = IgnoreTargetInfo
? nullptr
: &P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
@@ -221,6 +225,7 @@ namespace {
AliasAnalysis *AA;
DominatorTree *DT;
ScalarEvolution *SE;
+ const TargetLibraryInfo *TLI;
const TargetTransformInfo *TTI;
// FIXME: const correct?
@@ -437,9 +442,10 @@ namespace {
bool runOnBasicBlock(BasicBlock &BB) override {
// OptimizeNone check deferred to vectorizeBB().
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = IgnoreTargetInfo
? nullptr
: &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
@@ -450,13 +456,15 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
BasicBlockPass::getAnalysisUsage(AU);
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addPreserved<AliasAnalysis>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
AU.setPreservesCFG();
}
@@ -842,7 +850,7 @@ namespace {
// It is important to cleanup here so that future iterations of this
// function have less work to do.
- (void)SimplifyInstructionsInBlock(&BB, AA->getTargetLibraryInfo());
+ (void)SimplifyInstructionsInBlock(&BB, TLI);
return true;
}
@@ -1239,20 +1247,23 @@ namespace {
if (I == Start) IAfterStart = true;
bool IsSimpleLoadStore;
- if (!isInstVectorizable(I, IsSimpleLoadStore)) continue;
+ if (!isInstVectorizable(&*I, IsSimpleLoadStore))
+ continue;
// Look for an instruction with which to pair instruction *I...
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
- if (I->mayWriteToMemory()) WriteSet.add(I);
+ if (I->mayWriteToMemory())
+ WriteSet.add(&*I);
bool JAfterStart = IAfterStart;
BasicBlock::iterator J = std::next(I);
for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) {
- if (J == Start) JAfterStart = true;
+ if (&*J == Start)
+ JAfterStart = true;
// Determine if J uses I, if so, exit the loop.
- bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !Config.FastDep);
+ bool UsesI = trackUsesOfI(Users, WriteSet, &*I, &*J, !Config.FastDep);
if (Config.FastDep) {
// Note: For this heuristic to be effective, independent operations
// must tend to be intermixed. This is likely to be true from some
@@ -1269,25 +1280,26 @@ namespace {
// J does not use I, and comes before the first use of I, so it can be
// merged with I if the instructions are compatible.
int CostSavings, FixedOrder;
- if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len,
- CostSavings, FixedOrder)) continue;
+ if (!areInstsCompatible(&*I, &*J, IsSimpleLoadStore, NonPow2Len,
+ CostSavings, FixedOrder))
+ continue;
// J is a candidate for merging with I.
if (PairableInsts.empty() ||
- PairableInsts[PairableInsts.size()-1] != I) {
- PairableInsts.push_back(I);
+ PairableInsts[PairableInsts.size() - 1] != &*I) {
+ PairableInsts.push_back(&*I);
}
- CandidatePairs[I].push_back(J);
+ CandidatePairs[&*I].push_back(&*J);
++TotalPairs;
if (TTI)
- CandidatePairCostSavings.insert(ValuePairWithCost(ValuePair(I, J),
- CostSavings));
+ CandidatePairCostSavings.insert(
+ ValuePairWithCost(ValuePair(&*I, &*J), CostSavings));
if (FixedOrder == 1)
- FixedOrderPairs.insert(ValuePair(I, J));
+ FixedOrderPairs.insert(ValuePair(&*I, &*J));
else if (FixedOrder == -1)
- FixedOrderPairs.insert(ValuePair(J, I));
+ FixedOrderPairs.insert(ValuePair(&*J, &*I));
// The next call to this function must start after the last instruction
// selected during this invocation.
@@ -1468,14 +1480,16 @@ namespace {
BasicBlock::iterator E = BB.end(), EL =
BasicBlock::iterator(cast<Instruction>(PairableInsts.back()));
for (BasicBlock::iterator I = BB.getFirstInsertionPt(); I != E; ++I) {
- if (IsInPair.find(I) == IsInPair.end()) continue;
+ if (IsInPair.find(&*I) == IsInPair.end())
+ continue;
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
- if (I->mayWriteToMemory()) WriteSet.add(I);
+ if (I->mayWriteToMemory())
+ WriteSet.add(&*I);
for (BasicBlock::iterator J = std::next(I); J != E; ++J) {
- (void) trackUsesOfI(Users, WriteSet, I, J);
+ (void)trackUsesOfI(Users, WriteSet, &*I, &*J);
if (J == EL)
break;
@@ -1484,7 +1498,7 @@ namespace {
for (DenseSet<Value *>::iterator U = Users.begin(), E = Users.end();
U != E; ++U) {
if (IsInPair.find(*U) == IsInPair.end()) continue;
- PairableInstUsers.insert(ValuePair(I, *U));
+ PairableInstUsers.insert(ValuePair(&*I, *U));
}
if (I == EL)
@@ -2806,55 +2820,51 @@ namespace {
Instruction *J, Instruction *K,
Instruction *&InsertionPt,
Instruction *&K1, Instruction *&K2) {
- if (isa<StoreInst>(I)) {
- AA->replaceWithNewValue(I, K);
- AA->replaceWithNewValue(J, K);
- } else {
- Type *IType = I->getType();
- Type *JType = J->getType();
+ if (isa<StoreInst>(I))
+ return;
- VectorType *VType = getVecTypeForPair(IType, JType);
- unsigned numElem = VType->getNumElements();
+ Type *IType = I->getType();
+ Type *JType = J->getType();
- unsigned numElemI = getNumScalarElements(IType);
- unsigned numElemJ = getNumScalarElements(JType);
+ VectorType *VType = getVecTypeForPair(IType, JType);
+ unsigned numElem = VType->getNumElements();
- if (IType->isVectorTy()) {
- std::vector<Constant*> Mask1(numElemI), Mask2(numElemI);
- for (unsigned v = 0; v < numElemI; ++v) {
- Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
- Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemJ+v);
- }
+ unsigned numElemI = getNumScalarElements(IType);
+ unsigned numElemJ = getNumScalarElements(JType);
- K1 = new ShuffleVectorInst(K, UndefValue::get(VType),
- ConstantVector::get( Mask1),
- getReplacementName(K, false, 1));
- } else {
- Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
- K1 = ExtractElementInst::Create(K, CV0,
- getReplacementName(K, false, 1));
+ if (IType->isVectorTy()) {
+ std::vector<Constant *> Mask1(numElemI), Mask2(numElemI);
+ for (unsigned v = 0; v < numElemI; ++v) {
+ Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
+ Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemJ + v);
}
- if (JType->isVectorTy()) {
- std::vector<Constant*> Mask1(numElemJ), Mask2(numElemJ);
- for (unsigned v = 0; v < numElemJ; ++v) {
- Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
- Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemI+v);
- }
+ K1 = new ShuffleVectorInst(K, UndefValue::get(VType),
+ ConstantVector::get(Mask1),
+ getReplacementName(K, false, 1));
+ } else {
+ Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
+ K1 = ExtractElementInst::Create(K, CV0, getReplacementName(K, false, 1));
+ }
- K2 = new ShuffleVectorInst(K, UndefValue::get(VType),
- ConstantVector::get( Mask2),
- getReplacementName(K, false, 2));
- } else {
- Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem-1);
- K2 = ExtractElementInst::Create(K, CV1,
- getReplacementName(K, false, 2));
+ if (JType->isVectorTy()) {
+ std::vector<Constant *> Mask1(numElemJ), Mask2(numElemJ);
+ for (unsigned v = 0; v < numElemJ; ++v) {
+ Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
+ Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemI + v);
}
- K1->insertAfter(K);
- K2->insertAfter(K1);
- InsertionPt = K2;
+ K2 = new ShuffleVectorInst(K, UndefValue::get(VType),
+ ConstantVector::get(Mask2),
+ getReplacementName(K, false, 2));
+ } else {
+ Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem - 1);
+ K2 = ExtractElementInst::Create(K, CV1, getReplacementName(K, false, 2));
}
+
+ K1->insertAfter(K);
+ K2->insertAfter(K1);
+ InsertionPt = K2;
}
// Move all uses of the function I (including pairing-induced uses) after J.
@@ -2869,7 +2879,7 @@ namespace {
if (I->mayWriteToMemory()) WriteSet.add(I);
for (; cast<Instruction>(L) != J; ++L)
- (void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs);
+ (void)trackUsesOfI(Users, WriteSet, I, &*L, true, &LoadMoveSetPairs);
assert(cast<Instruction>(L) == J &&
"Tracking has not proceeded far enough to check for dependencies");
@@ -2891,9 +2901,9 @@ namespace {
if (I->mayWriteToMemory()) WriteSet.add(I);
for (; cast<Instruction>(L) != J;) {
- if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs)) {
+ if (trackUsesOfI(Users, WriteSet, I, &*L, true, &LoadMoveSetPairs)) {
// Move this instruction
- Instruction *InstToMove = L; ++L;
+ Instruction *InstToMove = &*L++;
DEBUG(dbgs() << "BBV: moving: " << *InstToMove <<
" to after " << *InsertionPt << "\n");
@@ -2924,11 +2934,11 @@ namespace {
// Note: We cannot end the loop when we reach J because J could be moved
// farther down the use chain by another instruction pairing. Also, J
// could be before I if this is an inverted input.
- for (BasicBlock::iterator E = BB.end(); cast<Instruction>(L) != E; ++L) {
- if (trackUsesOfI(Users, WriteSet, I, L)) {
+ for (BasicBlock::iterator E = BB.end(); L != E; ++L) {
+ if (trackUsesOfI(Users, WriteSet, I, &*L)) {
if (L->mayReadFromMemory()) {
- LoadMoveSet[L].push_back(I);
- LoadMoveSetPairs.insert(ValuePair(L, I));
+ LoadMoveSet[&*L].push_back(I);
+ LoadMoveSetPairs.insert(ValuePair(&*L, I));
}
}
}
@@ -2991,7 +3001,7 @@ namespace {
DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n");
for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) {
- DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(PI);
+ DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(&*PI);
if (P == ChosenPairs.end()) {
++PI;
continue;
@@ -3116,12 +3126,9 @@ namespace {
} else if (!isa<StoreInst>(K))
K->mutateType(getVecTypeForPair(L->getType(), H->getType()));
- unsigned KnownIDs[] = {
- LLVMContext::MD_tbaa,
- LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias,
- LLVMContext::MD_fpmath
- };
+ unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
+ LLVMContext::MD_invariant_group};
combineMetadata(K, H, KnownIDs);
K->intersectOptionalDataWith(H);
@@ -3145,8 +3152,6 @@ namespace {
if (!isa<StoreInst>(I)) {
L->replaceAllUsesWith(K1);
H->replaceAllUsesWith(K2);
- AA->replaceWithNewValue(L, K1);
- AA->replaceWithNewValue(H, K2);
}
// Instructions that may read from memory may be in the load move set.
@@ -3197,10 +3202,14 @@ namespace {
char BBVectorize::ID = 0;
static const char bb_vectorize_name[] = "Basic-Block Vectorization";
INITIALIZE_PASS_BEGIN(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
INITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
BasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig &C) {
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 69ca2688c810..a627dd665179 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -48,7 +48,6 @@
#include "llvm/Transforms/Vectorize.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
@@ -58,10 +57,13 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/DemandedBits.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
@@ -99,6 +101,7 @@
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <algorithm>
+#include <functional>
#include <map>
#include <tuple>
@@ -123,6 +126,11 @@ TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16),
"trip count that is smaller than this "
"value."));
+static cl::opt<bool> MaximizeBandwidth(
+ "vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden,
+ cl::desc("Maximize bandwidth when selecting vectorization factor which "
+ "will be determined by the smallest type in loop."));
+
/// This enables versioning on the strides of symbolically striding memory
/// accesses in code like the following.
/// for (i = 0; i < N; ++i)
@@ -136,7 +144,7 @@ TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16),
/// ...
static cl::opt<bool> EnableMemAccessVersioning(
"enable-mem-access-versioning", cl::init(true), cl::Hidden,
- cl::desc("Enable symblic stride memory access versioning"));
+ cl::desc("Enable symbolic stride memory access versioning"));
static cl::opt<bool> EnableInterleavedMemAccesses(
"enable-interleaved-mem-accesses", cl::init(false), cl::Hidden,
@@ -214,12 +222,27 @@ static cl::opt<unsigned> MaxNestedScalarReductionIC(
cl::desc("The maximum interleave count to use when interleaving a scalar "
"reduction in a nested loop."));
+static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
+ "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
+ cl::desc("The maximum allowed number of runtime memory checks with a "
+ "vectorize(enable) pragma."));
+
+static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
+ "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
+ cl::desc("The maximum number of SCEV checks allowed."));
+
+static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
+ "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
+ cl::desc("The maximum number of SCEV checks allowed with a "
+ "vectorize(enable) pragma"));
+
namespace {
// Forward declarations.
+class LoopVectorizeHints;
class LoopVectorizationLegality;
class LoopVectorizationCostModel;
-class LoopVectorizeHints;
+class LoopVectorizationRequirements;
/// \brief This modifies LoopAccessReport to initialize message with
/// loop-vectorizer-specific part.
@@ -245,6 +268,32 @@ static Type* ToVectorTy(Type *Scalar, unsigned VF) {
return VectorType::get(Scalar, VF);
}
+/// A helper function that returns GEP instruction and knows to skip a
+/// 'bitcast'. The 'bitcast' may be skipped if the source and the destination
+/// pointee types of the 'bitcast' have the same size.
+/// For example:
+/// bitcast double** %var to i64* - can be skipped
+/// bitcast double** %var to i8* - can not
+static GetElementPtrInst *getGEPInstruction(Value *Ptr) {
+
+ if (isa<GetElementPtrInst>(Ptr))
+ return cast<GetElementPtrInst>(Ptr);
+
+ if (isa<BitCastInst>(Ptr) &&
+ isa<GetElementPtrInst>(cast<BitCastInst>(Ptr)->getOperand(0))) {
+ Type *BitcastTy = Ptr->getType();
+ Type *GEPTy = cast<BitCastInst>(Ptr)->getSrcTy();
+ if (!isa<PointerType>(BitcastTy) || !isa<PointerType>(GEPTy))
+ return nullptr;
+ Type *Pointee1Ty = cast<PointerType>(BitcastTy)->getPointerElementType();
+ Type *Pointee2Ty = cast<PointerType>(GEPTy)->getPointerElementType();
+ const DataLayout &DL = cast<BitCastInst>(Ptr)->getModule()->getDataLayout();
+ if (DL.getTypeSizeInBits(Pointee1Ty) == DL.getTypeSizeInBits(Pointee2Ty))
+ return cast<GetElementPtrInst>(cast<BitCastInst>(Ptr)->getOperand(0));
+ }
+ return nullptr;
+}
+
/// InnerLoopVectorizer vectorizes loops which contain only one basic
/// block to a specified vectorization factor (VF).
/// This class performs the widening of scalars into vectors, or multiple
@@ -261,25 +310,30 @@ static Type* ToVectorTy(Type *Scalar, unsigned VF) {
/// and reduction variables that were found to a given vectorization factor.
class InnerLoopVectorizer {
public:
- InnerLoopVectorizer(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
- DominatorTree *DT, const TargetLibraryInfo *TLI,
+ InnerLoopVectorizer(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
+ LoopInfo *LI, DominatorTree *DT,
+ const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI, unsigned VecWidth,
unsigned UnrollFactor)
- : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
- VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()),
+ : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
+ VF(VecWidth), UF(UnrollFactor), Builder(PSE.getSE()->getContext()),
Induction(nullptr), OldInduction(nullptr), WidenMap(UnrollFactor),
- Legal(nullptr), AddedSafetyChecks(false) {}
+ TripCount(nullptr), VectorTripCount(nullptr), Legal(nullptr),
+ AddedSafetyChecks(false) {}
// Perform the actual loop widening (vectorization).
- void vectorize(LoopVectorizationLegality *L) {
+ // MinimumBitWidths maps scalar integer values to the smallest bitwidth they
+ // can be validly truncated to. The cost model has assumed this truncation
+ // will happen when vectorizing.
+ void vectorize(LoopVectorizationLegality *L,
+ MapVector<Instruction*,uint64_t> MinimumBitWidths) {
+ MinBWs = MinimumBitWidths;
Legal = L;
// Create a new empty loop. Unlink the old loop and connect the new one.
createEmptyLoop();
// Widen each instruction in the old loop to a new one in the new loop.
// Use the Legality module to find the induction and reduction variables.
vectorizeLoop();
- // Register the new loop and update the analysis passes.
- updateAnalysis();
}
// Return true if any runtime check is added.
@@ -302,14 +356,11 @@ protected:
typedef DenseMap<std::pair<BasicBlock*, BasicBlock*>,
VectorParts> EdgeMaskCache;
- /// \brief Add checks for strides that were assumed to be 1.
- ///
- /// Returns the last check instruction and the first check instruction in the
- /// pair as (first, last).
- std::pair<Instruction *, Instruction *> addStrideCheck(Instruction *Loc);
-
/// Create an empty loop, based on the loop ranges of the old loop.
void createEmptyLoop();
+ /// Create a new induction variable inside L.
+ PHINode *createInductionVariable(Loop *L, Value *Start, Value *End,
+ Value *Step, Instruction *DL);
/// Copy and widen the instructions from the old loop.
virtual void vectorizeLoop();
@@ -319,6 +370,9 @@ protected:
/// See PR14725.
void fixLCSSAPHIs();
+ /// Shrinks vector element sizes based on information in "MinBWs".
+ void truncateToMinimalBitwidths();
+
/// A helper function that computes the predicate of the block BB, assuming
/// that the header block of the loop is set to True. It returns the *entry*
/// mask for the block BB.
@@ -329,7 +383,7 @@ protected:
/// A helper function to vectorize a single BB within the innermost loop.
void vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV);
-
+
/// Vectorize a single PHINode in a block. This method handles the induction
/// variable canonicalization. It supports both VF = 1 for unrolled loops and
/// arbitrary length vectors.
@@ -374,6 +428,23 @@ protected:
/// Generate a shuffle sequence that will reverse the vector Vec.
virtual Value *reverseVector(Value *Vec);
+ /// Returns (and creates if needed) the original loop trip count.
+ Value *getOrCreateTripCount(Loop *NewLoop);
+
+ /// Returns (and creates if needed) the trip count of the widened loop.
+ Value *getOrCreateVectorTripCount(Loop *NewLoop);
+
+ /// Emit a bypass check to see if the trip count would overflow, or we
+ /// wouldn't have enough iterations to execute one vector loop.
+ void emitMinimumIterationCountCheck(Loop *L, BasicBlock *Bypass);
+ /// Emit a bypass check to see if the vector trip count is nonzero.
+ void emitVectorLoopEnteredCheck(Loop *L, BasicBlock *Bypass);
+ /// Emit a bypass check to see if all of the SCEV assumptions we've
+ /// had to make are correct.
+ void emitSCEVChecks(Loop *L, BasicBlock *Bypass);
+ /// Emit bypass checks to check any memory assumptions we may have made.
+ void emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass);
+
/// This is a helper class that holds the vectorizer state. It maps scalar
/// instructions to vector instructions. When the code is 'unrolled' then
/// then a single scalar value is mapped to multiple vector parts. The parts
@@ -416,8 +487,10 @@ protected:
/// The original loop.
Loop *OrigLoop;
- /// Scev analysis to use.
- ScalarEvolution *SE;
+ /// A wrapper around ScalarEvolution used to add runtime SCEV checks. Applies
+ /// dynamic knowledge to simplify SCEV expressions and converts them to a
+ /// more usable form.
+ PredicatedScalarEvolution &PSE;
/// Loop Info.
LoopInfo *LI;
/// Dominator Tree.
@@ -462,12 +535,21 @@ protected:
PHINode *Induction;
/// The induction variable of the old basic block.
PHINode *OldInduction;
- /// Holds the extended (to the widest induction type) start index.
- Value *ExtendedIdx;
/// Maps scalars to widened vectors.
ValueMap WidenMap;
+ /// Store instructions that should be predicated, as a pair
+ /// <StoreInst, Predicate>
+ SmallVector<std::pair<StoreInst*,Value*>, 4> PredicatedStores;
EdgeMaskCache MaskCache;
-
+ /// Trip count of the original loop.
+ Value *TripCount;
+ /// Trip count of the widened loop (TripCount - TripCount % (VF*UF))
+ Value *VectorTripCount;
+
+ /// Map of scalar integer values to the smallest bitwidth they can be legally
+ /// represented as. The vector equivalents of these values should be truncated
+ /// to this type.
+ MapVector<Instruction*,uint64_t> MinBWs;
LoopVectorizationLegality *Legal;
// Record whether runtime check is added.
@@ -476,10 +558,11 @@ protected:
class InnerLoopUnroller : public InnerLoopVectorizer {
public:
- InnerLoopUnroller(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
- DominatorTree *DT, const TargetLibraryInfo *TLI,
+ InnerLoopUnroller(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
+ LoopInfo *LI, DominatorTree *DT,
+ const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI, unsigned UnrollFactor)
- : InnerLoopVectorizer(OrigLoop, SE, LI, DT, TLI, TTI, 1, UnrollFactor) {}
+ : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, 1, UnrollFactor) {}
private:
void scalarizeInstruction(Instruction *Instr,
@@ -551,7 +634,8 @@ static void propagateMetadata(Instruction *To, const Instruction *From) {
if (Kind != LLVMContext::MD_tbaa &&
Kind != LLVMContext::MD_alias_scope &&
Kind != LLVMContext::MD_noalias &&
- Kind != LLVMContext::MD_fpmath)
+ Kind != LLVMContext::MD_fpmath &&
+ Kind != LLVMContext::MD_nontemporal)
continue;
To->setMetadata(Kind, M.second);
@@ -559,7 +643,8 @@ static void propagateMetadata(Instruction *To, const Instruction *From) {
}
/// \brief Propagate known metadata from one instruction to a vector of others.
-static void propagateMetadata(SmallVectorImpl<Value *> &To, const Instruction *From) {
+static void propagateMetadata(SmallVectorImpl<Value *> &To,
+ const Instruction *From) {
for (Value *V : To)
if (Instruction *I = dyn_cast<Instruction>(V))
propagateMetadata(I, From);
@@ -699,8 +784,9 @@ private:
/// between the member and the group in a map.
class InterleavedAccessInfo {
public:
- InterleavedAccessInfo(ScalarEvolution *SE, Loop *L, DominatorTree *DT)
- : SE(SE), TheLoop(L), DT(DT) {}
+ InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L,
+ DominatorTree *DT)
+ : PSE(PSE), TheLoop(L), DT(DT) {}
~InterleavedAccessInfo() {
SmallSet<InterleaveGroup *, 4> DelSet;
@@ -730,7 +816,11 @@ public:
}
private:
- ScalarEvolution *SE;
+ /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
+ /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
+ /// The interleaved access analysis can also add new predicates (for example
+ /// by versioning strides of pointers).
+ PredicatedScalarEvolution &PSE;
Loop *TheLoop;
DominatorTree *DT;
@@ -778,6 +868,304 @@ private:
const ValueToValueMap &Strides);
};
+/// Utility class for getting and setting loop vectorizer hints in the form
+/// of loop metadata.
+/// This class keeps a number of loop annotations locally (as member variables)
+/// and can, upon request, write them back as metadata on the loop. It will
+/// initially scan the loop for existing metadata, and will update the local
+/// values based on information in the loop.
+/// We cannot write all values to metadata, as the mere presence of some info,
+/// for example 'force', means a decision has been made. So, we need to be
+/// careful NOT to add them if the user hasn't specifically asked so.
+class LoopVectorizeHints {
+ enum HintKind {
+ HK_WIDTH,
+ HK_UNROLL,
+ HK_FORCE
+ };
+
+ /// Hint - associates name and validation with the hint value.
+ struct Hint {
+ const char * Name;
+ unsigned Value; // This may have to change for non-numeric values.
+ HintKind Kind;
+
+ Hint(const char * Name, unsigned Value, HintKind Kind)
+ : Name(Name), Value(Value), Kind(Kind) { }
+
+ bool validate(unsigned Val) {
+ switch (Kind) {
+ case HK_WIDTH:
+ return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
+ case HK_UNROLL:
+ return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
+ case HK_FORCE:
+ return (Val <= 1);
+ }
+ return false;
+ }
+ };
+
+ /// Vectorization width.
+ Hint Width;
+ /// Vectorization interleave factor.
+ Hint Interleave;
+ /// Vectorization forced
+ Hint Force;
+
+ /// Return the loop metadata prefix.
+ static StringRef Prefix() { return "llvm.loop."; }
+
+public:
+ enum ForceKind {
+ FK_Undefined = -1, ///< Not selected.
+ FK_Disabled = 0, ///< Forcing disabled.
+ FK_Enabled = 1, ///< Forcing enabled.
+ };
+
+ LoopVectorizeHints(const Loop *L, bool DisableInterleaving)
+ : Width("vectorize.width", VectorizerParams::VectorizationFactor,
+ HK_WIDTH),
+ Interleave("interleave.count", DisableInterleaving, HK_UNROLL),
+ Force("vectorize.enable", FK_Undefined, HK_FORCE),
+ TheLoop(L) {
+ // Populate values with existing loop metadata.
+ getHintsFromMetadata();
+
+ // force-vector-interleave overrides DisableInterleaving.
+ if (VectorizerParams::isInterleaveForced())
+ Interleave.Value = VectorizerParams::VectorizationInterleave;
+
+ DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs()
+ << "LV: Interleaving disabled by the pass manager\n");
+ }
+
+ /// Mark the loop L as already vectorized by setting the width to 1.
+ void setAlreadyVectorized() {
+ Width.Value = Interleave.Value = 1;
+ Hint Hints[] = {Width, Interleave};
+ writeHintsToMetadata(Hints);
+ }
+
+ bool allowVectorization(Function *F, Loop *L, bool AlwaysVectorize) const {
+ if (getForce() == LoopVectorizeHints::FK_Disabled) {
+ DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
+ emitOptimizationRemarkAnalysis(F->getContext(),
+ vectorizeAnalysisPassName(), *F,
+ L->getStartLoc(), emitRemark());
+ return false;
+ }
+
+ if (!AlwaysVectorize && getForce() != LoopVectorizeHints::FK_Enabled) {
+ DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
+ emitOptimizationRemarkAnalysis(F->getContext(),
+ vectorizeAnalysisPassName(), *F,
+ L->getStartLoc(), emitRemark());
+ return false;
+ }
+
+ if (getWidth() == 1 && getInterleave() == 1) {
+ // FIXME: Add a separate metadata to indicate when the loop has already
+ // been vectorized instead of setting width and count to 1.
+ DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
+ // FIXME: Add interleave.disable metadata. This will allow
+ // vectorize.disable to be used without disabling the pass and errors
+ // to differentiate between disabled vectorization and a width of 1.
+ emitOptimizationRemarkAnalysis(
+ F->getContext(), vectorizeAnalysisPassName(), *F, L->getStartLoc(),
+ "loop not vectorized: vectorization and interleaving are explicitly "
+ "disabled, or vectorize width and interleave count are both set to "
+ "1");
+ return false;
+ }
+
+ return true;
+ }
+
+ /// Dumps all the hint information.
+ std::string emitRemark() const {
+ VectorizationReport R;
+ if (Force.Value == LoopVectorizeHints::FK_Disabled)
+ R << "vectorization is explicitly disabled";
+ else {
+ R << "use -Rpass-analysis=loop-vectorize for more info";
+ if (Force.Value == LoopVectorizeHints::FK_Enabled) {
+ R << " (Force=true";
+ if (Width.Value != 0)
+ R << ", Vector Width=" << Width.Value;
+ if (Interleave.Value != 0)
+ R << ", Interleave Count=" << Interleave.Value;
+ R << ")";
+ }
+ }
+
+ return R.str();
+ }
+
+ unsigned getWidth() const { return Width.Value; }
+ unsigned getInterleave() const { return Interleave.Value; }
+ enum ForceKind getForce() const { return (ForceKind)Force.Value; }
+ const char *vectorizeAnalysisPassName() const {
+ // If hints are provided that don't disable vectorization use the
+ // AlwaysPrint pass name to force the frontend to print the diagnostic.
+ if (getWidth() == 1)
+ return LV_NAME;
+ if (getForce() == LoopVectorizeHints::FK_Disabled)
+ return LV_NAME;
+ if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
+ return LV_NAME;
+ return DiagnosticInfo::AlwaysPrint;
+ }
+
+ bool allowReordering() const {
+ // When enabling loop hints are provided we allow the vectorizer to change
+ // the order of operations that is given by the scalar loop. This is not
+ // enabled by default because can be unsafe or inefficient. For example,
+ // reordering floating-point operations will change the way round-off
+ // error accumulates in the loop.
+ return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1;
+ }
+
+private:
+ /// Find hints specified in the loop metadata and update local values.
+ void getHintsFromMetadata() {
+ MDNode *LoopID = TheLoop->getLoopID();
+ if (!LoopID)
+ return;
+
+ // First operand should refer to the loop id itself.
+ assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
+ assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
+
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+ const MDString *S = nullptr;
+ SmallVector<Metadata *, 4> Args;
+
+ // The expected hint is either a MDString or a MDNode with the first
+ // operand a MDString.
+ if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
+ if (!MD || MD->getNumOperands() == 0)
+ continue;
+ S = dyn_cast<MDString>(MD->getOperand(0));
+ for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
+ Args.push_back(MD->getOperand(i));
+ } else {
+ S = dyn_cast<MDString>(LoopID->getOperand(i));
+ assert(Args.size() == 0 && "too many arguments for MDString");
+ }
+
+ if (!S)
+ continue;
+
+ // Check if the hint starts with the loop metadata prefix.
+ StringRef Name = S->getString();
+ if (Args.size() == 1)
+ setHint(Name, Args[0]);
+ }
+ }
+
+ /// Checks string hint with one operand and set value if valid.
+ void setHint(StringRef Name, Metadata *Arg) {
+ if (!Name.startswith(Prefix()))
+ return;
+ Name = Name.substr(Prefix().size(), StringRef::npos);
+
+ const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
+ if (!C) return;
+ unsigned Val = C->getZExtValue();
+
+ Hint *Hints[] = {&Width, &Interleave, &Force};
+ for (auto H : Hints) {
+ if (Name == H->Name) {
+ if (H->validate(Val))
+ H->Value = Val;
+ else
+ DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
+ break;
+ }
+ }
+ }
+
+ /// Create a new hint from name / value pair.
+ MDNode *createHintMetadata(StringRef Name, unsigned V) const {
+ LLVMContext &Context = TheLoop->getHeader()->getContext();
+ Metadata *MDs[] = {MDString::get(Context, Name),
+ ConstantAsMetadata::get(
+ ConstantInt::get(Type::getInt32Ty(Context), V))};
+ return MDNode::get(Context, MDs);
+ }
+
+ /// Matches metadata with hint name.
+ bool matchesHintMetadataName(MDNode *Node, ArrayRef<Hint> HintTypes) {
+ MDString* Name = dyn_cast<MDString>(Node->getOperand(0));
+ if (!Name)
+ return false;
+
+ for (auto H : HintTypes)
+ if (Name->getString().endswith(H.Name))
+ return true;
+ return false;
+ }
+
+ /// Sets current hints into loop metadata, keeping other values intact.
+ void writeHintsToMetadata(ArrayRef<Hint> HintTypes) {
+ if (HintTypes.size() == 0)
+ return;
+
+ // Reserve the first element to LoopID (see below).
+ SmallVector<Metadata *, 4> MDs(1);
+ // If the loop already has metadata, then ignore the existing operands.
+ MDNode *LoopID = TheLoop->getLoopID();
+ if (LoopID) {
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+ MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
+ // If node in update list, ignore old value.
+ if (!matchesHintMetadataName(Node, HintTypes))
+ MDs.push_back(Node);
+ }
+ }
+
+ // Now, add the missing hints.
+ for (auto H : HintTypes)
+ MDs.push_back(createHintMetadata(Twine(Prefix(), H.Name).str(), H.Value));
+
+ // Replace current metadata node with new one.
+ LLVMContext &Context = TheLoop->getHeader()->getContext();
+ MDNode *NewLoopID = MDNode::get(Context, MDs);
+ // Set operand 0 to refer to the loop id itself.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+
+ TheLoop->setLoopID(NewLoopID);
+ }
+
+ /// The loop these hints belong to.
+ const Loop *TheLoop;
+};
+
+static void emitAnalysisDiag(const Function *TheFunction, const Loop *TheLoop,
+ const LoopVectorizeHints &Hints,
+ const LoopAccessReport &Message) {
+ const char *Name = Hints.vectorizeAnalysisPassName();
+ LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, Name);
+}
+
+static void emitMissedWarning(Function *F, Loop *L,
+ const LoopVectorizeHints &LH) {
+ emitOptimizationRemarkMissed(F->getContext(), LV_NAME, *F, L->getStartLoc(),
+ LH.emitRemark());
+
+ if (LH.getForce() == LoopVectorizeHints::FK_Enabled) {
+ if (LH.getWidth() != 1)
+ emitLoopVectorizeWarning(
+ F->getContext(), *F, L->getStartLoc(),
+ "failed explicitly specified loop vectorization");
+ else if (LH.getInterleave() != 1)
+ emitLoopInterleaveWarning(
+ F->getContext(), *F, L->getStartLoc(),
+ "failed explicitly specified loop interleaving");
+ }
+}
+
/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
/// to what vectorization factor.
/// This class does not look at the profitability of vectorization, only the
@@ -793,87 +1181,17 @@ private:
/// induction variable and the different reduction variables.
class LoopVectorizationLegality {
public:
- LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
- TargetLibraryInfo *TLI, AliasAnalysis *AA,
- Function *F, const TargetTransformInfo *TTI,
- LoopAccessAnalysis *LAA)
- : NumPredStores(0), TheLoop(L), SE(SE), TLI(TLI), TheFunction(F),
- TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), InterleaveInfo(SE, L, DT),
- Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false) {}
-
- /// This enum represents the kinds of inductions that we support.
- enum InductionKind {
- IK_NoInduction, ///< Not an induction variable.
- IK_IntInduction, ///< Integer induction variable. Step = C.
- IK_PtrInduction ///< Pointer induction var. Step = C / sizeof(elem).
- };
-
- /// A struct for saving information about induction variables.
- struct InductionInfo {
- InductionInfo(Value *Start, InductionKind K, ConstantInt *Step)
- : StartValue(Start), IK(K), StepValue(Step) {
- assert(IK != IK_NoInduction && "Not an induction");
- assert(StartValue && "StartValue is null");
- assert(StepValue && !StepValue->isZero() && "StepValue is zero");
- assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
- "StartValue is not a pointer for pointer induction");
- assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
- "StartValue is not an integer for integer induction");
- assert(StepValue->getType()->isIntegerTy() &&
- "StepValue is not an integer");
- }
- InductionInfo()
- : StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {}
-
- /// Get the consecutive direction. Returns:
- /// 0 - unknown or non-consecutive.
- /// 1 - consecutive and increasing.
- /// -1 - consecutive and decreasing.
- int getConsecutiveDirection() const {
- if (StepValue && (StepValue->isOne() || StepValue->isMinusOne()))
- return StepValue->getSExtValue();
- return 0;
- }
-
- /// Compute the transformed value of Index at offset StartValue using step
- /// StepValue.
- /// For integer induction, returns StartValue + Index * StepValue.
- /// For pointer induction, returns StartValue[Index * StepValue].
- /// FIXME: The newly created binary instructions should contain nsw/nuw
- /// flags, which can be found from the original scalar operations.
- Value *transform(IRBuilder<> &B, Value *Index) const {
- switch (IK) {
- case IK_IntInduction:
- assert(Index->getType() == StartValue->getType() &&
- "Index type does not match StartValue type");
- if (StepValue->isMinusOne())
- return B.CreateSub(StartValue, Index);
- if (!StepValue->isOne())
- Index = B.CreateMul(Index, StepValue);
- return B.CreateAdd(StartValue, Index);
-
- case IK_PtrInduction:
- assert(Index->getType() == StepValue->getType() &&
- "Index type does not match StepValue type");
- if (StepValue->isMinusOne())
- Index = B.CreateNeg(Index);
- else if (!StepValue->isOne())
- Index = B.CreateMul(Index, StepValue);
- return B.CreateGEP(nullptr, StartValue, Index);
-
- case IK_NoInduction:
- return nullptr;
- }
- llvm_unreachable("invalid enum");
- }
-
- /// Start value.
- TrackingVH<Value> StartValue;
- /// Induction kind.
- InductionKind IK;
- /// Step value.
- ConstantInt *StepValue;
- };
+ LoopVectorizationLegality(Loop *L, PredicatedScalarEvolution &PSE,
+ DominatorTree *DT, TargetLibraryInfo *TLI,
+ AliasAnalysis *AA, Function *F,
+ const TargetTransformInfo *TTI,
+ LoopAccessAnalysis *LAA,
+ LoopVectorizationRequirements *R,
+ const LoopVectorizeHints *H)
+ : NumPredStores(0), TheLoop(L), PSE(PSE), TLI(TLI), TheFunction(F),
+ TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), InterleaveInfo(PSE, L, DT),
+ Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false),
+ Requirements(R), Hints(H) {}
/// ReductionList contains the reduction descriptors for all
/// of the reductions that were found in the loop.
@@ -881,7 +1199,7 @@ public:
/// InductionList saves induction variables and maps them to the
/// induction descriptor.
- typedef MapVector<PHINode*, InductionInfo> InductionList;
+ typedef MapVector<PHINode*, InductionDescriptor> InductionList;
/// Returns true if it is legal to vectorize this loop.
/// This does not mean that it is profitable to vectorize this
@@ -903,6 +1221,9 @@ public:
/// Returns True if V is an induction variable in this loop.
bool isInductionVariable(const Value *V);
+ /// Returns True if PN is a reduction variable in this loop.
+ bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); }
+
/// Return true if the block BB needs to be predicated in order for the loop
/// to be vectorized.
bool blockNeedsPredication(BasicBlock *BB);
@@ -954,12 +1275,12 @@ public:
/// Returns true if the target machine supports masked store operation
/// for the given \p DataType and kind of access to \p Ptr.
bool isLegalMaskedStore(Type *DataType, Value *Ptr) {
- return TTI->isLegalMaskedStore(DataType, isConsecutivePtr(Ptr));
+ return isConsecutivePtr(Ptr) && TTI->isLegalMaskedStore(DataType);
}
/// Returns true if the target machine supports masked load operation
/// for the given \p DataType and kind of access to \p Ptr.
bool isLegalMaskedLoad(Type *DataType, Value *Ptr) {
- return TTI->isLegalMaskedLoad(DataType, isConsecutivePtr(Ptr));
+ return isConsecutivePtr(Ptr) && TTI->isLegalMaskedLoad(DataType);
}
/// Returns true if vector representation of the instruction \p I
/// requires mask.
@@ -999,10 +1320,6 @@ private:
/// and we know that we can read from them without segfault.
bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs);
- /// Returns the induction kind of Phi and record the step. This function may
- /// return NoInduction if the PHI is not an induction variable.
- InductionKind isInductionVariable(PHINode *Phi, ConstantInt *&StepValue);
-
/// \brief Collect memory access with loop invariant strides.
///
/// Looks for accesses like "a[i * StrideA]" where "StrideA" is loop
@@ -1013,16 +1330,20 @@ private:
/// not vectorized. These are handled as LoopAccessReport rather than
/// VectorizationReport because the << operator of VectorizationReport returns
/// LoopAccessReport.
- void emitAnalysis(const LoopAccessReport &Message) {
- LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME);
+ void emitAnalysis(const LoopAccessReport &Message) const {
+ emitAnalysisDiag(TheFunction, TheLoop, *Hints, Message);
}
unsigned NumPredStores;
/// The loop that we evaluate.
Loop *TheLoop;
- /// Scev analysis.
- ScalarEvolution *SE;
+ /// A wrapper around ScalarEvolution used to add runtime SCEV checks.
+ /// Applies dynamic knowledge to simplify SCEV expressions in the context
+ /// of existing SCEV assumptions. The analysis will also add a minimal set
+ /// of new predicates if this is required to enable vectorization and
+ /// unrolling.
+ PredicatedScalarEvolution &PSE;
/// Target Library Info.
TargetLibraryInfo *TLI;
/// Parent function
@@ -1065,12 +1386,18 @@ private:
/// Can we assume the absence of NaNs.
bool HasFunNoNaNAttr;
+ /// Vectorization requirements that will go through late-evaluation.
+ LoopVectorizationRequirements *Requirements;
+
+ /// Used to emit an analysis of any legality issues.
+ const LoopVectorizeHints *Hints;
+
ValueToValueMap Strides;
SmallPtrSet<Value *, 8> StrideSet;
/// While vectorizing these instructions we have to generate a
/// call to the appropriate masked intrinsic
- SmallPtrSet<const Instruction*, 8> MaskedOp;
+ SmallPtrSet<const Instruction *, 8> MaskedOp;
};
/// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -1082,15 +1409,14 @@ private:
/// different operations.
class LoopVectorizationCostModel {
public:
- LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
- LoopVectorizationLegality *Legal,
+ LoopVectorizationCostModel(Loop *L, PredicatedScalarEvolution &PSE,
+ LoopInfo *LI, LoopVectorizationLegality *Legal,
const TargetTransformInfo &TTI,
- const TargetLibraryInfo *TLI, AssumptionCache *AC,
- const Function *F, const LoopVectorizeHints *Hints)
- : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI),
- TheFunction(F), Hints(Hints) {
- CodeMetrics::collectEphemeralValues(L, AC, EphValues);
- }
+ const TargetLibraryInfo *TLI, DemandedBits *DB,
+ AssumptionCache *AC, const Function *F,
+ const LoopVectorizeHints *Hints)
+ : TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB),
+ AC(AC), TheFunction(F), Hints(Hints) {}
/// Information about vectorization costs
struct VectorizationFactor {
@@ -1103,10 +1429,10 @@ public:
/// possible.
VectorizationFactor selectVectorizationFactor(bool OptForSize);
- /// \return The size (in bits) of the widest type in the code that
- /// needs to be vectorized. We ignore values that remain scalar such as
+ /// \return The size (in bits) of the smallest and widest types in the code
+ /// that needs to be vectorized. We ignore values that remain scalar such as
/// 64 bit loop indices.
- unsigned getWidestType();
+ std::pair<unsigned, unsigned> getSmallestAndWidestTypes();
/// \return The desired interleave count.
/// If interleave count has been specified by metadata it will be returned.
@@ -1133,8 +1459,13 @@ public:
unsigned NumInstructions;
};
- /// \return information about the register usage of the loop.
- RegisterUsage calculateRegisterUsage();
+ /// \return Returns information about the register usages of the loop for the
+ /// given vectorization factors.
+ SmallVector<RegisterUsage, 8>
+ calculateRegisterUsage(const SmallVector<unsigned, 8> &VFs);
+
+ /// Collect values we want to ignore in the cost model.
+ void collectValuesToIgnore();
private:
/// Returns the expected execution cost. The unit of the cost does
@@ -1155,17 +1486,20 @@ private:
/// not vectorized. These are handled as LoopAccessReport rather than
/// VectorizationReport because the << operator of VectorizationReport returns
/// LoopAccessReport.
- void emitAnalysis(const LoopAccessReport &Message) {
- LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME);
+ void emitAnalysis(const LoopAccessReport &Message) const {
+ emitAnalysisDiag(TheFunction, TheLoop, *Hints, Message);
}
- /// Values used only by @llvm.assume calls.
- SmallPtrSet<const Value *, 32> EphValues;
+public:
+ /// Map of scalar integer values to the smallest bitwidth they can be legally
+ /// represented as. The vector equivalents of these values should be truncated
+ /// to this type.
+ MapVector<Instruction*,uint64_t> MinBWs;
/// The loop that we evaluate.
Loop *TheLoop;
- /// Scev analysis.
- ScalarEvolution *SE;
+ /// Predicated scalar evolution analysis.
+ PredicatedScalarEvolution &PSE;
/// Loop Info analysis.
LoopInfo *LI;
/// Vectorization legality.
@@ -1174,247 +1508,78 @@ private:
const TargetTransformInfo &TTI;
/// Target Library Info.
const TargetLibraryInfo *TLI;
+ /// Demanded bits analysis.
+ DemandedBits *DB;
+ /// Assumption cache.
+ AssumptionCache *AC;
const Function *TheFunction;
- // Loop Vectorize Hint.
+ /// Loop Vectorize Hint.
const LoopVectorizeHints *Hints;
+ /// Values to ignore in the cost model.
+ SmallPtrSet<const Value *, 16> ValuesToIgnore;
+ /// Values to ignore in the cost model when VF > 1.
+ SmallPtrSet<const Value *, 16> VecValuesToIgnore;
};
-/// Utility class for getting and setting loop vectorizer hints in the form
-/// of loop metadata.
-/// This class keeps a number of loop annotations locally (as member variables)
-/// and can, upon request, write them back as metadata on the loop. It will
-/// initially scan the loop for existing metadata, and will update the local
-/// values based on information in the loop.
-/// We cannot write all values to metadata, as the mere presence of some info,
-/// for example 'force', means a decision has been made. So, we need to be
-/// careful NOT to add them if the user hasn't specifically asked so.
-class LoopVectorizeHints {
- enum HintKind {
- HK_WIDTH,
- HK_UNROLL,
- HK_FORCE
- };
-
- /// Hint - associates name and validation with the hint value.
- struct Hint {
- const char * Name;
- unsigned Value; // This may have to change for non-numeric values.
- HintKind Kind;
-
- Hint(const char * Name, unsigned Value, HintKind Kind)
- : Name(Name), Value(Value), Kind(Kind) { }
-
- bool validate(unsigned Val) {
- switch (Kind) {
- case HK_WIDTH:
- return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
- case HK_UNROLL:
- return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
- case HK_FORCE:
- return (Val <= 1);
- }
- return false;
- }
- };
-
- /// Vectorization width.
- Hint Width;
- /// Vectorization interleave factor.
- Hint Interleave;
- /// Vectorization forced
- Hint Force;
-
- /// Return the loop metadata prefix.
- static StringRef Prefix() { return "llvm.loop."; }
-
+/// \brief This holds vectorization requirements that must be verified late in
+/// the process. The requirements are set by legalize and costmodel. Once
+/// vectorization has been determined to be possible and profitable the
+/// requirements can be verified by looking for metadata or compiler options.
+/// For example, some loops require FP commutativity which is only allowed if
+/// vectorization is explicitly specified or if the fast-math compiler option
+/// has been provided.
+/// Late evaluation of these requirements allows helpful diagnostics to be
+/// composed that tells the user what need to be done to vectorize the loop. For
+/// example, by specifying #pragma clang loop vectorize or -ffast-math. Late
+/// evaluation should be used only when diagnostics can generated that can be
+/// followed by a non-expert user.
+class LoopVectorizationRequirements {
public:
- enum ForceKind {
- FK_Undefined = -1, ///< Not selected.
- FK_Disabled = 0, ///< Forcing disabled.
- FK_Enabled = 1, ///< Forcing enabled.
- };
-
- LoopVectorizeHints(const Loop *L, bool DisableInterleaving)
- : Width("vectorize.width", VectorizerParams::VectorizationFactor,
- HK_WIDTH),
- Interleave("interleave.count", DisableInterleaving, HK_UNROLL),
- Force("vectorize.enable", FK_Undefined, HK_FORCE),
- TheLoop(L) {
- // Populate values with existing loop metadata.
- getHintsFromMetadata();
-
- // force-vector-interleave overrides DisableInterleaving.
- if (VectorizerParams::isInterleaveForced())
- Interleave.Value = VectorizerParams::VectorizationInterleave;
-
- DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs()
- << "LV: Interleaving disabled by the pass manager\n");
- }
-
- /// Mark the loop L as already vectorized by setting the width to 1.
- void setAlreadyVectorized() {
- Width.Value = Interleave.Value = 1;
- Hint Hints[] = {Width, Interleave};
- writeHintsToMetadata(Hints);
- }
-
- /// Dumps all the hint information.
- std::string emitRemark() const {
- VectorizationReport R;
- if (Force.Value == LoopVectorizeHints::FK_Disabled)
- R << "vectorization is explicitly disabled";
- else {
- R << "use -Rpass-analysis=loop-vectorize for more info";
- if (Force.Value == LoopVectorizeHints::FK_Enabled) {
- R << " (Force=true";
- if (Width.Value != 0)
- R << ", Vector Width=" << Width.Value;
- if (Interleave.Value != 0)
- R << ", Interleave Count=" << Interleave.Value;
- R << ")";
- }
- }
-
- return R.str();
- }
-
- unsigned getWidth() const { return Width.Value; }
- unsigned getInterleave() const { return Interleave.Value; }
- enum ForceKind getForce() const { return (ForceKind)Force.Value; }
-
-private:
- /// Find hints specified in the loop metadata and update local values.
- void getHintsFromMetadata() {
- MDNode *LoopID = TheLoop->getLoopID();
- if (!LoopID)
- return;
-
- // First operand should refer to the loop id itself.
- assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
- assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
-
- for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
- const MDString *S = nullptr;
- SmallVector<Metadata *, 4> Args;
-
- // The expected hint is either a MDString or a MDNode with the first
- // operand a MDString.
- if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
- if (!MD || MD->getNumOperands() == 0)
- continue;
- S = dyn_cast<MDString>(MD->getOperand(0));
- for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
- Args.push_back(MD->getOperand(i));
- } else {
- S = dyn_cast<MDString>(LoopID->getOperand(i));
- assert(Args.size() == 0 && "too many arguments for MDString");
- }
-
- if (!S)
- continue;
-
- // Check if the hint starts with the loop metadata prefix.
- StringRef Name = S->getString();
- if (Args.size() == 1)
- setHint(Name, Args[0]);
+ LoopVectorizationRequirements()
+ : NumRuntimePointerChecks(0), UnsafeAlgebraInst(nullptr) {}
+
+ void addUnsafeAlgebraInst(Instruction *I) {
+ // First unsafe algebra instruction.
+ if (!UnsafeAlgebraInst)
+ UnsafeAlgebraInst = I;
+ }
+
+ void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; }
+
+ bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints) {
+ const char *Name = Hints.vectorizeAnalysisPassName();
+ bool Failed = false;
+ if (UnsafeAlgebraInst && !Hints.allowReordering()) {
+ emitOptimizationRemarkAnalysisFPCommute(
+ F->getContext(), Name, *F, UnsafeAlgebraInst->getDebugLoc(),
+ VectorizationReport() << "cannot prove it is safe to reorder "
+ "floating-point operations");
+ Failed = true;
}
- }
-
- /// Checks string hint with one operand and set value if valid.
- void setHint(StringRef Name, Metadata *Arg) {
- if (!Name.startswith(Prefix()))
- return;
- Name = Name.substr(Prefix().size(), StringRef::npos);
-
- const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
- if (!C) return;
- unsigned Val = C->getZExtValue();
- Hint *Hints[] = {&Width, &Interleave, &Force};
- for (auto H : Hints) {
- if (Name == H->Name) {
- if (H->validate(Val))
- H->Value = Val;
- else
- DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
- break;
- }
+ // Test if runtime memcheck thresholds are exceeded.
+ bool PragmaThresholdReached =
+ NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
+ bool ThresholdReached =
+ NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
+ if ((ThresholdReached && !Hints.allowReordering()) ||
+ PragmaThresholdReached) {
+ emitOptimizationRemarkAnalysisAliasing(
+ F->getContext(), Name, *F, L->getStartLoc(),
+ VectorizationReport()
+ << "cannot prove it is safe to reorder memory operations");
+ DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
+ Failed = true;
}
- }
- /// Create a new hint from name / value pair.
- MDNode *createHintMetadata(StringRef Name, unsigned V) const {
- LLVMContext &Context = TheLoop->getHeader()->getContext();
- Metadata *MDs[] = {MDString::get(Context, Name),
- ConstantAsMetadata::get(
- ConstantInt::get(Type::getInt32Ty(Context), V))};
- return MDNode::get(Context, MDs);
+ return Failed;
}
- /// Matches metadata with hint name.
- bool matchesHintMetadataName(MDNode *Node, ArrayRef<Hint> HintTypes) {
- MDString* Name = dyn_cast<MDString>(Node->getOperand(0));
- if (!Name)
- return false;
-
- for (auto H : HintTypes)
- if (Name->getString().endswith(H.Name))
- return true;
- return false;
- }
-
- /// Sets current hints into loop metadata, keeping other values intact.
- void writeHintsToMetadata(ArrayRef<Hint> HintTypes) {
- if (HintTypes.size() == 0)
- return;
-
- // Reserve the first element to LoopID (see below).
- SmallVector<Metadata *, 4> MDs(1);
- // If the loop already has metadata, then ignore the existing operands.
- MDNode *LoopID = TheLoop->getLoopID();
- if (LoopID) {
- for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
- MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
- // If node in update list, ignore old value.
- if (!matchesHintMetadataName(Node, HintTypes))
- MDs.push_back(Node);
- }
- }
-
- // Now, add the missing hints.
- for (auto H : HintTypes)
- MDs.push_back(createHintMetadata(Twine(Prefix(), H.Name).str(), H.Value));
-
- // Replace current metadata node with new one.
- LLVMContext &Context = TheLoop->getHeader()->getContext();
- MDNode *NewLoopID = MDNode::get(Context, MDs);
- // Set operand 0 to refer to the loop id itself.
- NewLoopID->replaceOperandWith(0, NewLoopID);
-
- TheLoop->setLoopID(NewLoopID);
- }
-
- /// The loop these hints belong to.
- const Loop *TheLoop;
+private:
+ unsigned NumRuntimePointerChecks;
+ Instruction *UnsafeAlgebraInst;
};
-static void emitMissedWarning(Function *F, Loop *L,
- const LoopVectorizeHints &LH) {
- emitOptimizationRemarkMissed(F->getContext(), DEBUG_TYPE, *F,
- L->getStartLoc(), LH.emitRemark());
-
- if (LH.getForce() == LoopVectorizeHints::FK_Enabled) {
- if (LH.getWidth() != 1)
- emitLoopVectorizeWarning(
- F->getContext(), *F, L->getStartLoc(),
- "failed explicitly specified loop vectorization");
- else if (LH.getInterleave() != 1)
- emitLoopInterleaveWarning(
- F->getContext(), *F, L->getStartLoc(),
- "failed explicitly specified loop interleaving");
- }
-}
-
static void addInnerLoop(Loop &L, SmallVectorImpl<Loop *> &V) {
if (L.empty())
return V.push_back(&L);
@@ -1441,6 +1606,7 @@ struct LoopVectorize : public FunctionPass {
DominatorTree *DT;
BlockFrequencyInfo *BFI;
TargetLibraryInfo *TLI;
+ DemandedBits *DB;
AliasAnalysis *AA;
AssumptionCache *AC;
LoopAccessAnalysis *LAA;
@@ -1450,16 +1616,17 @@ struct LoopVectorize : public FunctionPass {
BlockFrequency ColdEntryFreq;
bool runOnFunction(Function &F) override {
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- BFI = &getAnalysis<BlockFrequencyInfo>();
+ BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
TLI = TLIP ? &TLIP->getTLI() : nullptr;
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
LAA = &getAnalysis<LoopAccessAnalysis>();
+ DB = &getAnalysis<DemandedBits>();
// Compute some weights outside of the loop over the loops. Compute this
// using a BranchProbability to re-use its scaling math.
@@ -1562,26 +1729,8 @@ struct LoopVectorize : public FunctionPass {
// less verbose reporting vectorized loops and unvectorized loops that may
// benefit from vectorization, respectively.
- if (Hints.getForce() == LoopVectorizeHints::FK_Disabled) {
- DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
- emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F,
- L->getStartLoc(), Hints.emitRemark());
- return false;
- }
-
- if (!AlwaysVectorize && Hints.getForce() != LoopVectorizeHints::FK_Enabled) {
- DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
- emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F,
- L->getStartLoc(), Hints.emitRemark());
- return false;
- }
-
- if (Hints.getWidth() == 1 && Hints.getInterleave() == 1) {
- DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
- emitOptimizationRemarkAnalysis(
- F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
- "loop not vectorized: vector width and interleave count are "
- "explicitly set to 1");
+ if (!Hints.allowVectorization(F, L, AlwaysVectorize)) {
+ DEBUG(dbgs() << "LV: Loop hints prevent vectorization.\n");
return false;
}
@@ -1595,15 +1744,19 @@ struct LoopVectorize : public FunctionPass {
DEBUG(dbgs() << " But vectorizing was explicitly forced.\n");
else {
DEBUG(dbgs() << "\n");
- emitOptimizationRemarkAnalysis(
- F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
- "vectorization is not beneficial and is not explicitly forced");
+ emitAnalysisDiag(F, L, Hints, VectorizationReport()
+ << "vectorization is not beneficial "
+ "and is not explicitly forced");
return false;
}
}
+ PredicatedScalarEvolution PSE(*SE);
+
// Check if it is legal to vectorize the loop.
- LoopVectorizationLegality LVL(L, SE, DT, TLI, AA, F, TTI, LAA);
+ LoopVectorizationRequirements Requirements;
+ LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, TTI, LAA,
+ &Requirements, &Hints);
if (!LVL.canVectorize()) {
DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
emitMissedWarning(F, L, Hints);
@@ -1611,16 +1764,18 @@ struct LoopVectorize : public FunctionPass {
}
// Use the cost model.
- LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, TLI, AC, F, &Hints);
+ LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, F,
+ &Hints);
+ CM.collectValuesToIgnore();
// Check the function attributes to find out if this function should be
// optimized for size.
bool OptForSize = Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
- F->hasFnAttribute(Attribute::OptimizeForSize);
+ F->optForSize();
// Compute the weighted frequency of this loop being executed and see if it
// is less than 20% of the function entry baseline frequency. Note that we
- // always have a canonical loop here because we think we *can* vectoriez.
+ // always have a canonical loop here because we think we *can* vectorize.
// FIXME: This is hidden behind a flag due to pervasive problems with
// exactly what block frequency models.
if (LoopVectorizeWithBlockFrequency) {
@@ -1630,16 +1785,17 @@ struct LoopVectorize : public FunctionPass {
OptForSize = true;
}
- // Check the function attributes to see if implicit floats are allowed.a
+ // Check the function attributes to see if implicit floats are allowed.
// FIXME: This check doesn't seem possibly correct -- what if the loop is
// an integer loop and the vector instructions selected are purely integer
// vector instructions?
if (F->hasFnAttribute(Attribute::NoImplicitFloat)) {
DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat"
"attribute is used.\n");
- emitOptimizationRemarkAnalysis(
- F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
- "loop not vectorized due to NoImplicitFloat attribute");
+ emitAnalysisDiag(
+ F, L, Hints,
+ VectorizationReport()
+ << "loop not vectorized due to NoImplicitFloat attribute");
emitMissedWarning(F, L, Hints);
return false;
}
@@ -1651,32 +1807,86 @@ struct LoopVectorize : public FunctionPass {
// Select the interleave count.
unsigned IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost);
- DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in "
- << DebugLocStr << '\n');
- DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
+ // Get user interleave count.
+ unsigned UserIC = Hints.getInterleave();
+
+ // Identify the diagnostic messages that should be produced.
+ std::string VecDiagMsg, IntDiagMsg;
+ bool VectorizeLoop = true, InterleaveLoop = true;
+
+ if (Requirements.doesNotMeet(F, L, Hints)) {
+ DEBUG(dbgs() << "LV: Not vectorizing: loop did not meet vectorization "
+ "requirements.\n");
+ emitMissedWarning(F, L, Hints);
+ return false;
+ }
if (VF.Width == 1) {
- DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial\n");
+ DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
+ VecDiagMsg =
+ "the cost-model indicates that vectorization is not beneficial";
+ VectorizeLoop = false;
+ }
- if (IC == 1) {
- emitOptimizationRemarkAnalysis(
- F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
- "not beneficial to vectorize and user disabled interleaving");
- return false;
- }
- DEBUG(dbgs() << "LV: Trying to at least unroll the loops.\n");
+ if (IC == 1 && UserIC <= 1) {
+ // Tell the user interleaving is not beneficial.
+ DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n");
+ IntDiagMsg =
+ "the cost-model indicates that interleaving is not beneficial";
+ InterleaveLoop = false;
+ if (UserIC == 1)
+ IntDiagMsg +=
+ " and is explicitly disabled or interleave count is set to 1";
+ } else if (IC > 1 && UserIC == 1) {
+ // Tell the user interleaving is beneficial, but it explicitly disabled.
+ DEBUG(dbgs()
+ << "LV: Interleaving is beneficial but is explicitly disabled.");
+ IntDiagMsg = "the cost-model indicates that interleaving is beneficial "
+ "but is explicitly disabled or interleave count is set to 1";
+ InterleaveLoop = false;
+ }
- // Report the unrolling decision.
- emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
- Twine("interleaved by " + Twine(IC) +
- " (vectorization not beneficial)"));
+ // Override IC if user provided an interleave count.
+ IC = UserIC > 0 ? UserIC : IC;
+
+ // Emit diagnostic messages, if any.
+ const char *VAPassName = Hints.vectorizeAnalysisPassName();
+ if (!VectorizeLoop && !InterleaveLoop) {
+ // Do not vectorize or interleaving the loop.
+ emitOptimizationRemarkAnalysis(F->getContext(), VAPassName, *F,
+ L->getStartLoc(), VecDiagMsg);
+ emitOptimizationRemarkAnalysis(F->getContext(), LV_NAME, *F,
+ L->getStartLoc(), IntDiagMsg);
+ return false;
+ } else if (!VectorizeLoop && InterleaveLoop) {
+ DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
+ emitOptimizationRemarkAnalysis(F->getContext(), VAPassName, *F,
+ L->getStartLoc(), VecDiagMsg);
+ } else if (VectorizeLoop && !InterleaveLoop) {
+ DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in "
+ << DebugLocStr << '\n');
+ emitOptimizationRemarkAnalysis(F->getContext(), LV_NAME, *F,
+ L->getStartLoc(), IntDiagMsg);
+ } else if (VectorizeLoop && InterleaveLoop) {
+ DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in "
+ << DebugLocStr << '\n');
+ DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
+ }
+
+ if (!VectorizeLoop) {
+ assert(IC > 1 && "interleave count should not be 1 or 0");
+ // If we decided that it is not legal to vectorize the loop then
+ // interleave it.
+ InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, IC);
+ Unroller.vectorize(&LVL, CM.MinBWs);
- InnerLoopUnroller Unroller(L, SE, LI, DT, TLI, TTI, IC);
- Unroller.vectorize(&LVL);
+ emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(),
+ Twine("interleaved loop (interleaved count: ") +
+ Twine(IC) + ")");
} else {
// If we decided that it is *legal* to vectorize the loop then do it.
- InnerLoopVectorizer LB(L, SE, LI, DT, TLI, TTI, VF.Width, IC);
- LB.vectorize(&LVL);
+ InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, VF.Width, IC);
+ LB.vectorize(&LVL, CM.MinBWs);
++LoopsVectorized;
// Add metadata to disable runtime unrolling scalar loop when there's no
@@ -1686,7 +1896,7 @@ struct LoopVectorize : public FunctionPass {
AddRuntimeUnrollDisableMetaData(L);
// Report the vectorization decision.
- emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
+ emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(),
Twine("vectorized loop (vectorization width: ") +
Twine(VF.Width) + ", interleaved count: " +
Twine(IC) + ")");
@@ -1703,16 +1913,19 @@ struct LoopVectorize : public FunctionPass {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
- AU.addRequired<BlockFrequencyInfo>();
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<LoopAccessAnalysis>();
+ AU.addRequired<DemandedBits>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
};
@@ -1773,6 +1986,7 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr");
+ auto *SE = PSE.getSE();
// Make sure that the pointer does not point to structs.
if (Ptr->getType()->getPointerElementType()->isAggregateType())
return 0;
@@ -1780,11 +1994,11 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
// If this value is a pointer induction variable we know it is consecutive.
PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr);
if (Phi && Inductions.count(Phi)) {
- InductionInfo II = Inductions[Phi];
+ InductionDescriptor II = Inductions[Phi];
return II.getConsecutiveDirection();
}
- GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr);
+ GetElementPtrInst *Gep = getGEPInstruction(Ptr);
if (!Gep)
return 0;
@@ -1802,10 +2016,10 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
// Make sure that all of the index operands are loop invariant.
for (unsigned i = 1; i < NumOperands; ++i)
- if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
+ if (!SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop))
return 0;
- InductionInfo II = Inductions[Phi];
+ InductionDescriptor II = Inductions[Phi];
return II.getConsecutiveDirection();
}
@@ -1815,14 +2029,14 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
// operand.
for (unsigned i = 0; i != NumOperands; ++i)
if (i != InductionOperand &&
- !SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
+ !SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop))
return 0;
// We can emit wide load/stores only if the last non-zero index is the
// induction variable.
const SCEV *Last = nullptr;
if (!Strides.count(Gep))
- Last = SE->getSCEV(Gep->getOperand(InductionOperand));
+ Last = PSE.getSCEV(Gep->getOperand(InductionOperand));
else {
// Because of the multiplication by a stride we can have a s/zext cast.
// We are going to replace this stride by 1 so the cast is safe to ignore.
@@ -1833,7 +2047,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
// %idxprom = zext i32 %mul to i64 << Safe cast.
// %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom
//
- Last = replaceSymbolicStrideSCEV(SE, Strides,
+ Last = replaceSymbolicStrideSCEV(PSE, Strides,
Gep->getOperand(InductionOperand), Gep);
if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(Last))
Last =
@@ -2177,7 +2391,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
VectorParts &Entry = WidenMap.get(Instr);
// Handle consecutive loads/stores.
- GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
+ GetElementPtrInst *Gep = getGEPInstruction(Ptr);
if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) {
setDebugLocFromInst(Builder, Gep);
Value *PtrOperand = Gep->getPointerOperand();
@@ -2191,8 +2405,9 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
Ptr = Builder.Insert(Gep2);
} else if (Gep) {
setDebugLocFromInst(Builder, Gep);
- assert(SE->isLoopInvariant(SE->getSCEV(Gep->getPointerOperand()),
- OrigLoop) && "Base ptr must be invariant");
+ assert(PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getPointerOperand()),
+ OrigLoop) &&
+ "Base ptr must be invariant");
// The last index does not have to be the induction. It can be
// consecutive and be a function of the index. For example A[I+1];
@@ -2209,7 +2424,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
if (i == InductionOperand ||
(GepOperandInst && OrigLoop->contains(GepOperandInst))) {
assert((i == InductionOperand ||
- SE->isLoopInvariant(SE->getSCEV(GepOperandInst), OrigLoop)) &&
+ PSE.getSE()->isLoopInvariant(PSE.getSCEV(GepOperandInst),
+ OrigLoop)) &&
"Must be last index or loop invariant");
VectorParts &GEPParts = getVectorValue(GepOperand);
@@ -2237,14 +2453,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
// We don't want to update the value in the map as it might be used in
// another expression. So don't use a reference type for "StoredVal".
VectorParts StoredVal = getVectorValue(SI->getValueOperand());
-
+
for (unsigned Part = 0; Part < UF; ++Part) {
// Calculate the pointer for the specific unroll-part.
Value *PartPtr =
Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF));
if (Reverse) {
- // If we store to reverse consecutive memory locations then we need
+ // If we store to reverse consecutive memory locations, then we need
// to reverse the order of elements in the stored value.
StoredVal[Part] = reverseVector(StoredVal[Part]);
// If the address is consecutive but reversed, then the
@@ -2298,7 +2514,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
}
}
-void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredicateStore) {
+void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
+ bool IfPredicateStore) {
assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
// Holds vector parameters or scalars, in case of uniform vals.
SmallVector<VectorParts, 4> Params;
@@ -2318,7 +2535,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic
// Try using previously calculated values.
Instruction *SrcInst = dyn_cast<Instruction>(SrcOp);
- // If the src is an instruction that appeared earlier in the basic block
+ // If the src is an instruction that appeared earlier in the basic block,
// then it should already be vectorized.
if (SrcInst && OrigLoop->contains(SrcInst)) {
assert(WidenMap.has(SrcInst) && "Source operand is unavailable");
@@ -2343,19 +2560,12 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic
// Create a new entry in the WidenMap and initialize it to Undef or Null.
VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
- Instruction *InsertPt = Builder.GetInsertPoint();
- BasicBlock *IfBlock = Builder.GetInsertBlock();
- BasicBlock *CondBlock = nullptr;
-
VectorParts Cond;
- Loop *VectorLp = nullptr;
if (IfPredicateStore) {
assert(Instr->getParent()->getSinglePredecessor() &&
"Only support single predecessor blocks");
Cond = createEdgeMask(Instr->getParent()->getSinglePredecessor(),
Instr->getParent());
- VectorLp = LI->getLoopFor(IfBlock);
- assert(VectorLp && "Must have a loop for this block");
}
// For each vector unroll 'part':
@@ -2367,12 +2577,8 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic
Value *Cmp = nullptr;
if (IfPredicateStore) {
Cmp = Builder.CreateExtractElement(Cond[Part], Builder.getInt32(Width));
- Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cmp, ConstantInt::get(Cmp->getType(), 1));
- CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
- LoopVectorBody.push_back(CondBlock);
- VectorLp->addBasicBlockToLoop(CondBlock, *LI);
- // Update Builder with newly created basic block.
- Builder.SetInsertPoint(InsertPt);
+ Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cmp,
+ ConstantInt::get(Cmp->getType(), 1));
}
Instruction *Cloned = Instr->clone();
@@ -2396,85 +2602,223 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic
VecResults[Part] = Builder.CreateInsertElement(VecResults[Part], Cloned,
Builder.getInt32(Width));
// End if-block.
- if (IfPredicateStore) {
- BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
- LoopVectorBody.push_back(NewIfBlock);
- VectorLp->addBasicBlockToLoop(NewIfBlock, *LI);
- Builder.SetInsertPoint(InsertPt);
- ReplaceInstWithInst(IfBlock->getTerminator(),
- BranchInst::Create(CondBlock, NewIfBlock, Cmp));
- IfBlock = NewIfBlock;
- }
+ if (IfPredicateStore)
+ PredicatedStores.push_back(std::make_pair(cast<StoreInst>(Cloned),
+ Cmp));
}
}
}
-static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
- Instruction *Loc) {
- if (FirstInst)
- return FirstInst;
- if (Instruction *I = dyn_cast<Instruction>(V))
- return I->getParent() == Loc->getParent() ? I : nullptr;
- return nullptr;
+PHINode *InnerLoopVectorizer::createInductionVariable(Loop *L, Value *Start,
+ Value *End, Value *Step,
+ Instruction *DL) {
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = L->getLoopLatch();
+ // As we're just creating this loop, it's possible no latch exists
+ // yet. If so, use the header as this will be a single block loop.
+ if (!Latch)
+ Latch = Header;
+
+ IRBuilder<> Builder(&*Header->getFirstInsertionPt());
+ setDebugLocFromInst(Builder, getDebugLocFromInstOrOperands(OldInduction));
+ auto *Induction = Builder.CreatePHI(Start->getType(), 2, "index");
+
+ Builder.SetInsertPoint(Latch->getTerminator());
+
+ // Create i+1 and fill the PHINode.
+ Value *Next = Builder.CreateAdd(Induction, Step, "index.next");
+ Induction->addIncoming(Start, L->getLoopPreheader());
+ Induction->addIncoming(Next, Latch);
+ // Create the compare.
+ Value *ICmp = Builder.CreateICmpEQ(Next, End);
+ Builder.CreateCondBr(ICmp, L->getExitBlock(), Header);
+
+ // Now we have two terminators. Remove the old one from the block.
+ Latch->getTerminator()->eraseFromParent();
+
+ return Induction;
}
-std::pair<Instruction *, Instruction *>
-InnerLoopVectorizer::addStrideCheck(Instruction *Loc) {
- Instruction *tnullptr = nullptr;
- if (!Legal->mustCheckStrides())
- return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr);
-
- IRBuilder<> ChkBuilder(Loc);
-
- // Emit checks.
- Value *Check = nullptr;
- Instruction *FirstInst = nullptr;
- for (SmallPtrSet<Value *, 8>::iterator SI = Legal->strides_begin(),
- SE = Legal->strides_end();
- SI != SE; ++SI) {
- Value *Ptr = stripIntegerCast(*SI);
- Value *C = ChkBuilder.CreateICmpNE(Ptr, ConstantInt::get(Ptr->getType(), 1),
- "stride.chk");
- // Store the first instruction we create.
- FirstInst = getFirstInst(FirstInst, C, Loc);
- if (Check)
- Check = ChkBuilder.CreateOr(Check, C);
- else
- Check = C;
- }
+Value *InnerLoopVectorizer::getOrCreateTripCount(Loop *L) {
+ if (TripCount)
+ return TripCount;
- // We have to do this trickery because the IRBuilder might fold the check to a
- // constant expression in which case there is no Instruction anchored in a
- // the block.
- LLVMContext &Ctx = Loc->getContext();
- Instruction *TheCheck =
- BinaryOperator::CreateAnd(Check, ConstantInt::getTrue(Ctx));
- ChkBuilder.Insert(TheCheck, "stride.not.one");
- FirstInst = getFirstInst(FirstInst, TheCheck, Loc);
+ IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
+ // Find the loop boundaries.
+ ScalarEvolution *SE = PSE.getSE();
+ const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(OrigLoop);
+ assert(BackedgeTakenCount != SE->getCouldNotCompute() &&
+ "Invalid loop count");
- return std::make_pair(FirstInst, TheCheck);
+ Type *IdxTy = Legal->getWidestInductionType();
+
+ // The exit count might have the type of i64 while the phi is i32. This can
+ // happen if we have an induction variable that is sign extended before the
+ // compare. The only way that we get a backedge taken count is that the
+ // induction variable was signed and as such will not overflow. In such a case
+ // truncation is legal.
+ if (BackedgeTakenCount->getType()->getPrimitiveSizeInBits() >
+ IdxTy->getPrimitiveSizeInBits())
+ BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, IdxTy);
+ BackedgeTakenCount = SE->getNoopOrZeroExtend(BackedgeTakenCount, IdxTy);
+
+ // Get the total trip count from the count by adding 1.
+ const SCEV *ExitCount = SE->getAddExpr(
+ BackedgeTakenCount, SE->getOne(BackedgeTakenCount->getType()));
+
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+
+ // Expand the trip count and place the new instructions in the preheader.
+ // Notice that the pre-header does not change, only the loop body.
+ SCEVExpander Exp(*SE, DL, "induction");
+
+ // Count holds the overall loop count (N).
+ TripCount = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
+ L->getLoopPreheader()->getTerminator());
+
+ if (TripCount->getType()->isPointerTy())
+ TripCount =
+ CastInst::CreatePointerCast(TripCount, IdxTy,
+ "exitcount.ptrcnt.to.int",
+ L->getLoopPreheader()->getTerminator());
+
+ return TripCount;
}
+Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) {
+ if (VectorTripCount)
+ return VectorTripCount;
+
+ Value *TC = getOrCreateTripCount(L);
+ IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
+
+ // Now we need to generate the expression for N - (N % VF), which is
+ // the part that the vectorized body will execute.
+ // The loop step is equal to the vectorization factor (num of SIMD elements)
+ // times the unroll factor (num of SIMD instructions).
+ Constant *Step = ConstantInt::get(TC->getType(), VF * UF);
+ Value *R = Builder.CreateURem(TC, Step, "n.mod.vf");
+ VectorTripCount = Builder.CreateSub(TC, R, "n.vec");
+
+ return VectorTripCount;
+}
+
+void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
+ BasicBlock *Bypass) {
+ Value *Count = getOrCreateTripCount(L);
+ BasicBlock *BB = L->getLoopPreheader();
+ IRBuilder<> Builder(BB->getTerminator());
+
+ // Generate code to check that the loop's trip count that we computed by
+ // adding one to the backedge-taken count will not overflow.
+ Value *CheckMinIters =
+ Builder.CreateICmpULT(Count,
+ ConstantInt::get(Count->getType(), VF * UF),
+ "min.iters.check");
+
+ BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(),
+ "min.iters.checked");
+ if (L->getParentLoop())
+ L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
+ ReplaceInstWithInst(BB->getTerminator(),
+ BranchInst::Create(Bypass, NewBB, CheckMinIters));
+ LoopBypassBlocks.push_back(BB);
+}
+
+void InnerLoopVectorizer::emitVectorLoopEnteredCheck(Loop *L,
+ BasicBlock *Bypass) {
+ Value *TC = getOrCreateVectorTripCount(L);
+ BasicBlock *BB = L->getLoopPreheader();
+ IRBuilder<> Builder(BB->getTerminator());
+
+ // Now, compare the new count to zero. If it is zero skip the vector loop and
+ // jump to the scalar loop.
+ Value *Cmp = Builder.CreateICmpEQ(TC, Constant::getNullValue(TC->getType()),
+ "cmp.zero");
+
+ // Generate code to check that the loop's trip count that we computed by
+ // adding one to the backedge-taken count will not overflow.
+ BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(),
+ "vector.ph");
+ if (L->getParentLoop())
+ L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
+ ReplaceInstWithInst(BB->getTerminator(),
+ BranchInst::Create(Bypass, NewBB, Cmp));
+ LoopBypassBlocks.push_back(BB);
+}
+
+void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
+ BasicBlock *BB = L->getLoopPreheader();
+
+ // Generate the code to check that the SCEV assumptions that we made.
+ // We want the new basic block to start at the first instruction in a
+ // sequence of instructions that form a check.
+ SCEVExpander Exp(*PSE.getSE(), Bypass->getModule()->getDataLayout(),
+ "scev.check");
+ Value *SCEVCheck =
+ Exp.expandCodeForPredicate(&PSE.getUnionPredicate(), BB->getTerminator());
+
+ if (auto *C = dyn_cast<ConstantInt>(SCEVCheck))
+ if (C->isZero())
+ return;
+
+ // Create a new block containing the stride check.
+ BB->setName("vector.scevcheck");
+ auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
+ if (L->getParentLoop())
+ L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
+ ReplaceInstWithInst(BB->getTerminator(),
+ BranchInst::Create(Bypass, NewBB, SCEVCheck));
+ LoopBypassBlocks.push_back(BB);
+ AddedSafetyChecks = true;
+}
+
+void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L,
+ BasicBlock *Bypass) {
+ BasicBlock *BB = L->getLoopPreheader();
+
+ // Generate the code that checks in runtime if arrays overlap. We put the
+ // checks into a separate block to make the more common case of few elements
+ // faster.
+ Instruction *FirstCheckInst;
+ Instruction *MemRuntimeCheck;
+ std::tie(FirstCheckInst, MemRuntimeCheck) =
+ Legal->getLAI()->addRuntimeChecks(BB->getTerminator());
+ if (!MemRuntimeCheck)
+ return;
+
+ // Create a new block containing the memory check.
+ BB->setName("vector.memcheck");
+ auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
+ if (L->getParentLoop())
+ L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
+ ReplaceInstWithInst(BB->getTerminator(),
+ BranchInst::Create(Bypass, NewBB, MemRuntimeCheck));
+ LoopBypassBlocks.push_back(BB);
+ AddedSafetyChecks = true;
+}
+
+
void InnerLoopVectorizer::createEmptyLoop() {
/*
In this function we generate a new loop. The new loop will contain
the vectorized instructions while the old loop will continue to run the
scalar remainder.
- [ ] <-- Back-edge taken count overflow check.
+ [ ] <-- loop iteration number check.
/ |
/ v
| [ ] <-- vector loop bypass (may consist of multiple blocks).
| / |
| / v
|| [ ] <-- vector pre header.
- || |
- || v
- || [ ] \
- || [ ]_| <-- vector loop.
- || |
- | \ v
- | >[ ] <--- middle-block.
+ |/ |
+ | v
+ | [ ] \
+ | [ ]_| <-- vector loop.
+ | |
+ | v
+ | -[ ] <--- middle-block.
| / |
| / v
-|- >[ ] <--- new preheader.
@@ -2498,65 +2842,16 @@ void InnerLoopVectorizer::createEmptyLoop() {
// don't. One example is c++ iterators that often have multiple pointer
// induction variables. In the code below we also support a case where we
// don't have a single induction variable.
+ //
+ // We try to obtain an induction variable from the original loop as hard
+ // as possible. However if we don't find one that:
+ // - is an integer
+ // - counts from zero, stepping by one
+ // - is the size of the widest induction variable type
+ // then we create a new one.
OldInduction = Legal->getInduction();
Type *IdxTy = Legal->getWidestInductionType();
- // Find the loop boundaries.
- const SCEV *ExitCount = SE->getBackedgeTakenCount(OrigLoop);
- assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count");
-
- // The exit count might have the type of i64 while the phi is i32. This can
- // happen if we have an induction variable that is sign extended before the
- // compare. The only way that we get a backedge taken count is that the
- // induction variable was signed and as such will not overflow. In such a case
- // truncation is legal.
- if (ExitCount->getType()->getPrimitiveSizeInBits() >
- IdxTy->getPrimitiveSizeInBits())
- ExitCount = SE->getTruncateOrNoop(ExitCount, IdxTy);
-
- const SCEV *BackedgeTakeCount = SE->getNoopOrZeroExtend(ExitCount, IdxTy);
- // Get the total trip count from the count by adding 1.
- ExitCount = SE->getAddExpr(BackedgeTakeCount,
- SE->getConstant(BackedgeTakeCount->getType(), 1));
-
- const DataLayout &DL = OldBasicBlock->getModule()->getDataLayout();
-
- // Expand the trip count and place the new instructions in the preheader.
- // Notice that the pre-header does not change, only the loop body.
- SCEVExpander Exp(*SE, DL, "induction");
-
- // We need to test whether the backedge-taken count is uint##_max. Adding one
- // to it will cause overflow and an incorrect loop trip count in the vector
- // body. In case of overflow we want to directly jump to the scalar remainder
- // loop.
- Value *BackedgeCount =
- Exp.expandCodeFor(BackedgeTakeCount, BackedgeTakeCount->getType(),
- VectorPH->getTerminator());
- if (BackedgeCount->getType()->isPointerTy())
- BackedgeCount = CastInst::CreatePointerCast(BackedgeCount, IdxTy,
- "backedge.ptrcnt.to.int",
- VectorPH->getTerminator());
- Instruction *CheckBCOverflow =
- CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, BackedgeCount,
- Constant::getAllOnesValue(BackedgeCount->getType()),
- "backedge.overflow", VectorPH->getTerminator());
-
- // The loop index does not have to start at Zero. Find the original start
- // value from the induction PHI node. If we don't have an induction variable
- // then we know that it starts at zero.
- Builder.SetInsertPoint(VectorPH->getTerminator());
- Value *StartIdx = ExtendedIdx =
- OldInduction
- ? Builder.CreateZExt(OldInduction->getIncomingValueForBlock(VectorPH),
- IdxTy)
- : ConstantInt::get(IdxTy, 0);
-
- // Count holds the overall loop count (N).
- Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
- VectorPH->getTerminator());
-
- LoopBypassBlocks.push_back(VectorPH);
-
// Split the single block loop into the two loop structure described above.
BasicBlock *VecBody =
VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body");
@@ -2580,118 +2875,36 @@ void InnerLoopVectorizer::createEmptyLoop() {
}
Lp->addBasicBlockToLoop(VecBody, *LI);
- // Use this IR builder to create the loop instructions (Phi, Br, Cmp)
- // inside the loop.
- Builder.SetInsertPoint(VecBody->getFirstNonPHI());
-
- // Generate the induction variable.
- setDebugLocFromInst(Builder, getDebugLocFromInstOrOperands(OldInduction));
- Induction = Builder.CreatePHI(IdxTy, 2, "index");
- // The loop step is equal to the vectorization factor (num of SIMD elements)
- // times the unroll factor (num of SIMD instructions).
- Constant *Step = ConstantInt::get(IdxTy, VF * UF);
-
- // Generate code to check that the loop's trip count that we computed by
- // adding one to the backedge-taken count will not overflow.
- BasicBlock *NewVectorPH =
- VectorPH->splitBasicBlock(VectorPH->getTerminator(), "overflow.checked");
- if (ParentLoop)
- ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI);
- ReplaceInstWithInst(
- VectorPH->getTerminator(),
- BranchInst::Create(ScalarPH, NewVectorPH, CheckBCOverflow));
- VectorPH = NewVectorPH;
-
- // This is the IR builder that we use to add all of the logic for bypassing
- // the new vector loop.
- IRBuilder<> BypassBuilder(VectorPH->getTerminator());
- setDebugLocFromInst(BypassBuilder,
- getDebugLocFromInstOrOperands(OldInduction));
-
- // We may need to extend the index in case there is a type mismatch.
- // We know that the count starts at zero and does not overflow.
- if (Count->getType() != IdxTy) {
- // The exit count can be of pointer type. Convert it to the correct
- // integer type.
- if (ExitCount->getType()->isPointerTy())
- Count = BypassBuilder.CreatePointerCast(Count, IdxTy, "ptrcnt.to.int");
- else
- Count = BypassBuilder.CreateZExtOrTrunc(Count, IdxTy, "cnt.cast");
- }
-
- // Add the start index to the loop count to get the new end index.
- Value *IdxEnd = BypassBuilder.CreateAdd(Count, StartIdx, "end.idx");
+ // Find the loop boundaries.
+ Value *Count = getOrCreateTripCount(Lp);
- // Now we need to generate the expression for N - (N % VF), which is
- // the part that the vectorized body will execute.
- Value *R = BypassBuilder.CreateURem(Count, Step, "n.mod.vf");
- Value *CountRoundDown = BypassBuilder.CreateSub(Count, R, "n.vec");
- Value *IdxEndRoundDown = BypassBuilder.CreateAdd(CountRoundDown, StartIdx,
- "end.idx.rnd.down");
+ Value *StartIdx = ConstantInt::get(IdxTy, 0);
+ // We need to test whether the backedge-taken count is uint##_max. Adding one
+ // to it will cause overflow and an incorrect loop trip count in the vector
+ // body. In case of overflow we want to directly jump to the scalar remainder
+ // loop.
+ emitMinimumIterationCountCheck(Lp, ScalarPH);
// Now, compare the new count to zero. If it is zero skip the vector loop and
// jump to the scalar loop.
- Value *Cmp =
- BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, "cmp.zero");
- NewVectorPH =
- VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph");
- if (ParentLoop)
- ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI);
- LoopBypassBlocks.push_back(VectorPH);
- ReplaceInstWithInst(VectorPH->getTerminator(),
- BranchInst::Create(MiddleBlock, NewVectorPH, Cmp));
- VectorPH = NewVectorPH;
-
- // Generate the code to check that the strides we assumed to be one are really
- // one. We want the new basic block to start at the first instruction in a
- // sequence of instructions that form a check.
- Instruction *StrideCheck;
- Instruction *FirstCheckInst;
- std::tie(FirstCheckInst, StrideCheck) =
- addStrideCheck(VectorPH->getTerminator());
- if (StrideCheck) {
- AddedSafetyChecks = true;
- // Create a new block containing the stride check.
- VectorPH->setName("vector.stridecheck");
- NewVectorPH =
- VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph");
- if (ParentLoop)
- ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI);
- LoopBypassBlocks.push_back(VectorPH);
-
- // Replace the branch into the memory check block with a conditional branch
- // for the "few elements case".
- ReplaceInstWithInst(
- VectorPH->getTerminator(),
- BranchInst::Create(MiddleBlock, NewVectorPH, StrideCheck));
-
- VectorPH = NewVectorPH;
- }
+ emitVectorLoopEnteredCheck(Lp, ScalarPH);
+ // Generate the code to check any assumptions that we've made for SCEV
+ // expressions.
+ emitSCEVChecks(Lp, ScalarPH);
// Generate the code that checks in runtime if arrays overlap. We put the
// checks into a separate block to make the more common case of few elements
// faster.
- Instruction *MemRuntimeCheck;
- std::tie(FirstCheckInst, MemRuntimeCheck) =
- Legal->getLAI()->addRuntimeCheck(VectorPH->getTerminator());
- if (MemRuntimeCheck) {
- AddedSafetyChecks = true;
- // Create a new block containing the memory check.
- VectorPH->setName("vector.memcheck");
- NewVectorPH =
- VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph");
- if (ParentLoop)
- ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI);
- LoopBypassBlocks.push_back(VectorPH);
-
- // Replace the branch into the memory check block with a conditional branch
- // for the "few elements case".
- ReplaceInstWithInst(
- VectorPH->getTerminator(),
- BranchInst::Create(MiddleBlock, NewVectorPH, MemRuntimeCheck));
-
- VectorPH = NewVectorPH;
- }
+ emitMemRuntimeChecks(Lp, ScalarPH);
+
+ // Generate the induction variable.
+ // The loop step is equal to the vectorization factor (num of SIMD elements)
+ // times the unroll factor (num of SIMD instructions).
+ Value *CountRoundDown = getOrCreateVectorTripCount(Lp);
+ Constant *Step = ConstantInt::get(IdxTy, VF * UF);
+ Induction =
+ createInductionVariable(Lp, StartIdx, CountRoundDown, Step,
+ getDebugLocFromInstOrOperands(OldInduction));
// We are going to resume the execution of the scalar loop.
// Go over all of the induction variables that we found and fix the
@@ -2701,152 +2914,60 @@ void InnerLoopVectorizer::createEmptyLoop() {
// If we come from a bypass edge then we need to start from the original
// start value.
- // This variable saves the new starting index for the scalar loop.
- PHINode *ResumeIndex = nullptr;
+ // This variable saves the new starting index for the scalar loop. It is used
+ // to test if there are any tail iterations left once the vector loop has
+ // completed.
LoopVectorizationLegality::InductionList::iterator I, E;
LoopVectorizationLegality::InductionList *List = Legal->getInductionVars();
- // Set builder to point to last bypass block.
- BypassBuilder.SetInsertPoint(LoopBypassBlocks.back()->getTerminator());
for (I = List->begin(), E = List->end(); I != E; ++I) {
PHINode *OrigPhi = I->first;
- LoopVectorizationLegality::InductionInfo II = I->second;
-
- Type *ResumeValTy = (OrigPhi == OldInduction) ? IdxTy : OrigPhi->getType();
- PHINode *ResumeVal = PHINode::Create(ResumeValTy, 2, "resume.val",
- MiddleBlock->getTerminator());
- // We might have extended the type of the induction variable but we need a
- // truncated version for the scalar loop.
- PHINode *TruncResumeVal = (OrigPhi == OldInduction) ?
- PHINode::Create(OrigPhi->getType(), 2, "trunc.resume.val",
- MiddleBlock->getTerminator()) : nullptr;
+ InductionDescriptor II = I->second;
// Create phi nodes to merge from the backedge-taken check block.
- PHINode *BCResumeVal = PHINode::Create(ResumeValTy, 3, "bc.resume.val",
+ PHINode *BCResumeVal = PHINode::Create(OrigPhi->getType(), 3,
+ "bc.resume.val",
ScalarPH->getTerminator());
- BCResumeVal->addIncoming(ResumeVal, MiddleBlock);
-
- PHINode *BCTruncResumeVal = nullptr;
+ Value *EndValue;
if (OrigPhi == OldInduction) {
- BCTruncResumeVal =
- PHINode::Create(OrigPhi->getType(), 2, "bc.trunc.resume.val",
- ScalarPH->getTerminator());
- BCTruncResumeVal->addIncoming(TruncResumeVal, MiddleBlock);
- }
-
- Value *EndValue = nullptr;
- switch (II.IK) {
- case LoopVectorizationLegality::IK_NoInduction:
- llvm_unreachable("Unknown induction");
- case LoopVectorizationLegality::IK_IntInduction: {
- // Handle the integer induction counter.
- assert(OrigPhi->getType()->isIntegerTy() && "Invalid type");
-
- // We have the canonical induction variable.
- if (OrigPhi == OldInduction) {
- // Create a truncated version of the resume value for the scalar loop,
- // we might have promoted the type to a larger width.
- EndValue =
- BypassBuilder.CreateTrunc(IdxEndRoundDown, OrigPhi->getType());
- // The new PHI merges the original incoming value, in case of a bypass,
- // or the value at the end of the vectorized loop.
- for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
- TruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
- TruncResumeVal->addIncoming(EndValue, VecBody);
-
- BCTruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[0]);
-
- // We know what the end value is.
- EndValue = IdxEndRoundDown;
- // We also know which PHI node holds it.
- ResumeIndex = ResumeVal;
- break;
- }
-
- // Not the canonical induction variable - add the vector loop count to the
- // start value.
- Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
- II.StartValue->getType(),
- "cast.crd");
- EndValue = II.transform(BypassBuilder, CRD);
+ // We know what the end value is.
+ EndValue = CountRoundDown;
+ } else {
+ IRBuilder<> B(LoopBypassBlocks.back()->getTerminator());
+ Value *CRD = B.CreateSExtOrTrunc(CountRoundDown,
+ II.getStepValue()->getType(),
+ "cast.crd");
+ EndValue = II.transform(B, CRD);
EndValue->setName("ind.end");
- break;
}
- case LoopVectorizationLegality::IK_PtrInduction: {
- Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
- II.StepValue->getType(),
- "cast.crd");
- EndValue = II.transform(BypassBuilder, CRD);
- EndValue->setName("ptr.ind.end");
- break;
- }
- }// end of case
// The new PHI merges the original incoming value, in case of a bypass,
// or the value at the end of the vectorized loop.
- for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) {
- if (OrigPhi == OldInduction)
- ResumeVal->addIncoming(StartIdx, LoopBypassBlocks[I]);
- else
- ResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
- }
- ResumeVal->addIncoming(EndValue, VecBody);
+ BCResumeVal->addIncoming(EndValue, MiddleBlock);
// Fix the scalar body counter (PHI node).
unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
// The old induction's phi node in the scalar body needs the truncated
// value.
- if (OrigPhi == OldInduction) {
- BCResumeVal->addIncoming(StartIdx, LoopBypassBlocks[0]);
- OrigPhi->setIncomingValue(BlockIdx, BCTruncResumeVal);
- } else {
- BCResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[0]);
- OrigPhi->setIncomingValue(BlockIdx, BCResumeVal);
- }
+ for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+ BCResumeVal->addIncoming(II.getStartValue(), LoopBypassBlocks[I]);
+ OrigPhi->setIncomingValue(BlockIdx, BCResumeVal);
}
- // If we are generating a new induction variable then we also need to
- // generate the code that calculates the exit value. This value is not
- // simply the end of the counter because we may skip the vectorized body
- // in case of a runtime check.
- if (!OldInduction){
- assert(!ResumeIndex && "Unexpected resume value found");
- ResumeIndex = PHINode::Create(IdxTy, 2, "new.indc.resume.val",
- MiddleBlock->getTerminator());
- for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
- ResumeIndex->addIncoming(StartIdx, LoopBypassBlocks[I]);
- ResumeIndex->addIncoming(IdxEndRoundDown, VecBody);
- }
-
- // Make sure that we found the index where scalar loop needs to continue.
- assert(ResumeIndex && ResumeIndex->getType()->isIntegerTy() &&
- "Invalid resume Index");
-
// Add a check in the middle block to see if we have completed
// all of the iterations in the first vector loop.
// If (N - N%VF) == N, then we *don't* need to run the remainder.
- Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, IdxEnd,
- ResumeIndex, "cmp.n",
+ Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count,
+ CountRoundDown, "cmp.n",
MiddleBlock->getTerminator());
ReplaceInstWithInst(MiddleBlock->getTerminator(),
BranchInst::Create(ExitBlock, ScalarPH, CmpN));
- // Create i+1 and fill the PHINode.
- Value *NextIdx = Builder.CreateAdd(Induction, Step, "index.next");
- Induction->addIncoming(StartIdx, VectorPH);
- Induction->addIncoming(NextIdx, VecBody);
- // Create the compare.
- Value *ICmp = Builder.CreateICmpEQ(NextIdx, IdxEndRoundDown);
- Builder.CreateCondBr(ICmp, MiddleBlock, VecBody);
-
- // Now we have two terminators. Remove the old one from the block.
- VecBody->getTerminator()->eraseFromParent();
-
// Get ready to start creating new instructions into the vectorized body.
- Builder.SetInsertPoint(VecBody->getFirstInsertionPt());
+ Builder.SetInsertPoint(&*VecBody->getFirstInsertionPt());
// Save the state.
- LoopVectorPreHeader = VectorPH;
+ LoopVectorPreHeader = Lp->getLoopPreheader();
LoopScalarPreHeader = ScalarPH;
LoopMiddleBlock = MiddleBlock;
LoopExitBlock = ExitBlock;
@@ -2899,7 +3020,7 @@ static void cse(SmallVector<BasicBlock *, 4> &BBs) {
for (unsigned i = 0, e = BBs.size(); i != e; ++i) {
BasicBlock *BB = BBs[i];
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
- Instruction *In = I++;
+ Instruction *In = &*I++;
if (!CSEDenseMapInfo::canHandle(In))
continue;
@@ -3021,6 +3142,117 @@ static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF,
return TTI.getIntrinsicInstrCost(ID, RetTy, Tys);
}
+static Type *smallestIntegerVectorType(Type *T1, Type *T2) {
+ IntegerType *I1 = cast<IntegerType>(T1->getVectorElementType());
+ IntegerType *I2 = cast<IntegerType>(T2->getVectorElementType());
+ return I1->getBitWidth() < I2->getBitWidth() ? T1 : T2;
+}
+static Type *largestIntegerVectorType(Type *T1, Type *T2) {
+ IntegerType *I1 = cast<IntegerType>(T1->getVectorElementType());
+ IntegerType *I2 = cast<IntegerType>(T2->getVectorElementType());
+ return I1->getBitWidth() > I2->getBitWidth() ? T1 : T2;
+}
+
+void InnerLoopVectorizer::truncateToMinimalBitwidths() {
+ // For every instruction `I` in MinBWs, truncate the operands, create a
+ // truncated version of `I` and reextend its result. InstCombine runs
+ // later and will remove any ext/trunc pairs.
+ //
+ for (auto &KV : MinBWs) {
+ VectorParts &Parts = WidenMap.get(KV.first);
+ for (Value *&I : Parts) {
+ if (I->use_empty())
+ continue;
+ Type *OriginalTy = I->getType();
+ Type *ScalarTruncatedTy = IntegerType::get(OriginalTy->getContext(),
+ KV.second);
+ Type *TruncatedTy = VectorType::get(ScalarTruncatedTy,
+ OriginalTy->getVectorNumElements());
+ if (TruncatedTy == OriginalTy)
+ continue;
+
+ IRBuilder<> B(cast<Instruction>(I));
+ auto ShrinkOperand = [&](Value *V) -> Value* {
+ if (auto *ZI = dyn_cast<ZExtInst>(V))
+ if (ZI->getSrcTy() == TruncatedTy)
+ return ZI->getOperand(0);
+ return B.CreateZExtOrTrunc(V, TruncatedTy);
+ };
+
+ // The actual instruction modification depends on the instruction type,
+ // unfortunately.
+ Value *NewI = nullptr;
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ NewI = B.CreateBinOp(BO->getOpcode(),
+ ShrinkOperand(BO->getOperand(0)),
+ ShrinkOperand(BO->getOperand(1)));
+ cast<BinaryOperator>(NewI)->copyIRFlags(I);
+ } else if (ICmpInst *CI = dyn_cast<ICmpInst>(I)) {
+ NewI = B.CreateICmp(CI->getPredicate(),
+ ShrinkOperand(CI->getOperand(0)),
+ ShrinkOperand(CI->getOperand(1)));
+ } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+ NewI = B.CreateSelect(SI->getCondition(),
+ ShrinkOperand(SI->getTrueValue()),
+ ShrinkOperand(SI->getFalseValue()));
+ } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ switch (CI->getOpcode()) {
+ default: llvm_unreachable("Unhandled cast!");
+ case Instruction::Trunc:
+ NewI = ShrinkOperand(CI->getOperand(0));
+ break;
+ case Instruction::SExt:
+ NewI = B.CreateSExtOrTrunc(CI->getOperand(0),
+ smallestIntegerVectorType(OriginalTy,
+ TruncatedTy));
+ break;
+ case Instruction::ZExt:
+ NewI = B.CreateZExtOrTrunc(CI->getOperand(0),
+ smallestIntegerVectorType(OriginalTy,
+ TruncatedTy));
+ break;
+ }
+ } else if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(I)) {
+ auto Elements0 = SI->getOperand(0)->getType()->getVectorNumElements();
+ auto *O0 =
+ B.CreateZExtOrTrunc(SI->getOperand(0),
+ VectorType::get(ScalarTruncatedTy, Elements0));
+ auto Elements1 = SI->getOperand(1)->getType()->getVectorNumElements();
+ auto *O1 =
+ B.CreateZExtOrTrunc(SI->getOperand(1),
+ VectorType::get(ScalarTruncatedTy, Elements1));
+
+ NewI = B.CreateShuffleVector(O0, O1, SI->getMask());
+ } else if (isa<LoadInst>(I)) {
+ // Don't do anything with the operands, just extend the result.
+ continue;
+ } else {
+ llvm_unreachable("Unhandled instruction type!");
+ }
+
+ // Lastly, extend the result.
+ NewI->takeName(cast<Instruction>(I));
+ Value *Res = B.CreateZExtOrTrunc(NewI, OriginalTy);
+ I->replaceAllUsesWith(Res);
+ cast<Instruction>(I)->eraseFromParent();
+ I = Res;
+ }
+ }
+
+ // We'll have created a bunch of ZExts that are now parentless. Clean up.
+ for (auto &KV : MinBWs) {
+ VectorParts &Parts = WidenMap.get(KV.first);
+ for (Value *&I : Parts) {
+ ZExtInst *Inst = dyn_cast<ZExtInst>(I);
+ if (Inst && Inst->use_empty()) {
+ Value *NewI = Inst->getOperand(0);
+ Inst->eraseFromParent();
+ I = NewI;
+ }
+ }
+ }
+}
+
void InnerLoopVectorizer::vectorizeLoop() {
//===------------------------------------------------===//
//
@@ -3051,6 +3283,11 @@ void InnerLoopVectorizer::vectorizeLoop() {
be = DFS.endRPO(); bb != be; ++bb)
vectorizeBlockInLoop(*bb, &RdxPHIsToFix);
+ // Insert truncates and extends for any truncated instructions as hints to
+ // InstCombine.
+ if (VF > 1)
+ truncateToMinimalBitwidths();
+
// At this point every instruction in the original loop is widened to
// a vector form. We are almost done. Now, we need to fix the PHI nodes
// that we vectorized. The PHI nodes are currently empty because we did
@@ -3066,7 +3303,7 @@ void InnerLoopVectorizer::vectorizeLoop() {
assert(RdxPhi && "Unable to recover vectorized PHI");
// Find the reduction variable descriptor.
- assert(Legal->getReductionVars()->count(RdxPhi) &&
+ assert(Legal->isReductionVariable(RdxPhi) &&
"Unable to find the reduction variable");
RecurrenceDescriptor RdxDesc = (*Legal->getReductionVars())[RdxPhi];
@@ -3141,21 +3378,33 @@ void InnerLoopVectorizer::vectorizeLoop() {
// the PHIs and the values we are going to write.
// This allows us to write both PHINodes and the extractelement
// instructions.
- Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt());
+ Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
- VectorParts RdxParts;
+ VectorParts RdxParts = getVectorValue(LoopExitInst);
setDebugLocFromInst(Builder, LoopExitInst);
- for (unsigned part = 0; part < UF; ++part) {
- // This PHINode contains the vectorized reduction variable, or
- // the initial value vector, if we bypass the vector loop.
- VectorParts &RdxExitVal = getVectorValue(LoopExitInst);
- PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi");
- Value *StartVal = (part == 0) ? VectorStart : Identity;
- for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
- NewPhi->addIncoming(StartVal, LoopBypassBlocks[I]);
- NewPhi->addIncoming(RdxExitVal[part],
- LoopVectorBody.back());
- RdxParts.push_back(NewPhi);
+
+ // If the vector reduction can be performed in a smaller type, we truncate
+ // then extend the loop exit value to enable InstCombine to evaluate the
+ // entire expression in the smaller type.
+ if (VF > 1 && RdxPhi->getType() != RdxDesc.getRecurrenceType()) {
+ Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF);
+ Builder.SetInsertPoint(LoopVectorBody.back()->getTerminator());
+ for (unsigned part = 0; part < UF; ++part) {
+ Value *Trunc = Builder.CreateTrunc(RdxParts[part], RdxVecTy);
+ Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy)
+ : Builder.CreateZExt(Trunc, VecTy);
+ for (Value::user_iterator UI = RdxParts[part]->user_begin();
+ UI != RdxParts[part]->user_end();)
+ if (*UI != Trunc) {
+ (*UI++)->replaceUsesOfWith(RdxParts[part], Extnd);
+ RdxParts[part] = Extnd;
+ } else {
+ ++UI;
+ }
+ }
+ Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
+ for (unsigned part = 0; part < UF; ++part)
+ RdxParts[part] = Builder.CreateTrunc(RdxParts[part], RdxVecTy);
}
// Reduce all of the unrolled parts into a single vector.
@@ -3208,13 +3457,22 @@ void InnerLoopVectorizer::vectorizeLoop() {
// The result is in the first element of the vector.
ReducedPartRdx = Builder.CreateExtractElement(TmpVec,
Builder.getInt32(0));
+
+ // If the reduction can be performed in a smaller type, we need to extend
+ // the reduction to the wider type before we branch to the original loop.
+ if (RdxPhi->getType() != RdxDesc.getRecurrenceType())
+ ReducedPartRdx =
+ RdxDesc.isSigned()
+ ? Builder.CreateSExt(ReducedPartRdx, RdxPhi->getType())
+ : Builder.CreateZExt(ReducedPartRdx, RdxPhi->getType());
}
// Create a phi node that merges control-flow from the backedge-taken check
// block and the middle block.
PHINode *BCBlockPhi = PHINode::Create(RdxPhi->getType(), 2, "bc.merge.rdx",
LoopScalarPreHeader->getTerminator());
- BCBlockPhi->addIncoming(ReductionStartValue, LoopBypassBlocks[0]);
+ for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+ BCBlockPhi->addIncoming(ReductionStartValue, LoopBypassBlocks[I]);
BCBlockPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
// Now, we need to fix the users of the reduction variable
@@ -3252,6 +3510,20 @@ void InnerLoopVectorizer::vectorizeLoop() {
fixLCSSAPHIs();
+ // Make sure DomTree is updated.
+ updateAnalysis();
+
+ // Predicate any stores.
+ for (auto KV : PredicatedStores) {
+ BasicBlock::iterator I(KV.first);
+ auto *BB = SplitBlock(I->getParent(), &*std::next(I), DT, LI);
+ auto *T = SplitBlockAndInsertIfThen(KV.second, &*I, /*Unreachable=*/false,
+ /*BranchWeights=*/nullptr, DT);
+ I->moveBefore(T);
+ I->getParent()->setName("pred.store.if");
+ BB->setName("pred.store.continue");
+ }
+ DEBUG(DT->verifyDomTree());
// Remove redundant induction instructions.
cse(LoopVectorBody);
}
@@ -3326,18 +3598,18 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
return BlockMask;
}
-void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
- InnerLoopVectorizer::VectorParts &Entry,
- unsigned UF, unsigned VF, PhiVector *PV) {
+void InnerLoopVectorizer::widenPHIInstruction(
+ Instruction *PN, InnerLoopVectorizer::VectorParts &Entry, unsigned UF,
+ unsigned VF, PhiVector *PV) {
PHINode* P = cast<PHINode>(PN);
// Handle reduction variables:
- if (Legal->getReductionVars()->count(P)) {
+ if (Legal->isReductionVariable(P)) {
for (unsigned part = 0; part < UF; ++part) {
// This is phase one of vectorizing PHIs.
Type *VecTy = (VF == 1) ? PN->getType() :
VectorType::get(PN->getType(), VF);
- Entry[part] = PHINode::Create(VecTy, 2, "vec.phi",
- LoopVectorBody.back()-> getFirstInsertionPt());
+ Entry[part] = PHINode::Create(
+ VecTy, 2, "vec.phi", &*LoopVectorBody.back()->getFirstInsertionPt());
}
PV->push_back(P);
return;
@@ -3385,53 +3657,44 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
assert(Legal->getInductionVars()->count(P) &&
"Not an induction variable");
- LoopVectorizationLegality::InductionInfo II =
- Legal->getInductionVars()->lookup(P);
+ InductionDescriptor II = Legal->getInductionVars()->lookup(P);
// FIXME: The newly created binary instructions should contain nsw/nuw flags,
// which can be found from the original scalar operations.
- switch (II.IK) {
- case LoopVectorizationLegality::IK_NoInduction:
+ switch (II.getKind()) {
+ case InductionDescriptor::IK_NoInduction:
llvm_unreachable("Unknown induction");
- case LoopVectorizationLegality::IK_IntInduction: {
- assert(P->getType() == II.StartValue->getType() && "Types must match");
- Type *PhiTy = P->getType();
- Value *Broadcasted;
- if (P == OldInduction) {
- // Handle the canonical induction variable. We might have had to
- // extend the type.
- Broadcasted = Builder.CreateTrunc(Induction, PhiTy);
- } else {
- // Handle other induction variables that are now based on the
- // canonical one.
- Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx,
- "normalized.idx");
- NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy);
- Broadcasted = II.transform(Builder, NormalizedIdx);
- Broadcasted->setName("offset.idx");
+ case InductionDescriptor::IK_IntInduction: {
+ assert(P->getType() == II.getStartValue()->getType() &&
+ "Types must match");
+ // Handle other induction variables that are now based on the
+ // canonical one.
+ Value *V = Induction;
+ if (P != OldInduction) {
+ V = Builder.CreateSExtOrTrunc(Induction, P->getType());
+ V = II.transform(Builder, V);
+ V->setName("offset.idx");
}
- Broadcasted = getBroadcastInstrs(Broadcasted);
+ Value *Broadcasted = getBroadcastInstrs(V);
// After broadcasting the induction variable we need to make the vector
// consecutive by adding 0, 1, 2, etc.
for (unsigned part = 0; part < UF; ++part)
- Entry[part] = getStepVector(Broadcasted, VF * part, II.StepValue);
+ Entry[part] = getStepVector(Broadcasted, VF * part, II.getStepValue());
return;
}
- case LoopVectorizationLegality::IK_PtrInduction:
+ case InductionDescriptor::IK_PtrInduction:
// Handle the pointer induction variable case.
assert(P->getType()->isPointerTy() && "Unexpected type.");
// This is the normalized GEP that starts counting at zero.
- Value *NormalizedIdx =
- Builder.CreateSub(Induction, ExtendedIdx, "normalized.idx");
- NormalizedIdx =
- Builder.CreateSExtOrTrunc(NormalizedIdx, II.StepValue->getType());
+ Value *PtrInd = Induction;
+ PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStepValue()->getType());
// This is the vector of results. Notice that we don't generate
// vector geps because scalar geps result in better code.
for (unsigned part = 0; part < UF; ++part) {
if (VF == 1) {
int EltIndex = part;
- Constant *Idx = ConstantInt::get(NormalizedIdx->getType(), EltIndex);
- Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx);
+ Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex);
+ Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
Value *SclrGep = II.transform(Builder, GlobalIdx);
SclrGep->setName("next.gep");
Entry[part] = SclrGep;
@@ -3441,8 +3704,8 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
for (unsigned int i = 0; i < VF; ++i) {
int EltIndex = i + part * VF;
- Constant *Idx = ConstantInt::get(NormalizedIdx->getType(), EltIndex);
- Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx);
+ Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex);
+ Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
Value *SclrGep = II.transform(Builder, GlobalIdx);
SclrGep->setName("next.gep");
VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
@@ -3458,7 +3721,8 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
// For each instruction in the old loop.
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
- VectorParts &Entry = WidenMap.get(it);
+ VectorParts &Entry = WidenMap.get(&*it);
+
switch (it->getOpcode()) {
case Instruction::Br:
// Nothing to do for PHIs and BR, since we already took care of the
@@ -3466,7 +3730,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
continue;
case Instruction::PHI: {
// Vectorize PHINodes.
- widenPHIInstruction(it, Entry, UF, VF, PV);
+ widenPHIInstruction(&*it, Entry, UF, VF, PV);
continue;
}// End of PHI.
@@ -3504,16 +3768,17 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
Entry[Part] = V;
}
- propagateMetadata(Entry, it);
+ propagateMetadata(Entry, &*it);
break;
}
case Instruction::Select: {
// Widen selects.
// If the selector is loop invariant we can create a select
// instruction with a scalar condition. Otherwise, use vector-select.
- bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(it->getOperand(0)),
- OrigLoop);
- setDebugLocFromInst(Builder, it);
+ auto *SE = PSE.getSE();
+ bool InvariantCond =
+ SE->isLoopInvariant(PSE.getSCEV(it->getOperand(0)), OrigLoop);
+ setDebugLocFromInst(Builder, &*it);
// The condition can be loop invariant but still defined inside the
// loop. This means that we can't just use the original 'cond' value.
@@ -3522,7 +3787,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
VectorParts &Cond = getVectorValue(it->getOperand(0));
VectorParts &Op0 = getVectorValue(it->getOperand(1));
VectorParts &Op1 = getVectorValue(it->getOperand(2));
-
+
Value *ScalarCond = (VF == 1) ? Cond[0] :
Builder.CreateExtractElement(Cond[0], Builder.getInt32(0));
@@ -3533,7 +3798,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
Op1[Part]);
}
- propagateMetadata(Entry, it);
+ propagateMetadata(Entry, &*it);
break;
}
@@ -3542,25 +3807,27 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
// Widen compares. Generate vector compares.
bool FCmp = (it->getOpcode() == Instruction::FCmp);
CmpInst *Cmp = dyn_cast<CmpInst>(it);
- setDebugLocFromInst(Builder, it);
+ setDebugLocFromInst(Builder, &*it);
VectorParts &A = getVectorValue(it->getOperand(0));
VectorParts &B = getVectorValue(it->getOperand(1));
for (unsigned Part = 0; Part < UF; ++Part) {
Value *C = nullptr;
- if (FCmp)
+ if (FCmp) {
C = Builder.CreateFCmp(Cmp->getPredicate(), A[Part], B[Part]);
- else
+ cast<FCmpInst>(C)->copyFastMathFlags(&*it);
+ } else {
C = Builder.CreateICmp(Cmp->getPredicate(), A[Part], B[Part]);
+ }
Entry[Part] = C;
}
- propagateMetadata(Entry, it);
+ propagateMetadata(Entry, &*it);
break;
}
case Instruction::Store:
case Instruction::Load:
- vectorizeMemoryInstruction(it);
+ vectorizeMemoryInstruction(&*it);
break;
case Instruction::ZExt:
case Instruction::SExt:
@@ -3575,7 +3842,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
case Instruction::FPTrunc:
case Instruction::BitCast: {
CastInst *CI = dyn_cast<CastInst>(it);
- setDebugLocFromInst(Builder, it);
+ setDebugLocFromInst(Builder, &*it);
/// Optimize the special case where the source is the induction
/// variable. Notice that we can only optimize the 'trunc' case
/// because: a. FP conversions lose precision, b. sext/zext may wrap,
@@ -3585,13 +3852,13 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction,
CI->getType());
Value *Broadcasted = getBroadcastInstrs(ScalarCast);
- LoopVectorizationLegality::InductionInfo II =
+ InductionDescriptor II =
Legal->getInductionVars()->lookup(OldInduction);
- Constant *Step =
- ConstantInt::getSigned(CI->getType(), II.StepValue->getSExtValue());
+ Constant *Step = ConstantInt::getSigned(
+ CI->getType(), II.getStepValue()->getSExtValue());
for (unsigned Part = 0; Part < UF; ++Part)
Entry[Part] = getStepVector(Broadcasted, VF * Part, Step);
- propagateMetadata(Entry, it);
+ propagateMetadata(Entry, &*it);
break;
}
/// Vectorize casts.
@@ -3601,7 +3868,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
VectorParts &A = getVectorValue(it->getOperand(0));
for (unsigned Part = 0; Part < UF; ++Part)
Entry[Part] = Builder.CreateCast(CI->getOpcode(), A[Part], DestTy);
- propagateMetadata(Entry, it);
+ propagateMetadata(Entry, &*it);
break;
}
@@ -3609,7 +3876,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
// Ignore dbg intrinsics.
if (isa<DbgInfoIntrinsic>(it))
break;
- setDebugLocFromInst(Builder, it);
+ setDebugLocFromInst(Builder, &*it);
Module *M = BB->getParent()->getParent();
CallInst *CI = cast<CallInst>(it);
@@ -3625,7 +3892,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
if (ID &&
(ID == Intrinsic::assume || ID == Intrinsic::lifetime_end ||
ID == Intrinsic::lifetime_start)) {
- scalarizeInstruction(it);
+ scalarizeInstruction(&*it);
break;
}
// The flag shows whether we use Intrinsic or a usual Call for vectorized
@@ -3636,7 +3903,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
bool UseVectorIntrinsic =
ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost;
if (!UseVectorIntrinsic && NeedToScalarize) {
- scalarizeInstruction(it);
+ scalarizeInstruction(&*it);
break;
}
@@ -3677,13 +3944,13 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
Entry[Part] = Builder.CreateCall(VectorF, Args);
}
- propagateMetadata(Entry, it);
+ propagateMetadata(Entry, &*it);
break;
}
default:
// All other instructions are unsupported. Scalarize them.
- scalarizeInstruction(it);
+ scalarizeInstruction(&*it);
break;
}// end of switch.
}// end of for_each instr.
@@ -3691,7 +3958,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
void InnerLoopVectorizer::updateAnalysis() {
// Forget the original basic block.
- SE->forgetLoop(OrigLoop);
+ PSE.getSE()->forgetLoop(OrigLoop);
// Update the dominator tree information.
assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) &&
@@ -3701,19 +3968,12 @@ void InnerLoopVectorizer::updateAnalysis() {
DT->addNewBlock(LoopBypassBlocks[I], LoopBypassBlocks[I-1]);
DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlocks.back());
- // Due to if predication of stores we might create a sequence of "if(pred)
- // a[i] = ...; " blocks.
- for (unsigned i = 0, e = LoopVectorBody.size(); i != e; ++i) {
- if (i == 0)
- DT->addNewBlock(LoopVectorBody[0], LoopVectorPreHeader);
- else if (isPredicatedBlock(i)) {
- DT->addNewBlock(LoopVectorBody[i], LoopVectorBody[i-1]);
- } else {
- DT->addNewBlock(LoopVectorBody[i], LoopVectorBody[i-2]);
- }
- }
+ // We don't predicate stores by this point, so the vector body should be a
+ // single loop.
+ assert(LoopVectorBody.size() == 1 && "Expected single block loop!");
+ DT->addNewBlock(LoopVectorBody[0], LoopVectorPreHeader);
- DT->addNewBlock(LoopMiddleBlock, LoopBypassBlocks[1]);
+ DT->addNewBlock(LoopMiddleBlock, LoopVectorBody.back());
DT->addNewBlock(LoopScalarPreHeader, LoopBypassBlocks[0]);
DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader);
DT->changeImmediateDominator(LoopExitBlock, LoopBypassBlocks[0]);
@@ -3850,10 +4110,10 @@ bool LoopVectorizationLegality::canVectorize() {
}
// ScalarEvolution needs to be able to find the exit count.
- const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
- if (ExitCount == SE->getCouldNotCompute()) {
- emitAnalysis(VectorizationReport() <<
- "could not determine number of loop iterations");
+ const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop);
+ if (ExitCount == PSE.getSE()->getCouldNotCompute()) {
+ emitAnalysis(VectorizationReport()
+ << "could not determine number of loop iterations");
DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
return false;
}
@@ -3879,10 +4139,28 @@ bool LoopVectorizationLegality::canVectorize() {
: "")
<< "!\n");
+ bool UseInterleaved = TTI->enableInterleavedAccessVectorization();
+
+ // If an override option has been passed in for interleaved accesses, use it.
+ if (EnableInterleavedMemAccesses.getNumOccurrences() > 0)
+ UseInterleaved = EnableInterleavedMemAccesses;
+
// Analyze interleaved memory accesses.
- if (EnableInterleavedMemAccesses)
+ if (UseInterleaved)
InterleaveInfo.analyzeInterleaving(Strides);
+ unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
+ if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
+ SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
+
+ if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
+ emitAnalysis(VectorizationReport()
+ << "Too many SCEV assumptions need to be made and checked "
+ << "at runtime");
+ DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n");
+ return false;
+ }
+
// Okay! We can vectorize. At this point we don't have any other mem analysis
// which may limit our maximum vectorization factor, so just return true with
// no restrictions.
@@ -3929,7 +4207,6 @@ static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
}
bool LoopVectorizationLegality::canVectorizeInstrs() {
- BasicBlock *PreHeader = TheLoop->getLoopPreheader();
BasicBlock *Header = TheLoop->getHeader();
// Look for the attribute signaling the absence of NaNs.
@@ -3953,7 +4230,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (!PhiTy->isIntegerTy() &&
!PhiTy->isFloatingPointTy() &&
!PhiTy->isPointerTy()) {
- emitAnalysis(VectorizationReport(it)
+ emitAnalysis(VectorizationReport(&*it)
<< "loop control flow is not understood by vectorizer");
DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
return false;
@@ -3965,9 +4242,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (*bb != Header) {
// Check that this instruction has no outside users or is an
// identified reduction value with an outside user.
- if (!hasOutsideLoopUser(TheLoop, it, AllowedExit))
+ if (!hasOutsideLoopUser(TheLoop, &*it, AllowedExit))
continue;
- emitAnalysis(VectorizationReport(it) <<
+ emitAnalysis(VectorizationReport(&*it) <<
"value could not be identified as "
"an induction or reduction variable");
return false;
@@ -3975,19 +4252,15 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// We only allow if-converted PHIs with exactly two incoming values.
if (Phi->getNumIncomingValues() != 2) {
- emitAnalysis(VectorizationReport(it)
+ emitAnalysis(VectorizationReport(&*it)
<< "control flow not understood by vectorizer");
DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
return false;
}
- // This is the value coming from the preheader.
- Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
- ConstantInt *StepValue = nullptr;
- // Check if this is an induction variable.
- InductionKind IK = isInductionVariable(Phi, StepValue);
-
- if (IK_NoInduction != IK) {
+ InductionDescriptor ID;
+ if (InductionDescriptor::isInductionPHI(Phi, PSE.getSE(), ID)) {
+ Inductions[Phi] = ID;
// Get the widest type.
if (!WidestIndTy)
WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
@@ -3995,21 +4268,24 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
// Int inductions are special because we only allow one IV.
- if (IK == IK_IntInduction && StepValue->isOne()) {
+ if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
+ ID.getStepValue()->isOne() &&
+ isa<Constant>(ID.getStartValue()) &&
+ cast<Constant>(ID.getStartValue())->isNullValue()) {
// Use the phi node with the widest type as induction. Use the last
// one if there are multiple (no good reason for doing this other
- // than it is expedient).
+ // than it is expedient). We've checked that it begins at zero and
+ // steps by one, so this is a canonical induction variable.
if (!Induction || PhiTy == WidestIndTy)
Induction = Phi;
}
DEBUG(dbgs() << "LV: Found an induction variable.\n");
- Inductions[Phi] = InductionInfo(StartValue, IK, StepValue);
// Until we explicitly handle the case of an induction variable with
// an outside loop user we have to give up vectorizing this loop.
- if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) {
- emitAnalysis(VectorizationReport(it) <<
+ if (hasOutsideLoopUser(TheLoop, &*it, AllowedExit)) {
+ emitAnalysis(VectorizationReport(&*it) <<
"use of induction value outside of the "
"loop is not handled by vectorizer");
return false;
@@ -4020,11 +4296,14 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop,
Reductions[Phi])) {
+ if (Reductions[Phi].hasUnsafeAlgebra())
+ Requirements->addUnsafeAlgebraInst(
+ Reductions[Phi].getUnsafeAlgebraInst());
AllowedExit.insert(Reductions[Phi].getLoopExitInstr());
continue;
}
- emitAnalysis(VectorizationReport(it) <<
+ emitAnalysis(VectorizationReport(&*it) <<
"value that could not be identified as "
"reduction is used outside the loop");
DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
@@ -4039,8 +4318,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI) &&
!(CI->getCalledFunction() && TLI &&
TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
- emitAnalysis(VectorizationReport(it) <<
- "call instruction cannot be vectorized");
+ emitAnalysis(VectorizationReport(&*it)
+ << "call instruction cannot be vectorized");
DEBUG(dbgs() << "LV: Found a non-intrinsic, non-libfunc callsite.\n");
return false;
}
@@ -4049,8 +4328,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// second argument is the same (i.e. loop invariant)
if (CI &&
hasVectorInstrinsicScalarOpd(getIntrinsicIDForCall(CI, TLI), 1)) {
- if (!SE->isLoopInvariant(SE->getSCEV(CI->getOperand(1)), TheLoop)) {
- emitAnalysis(VectorizationReport(it)
+ auto *SE = PSE.getSE();
+ if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(1)), TheLoop)) {
+ emitAnalysis(VectorizationReport(&*it)
<< "intrinsic instruction cannot be vectorized");
DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n");
return false;
@@ -4061,7 +4341,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Also, we can't vectorize extractelement instructions.
if ((!VectorType::isValidElementType(it->getType()) &&
!it->getType()->isVoidTy()) || isa<ExtractElementInst>(it)) {
- emitAnalysis(VectorizationReport(it)
+ emitAnalysis(VectorizationReport(&*it)
<< "instruction return type cannot be vectorized");
DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
return false;
@@ -4085,8 +4365,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Reduction instructions are allowed to have exit users.
// All other instructions must not have external users.
- if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) {
- emitAnalysis(VectorizationReport(it) <<
+ if (hasOutsideLoopUser(TheLoop, &*it, AllowedExit)) {
+ emitAnalysis(VectorizationReport(&*it) <<
"value cannot be used outside the loop");
return false;
}
@@ -4104,6 +4384,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
}
}
+ // Now we know the widest induction type, check if our found induction
+ // is the same size. If it's not, unset it here and InnerLoopVectorizer
+ // will create another.
+ if (Induction && WidestIndTy != Induction->getType())
+ Induction = nullptr;
+
return true;
}
@@ -4116,7 +4402,7 @@ void LoopVectorizationLegality::collectStridedAccess(Value *MemAccess) {
else
return;
- Value *Stride = getStrideFromPointer(Ptr, SE, TheLoop);
+ Value *Stride = getStrideFromPointer(Ptr, PSE.getSE(), TheLoop);
if (!Stride)
return;
@@ -4142,7 +4428,7 @@ void LoopVectorizationLegality::collectLoopUniforms() {
BE = TheLoop->block_end(); B != BE; ++B)
for (BasicBlock::iterator I = (*B)->begin(), IE = (*B)->end();
I != IE; ++I)
- if (I->getType()->isPointerTy() && isConsecutivePtr(I))
+ if (I->getType()->isPointerTy() && isConsecutivePtr(&*I))
Worklist.insert(Worklist.end(), I->op_begin(), I->op_end());
while (!Worklist.empty()) {
@@ -4179,30 +4465,10 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
return false;
}
- if (LAI->getNumRuntimePointerChecks() >
- VectorizerParams::RuntimeMemoryCheckThreshold) {
- emitAnalysis(VectorizationReport()
- << LAI->getNumRuntimePointerChecks() << " exceeds limit of "
- << VectorizerParams::RuntimeMemoryCheckThreshold
- << " dependent memory operations checked at runtime");
- DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
- return false;
- }
- return true;
-}
+ Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
+ PSE.addPredicate(LAI->PSE.getUnionPredicate());
-LoopVectorizationLegality::InductionKind
-LoopVectorizationLegality::isInductionVariable(PHINode *Phi,
- ConstantInt *&StepValue) {
- if (!isInductionPHI(Phi, SE, StepValue))
- return IK_NoInduction;
-
- Type *PhiTy = Phi->getType();
- // Found an Integer induction variable.
- if (PhiTy->isIntegerTy())
- return IK_IntInduction;
- // Found an Pointer induction variable.
- return IK_PtrInduction;
+ return true;
}
bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
@@ -4256,8 +4522,8 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
if (++NumPredStores > NumberOfStoresToPredicate || !isSafePtr ||
!isSinglePredecessor) {
- // Build a masked store if it is legal for the target, otherwise scalarize
- // the block.
+ // Build a masked store if it is legal for the target, otherwise
+ // scalarize the block.
bool isLegalMaskedOp =
isLegalMaskedStore(SI->getValueOperand()->getType(),
SI->getPointerOperand());
@@ -4315,7 +4581,7 @@ void InterleavedAccessInfo::collectConstStridedAccesses(
StoreInst *SI = dyn_cast<StoreInst>(I);
Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
- int Stride = isStridedPtr(SE, Ptr, TheLoop, Strides);
+ int Stride = isStridedPtr(PSE, Ptr, TheLoop, Strides);
// The factor of the corresponding interleave group.
unsigned Factor = std::abs(Stride);
@@ -4324,7 +4590,7 @@ void InterleavedAccessInfo::collectConstStridedAccesses(
if (Factor < 2 || Factor > MaxInterleaveGroupFactor)
continue;
- const SCEV *Scev = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
+ const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
unsigned Size = DL.getTypeAllocSize(PtrTy->getElementType());
@@ -4411,12 +4677,12 @@ void InterleavedAccessInfo::analyzeInterleaving(
continue;
// Calculate the distance and prepare for the rule 3.
- const SCEVConstant *DistToA =
- dyn_cast<SCEVConstant>(SE->getMinusSCEV(DesB.Scev, DesA.Scev));
+ const SCEVConstant *DistToA = dyn_cast<SCEVConstant>(
+ PSE.getSE()->getMinusSCEV(DesB.Scev, DesA.Scev));
if (!DistToA)
continue;
- int DistanceToA = DistToA->getValue()->getValue().getSExtValue();
+ int DistanceToA = DistToA->getAPInt().getSExtValue();
// Skip if the distance is not multiple of size as they are not in the
// same group.
@@ -4454,8 +4720,9 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
emitAnalysis(VectorizationReport() <<
"runtime pointer checks needed. Enable vectorization of this "
"loop with '#pragma clang loop vectorize(enable)' when "
- "compiling with -Os");
- DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required in Os.\n");
+ "compiling with -Os/-Oz");
+ DEBUG(dbgs() <<
+ "LV: Aborting. Runtime ptr check is required with -Os/-Oz.\n");
return Factor;
}
@@ -4467,10 +4734,12 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
}
// Find the trip count.
- unsigned TC = SE->getSmallConstantTripCount(TheLoop);
+ unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
- unsigned WidestType = getWidestType();
+ MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI);
+ unsigned SmallestType, WidestType;
+ std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes();
unsigned WidestRegister = TTI.getRegisterBitWidth(true);
unsigned MaxSafeDepDist = -1U;
if (Legal->getMaxSafeDepDistBytes() != -1U)
@@ -4478,7 +4747,9 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
WidestRegister = ((WidestRegister < MaxSafeDepDist) ?
WidestRegister : MaxSafeDepDist);
unsigned MaxVectorSize = WidestRegister / WidestType;
- DEBUG(dbgs() << "LV: The Widest type: " << WidestType << " bits.\n");
+
+ DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType << " / "
+ << WidestType << " bits.\n");
DEBUG(dbgs() << "LV: The Widest register is: "
<< WidestRegister << " bits.\n");
@@ -4491,6 +4762,26 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
" into one vector!");
unsigned VF = MaxVectorSize;
+ if (MaximizeBandwidth && !OptForSize) {
+ // Collect all viable vectorization factors.
+ SmallVector<unsigned, 8> VFs;
+ unsigned NewMaxVectorSize = WidestRegister / SmallestType;
+ for (unsigned VS = MaxVectorSize; VS <= NewMaxVectorSize; VS *= 2)
+ VFs.push_back(VS);
+
+ // For each VF calculate its register usage.
+ auto RUs = calculateRegisterUsage(VFs);
+
+ // Select the largest VF which doesn't require more registers than existing
+ // ones.
+ unsigned TargetNumRegisters = TTI.getNumberOfRegisters(true);
+ for (int i = RUs.size() - 1; i >= 0; --i) {
+ if (RUs[i].MaxLocalUsers <= TargetNumRegisters) {
+ VF = VFs[i];
+ break;
+ }
+ }
+ }
// If we optimize the program for size, avoid creating the tail loop.
if (OptForSize) {
@@ -4499,7 +4790,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
emitAnalysis
(VectorizationReport() <<
"unable to calculate the loop count due to complex control flow");
- DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
+ DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n");
return Factor;
}
@@ -4515,8 +4806,8 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
"cannot optimize for size and vectorize at the "
"same time. Enable vectorization of this loop "
"with '#pragma clang loop vectorize(enable)' "
- "when compiling with -Os");
- DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
+ "when compiling with -Os/-Oz");
+ DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n");
return Factor;
}
}
@@ -4566,7 +4857,9 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
return Factor;
}
-unsigned LoopVectorizationCostModel::getWidestType() {
+std::pair<unsigned, unsigned>
+LoopVectorizationCostModel::getSmallestAndWidestTypes() {
+ unsigned MinWidth = -1U;
unsigned MaxWidth = 8;
const DataLayout &DL = TheFunction->getParent()->getDataLayout();
@@ -4579,18 +4872,22 @@ unsigned LoopVectorizationCostModel::getWidestType() {
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
Type *T = it->getType();
- // Ignore ephemeral values.
- if (EphValues.count(it))
+ // Skip ignored values.
+ if (ValuesToIgnore.count(&*it))
continue;
// Only examine Loads, Stores and PHINodes.
if (!isa<LoadInst>(it) && !isa<StoreInst>(it) && !isa<PHINode>(it))
continue;
- // Examine PHI nodes that are reduction variables.
- if (PHINode *PN = dyn_cast<PHINode>(it))
- if (!Legal->getReductionVars()->count(PN))
+ // Examine PHI nodes that are reduction variables. Update the type to
+ // account for the recurrence type.
+ if (PHINode *PN = dyn_cast<PHINode>(it)) {
+ if (!Legal->isReductionVariable(PN))
continue;
+ RecurrenceDescriptor RdxDesc = (*Legal->getReductionVars())[PN];
+ T = RdxDesc.getRecurrenceType();
+ }
// Examine the stored values.
if (StoreInst *ST = dyn_cast<StoreInst>(it))
@@ -4599,15 +4896,17 @@ unsigned LoopVectorizationCostModel::getWidestType() {
// Ignore loaded pointer types and stored pointer types that are not
// consecutive. However, we do want to take consecutive stores/loads of
// pointer vectors into account.
- if (T->isPointerTy() && !isConsecutiveLoadOrStore(it))
+ if (T->isPointerTy() && !isConsecutiveLoadOrStore(&*it))
continue;
+ MinWidth = std::min(MinWidth,
+ (unsigned)DL.getTypeSizeInBits(T->getScalarType()));
MaxWidth = std::max(MaxWidth,
(unsigned)DL.getTypeSizeInBits(T->getScalarType()));
}
}
- return MaxWidth;
+ return {MinWidth, MaxWidth};
}
unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
@@ -4628,11 +4927,6 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
// 3. We don't interleave if we think that we will spill registers to memory
// due to the increased register pressure.
- // Use the user preference, unless 'auto' is selected.
- int UserUF = Hints->getInterleave();
- if (UserUF != 0)
- return UserUF;
-
// When we optimize for size, we don't interleave.
if (OptForSize)
return 1;
@@ -4642,7 +4936,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
return 1;
// Do not interleave loops with a relatively small trip count.
- unsigned TC = SE->getSmallConstantTripCount(TheLoop);
+ unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
if (TC > 1 && TC < TinyTripCountInterleaveThreshold)
return 1;
@@ -4658,7 +4952,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
TargetNumRegisters = ForceTargetNumVectorRegs;
}
- LoopVectorizationCostModel::RegisterUsage R = calculateRegisterUsage();
+ RegisterUsage R = calculateRegisterUsage({VF})[0];
// We divide by these constants so assume that we have at least one
// instruction that uses at least one register.
R.MaxLocalUsers = std::max(R.MaxLocalUsers, 1U);
@@ -4756,8 +5050,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
}
// Interleave if this is a large loop (small loops are already dealt with by
- // this
- // point) that could benefit from interleaving.
+ // this point) that could benefit from interleaving.
bool HasReductions = (Legal->getReductionVars()->size() > 0);
if (TTI.enableAggressiveInterleaving(HasReductions)) {
DEBUG(dbgs() << "LV: Interleaving to expose ILP.\n");
@@ -4768,8 +5061,9 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
return 1;
}
-LoopVectorizationCostModel::RegisterUsage
-LoopVectorizationCostModel::calculateRegisterUsage() {
+SmallVector<LoopVectorizationCostModel::RegisterUsage, 8>
+LoopVectorizationCostModel::calculateRegisterUsage(
+ const SmallVector<unsigned, 8> &VFs) {
// This function calculates the register usage by measuring the highest number
// of values that are alive at a single location. Obviously, this is a very
// rough estimation. We scan the loop in a topological order in order and
@@ -4790,8 +5084,8 @@ LoopVectorizationCostModel::calculateRegisterUsage() {
LoopBlocksDFS DFS(TheLoop);
DFS.perform(LI);
- RegisterUsage R;
- R.NumInstructions = 0;
+ RegisterUsage RU;
+ RU.NumInstructions = 0;
// Each 'key' in the map opens a new interval. The values
// of the map are the index of the 'last seen' usage of the
@@ -4810,15 +5104,13 @@ LoopVectorizationCostModel::calculateRegisterUsage() {
unsigned Index = 0;
for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(),
be = DFS.endRPO(); bb != be; ++bb) {
- R.NumInstructions += (*bb)->size();
- for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
- ++it) {
- Instruction *I = it;
- IdxToInstr[Index++] = I;
+ RU.NumInstructions += (*bb)->size();
+ for (Instruction &I : **bb) {
+ IdxToInstr[Index++] = &I;
// Save the end location of each USE.
- for (unsigned i = 0; i < I->getNumOperands(); ++i) {
- Value *U = I->getOperand(i);
+ for (unsigned i = 0; i < I.getNumOperands(); ++i) {
+ Value *U = I.getOperand(i);
Instruction *Instr = dyn_cast<Instruction>(U);
// Ignore non-instruction values such as arguments, constants, etc.
@@ -4847,42 +5139,85 @@ LoopVectorizationCostModel::calculateRegisterUsage() {
TransposeEnds[it->second].push_back(it->first);
SmallSet<Instruction*, 8> OpenIntervals;
- unsigned MaxUsage = 0;
+ // Get the size of the widest register.
+ unsigned MaxSafeDepDist = -1U;
+ if (Legal->getMaxSafeDepDistBytes() != -1U)
+ MaxSafeDepDist = Legal->getMaxSafeDepDistBytes() * 8;
+ unsigned WidestRegister =
+ std::min(TTI.getRegisterBitWidth(true), MaxSafeDepDist);
+ const DataLayout &DL = TheFunction->getParent()->getDataLayout();
+
+ SmallVector<RegisterUsage, 8> RUs(VFs.size());
+ SmallVector<unsigned, 8> MaxUsages(VFs.size(), 0);
DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
+
+ // A lambda that gets the register usage for the given type and VF.
+ auto GetRegUsage = [&DL, WidestRegister](Type *Ty, unsigned VF) {
+ unsigned TypeSize = DL.getTypeSizeInBits(Ty->getScalarType());
+ return std::max<unsigned>(1, VF * TypeSize / WidestRegister);
+ };
+
for (unsigned int i = 0; i < Index; ++i) {
Instruction *I = IdxToInstr[i];
// Ignore instructions that are never used within the loop.
if (!Ends.count(I)) continue;
- // Ignore ephemeral values.
- if (EphValues.count(I))
- continue;
-
// Remove all of the instructions that end at this location.
InstrList &List = TransposeEnds[i];
- for (unsigned int j=0, e = List.size(); j < e; ++j)
+ for (unsigned int j = 0, e = List.size(); j < e; ++j)
OpenIntervals.erase(List[j]);
- // Count the number of live interals.
- MaxUsage = std::max(MaxUsage, OpenIntervals.size());
+ // Skip ignored values.
+ if (ValuesToIgnore.count(I))
+ continue;
- DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # " <<
- OpenIntervals.size() << '\n');
+ // For each VF find the maximum usage of registers.
+ for (unsigned j = 0, e = VFs.size(); j < e; ++j) {
+ if (VFs[j] == 1) {
+ MaxUsages[j] = std::max(MaxUsages[j], OpenIntervals.size());
+ continue;
+ }
+
+ // Count the number of live intervals.
+ unsigned RegUsage = 0;
+ for (auto Inst : OpenIntervals) {
+ // Skip ignored values for VF > 1.
+ if (VecValuesToIgnore.count(Inst))
+ continue;
+ RegUsage += GetRegUsage(Inst->getType(), VFs[j]);
+ }
+ MaxUsages[j] = std::max(MaxUsages[j], RegUsage);
+ }
+
+ DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # "
+ << OpenIntervals.size() << '\n');
// Add the current instruction to the list of open intervals.
OpenIntervals.insert(I);
}
- unsigned Invariant = LoopInvariants.size();
- DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsage << '\n');
- DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << '\n');
- DEBUG(dbgs() << "LV(REG): LoopSize: " << R.NumInstructions << '\n');
+ for (unsigned i = 0, e = VFs.size(); i < e; ++i) {
+ unsigned Invariant = 0;
+ if (VFs[i] == 1)
+ Invariant = LoopInvariants.size();
+ else {
+ for (auto Inst : LoopInvariants)
+ Invariant += GetRegUsage(Inst->getType(), VFs[i]);
+ }
+
+ DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n');
+ DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i] << '\n');
+ DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << '\n');
+ DEBUG(dbgs() << "LV(REG): LoopSize: " << RU.NumInstructions << '\n');
- R.LoopInvariantRegs = Invariant;
- R.MaxLocalUsers = MaxUsage;
- return R;
+ RU.LoopInvariantRegs = Invariant;
+ RU.MaxLocalUsers = MaxUsages[i];
+ RUs[i] = RU;
+ }
+
+ return RUs;
}
unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
@@ -4900,11 +5235,11 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
if (isa<DbgInfoIntrinsic>(it))
continue;
- // Ignore ephemeral values.
- if (EphValues.count(it))
+ // Skip ignored values.
+ if (ValuesToIgnore.count(&*it))
continue;
- unsigned C = getInstructionCost(it, VF);
+ unsigned C = getInstructionCost(&*it, VF);
// Check if we should override the cost.
if (ForceTargetInstructionCost.getNumOccurrences() > 0)
@@ -4969,7 +5304,7 @@ static bool isLikelyComplexAddressComputation(Value *Ptr,
if (!C)
return true;
- const APInt &APStepVal = C->getValue()->getValue();
+ const APInt &APStepVal = C->getAPInt();
// Huge step value - give up.
if (APStepVal.getBitWidth() > 64)
@@ -4981,9 +5316,8 @@ static bool isLikelyComplexAddressComputation(Value *Ptr,
}
static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) {
- if (Legal->hasStride(I->getOperand(0)) || Legal->hasStride(I->getOperand(1)))
- return true;
- return false;
+ return Legal->hasStride(I->getOperand(0)) ||
+ Legal->hasStride(I->getOperand(1));
}
unsigned
@@ -4994,7 +5328,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
VF = 1;
Type *RetTy = I->getType();
+ if (VF > 1 && MinBWs.count(I))
+ RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
Type *VectorTy = ToVectorTy(RetTy, VF);
+ auto SE = PSE.getSE();
// TODO: We need to estimate the cost of intrinsic calls.
switch (I->getOpcode()) {
@@ -5076,6 +5413,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
case Instruction::ICmp:
case Instruction::FCmp: {
Type *ValTy = I->getOperand(0)->getType();
+ Instruction *Op0AsInstruction = dyn_cast<Instruction>(I->getOperand(0));
+ auto It = MinBWs.find(Op0AsInstruction);
+ if (VF > 1 && It != MinBWs.end())
+ ValTy = IntegerType::get(ValTy->getContext(), It->second);
VectorTy = ToVectorTy(ValTy, VF);
return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy);
}
@@ -5199,8 +5540,28 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
Legal->isInductionVariable(I->getOperand(0)))
return TTI.getCastInstrCost(I->getOpcode(), I->getType(),
I->getOperand(0)->getType());
-
- Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF);
+
+ Type *SrcScalarTy = I->getOperand(0)->getType();
+ Type *SrcVecTy = ToVectorTy(SrcScalarTy, VF);
+ if (VF > 1 && MinBWs.count(I)) {
+ // This cast is going to be shrunk. This may remove the cast or it might
+ // turn it into slightly different cast. For example, if MinBW == 16,
+ // "zext i8 %1 to i32" becomes "zext i8 %1 to i16".
+ //
+ // Calculate the modified src and dest types.
+ Type *MinVecTy = VectorTy;
+ if (I->getOpcode() == Instruction::Trunc) {
+ SrcVecTy = smallestIntegerVectorType(SrcVecTy, MinVecTy);
+ VectorTy = largestIntegerVectorType(ToVectorTy(I->getType(), VF),
+ MinVecTy);
+ } else if (I->getOpcode() == Instruction::ZExt ||
+ I->getOpcode() == Instruction::SExt) {
+ SrcVecTy = largestIntegerVectorType(SrcVecTy, MinVecTy);
+ VectorTy = smallestIntegerVectorType(ToVectorTy(I->getType(), VF),
+ MinVecTy);
+ }
+ }
+
return TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
}
case Instruction::Call: {
@@ -5240,15 +5601,18 @@ char LoopVectorize::ID = 0;
static const char lv_name[] = "Loop Vectorization";
INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DemandedBits)
INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
namespace llvm {
@@ -5269,6 +5633,79 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
return false;
}
+void LoopVectorizationCostModel::collectValuesToIgnore() {
+ // Ignore ephemeral values.
+ CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore);
+
+ // Ignore type-promoting instructions we identified during reduction
+ // detection.
+ for (auto &Reduction : *Legal->getReductionVars()) {
+ RecurrenceDescriptor &RedDes = Reduction.second;
+ SmallPtrSetImpl<Instruction *> &Casts = RedDes.getCastInsts();
+ VecValuesToIgnore.insert(Casts.begin(), Casts.end());
+ }
+
+ // Ignore induction phis that are only used in either GetElementPtr or ICmp
+ // instruction to exit loop. Induction variables usually have large types and
+ // can have big impact when estimating register usage.
+ // This is for when VF > 1.
+ for (auto &Induction : *Legal->getInductionVars()) {
+ auto *PN = Induction.first;
+ auto *UpdateV = PN->getIncomingValueForBlock(TheLoop->getLoopLatch());
+
+ // Check that the PHI is only used by the induction increment (UpdateV) or
+ // by GEPs. Then check that UpdateV is only used by a compare instruction or
+ // the loop header PHI.
+ // FIXME: Need precise def-use analysis to determine if this instruction
+ // variable will be vectorized.
+ if (std::all_of(PN->user_begin(), PN->user_end(),
+ [&](const User *U) -> bool {
+ return U == UpdateV || isa<GetElementPtrInst>(U);
+ }) &&
+ std::all_of(UpdateV->user_begin(), UpdateV->user_end(),
+ [&](const User *U) -> bool {
+ return U == PN || isa<ICmpInst>(U);
+ })) {
+ VecValuesToIgnore.insert(PN);
+ VecValuesToIgnore.insert(UpdateV);
+ }
+ }
+
+ // Ignore instructions that will not be vectorized.
+ // This is for when VF > 1.
+ for (auto bb = TheLoop->block_begin(), be = TheLoop->block_end(); bb != be;
+ ++bb) {
+ for (auto &Inst : **bb) {
+ switch (Inst.getOpcode()) {
+ case Instruction::GetElementPtr: {
+ // Ignore GEP if its last operand is an induction variable so that it is
+ // a consecutive load/store and won't be vectorized as scatter/gather
+ // pattern.
+
+ GetElementPtrInst *Gep = cast<GetElementPtrInst>(&Inst);
+ unsigned NumOperands = Gep->getNumOperands();
+ unsigned InductionOperand = getGEPInductionOperand(Gep);
+ bool GepToIgnore = true;
+
+ // Check that all of the gep indices are uniform except for the
+ // induction operand.
+ for (unsigned i = 0; i != NumOperands; ++i) {
+ if (i != InductionOperand &&
+ !PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)),
+ TheLoop)) {
+ GepToIgnore = false;
+ break;
+ }
+ }
+
+ if (GepToIgnore)
+ VecValuesToIgnore.insert(&Inst);
+ break;
+ }
+ }
+ }
+ }
+}
void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
bool IfPredicateStore) {
@@ -5316,19 +5753,12 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
// Create a new entry in the WidenMap and initialize it to Undef or Null.
VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
- Instruction *InsertPt = Builder.GetInsertPoint();
- BasicBlock *IfBlock = Builder.GetInsertBlock();
- BasicBlock *CondBlock = nullptr;
-
VectorParts Cond;
- Loop *VectorLp = nullptr;
if (IfPredicateStore) {
assert(Instr->getParent()->getSinglePredecessor() &&
"Only support single predecessor blocks");
Cond = createEdgeMask(Instr->getParent()->getSinglePredecessor(),
Instr->getParent());
- VectorLp = LI->getLoopFor(IfBlock);
- assert(VectorLp && "Must have a loop for this block");
}
// For each vector unroll 'part':
@@ -5343,11 +5773,6 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
Builder.CreateExtractElement(Cond[Part], Builder.getInt32(0));
Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cond[Part],
ConstantInt::get(Cond[Part]->getType(), 1));
- CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
- LoopVectorBody.push_back(CondBlock);
- VectorLp->addBasicBlockToLoop(CondBlock, *LI);
- // Update Builder with newly created basic block.
- Builder.SetInsertPoint(InsertPt);
}
Instruction *Cloned = Instr->clone();
@@ -5367,16 +5792,10 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
if (!IsVoidRetTy)
VecResults[Part] = Cloned;
- // End if-block.
- if (IfPredicateStore) {
- BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
- LoopVectorBody.push_back(NewIfBlock);
- VectorLp->addBasicBlockToLoop(NewIfBlock, *LI);
- Builder.SetInsertPoint(InsertPt);
- ReplaceInstWithInst(IfBlock->getTerminator(),
- BranchInst::Create(CondBlock, NewIfBlock, Cmp));
- IfBlock = NewIfBlock;
- }
+ // End if-block.
+ if (IfPredicateStore)
+ PredicatedStores.push_back(std::make_pair(cast<StoreInst>(Cloned),
+ Cmp));
}
}
diff --git a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b180c976c233..9ed44d1e0cb8 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -61,7 +62,7 @@ static cl::opt<int>
"number "));
static cl::opt<bool>
-ShouldVectorizeHor("slp-vectorize-hor", cl::init(false), cl::Hidden,
+ShouldVectorizeHor("slp-vectorize-hor", cl::init(true), cl::Hidden,
cl::desc("Attempt to vectorize horizontal reductions"));
static cl::opt<bool> ShouldStartVectorizeHorAtStore(
@@ -73,6 +74,14 @@ static cl::opt<int>
MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden,
cl::desc("Attempt to vectorize for this register size in bits"));
+/// Limits the size of scheduling regions in a block.
+/// It avoid long compile times for _very_ large blocks where vector
+/// instructions are spread over a wide range.
+/// This limit is way higher than needed by real-world functions.
+static cl::opt<int>
+ScheduleRegionSizeBudget("slp-schedule-budget", cl::init(100000), cl::Hidden,
+ cl::desc("Limit the size of the SLP scheduling region per block"));
+
namespace {
// FIXME: Set this via cl::opt to allow overriding.
@@ -89,6 +98,10 @@ static const unsigned AliasedCheckLimit = 10;
// This limit is useful for very large basic blocks.
static const unsigned MaxMemDepDistance = 160;
+/// If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling
+/// regions to be handled.
+static const int MinScheduleRegionSize = 16;
+
/// \brief Predicate for the element types that the SLP vectorizer supports.
///
/// The most important thing to filter here are types which are invalid in LLVM
@@ -156,13 +169,11 @@ static unsigned getAltOpcode(unsigned Op) {
/// of an alternate sequence which can later be merged as
/// a ShuffleVector instruction.
static bool canCombineAsAltInst(unsigned Op) {
- if (Op == Instruction::FAdd || Op == Instruction::FSub ||
- Op == Instruction::Sub || Op == Instruction::Add)
- return true;
- return false;
+ return Op == Instruction::FAdd || Op == Instruction::FSub ||
+ Op == Instruction::Sub || Op == Instruction::Add;
}
-/// \returns ShuffleVector instruction if intructions in \p VL have
+/// \returns ShuffleVector instruction if instructions in \p VL have
/// alternate fadd,fsub / fsub,fadd/add,sub/sub,add sequence.
/// (i.e. e.g. opcodes of fadd,fsub,fadd,fsub...)
static unsigned isAltInst(ArrayRef<Value *> VL) {
@@ -242,6 +253,9 @@ static Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL) {
case LLVMContext::MD_fpmath:
MD = MDNode::getMostGenericFPMath(MD, IMD);
break;
+ case LLVMContext::MD_nontemporal:
+ MD = MDNode::intersect(MD, IMD);
+ break;
}
}
I->setMetadata(Kind, MD);
@@ -393,7 +407,7 @@ public:
/// \brief Perform LICM and CSE on the newly generated gather sequences.
void optimizeGatherSequence();
- /// \returns true if it is benefitial to reverse the vector order.
+ /// \returns true if it is beneficial to reverse the vector order.
bool shouldReorder() const {
return NumLoadsWantToChangeOrder > NumLoadsWantToKeepOrder;
}
@@ -441,7 +455,7 @@ private:
/// \returns a vector from a collection of scalars in \p VL.
Value *Gather(ArrayRef<Value *> VL, VectorType *Ty);
- /// \returns whether the VectorizableTree is fully vectoriable and will
+ /// \returns whether the VectorizableTree is fully vectorizable and will
/// be beneficial even the tree height is tiny.
bool isFullyVectorizableTinyTree();
@@ -506,7 +520,7 @@ private:
/// This POD struct describes one external user in the vectorized tree.
struct ExternalUser {
ExternalUser (Value *S, llvm::User *U, int L) :
- Scalar(S), User(U), Lane(L){};
+ Scalar(S), User(U), Lane(L){}
// Which scalar in our function.
Value *Scalar;
// Which user that uses the scalar.
@@ -717,6 +731,8 @@ private:
: BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize),
ScheduleStart(nullptr), ScheduleEnd(nullptr),
FirstLoadStoreInRegion(nullptr), LastLoadStoreInRegion(nullptr),
+ ScheduleRegionSize(0),
+ ScheduleRegionSizeLimit(ScheduleRegionSizeBudget),
// Make sure that the initial SchedulingRegionID is greater than the
// initial SchedulingRegionID in ScheduleData (which is 0).
SchedulingRegionID(1) {}
@@ -728,6 +744,13 @@ private:
FirstLoadStoreInRegion = nullptr;
LastLoadStoreInRegion = nullptr;
+ // Reduce the maximum schedule region size by the size of the
+ // previous scheduling run.
+ ScheduleRegionSizeLimit -= ScheduleRegionSize;
+ if (ScheduleRegionSizeLimit < MinScheduleRegionSize)
+ ScheduleRegionSizeLimit = MinScheduleRegionSize;
+ ScheduleRegionSize = 0;
+
// Make a new scheduling region, i.e. all existing ScheduleData is not
// in the new region yet.
++SchedulingRegionID;
@@ -804,7 +827,8 @@ private:
void cancelScheduling(ArrayRef<Value *> VL);
/// Extends the scheduling region so that V is inside the region.
- void extendSchedulingRegion(Value *V);
+ /// \returns true if the region size is within the limit.
+ bool extendSchedulingRegion(Value *V);
/// Initialize the ScheduleData structures for new instructions in the
/// scheduling region.
@@ -858,6 +882,12 @@ private:
/// (can be null).
ScheduleData *LastLoadStoreInRegion;
+ /// The current size of the scheduling region.
+ int ScheduleRegionSize;
+
+ /// The maximum size allowed for the scheduling region.
+ int ScheduleRegionSizeLimit;
+
/// The ID of the scheduling region. For a new vectorization iteration this
/// is incremented which "removes" all ScheduleData from the region.
int SchedulingRegionID;
@@ -1077,7 +1107,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (!BS.tryScheduleBundle(VL, this)) {
DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n");
- BS.cancelScheduling(VL);
+ assert((!BS.getScheduleData(VL[0]) ||
+ !BS.getScheduleData(VL[0])->isPartOfBundle()) &&
+ "tryScheduleBundle should cancelScheduling on failure");
newTreeEntry(VL, false);
return;
}
@@ -1125,6 +1157,23 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
return;
}
case Instruction::Load: {
+ // Check that a vectorized load would load the same memory as a scalar
+ // load.
+ // For example we don't want vectorize loads that are smaller than 8 bit.
+ // Even though we have a packed struct {<i2, i2, i2, i2>} LLVM treats
+ // loading/storing it as an i8 struct. If we vectorize loads/stores from
+ // such a struct we read/write packed bits disagreeing with the
+ // unvectorized version.
+ const DataLayout &DL = F->getParent()->getDataLayout();
+ Type *ScalarTy = VL[0]->getType();
+
+ if (DL.getTypeSizeInBits(ScalarTy) !=
+ DL.getTypeAllocSizeInBits(ScalarTy)) {
+ BS.cancelScheduling(VL);
+ newTreeEntry(VL, false);
+ DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
+ return;
+ }
// Check if the loads are consecutive or of we need to swizzle them.
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) {
LoadInst *L = cast<LoadInst>(VL[i]);
@@ -1134,7 +1183,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
return;
}
- const DataLayout &DL = F->getParent()->getDataLayout();
+
if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) {
if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], DL)) {
++NumLoadsWantToChangeOrder;
@@ -1690,7 +1739,8 @@ int BoUpSLP::getSpillCost() {
}
// Now find the sequence of instructions between PrevInst and Inst.
- BasicBlock::reverse_iterator InstIt(Inst), PrevInstIt(PrevInst);
+ BasicBlock::reverse_iterator InstIt(Inst->getIterator()),
+ PrevInstIt(PrevInst->getIterator());
--PrevInstIt;
while (InstIt != PrevInstIt) {
if (PrevInstIt == PrevInst->getParent()->rend()) {
@@ -1890,106 +1940,126 @@ void BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL,
}
}
+// Return true if I should be commuted before adding it's left and right
+// operands to the arrays Left and Right.
+//
+// The vectorizer is trying to either have all elements one side being
+// instruction with the same opcode to enable further vectorization, or having
+// a splat to lower the vectorizing cost.
+static bool shouldReorderOperands(int i, Instruction &I,
+ SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right,
+ bool AllSameOpcodeLeft,
+ bool AllSameOpcodeRight, bool SplatLeft,
+ bool SplatRight) {
+ Value *VLeft = I.getOperand(0);
+ Value *VRight = I.getOperand(1);
+ // If we have "SplatRight", try to see if commuting is needed to preserve it.
+ if (SplatRight) {
+ if (VRight == Right[i - 1])
+ // Preserve SplatRight
+ return false;
+ if (VLeft == Right[i - 1]) {
+ // Commuting would preserve SplatRight, but we don't want to break
+ // SplatLeft either, i.e. preserve the original order if possible.
+ // (FIXME: why do we care?)
+ if (SplatLeft && VLeft == Left[i - 1])
+ return false;
+ return true;
+ }
+ }
+ // Symmetrically handle Right side.
+ if (SplatLeft) {
+ if (VLeft == Left[i - 1])
+ // Preserve SplatLeft
+ return false;
+ if (VRight == Left[i - 1])
+ return true;
+ }
+
+ Instruction *ILeft = dyn_cast<Instruction>(VLeft);
+ Instruction *IRight = dyn_cast<Instruction>(VRight);
+
+ // If we have "AllSameOpcodeRight", try to see if the left operands preserves
+ // it and not the right, in this case we want to commute.
+ if (AllSameOpcodeRight) {
+ unsigned RightPrevOpcode = cast<Instruction>(Right[i - 1])->getOpcode();
+ if (IRight && RightPrevOpcode == IRight->getOpcode())
+ // Do not commute, a match on the right preserves AllSameOpcodeRight
+ return false;
+ if (ILeft && RightPrevOpcode == ILeft->getOpcode()) {
+ // We have a match and may want to commute, but first check if there is
+ // not also a match on the existing operands on the Left to preserve
+ // AllSameOpcodeLeft, i.e. preserve the original order if possible.
+ // (FIXME: why do we care?)
+ if (AllSameOpcodeLeft && ILeft &&
+ cast<Instruction>(Left[i - 1])->getOpcode() == ILeft->getOpcode())
+ return false;
+ return true;
+ }
+ }
+ // Symmetrically handle Left side.
+ if (AllSameOpcodeLeft) {
+ unsigned LeftPrevOpcode = cast<Instruction>(Left[i - 1])->getOpcode();
+ if (ILeft && LeftPrevOpcode == ILeft->getOpcode())
+ return false;
+ if (IRight && LeftPrevOpcode == IRight->getOpcode())
+ return true;
+ }
+ return false;
+}
+
void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
SmallVectorImpl<Value *> &Left,
SmallVectorImpl<Value *> &Right) {
- SmallVector<Value *, 16> OrigLeft, OrigRight;
-
- bool AllSameOpcodeLeft = true;
- bool AllSameOpcodeRight = true;
- for (unsigned i = 0, e = VL.size(); i != e; ++i) {
- Instruction *I = cast<Instruction>(VL[i]);
- Value *VLeft = I->getOperand(0);
- Value *VRight = I->getOperand(1);
-
- OrigLeft.push_back(VLeft);
- OrigRight.push_back(VRight);
-
- Instruction *ILeft = dyn_cast<Instruction>(VLeft);
- Instruction *IRight = dyn_cast<Instruction>(VRight);
-
- // Check whether all operands on one side have the same opcode. In this case
- // we want to preserve the original order and not make things worse by
- // reordering.
- if (i && AllSameOpcodeLeft && ILeft) {
- if (Instruction *PLeft = dyn_cast<Instruction>(OrigLeft[i - 1])) {
- if (PLeft->getOpcode() != ILeft->getOpcode())
- AllSameOpcodeLeft = false;
- } else
- AllSameOpcodeLeft = false;
- }
- if (i && AllSameOpcodeRight && IRight) {
- if (Instruction *PRight = dyn_cast<Instruction>(OrigRight[i - 1])) {
- if (PRight->getOpcode() != IRight->getOpcode())
- AllSameOpcodeRight = false;
- } else
- AllSameOpcodeRight = false;
- }
-
- // Sort two opcodes. In the code below we try to preserve the ability to use
- // broadcast of values instead of individual inserts.
- // vl1 = load
- // vl2 = phi
- // vr1 = load
- // vr2 = vr2
- // = vl1 x vr1
- // = vl2 x vr2
- // If we just sorted according to opcode we would leave the first line in
- // tact but we would swap vl2 with vr2 because opcode(phi) > opcode(load).
- // = vl1 x vr1
- // = vr2 x vl2
- // Because vr2 and vr1 are from the same load we loose the opportunity of a
- // broadcast for the packed right side in the backend: we have [vr1, vl2]
- // instead of [vr1, vr2=vr1].
- if (ILeft && IRight) {
- if (!i && ILeft->getOpcode() > IRight->getOpcode()) {
- Left.push_back(IRight);
- Right.push_back(ILeft);
- } else if (i && ILeft->getOpcode() > IRight->getOpcode() &&
- Right[i - 1] != IRight) {
- // Try not to destroy a broad cast for no apparent benefit.
- Left.push_back(IRight);
- Right.push_back(ILeft);
- } else if (i && ILeft->getOpcode() == IRight->getOpcode() &&
- Right[i - 1] == ILeft) {
- // Try preserve broadcasts.
- Left.push_back(IRight);
- Right.push_back(ILeft);
- } else if (i && ILeft->getOpcode() == IRight->getOpcode() &&
- Left[i - 1] == IRight) {
- // Try preserve broadcasts.
- Left.push_back(IRight);
- Right.push_back(ILeft);
- } else {
- Left.push_back(ILeft);
- Right.push_back(IRight);
- }
- continue;
- }
- // One opcode, put the instruction on the right.
- if (ILeft) {
- Left.push_back(VRight);
- Right.push_back(ILeft);
- continue;
- }
+ if (VL.size()) {
+ // Peel the first iteration out of the loop since there's nothing
+ // interesting to do anyway and it simplifies the checks in the loop.
+ auto VLeft = cast<Instruction>(VL[0])->getOperand(0);
+ auto VRight = cast<Instruction>(VL[0])->getOperand(1);
+ if (!isa<Instruction>(VRight) && isa<Instruction>(VLeft))
+ // Favor having instruction to the right. FIXME: why?
+ std::swap(VLeft, VRight);
Left.push_back(VLeft);
Right.push_back(VRight);
}
- bool LeftBroadcast = isSplat(Left);
- bool RightBroadcast = isSplat(Right);
-
- // If operands end up being broadcast return this operand order.
- if (LeftBroadcast || RightBroadcast)
- return;
+ // Keep track if we have instructions with all the same opcode on one side.
+ bool AllSameOpcodeLeft = isa<Instruction>(Left[0]);
+ bool AllSameOpcodeRight = isa<Instruction>(Right[0]);
+ // Keep track if we have one side with all the same value (broadcast).
+ bool SplatLeft = true;
+ bool SplatRight = true;
- // Don't reorder if the operands where good to begin.
- if (AllSameOpcodeRight || AllSameOpcodeLeft) {
- Left = OrigLeft;
- Right = OrigRight;
+ for (unsigned i = 1, e = VL.size(); i != e; ++i) {
+ Instruction *I = cast<Instruction>(VL[i]);
+ assert(I->isCommutative() && "Can only process commutative instruction");
+ // Commute to favor either a splat or maximizing having the same opcodes on
+ // one side.
+ if (shouldReorderOperands(i, *I, Left, Right, AllSameOpcodeLeft,
+ AllSameOpcodeRight, SplatLeft, SplatRight)) {
+ Left.push_back(I->getOperand(1));
+ Right.push_back(I->getOperand(0));
+ } else {
+ Left.push_back(I->getOperand(0));
+ Right.push_back(I->getOperand(1));
+ }
+ // Update Splat* and AllSameOpcode* after the insertion.
+ SplatRight = SplatRight && (Right[i - 1] == Right[i]);
+ SplatLeft = SplatLeft && (Left[i - 1] == Left[i]);
+ AllSameOpcodeLeft = AllSameOpcodeLeft && isa<Instruction>(Left[i]) &&
+ (cast<Instruction>(Left[i - 1])->getOpcode() ==
+ cast<Instruction>(Left[i])->getOpcode());
+ AllSameOpcodeRight = AllSameOpcodeRight && isa<Instruction>(Right[i]) &&
+ (cast<Instruction>(Right[i - 1])->getOpcode() ==
+ cast<Instruction>(Right[i])->getOpcode());
}
+ // If one operand end up being broadcast, return this operand order.
+ if (SplatRight || SplatLeft)
+ return;
+
const DataLayout &DL = F->getParent()->getDataLayout();
// Finally check if we can get longer vectorizable chain by reordering
@@ -2030,7 +2100,7 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) {
Instruction *VL0 = cast<Instruction>(VL[0]);
- BasicBlock::iterator NextInst = VL0;
+ BasicBlock::iterator NextInst(VL0);
++NextInst;
Builder.SetInsertPoint(VL0->getParent(), NextInst);
Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
@@ -2487,7 +2557,7 @@ Value *BoUpSLP::vectorizeTree() {
scheduleBlock(BSIter.second.get());
}
- Builder.SetInsertPoint(F->getEntryBlock().begin());
+ Builder.SetInsertPoint(&F->getEntryBlock().front());
vectorizeTree(&VectorizableTree[0]);
DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size() << " values .\n");
@@ -2532,7 +2602,7 @@ Value *BoUpSLP::vectorizeTree() {
User->replaceUsesOfWith(Scalar, Ex);
}
} else {
- Builder.SetInsertPoint(F->getEntryBlock().begin());
+ Builder.SetInsertPoint(&F->getEntryBlock().front());
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
CSEBlocks.insert(&F->getEntryBlock());
User->replaceUsesOfWith(Scalar, Ex);
@@ -2641,7 +2711,7 @@ void BoUpSLP::optimizeGatherSequence() {
BasicBlock *BB = (*I)->getBlock();
// For all instructions in blocks containing gather sequences:
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) {
- Instruction *In = it++;
+ Instruction *In = &*it++;
if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In))
continue;
@@ -2681,8 +2751,15 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
ScheduleData *Bundle = nullptr;
bool ReSchedule = false;
DEBUG(dbgs() << "SLP: bundle: " << *VL[0] << "\n");
+
+ // Make sure that the scheduling region contains all
+ // instructions of the bundle.
+ for (Value *V : VL) {
+ if (!extendSchedulingRegion(V))
+ return false;
+ }
+
for (Value *V : VL) {
- extendSchedulingRegion(V);
ScheduleData *BundleMember = getScheduleData(V);
assert(BundleMember &&
"no ScheduleData for bundle member (maybe not in same basic block)");
@@ -2743,7 +2820,11 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
schedule(pickedSD, ReadyInsts);
}
}
- return Bundle->isReady();
+ if (!Bundle->isReady()) {
+ cancelScheduling(VL);
+ return false;
+ }
+ return true;
}
void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL) {
@@ -2772,9 +2853,9 @@ void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL) {
}
}
-void BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) {
+bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) {
if (getScheduleData(V))
- return;
+ return true;
Instruction *I = dyn_cast<Instruction>(V);
assert(I && "bundle member must be an instruction");
assert(!isa<PHINode>(I) && "phi nodes don't need to be scheduled");
@@ -2785,21 +2866,26 @@ void BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) {
ScheduleEnd = I->getNextNode();
assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
DEBUG(dbgs() << "SLP: initialize schedule region to " << *I << "\n");
- return;
+ return true;
}
// Search up and down at the same time, because we don't know if the new
// instruction is above or below the existing scheduling region.
- BasicBlock::reverse_iterator UpIter(ScheduleStart);
+ BasicBlock::reverse_iterator UpIter(ScheduleStart->getIterator());
BasicBlock::reverse_iterator UpperEnd = BB->rend();
BasicBlock::iterator DownIter(ScheduleEnd);
BasicBlock::iterator LowerEnd = BB->end();
for (;;) {
+ if (++ScheduleRegionSize > ScheduleRegionSizeLimit) {
+ DEBUG(dbgs() << "SLP: exceeded schedule region size limit\n");
+ return false;
+ }
+
if (UpIter != UpperEnd) {
if (&*UpIter == I) {
initScheduleData(I, ScheduleStart, nullptr, FirstLoadStoreInRegion);
ScheduleStart = I;
DEBUG(dbgs() << "SLP: extend schedule region start to " << *I << "\n");
- return;
+ return true;
}
UpIter++;
}
@@ -2810,13 +2896,14 @@ void BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) {
ScheduleEnd = I->getNextNode();
assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
DEBUG(dbgs() << "SLP: extend schedule region end to " << *I << "\n");
- return;
+ return true;
}
DownIter++;
}
assert((UpIter != UpperEnd || DownIter != LowerEnd) &&
"instruction not found in block");
}
+ return true;
}
void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
@@ -2896,8 +2983,8 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
}
} else {
// I'm not sure if this can ever happen. But we need to be safe.
- // This lets the instruction/bundle never be scheduled and eventally
- // disable vectorization.
+ // This lets the instruction/bundle never be scheduled and
+ // eventually disable vectorization.
BundleMember->Dependencies++;
BundleMember->incrementUnscheduledDeps(1);
}
@@ -3003,7 +3090,7 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
};
std::set<ScheduleData *, ScheduleDataCompare> ReadyInsts;
- // Ensure that all depencency data is updated and fill the ready-list with
+ // Ensure that all dependency data is updated and fill the ready-list with
// initial instructions.
int Idx = 0;
int NumToSchedule = 0;
@@ -3035,7 +3122,8 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
Instruction *pickedInst = BundleMember->Inst;
if (LastScheduledInst->getNextNode() != pickedInst) {
BS->BB->getInstList().remove(pickedInst);
- BS->BB->getInstList().insert(LastScheduledInst, pickedInst);
+ BS->BB->getInstList().insert(LastScheduledInst->getIterator(),
+ pickedInst);
}
LastScheduledInst = pickedInst;
BundleMember = BundleMember->NextInBundle;
@@ -3074,11 +3162,11 @@ struct SLPVectorizer : public FunctionPass {
if (skipOptnoneFunction(F))
return false;
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
TLI = TLIP ? &TLIP->getTLI() : nullptr;
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
@@ -3139,13 +3227,15 @@ struct SLPVectorizer : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
FunctionPass::getAnalysisUsage(AU);
AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<ScalarEvolution>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.setPreservesCFG();
}
@@ -3260,15 +3350,26 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
// Do a quadratic search on all of the given stores and find
// all of the pairs of stores that follow each other.
+ SmallVector<unsigned, 16> IndexQueue;
for (unsigned i = 0, e = Stores.size(); i < e; ++i) {
- for (unsigned j = 0; j < e; ++j) {
- if (i == j)
- continue;
- const DataLayout &DL = Stores[i]->getModule()->getDataLayout();
- if (R.isConsecutiveAccess(Stores[i], Stores[j], DL)) {
- Tails.insert(Stores[j]);
+ const DataLayout &DL = Stores[i]->getModule()->getDataLayout();
+ IndexQueue.clear();
+ // If a store has multiple consecutive store candidates, search Stores
+ // array according to the sequence: from i+1 to e, then from i-1 to 0.
+ // This is because usually pairing with immediate succeeding or preceding
+ // candidate create the best chance to find slp vectorization opportunity.
+ unsigned j = 0;
+ for (j = i + 1; j < e; ++j)
+ IndexQueue.push_back(j);
+ for (j = i; j > 0; --j)
+ IndexQueue.push_back(j - 1);
+
+ for (auto &k : IndexQueue) {
+ if (R.isConsecutiveAccess(Stores[i], Stores[k], DL)) {
+ Tails.insert(Stores[k]);
Heads.insert(Stores[i]);
- ConsecutiveChain[Stores[i]] = Stores[j];
+ ConsecutiveChain[Stores[i]] = Stores[k];
+ break;
}
}
}
@@ -3428,7 +3529,7 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
unsigned VecIdx = 0;
for (auto &V : BuildVectorSlice) {
IRBuilder<true, NoFolder> Builder(
- ++BasicBlock::iterator(InsertAfter));
+ InsertAfter->getParent(), ++BasicBlock::iterator(InsertAfter));
InsertElementInst *IE = cast<InsertElementInst>(V);
Instruction *Extract = cast<Instruction>(Builder.CreateExtractElement(
VectorizedRoot, Builder.getInt32(VecIdx++)));
@@ -3552,16 +3653,17 @@ class HorizontalReduction {
unsigned ReductionOpcode;
/// The opcode of the values we perform a reduction on.
unsigned ReducedValueOpcode;
- /// The width of one full horizontal reduction operation.
- unsigned ReduxWidth;
/// Should we model this reduction as a pairwise reduction tree or a tree that
/// splits the vector in halves and adds those halves.
bool IsPairwiseReduction;
public:
+ /// The width of one full horizontal reduction operation.
+ unsigned ReduxWidth;
+
HorizontalReduction()
: ReductionRoot(nullptr), ReductionPHI(nullptr), ReductionOpcode(0),
- ReducedValueOpcode(0), ReduxWidth(0), IsPairwiseReduction(false) {}
+ ReducedValueOpcode(0), IsPairwiseReduction(false), ReduxWidth(0) {}
/// \brief Try to find a reduction tree.
bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B) {
@@ -3607,11 +3709,11 @@ public:
return false;
// Post order traverse the reduction tree starting at B. We only handle true
- // trees containing only binary operators.
- SmallVector<std::pair<BinaryOperator *, unsigned>, 32> Stack;
+ // trees containing only binary operators or selects.
+ SmallVector<std::pair<Instruction *, unsigned>, 32> Stack;
Stack.push_back(std::make_pair(B, 0));
while (!Stack.empty()) {
- BinaryOperator *TreeN = Stack.back().first;
+ Instruction *TreeN = Stack.back().first;
unsigned EdgeToVist = Stack.back().second++;
bool IsReducedValue = TreeN->getOpcode() != ReductionOpcode;
@@ -3647,9 +3749,10 @@ public:
// Visit left or right.
Value *NextV = TreeN->getOperand(EdgeToVist);
- BinaryOperator *Next = dyn_cast<BinaryOperator>(NextV);
- if (Next)
- Stack.push_back(std::make_pair(Next, 0));
+ // We currently only allow BinaryOperator's and SelectInst's as reduction
+ // values in our tree.
+ if (isa<BinaryOperator>(NextV) || isa<SelectInst>(NextV))
+ Stack.push_back(std::make_pair(cast<Instruction>(NextV), 0));
else if (NextV != Phi)
return false;
}
@@ -3717,9 +3820,12 @@ public:
return VectorizedTree != nullptr;
}
-private:
+ unsigned numReductionValues() const {
+ return ReducedVals.size();
+ }
- /// \brief Calcuate the cost of a reduction.
+private:
+ /// \brief Calculate the cost of a reduction.
int getReductionCost(TargetTransformInfo *TTI, Value *FirstReducedVal) {
Type *ScalarTy = FirstReducedVal->getType();
Type *VecTy = VectorType::get(ScalarTy, ReduxWidth);
@@ -3825,6 +3931,82 @@ static bool PhiTypeSorterFunc(Value *V, Value *V2) {
return V->getType() < V2->getType();
}
+/// \brief Try and get a reduction value from a phi node.
+///
+/// Given a phi node \p P in a block \p ParentBB, consider possible reductions
+/// if they come from either \p ParentBB or a containing loop latch.
+///
+/// \returns A candidate reduction value if possible, or \code nullptr \endcode
+/// if not possible.
+static Value *getReductionValue(const DominatorTree *DT, PHINode *P,
+ BasicBlock *ParentBB, LoopInfo *LI) {
+ // There are situations where the reduction value is not dominated by the
+ // reduction phi. Vectorizing such cases has been reported to cause
+ // miscompiles. See PR25787.
+ auto DominatedReduxValue = [&](Value *R) {
+ return (
+ dyn_cast<Instruction>(R) &&
+ DT->dominates(P->getParent(), dyn_cast<Instruction>(R)->getParent()));
+ };
+
+ Value *Rdx = nullptr;
+
+ // Return the incoming value if it comes from the same BB as the phi node.
+ if (P->getIncomingBlock(0) == ParentBB) {
+ Rdx = P->getIncomingValue(0);
+ } else if (P->getIncomingBlock(1) == ParentBB) {
+ Rdx = P->getIncomingValue(1);
+ }
+
+ if (Rdx && DominatedReduxValue(Rdx))
+ return Rdx;
+
+ // Otherwise, check whether we have a loop latch to look at.
+ Loop *BBL = LI->getLoopFor(ParentBB);
+ if (!BBL)
+ return nullptr;
+ BasicBlock *BBLatch = BBL->getLoopLatch();
+ if (!BBLatch)
+ return nullptr;
+
+ // There is a loop latch, return the incoming value if it comes from
+ // that. This reduction pattern occassionaly turns up.
+ if (P->getIncomingBlock(0) == BBLatch) {
+ Rdx = P->getIncomingValue(0);
+ } else if (P->getIncomingBlock(1) == BBLatch) {
+ Rdx = P->getIncomingValue(1);
+ }
+
+ if (Rdx && DominatedReduxValue(Rdx))
+ return Rdx;
+
+ return nullptr;
+}
+
+/// \brief Attempt to reduce a horizontal reduction.
+/// If it is legal to match a horizontal reduction feeding
+/// the phi node P with reduction operators BI, then check if it
+/// can be done.
+/// \returns true if a horizontal reduction was matched and reduced.
+/// \returns false if a horizontal reduction was not matched.
+static bool canMatchHorizontalReduction(PHINode *P, BinaryOperator *BI,
+ BoUpSLP &R, TargetTransformInfo *TTI) {
+ if (!ShouldVectorizeHor)
+ return false;
+
+ HorizontalReduction HorRdx;
+ if (!HorRdx.matchAssociativeReduction(P, BI))
+ return false;
+
+ // If there is a sufficient number of reduction values, reduce
+ // to a nearby power-of-2. Can safely generate oversized
+ // vectors and rely on the backend to split them to legal sizes.
+ HorRdx.ReduxWidth =
+ std::max((uint64_t)4, PowerOf2Floor(HorRdx.numReductionValues()));
+
+ return HorRdx.tryToReduce(R, TTI);
+}
+
bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
bool Changed = false;
SmallVector<Value *, 4> Incoming;
@@ -3881,7 +4063,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) {
// We may go through BB multiple times so skip the one we have checked.
- if (!VisitedInstrs.insert(it).second)
+ if (!VisitedInstrs.insert(&*it).second)
continue;
if (isa<DbgInfoIntrinsic>(it))
@@ -3892,20 +4074,16 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// Check that the PHI is a reduction PHI.
if (P->getNumIncomingValues() != 2)
return Changed;
- Value *Rdx =
- (P->getIncomingBlock(0) == BB
- ? (P->getIncomingValue(0))
- : (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1)
- : nullptr));
+
+ Value *Rdx = getReductionValue(DT, P, BB, LI);
+
// Check if this is a Binary Operator.
BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
if (!BI)
continue;
// Try to match and vectorize a horizontal reduction.
- HorizontalReduction HorRdx;
- if (ShouldVectorizeHor && HorRdx.matchAssociativeReduction(P, BI) &&
- HorRdx.tryToReduce(R, TTI)) {
+ if (canMatchHorizontalReduction(P, BI, R, TTI)) {
Changed = true;
it = BB->begin();
e = BB->end();
@@ -3928,15 +4106,12 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
continue;
}
- // Try to vectorize horizontal reductions feeding into a store.
if (ShouldStartVectorizeHorAtStore)
if (StoreInst *SI = dyn_cast<StoreInst>(it))
if (BinaryOperator *BinOp =
dyn_cast<BinaryOperator>(SI->getValueOperand())) {
- HorizontalReduction HorRdx;
- if (((HorRdx.matchAssociativeReduction(nullptr, BinOp) &&
- HorRdx.tryToReduce(R, TTI)) ||
- tryToVectorize(BinOp, R))) {
+ if (canMatchHorizontalReduction(nullptr, BinOp, R, TTI) ||
+ tryToVectorize(BinOp, R)) {
Changed = true;
it = BB->begin();
e = BB->end();
@@ -4037,10 +4212,10 @@ bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
char SLPVectorizer::ID = 0;
static const char lv_name[] = "SLP Vectorizer";
INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)