aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Analysis
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-12-20 14:16:56 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-12-20 14:16:56 +0000
commit2cab237b5dbfe1b3e9c7aa7a3c02d2b98fcf7462 (patch)
tree524fe828571f81358bba62fdb6d04c6e5e96a2a4 /contrib/llvm/lib/Analysis
parent6c7828a2807ea5e50c79ca42dbedf2b589ce63b2 (diff)
parent044eb2f6afba375a914ac9d8024f8f5142bb912e (diff)
Merge llvm trunk r321017 to contrib/llvm.
Notes
Notes: svn path=/projects/clang600-import/; revision=327023
Diffstat (limited to 'contrib/llvm/lib/Analysis')
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysis.cpp216
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp16
-rw-r--r--contrib/llvm/lib/Analysis/AliasSetTracker.cpp42
-rw-r--r--contrib/llvm/lib/Analysis/AssumptionCache.cpp25
-rw-r--r--contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp103
-rw-r--r--contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp86
-rw-r--r--contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp67
-rw-r--r--contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp128
-rw-r--r--contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp96
-rw-r--r--contrib/llvm/lib/Analysis/CFLGraph.h55
-rw-r--r--contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp15
-rw-r--r--contrib/llvm/lib/Analysis/CGSCCPassManager.cpp229
-rw-r--r--contrib/llvm/lib/Analysis/CallGraph.cpp21
-rw-r--r--contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp43
-rw-r--r--contrib/llvm/lib/Analysis/CmpInstAnalysis.cpp144
-rw-r--r--contrib/llvm/lib/Analysis/CodeMetrics.cpp1
-rw-r--r--contrib/llvm/lib/Analysis/ConstantFolding.cpp6
-rw-r--r--contrib/llvm/lib/Analysis/CostModel.cpp492
-rw-r--r--contrib/llvm/lib/Analysis/DemandedBits.cpp21
-rw-r--r--contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp1
-rw-r--r--contrib/llvm/lib/Analysis/DominanceFrontier.cpp12
-rw-r--r--contrib/llvm/lib/Analysis/GlobalsModRef.cpp69
-rw-r--r--contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp40
-rw-r--r--contrib/llvm/lib/Analysis/InlineCost.cpp671
-rw-r--r--contrib/llvm/lib/Analysis/InstructionSimplify.cpp744
-rw-r--r--contrib/llvm/lib/Analysis/Interval.cpp3
-rw-r--r--contrib/llvm/lib/Analysis/IntervalPartition.cpp13
-rw-r--r--contrib/llvm/lib/Analysis/LazyCallGraph.cpp564
-rw-r--r--contrib/llvm/lib/Analysis/LazyValueInfo.cpp594
-rw-r--r--contrib/llvm/lib/Analysis/Lint.cpp27
-rw-r--r--contrib/llvm/lib/Analysis/Loads.cpp7
-rw-r--r--contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp248
-rw-r--r--contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp24
-rw-r--r--contrib/llvm/lib/Analysis/LoopInfo.cpp142
-rw-r--r--contrib/llvm/lib/Analysis/LoopPass.cpp29
-rw-r--r--contrib/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp1
-rw-r--r--contrib/llvm/lib/Analysis/MemDerefPrinter.cpp2
-rw-r--r--contrib/llvm/lib/Analysis/MemoryBuiltins.cpp33
-rw-r--r--contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp90
-rw-r--r--contrib/llvm/lib/Analysis/MemorySSA.cpp157
-rw-r--r--contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp7
-rw-r--r--contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp1
-rw-r--r--contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp172
-rw-r--r--contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp2
-rw-r--r--contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp8
-rw-r--r--contrib/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp (renamed from contrib/llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp)104
-rw-r--r--contrib/llvm/lib/Analysis/PostDominators.cpp11
-rw-r--r--contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp55
-rw-r--r--contrib/llvm/lib/Analysis/PtrUseVisitor.cpp6
-rw-r--r--contrib/llvm/lib/Analysis/RegionPass.cpp3
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolution.cpp933
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp33
-rw-r--r--contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp24
-rw-r--r--contrib/llvm/lib/Analysis/SparsePropagation.cpp347
-rw-r--r--contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp45
-rw-r--r--contrib/llvm/lib/Analysis/TargetTransformInfo.cpp619
-rw-r--r--contrib/llvm/lib/Analysis/Trace.cpp5
-rw-r--r--contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp219
-rw-r--r--contrib/llvm/lib/Analysis/ValueLattice.cpp26
-rw-r--r--contrib/llvm/lib/Analysis/ValueLatticeUtils.cpp44
-rw-r--r--contrib/llvm/lib/Analysis/ValueTracking.cpp926
-rw-r--r--contrib/llvm/lib/Analysis/VectorUtils.cpp3
62 files changed, 5226 insertions, 3644 deletions
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
index 4c29aeaa622f..dd2db1e5b27b 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -1,4 +1,4 @@
-//===- AliasAnalysis.cpp - Generic Alias Analysis Interface Implementation -==//
+//==- AliasAnalysis.cpp - Generic Alias Analysis Interface Implementation --==//
//
// The LLVM Compiler Infrastructure
//
@@ -26,26 +26,35 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CFLAndersAliasAnalysis.h"
#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ObjCARCAliasAnalysis.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include <algorithm>
+#include <cassert>
+#include <functional>
+#include <iterator>
+
using namespace llvm;
/// Allow disabling BasicAA from the AA results. This is particularly useful
@@ -110,13 +119,13 @@ bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc,
}
ModRefInfo AAResults::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
- ModRefInfo Result = MRI_ModRef;
+ ModRefInfo Result = ModRefInfo::ModRef;
for (const auto &AA : AAs) {
- Result = ModRefInfo(Result & AA->getArgModRefInfo(CS, ArgIdx));
+ Result = intersectModRef(Result, AA->getArgModRefInfo(CS, ArgIdx));
// Early-exit the moment we reach the bottom of the lattice.
- if (Result == MRI_NoModRef)
+ if (isNoModRef(Result))
return Result;
}
@@ -129,29 +138,30 @@ ModRefInfo AAResults::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
// Check if the two calls modify the same memory
return getModRefInfo(CS, Call);
} else if (I->isFenceLike()) {
- // If this is a fence, just return MRI_ModRef.
- return MRI_ModRef;
+ // If this is a fence, just return ModRef.
+ return ModRefInfo::ModRef;
} else {
// Otherwise, check if the call modifies or references the
// location this memory access defines. The best we can say
// is that if the call references what this instruction
// defines, it must be clobbered by this location.
const MemoryLocation DefLoc = MemoryLocation::get(I);
- if (getModRefInfo(Call, DefLoc) != MRI_NoModRef)
- return MRI_ModRef;
+ ModRefInfo MR = getModRefInfo(Call, DefLoc);
+ if (isModOrRefSet(MR))
+ return setModAndRef(MR);
}
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
}
ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
const MemoryLocation &Loc) {
- ModRefInfo Result = MRI_ModRef;
+ ModRefInfo Result = ModRefInfo::ModRef;
for (const auto &AA : AAs) {
- Result = ModRefInfo(Result & AA->getModRefInfo(CS, Loc));
+ Result = intersectModRef(Result, AA->getModRefInfo(CS, Loc));
// Early-exit the moment we reach the bottom of the lattice.
- if (Result == MRI_NoModRef)
+ if (isNoModRef(Result))
return Result;
}
@@ -160,16 +170,16 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
auto MRB = getModRefBehavior(CS);
if (MRB == FMRB_DoesNotAccessMemory ||
MRB == FMRB_OnlyAccessesInaccessibleMem)
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
if (onlyReadsMemory(MRB))
- Result = ModRefInfo(Result & MRI_Ref);
+ Result = clearMod(Result);
else if (doesNotReadMemory(MRB))
- Result = ModRefInfo(Result & MRI_Mod);
+ Result = clearRef(Result);
if (onlyAccessesArgPointees(MRB) || onlyAccessesInaccessibleOrArgMem(MRB)) {
bool DoesAlias = false;
- ModRefInfo AllArgsMask = MRI_NoModRef;
+ ModRefInfo AllArgsMask = ModRefInfo::NoModRef;
if (doesAccessArgPointees(MRB)) {
for (auto AI = CS.arg_begin(), AE = CS.arg_end(); AI != AE; ++AI) {
const Value *Arg = *AI;
@@ -181,33 +191,34 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
if (ArgAlias != NoAlias) {
ModRefInfo ArgMask = getArgModRefInfo(CS, ArgIdx);
DoesAlias = true;
- AllArgsMask = ModRefInfo(AllArgsMask | ArgMask);
+ AllArgsMask = unionModRef(AllArgsMask, ArgMask);
}
}
}
+ // Return NoModRef if no alias found with any argument.
if (!DoesAlias)
- return MRI_NoModRef;
- Result = ModRefInfo(Result & AllArgsMask);
+ return ModRefInfo::NoModRef;
+ // Logical & between other AA analyses and argument analysis.
+ Result = intersectModRef(Result, AllArgsMask);
}
// If Loc is a constant memory location, the call definitely could not
// modify the memory location.
- if ((Result & MRI_Mod) &&
- pointsToConstantMemory(Loc, /*OrLocal*/ false))
- Result = ModRefInfo(Result & ~MRI_Mod);
+ if (isModSet(Result) && pointsToConstantMemory(Loc, /*OrLocal*/ false))
+ Result = clearMod(Result);
return Result;
}
ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
ImmutableCallSite CS2) {
- ModRefInfo Result = MRI_ModRef;
+ ModRefInfo Result = ModRefInfo::ModRef;
for (const auto &AA : AAs) {
- Result = ModRefInfo(Result & AA->getModRefInfo(CS1, CS2));
+ Result = intersectModRef(Result, AA->getModRefInfo(CS1, CS2));
// Early-exit the moment we reach the bottom of the lattice.
- if (Result == MRI_NoModRef)
+ if (isNoModRef(Result))
return Result;
}
@@ -217,28 +228,28 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
// If CS1 or CS2 are readnone, they don't interact.
auto CS1B = getModRefBehavior(CS1);
if (CS1B == FMRB_DoesNotAccessMemory)
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
auto CS2B = getModRefBehavior(CS2);
if (CS2B == FMRB_DoesNotAccessMemory)
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
// If they both only read from memory, there is no dependence.
if (onlyReadsMemory(CS1B) && onlyReadsMemory(CS2B))
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
// If CS1 only reads memory, the only dependence on CS2 can be
// from CS1 reading memory written by CS2.
if (onlyReadsMemory(CS1B))
- Result = ModRefInfo(Result & MRI_Ref);
+ Result = clearMod(Result);
else if (doesNotReadMemory(CS1B))
- Result = ModRefInfo(Result & MRI_Mod);
+ Result = clearRef(Result);
// If CS2 only access memory through arguments, accumulate the mod/ref
// information from CS1's references to the memory referenced by
// CS2's arguments.
if (onlyAccessesArgPointees(CS2B)) {
- ModRefInfo R = MRI_NoModRef;
+ ModRefInfo R = ModRefInfo::NoModRef;
if (doesAccessArgPointees(CS2B)) {
for (auto I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) {
const Value *Arg = *I;
@@ -247,17 +258,23 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
unsigned CS2ArgIdx = std::distance(CS2.arg_begin(), I);
auto CS2ArgLoc = MemoryLocation::getForArgument(CS2, CS2ArgIdx, TLI);
- // ArgMask indicates what CS2 might do to CS2ArgLoc, and the dependence
- // of CS1 on that location is the inverse.
- ModRefInfo ArgMask = getArgModRefInfo(CS2, CS2ArgIdx);
- if (ArgMask == MRI_Mod)
- ArgMask = MRI_ModRef;
- else if (ArgMask == MRI_Ref)
- ArgMask = MRI_Mod;
-
- ArgMask = ModRefInfo(ArgMask & getModRefInfo(CS1, CS2ArgLoc));
-
- R = ModRefInfo((R | ArgMask) & Result);
+ // ArgModRefCS2 indicates what CS2 might do to CS2ArgLoc, and the
+ // dependence of CS1 on that location is the inverse:
+ // - If CS2 modifies location, dependence exists if CS1 reads or writes.
+ // - If CS2 only reads location, dependence exists if CS1 writes.
+ ModRefInfo ArgModRefCS2 = getArgModRefInfo(CS2, CS2ArgIdx);
+ ModRefInfo ArgMask = ModRefInfo::NoModRef;
+ if (isModSet(ArgModRefCS2))
+ ArgMask = ModRefInfo::ModRef;
+ else if (isRefSet(ArgModRefCS2))
+ ArgMask = ModRefInfo::Mod;
+
+ // ModRefCS1 indicates what CS1 might do to CS2ArgLoc, and we use
+ // above ArgMask to update dependence info.
+ ModRefInfo ModRefCS1 = getModRefInfo(CS1, CS2ArgLoc);
+ ArgMask = intersectModRef(ArgMask, ModRefCS1);
+
+ R = intersectModRef(unionModRef(R, ArgMask), Result);
if (R == Result)
break;
}
@@ -268,7 +285,7 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
// If CS1 only accesses memory through arguments, check if CS2 references
// any of the memory referenced by CS1's arguments. If not, return NoModRef.
if (onlyAccessesArgPointees(CS1B)) {
- ModRefInfo R = MRI_NoModRef;
+ ModRefInfo R = ModRefInfo::NoModRef;
if (doesAccessArgPointees(CS1B)) {
for (auto I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) {
const Value *Arg = *I;
@@ -277,16 +294,14 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
unsigned CS1ArgIdx = std::distance(CS1.arg_begin(), I);
auto CS1ArgLoc = MemoryLocation::getForArgument(CS1, CS1ArgIdx, TLI);
- // ArgMask indicates what CS1 might do to CS1ArgLoc; if CS1 might Mod
- // CS1ArgLoc, then we care about either a Mod or a Ref by CS2. If CS1
- // might Ref, then we care only about a Mod by CS2.
- ModRefInfo ArgMask = getArgModRefInfo(CS1, CS1ArgIdx);
- ModRefInfo ArgR = getModRefInfo(CS2, CS1ArgLoc);
- if (((ArgMask & MRI_Mod) != MRI_NoModRef &&
- (ArgR & MRI_ModRef) != MRI_NoModRef) ||
- ((ArgMask & MRI_Ref) != MRI_NoModRef &&
- (ArgR & MRI_Mod) != MRI_NoModRef))
- R = ModRefInfo((R | ArgMask) & Result);
+ // ArgModRefCS1 indicates what CS1 might do to CS1ArgLoc; if CS1 might
+ // Mod CS1ArgLoc, then we care about either a Mod or a Ref by CS2. If
+ // CS1 might Ref, then we care only about a Mod by CS2.
+ ModRefInfo ArgModRefCS1 = getArgModRefInfo(CS1, CS1ArgIdx);
+ ModRefInfo ModRefCS2 = getModRefInfo(CS2, CS1ArgLoc);
+ if ((isModSet(ArgModRefCS1) && isModOrRefSet(ModRefCS2)) ||
+ (isRefSet(ArgModRefCS1) && isModSet(ModRefCS2)))
+ R = intersectModRef(unionModRef(R, ArgModRefCS1), Result);
if (R == Result)
break;
@@ -334,64 +349,63 @@ ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
const MemoryLocation &Loc) {
// Be conservative in the face of atomic.
if (isStrongerThan(L->getOrdering(), AtomicOrdering::Unordered))
- return MRI_ModRef;
+ return ModRefInfo::ModRef;
// If the load address doesn't alias the given address, it doesn't read
// or write the specified memory.
if (Loc.Ptr && !alias(MemoryLocation::get(L), Loc))
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
// Otherwise, a load just reads.
- return MRI_Ref;
+ return ModRefInfo::Ref;
}
ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
const MemoryLocation &Loc) {
// Be conservative in the face of atomic.
if (isStrongerThan(S->getOrdering(), AtomicOrdering::Unordered))
- return MRI_ModRef;
+ return ModRefInfo::ModRef;
if (Loc.Ptr) {
// If the store address cannot alias the pointer in question, then the
// specified memory cannot be modified by the store.
if (!alias(MemoryLocation::get(S), Loc))
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
// If the pointer is a pointer to constant memory, then it could not have
// been modified by this store.
if (pointsToConstantMemory(Loc))
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
}
// Otherwise, a store just writes.
- return MRI_Mod;
+ return ModRefInfo::Mod;
}
ModRefInfo AAResults::getModRefInfo(const FenceInst *S, const MemoryLocation &Loc) {
// If we know that the location is a constant memory location, the fence
// cannot modify this location.
if (Loc.Ptr && pointsToConstantMemory(Loc))
- return MRI_Ref;
- return MRI_ModRef;
+ return ModRefInfo::Ref;
+ return ModRefInfo::ModRef;
}
ModRefInfo AAResults::getModRefInfo(const VAArgInst *V,
const MemoryLocation &Loc) {
-
if (Loc.Ptr) {
// If the va_arg address cannot alias the pointer in question, then the
// specified memory cannot be accessed by the va_arg.
if (!alias(MemoryLocation::get(V), Loc))
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
// If the pointer is a pointer to constant memory, then it could not have
// been modified by this va_arg.
if (pointsToConstantMemory(Loc))
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
}
// Otherwise, a va_arg reads and writes.
- return MRI_ModRef;
+ return ModRefInfo::ModRef;
}
ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad,
@@ -400,11 +414,11 @@ ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad,
// If the pointer is a pointer to constant memory,
// then it could not have been modified by this catchpad.
if (pointsToConstantMemory(Loc))
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
}
// Otherwise, a catchpad reads and writes.
- return MRI_ModRef;
+ return ModRefInfo::ModRef;
}
ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet,
@@ -413,42 +427,42 @@ ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet,
// If the pointer is a pointer to constant memory,
// then it could not have been modified by this catchpad.
if (pointsToConstantMemory(Loc))
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
}
// Otherwise, a catchret reads and writes.
- return MRI_ModRef;
+ return ModRefInfo::ModRef;
}
ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX,
const MemoryLocation &Loc) {
// Acquire/Release cmpxchg has properties that matter for arbitrary addresses.
if (isStrongerThanMonotonic(CX->getSuccessOrdering()))
- return MRI_ModRef;
+ return ModRefInfo::ModRef;
// If the cmpxchg address does not alias the location, it does not access it.
if (Loc.Ptr && !alias(MemoryLocation::get(CX), Loc))
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
- return MRI_ModRef;
+ return ModRefInfo::ModRef;
}
ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW,
const MemoryLocation &Loc) {
// Acquire/Release atomicrmw has properties that matter for arbitrary addresses.
if (isStrongerThanMonotonic(RMW->getOrdering()))
- return MRI_ModRef;
+ return ModRefInfo::ModRef;
// If the atomicrmw address does not alias the location, it does not access it.
if (Loc.Ptr && !alias(MemoryLocation::get(RMW), Loc))
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
- return MRI_ModRef;
+ return ModRefInfo::ModRef;
}
/// \brief Return information about whether a particular call site modifies
/// or reads the specified memory location \p MemLoc before instruction \p I
-/// in a BasicBlock. A ordered basic block \p OBB can be used to speed up
+/// in a BasicBlock. An ordered basic block \p OBB can be used to speed up
/// instruction-ordering queries inside the BasicBlock containing \p I.
/// FIXME: this is really just shoring-up a deficiency in alias analysis.
/// BasicAA isn't willing to spend linear time determining whether an alloca
@@ -459,26 +473,26 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
DominatorTree *DT,
OrderedBasicBlock *OBB) {
if (!DT)
- return MRI_ModRef;
+ return ModRefInfo::ModRef;
const Value *Object =
GetUnderlyingObject(MemLoc.Ptr, I->getModule()->getDataLayout());
if (!isIdentifiedObject(Object) || isa<GlobalValue>(Object) ||
isa<Constant>(Object))
- return MRI_ModRef;
+ return ModRefInfo::ModRef;
ImmutableCallSite CS(I);
if (!CS.getInstruction() || CS.getInstruction() == Object)
- return MRI_ModRef;
+ return ModRefInfo::ModRef;
- if (llvm::PointerMayBeCapturedBefore(Object, /* ReturnCaptures */ true,
- /* StoreCaptures */ true, I, DT,
- /* include Object */ true,
- /* OrderedBasicBlock */ OBB))
- return MRI_ModRef;
+ if (PointerMayBeCapturedBefore(Object, /* ReturnCaptures */ true,
+ /* StoreCaptures */ true, I, DT,
+ /* include Object */ true,
+ /* OrderedBasicBlock */ OBB))
+ return ModRefInfo::ModRef;
unsigned ArgNo = 0;
- ModRefInfo R = MRI_NoModRef;
+ ModRefInfo R = ModRefInfo::NoModRef;
for (auto CI = CS.data_operands_begin(), CE = CS.data_operands_end();
CI != CE; ++CI, ++ArgNo) {
// Only look at the no-capture or byval pointer arguments. If this
@@ -498,10 +512,10 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
if (CS.doesNotAccessMemory(ArgNo))
continue;
if (CS.onlyReadsMemory(ArgNo)) {
- R = MRI_Ref;
+ R = ModRefInfo::Ref;
continue;
}
- return MRI_ModRef;
+ return ModRefInfo::ModRef;
}
return R;
}
@@ -511,7 +525,7 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
///
bool AAResults::canBasicBlockModify(const BasicBlock &BB,
const MemoryLocation &Loc) {
- return canInstructionRangeModRef(BB.front(), BB.back(), Loc, MRI_Mod);
+ return canInstructionRangeModRef(BB.front(), BB.back(), Loc, ModRefInfo::Mod);
}
/// canInstructionRangeModRef - Return true if it is possible for the
@@ -530,22 +544,23 @@ bool AAResults::canInstructionRangeModRef(const Instruction &I1,
++E; // Convert from inclusive to exclusive range.
for (; I != E; ++I) // Check every instruction in range
- if (getModRefInfo(&*I, Loc) & Mode)
+ if (isModOrRefSet(intersectModRef(getModRefInfo(&*I, Loc), Mode)))
return true;
return false;
}
// Provide a definition for the root virtual destructor.
-AAResults::Concept::~Concept() {}
+AAResults::Concept::~Concept() = default;
// Provide a definition for the static object used to identify passes.
AnalysisKey AAManager::Key;
namespace {
+
/// A wrapper pass for external alias analyses. This just squirrels away the
/// callback used to run any analyses and register their results.
struct ExternalAAWrapperPass : ImmutablePass {
- typedef std::function<void(Pass &, Function &, AAResults &)> CallbackT;
+ using CallbackT = std::function<void(Pass &, Function &, AAResults &)>;
CallbackT CB;
@@ -554,6 +569,7 @@ struct ExternalAAWrapperPass : ImmutablePass {
ExternalAAWrapperPass() : ImmutablePass(ID) {
initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
}
+
explicit ExternalAAWrapperPass(CallbackT CB)
: ImmutablePass(ID), CB(std::move(CB)) {
initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
@@ -563,9 +579,11 @@ struct ExternalAAWrapperPass : ImmutablePass {
AU.setPreservesAll();
}
};
-}
+
+} // end anonymous namespace
char ExternalAAWrapperPass::ID = 0;
+
INITIALIZE_PASS(ExternalAAWrapperPass, "external-aa", "External Alias Analysis",
false, true)
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
index 435c782d97a5..423acf739f58 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -244,20 +244,20 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
if (ElTy->isSized()) Size = DL.getTypeStoreSize(ElTy);
switch (AA.getModRefInfo(C, Pointer, Size)) {
- case MRI_NoModRef:
+ case ModRefInfo::NoModRef:
PrintModRefResults("NoModRef", PrintNoModRef, I, Pointer,
F.getParent());
++NoModRefCount;
break;
- case MRI_Mod:
+ case ModRefInfo::Mod:
PrintModRefResults("Just Mod", PrintMod, I, Pointer, F.getParent());
++ModCount;
break;
- case MRI_Ref:
+ case ModRefInfo::Ref:
PrintModRefResults("Just Ref", PrintRef, I, Pointer, F.getParent());
++RefCount;
break;
- case MRI_ModRef:
+ case ModRefInfo::ModRef:
PrintModRefResults("Both ModRef", PrintModRef, I, Pointer,
F.getParent());
++ModRefCount;
@@ -272,19 +272,19 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
if (D == C)
continue;
switch (AA.getModRefInfo(*C, *D)) {
- case MRI_NoModRef:
+ case ModRefInfo::NoModRef:
PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent());
++NoModRefCount;
break;
- case MRI_Mod:
+ case ModRefInfo::Mod:
PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent());
++ModCount;
break;
- case MRI_Ref:
+ case ModRefInfo::Ref:
PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent());
++RefCount;
break;
- case MRI_ModRef:
+ case ModRefInfo::ModRef:
PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent());
++ModRefCount;
break;
diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
index 4dfa25490d00..c88e0dd7dc44 100644
--- a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
+++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -13,17 +13,29 @@
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+#include <vector>
+
using namespace llvm;
static cl::opt<unsigned>
@@ -106,7 +118,6 @@ void AliasSetTracker::removeAliasSet(AliasSet *AS) {
TotalMayAliasSetSize -= AS->size();
AliasSets.erase(AS);
-
}
void AliasSet::removeFromTracker(AliasSetTracker &AST) {
@@ -200,8 +211,8 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
if (!UnknownInsts.empty()) {
for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i)
if (auto *Inst = getUnknownInst(i))
- if (AA.getModRefInfo(Inst, MemoryLocation(Ptr, Size, AAInfo)) !=
- MRI_NoModRef)
+ if (isModOrRefSet(
+ AA.getModRefInfo(Inst, MemoryLocation(Ptr, Size, AAInfo))))
return true;
}
@@ -220,15 +231,15 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
if (auto *UnknownInst = getUnknownInst(i)) {
ImmutableCallSite C1(UnknownInst), C2(Inst);
- if (!C1 || !C2 || AA.getModRefInfo(C1, C2) != MRI_NoModRef ||
- AA.getModRefInfo(C2, C1) != MRI_NoModRef)
+ if (!C1 || !C2 || isModOrRefSet(AA.getModRefInfo(C1, C2)) ||
+ isModOrRefSet(AA.getModRefInfo(C2, C1)))
return true;
}
}
for (iterator I = begin(), E = end(); I != E; ++I)
- if (AA.getModRefInfo(Inst, MemoryLocation(I.getPointer(), I.getSize(),
- I.getAAInfo())) != MRI_NoModRef)
+ if (isModOrRefSet(AA.getModRefInfo(
+ Inst, MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo()))))
return true;
return false;
@@ -425,6 +436,7 @@ void AliasSetTracker::addUnknown(Instruction *Inst) {
break;
// FIXME: Add lifetime/invariant intrinsics (See: PR30807).
case Intrinsic::assume:
+ case Intrinsic::sideeffect:
return;
}
}
@@ -560,12 +572,11 @@ AliasSet &AliasSetTracker::mergeAllAliasSets() {
AliasAnyAS->AliasAny = true;
for (auto Cur : ASVector) {
-
// If Cur was already forwarding, just forward to the new AS instead.
AliasSet *FwdTo = Cur->Forward;
if (FwdTo) {
Cur->Forward = AliasAnyAS;
- AliasAnyAS->addRef();
+ AliasAnyAS->addRef();
FwdTo->dropRef(*this);
continue;
}
@@ -580,7 +591,6 @@ AliasSet &AliasSetTracker::mergeAllAliasSets() {
AliasSet &AliasSetTracker::addPointer(Value *P, uint64_t Size,
const AAMDNodes &AAInfo,
AliasSet::AccessLattice E) {
-
AliasSet &AS = getAliasSetForPointer(P, Size, AAInfo);
AS.Access |= E;
@@ -611,7 +621,6 @@ void AliasSet::print(raw_ostream &OS) const {
if (Forward)
OS << " forwarding to " << (void*)Forward;
-
if (!empty()) {
OS << "Pointers: ";
for (iterator I = begin(), E = end(); I != E; ++I) {
@@ -671,10 +680,13 @@ AliasSetTracker::ASTCallbackVH::operator=(Value *V) {
//===----------------------------------------------------------------------===//
namespace {
+
class AliasSetPrinter : public FunctionPass {
AliasSetTracker *Tracker;
+
public:
static char ID; // Pass identification, replacement for typeid
+
AliasSetPrinter() : FunctionPass(ID) {
initializeAliasSetPrinterPass(*PassRegistry::getPassRegistry());
}
@@ -695,9 +707,11 @@ namespace {
return false;
}
};
-}
+
+} // end anonymous namespace
char AliasSetPrinter::ID = 0;
+
INITIALIZE_PASS_BEGIN(AliasSetPrinter, "print-alias-sets",
"Alias Set Printer", false, true)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
diff --git a/contrib/llvm/lib/Analysis/AssumptionCache.cpp b/contrib/llvm/lib/Analysis/AssumptionCache.cpp
index 3ff27890dc38..8bfd24ccf77b 100644
--- a/contrib/llvm/lib/Analysis/AssumptionCache.cpp
+++ b/contrib/llvm/lib/Analysis/AssumptionCache.cpp
@@ -13,14 +13,26 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/IR/CallSite.h"
-#include "llvm/IR/Dominators.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <utility>
+
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -255,8 +267,9 @@ AssumptionCacheTracker::AssumptionCacheTracker() : ImmutablePass(ID) {
initializeAssumptionCacheTrackerPass(*PassRegistry::getPassRegistry());
}
-AssumptionCacheTracker::~AssumptionCacheTracker() {}
+AssumptionCacheTracker::~AssumptionCacheTracker() = default;
+
+char AssumptionCacheTracker::ID = 0;
INITIALIZE_PASS(AssumptionCacheTracker, "assumption-cache-tracker",
"Assumption Cache Tracker", false, true)
-char AssumptionCacheTracker::ID = 0;
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index e682a644ef2c..81b9f842249e 100644
--- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -14,6 +14,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -23,21 +25,40 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/KnownBits.h"
-#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <utility>
#define DEBUG_TYPE "basicaa"
@@ -223,7 +244,6 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
-
// If we've been called recursively, then Offset and Scale will be wider
// than the BOp operands. We'll always zext it here as we'll process sign
// extensions below (see the isa<SExtInst> / isa<ZExtInst> cases).
@@ -574,7 +594,6 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
// Otherwise be conservative.
Visited.clear();
return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
-
} while (!Worklist.empty() && --MaxLookup);
Visited.clear();
@@ -598,6 +617,10 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(ImmutableCallSite CS) {
if (CS.onlyAccessesArgMemory())
Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees);
+ else if (CS.onlyAccessesInaccessibleMemory())
+ Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesInaccessibleMem);
+ else if (CS.onlyAccessesInaccessibleMemOrArgMem())
+ Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesInaccessibleOrArgMem);
// If CS has operand bundles then aliasing attributes from the function it
// calls do not directly apply to the CallSite. This can be made more
@@ -662,16 +685,15 @@ static bool isWriteOnlyParam(ImmutableCallSite CS, unsigned ArgIdx,
ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
unsigned ArgIdx) {
-
// Checking for known builtin intrinsics and target library functions.
if (isWriteOnlyParam(CS, ArgIdx, TLI))
- return MRI_Mod;
+ return ModRefInfo::Mod;
if (CS.paramHasAttr(ArgIdx, Attribute::ReadOnly))
- return MRI_Ref;
+ return ModRefInfo::Ref;
if (CS.paramHasAttr(ArgIdx, Attribute::ReadNone))
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
return AAResultBase::getArgModRefInfo(CS, ArgIdx);
}
@@ -748,7 +770,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
if (isa<AllocaInst>(Object))
if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
if (CI->isTailCall())
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
// If the pointer is to a locally allocated object that does not escape,
// then the call can not mod/ref the pointer unless the call takes the pointer
@@ -758,7 +780,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
// Optimistically assume that call doesn't touch Object and check this
// assumption in the following loop.
- ModRefInfo Result = MRI_NoModRef;
+ ModRefInfo Result = ModRefInfo::NoModRef;
unsigned OperandNo = 0;
for (auto CI = CS.data_operands_begin(), CE = CS.data_operands_end();
@@ -787,21 +809,21 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
// Operand aliases 'Object', but call doesn't modify it. Strengthen
// initial assumption and keep looking in case if there are more aliases.
if (CS.onlyReadsMemory(OperandNo)) {
- Result = static_cast<ModRefInfo>(Result | MRI_Ref);
+ Result = setRef(Result);
continue;
}
// Operand aliases 'Object' but call only writes into it.
if (CS.doesNotReadMemory(OperandNo)) {
- Result = static_cast<ModRefInfo>(Result | MRI_Mod);
+ Result = setMod(Result);
continue;
}
// This operand aliases 'Object' and call reads and writes into it.
- Result = MRI_ModRef;
+ Result = ModRefInfo::ModRef;
break;
}
// Early return if we improved mod ref information
- if (Result != MRI_ModRef)
+ if (!isModAndRefSet(Result))
return Result;
}
@@ -810,13 +832,13 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
// routines do not read values visible in the IR. TODO: Consider special
// casing realloc and strdup routines which access only their arguments as
// well. Or alternatively, replace all of this with inaccessiblememonly once
- // that's implemented fully.
+ // that's implemented fully.
auto *Inst = CS.getInstruction();
if (isMallocOrCallocLikeFn(Inst, &TLI)) {
// Be conservative if the accessed pointer may alias the allocation -
// fallback to the generic handling below.
if (getBestAAResults().alias(MemoryLocation(Inst), Loc) == NoAlias)
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
}
// The semantics of memcpy intrinsics forbid overlap between their respective
@@ -829,18 +851,18 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
if ((SrcAA = getBestAAResults().alias(MemoryLocation::getForSource(Inst),
Loc)) == MustAlias)
// Loc is exactly the memcpy source thus disjoint from memcpy dest.
- return MRI_Ref;
+ return ModRefInfo::Ref;
if ((DestAA = getBestAAResults().alias(MemoryLocation::getForDest(Inst),
Loc)) == MustAlias)
// The converse case.
- return MRI_Mod;
+ return ModRefInfo::Mod;
// It's also possible for Loc to alias both src and dest, or neither.
- ModRefInfo rv = MRI_NoModRef;
+ ModRefInfo rv = ModRefInfo::NoModRef;
if (SrcAA != NoAlias)
- rv = static_cast<ModRefInfo>(rv | MRI_Ref);
+ rv = setRef(rv);
if (DestAA != NoAlias)
- rv = static_cast<ModRefInfo>(rv | MRI_Mod);
+ rv = setMod(rv);
return rv;
}
@@ -848,7 +870,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
// proper control dependencies will be maintained, it never aliases any
// particular memory location.
if (isIntrinsicCall(CS, Intrinsic::assume))
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
// Like assumes, guard intrinsics are also marked as arbitrarily writing so
// that proper control dependencies are maintained but they never mods any
@@ -858,7 +880,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
// heap state at the point the guard is issued needs to be consistent in case
// the guard invokes the "deopt" continuation.
if (isIntrinsicCall(CS, Intrinsic::experimental_guard))
- return MRI_Ref;
+ return ModRefInfo::Ref;
// Like assumes, invariant.start intrinsics were also marked as arbitrarily
// writing so that proper control dependencies are maintained but they never
@@ -884,7 +906,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
// rules of invariant.start) and print 40, while the first program always
// prints 50.
if (isIntrinsicCall(CS, Intrinsic::invariant_start))
- return MRI_Ref;
+ return ModRefInfo::Ref;
// The AAResultBase base class has some smarts, lets use them.
return AAResultBase::getModRefInfo(CS, Loc);
@@ -897,7 +919,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS1,
// particular memory location.
if (isIntrinsicCall(CS1, Intrinsic::assume) ||
isIntrinsicCall(CS2, Intrinsic::assume))
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
// Like assumes, guard intrinsics are also marked as arbitrarily writing so
// that proper control dependencies are maintained but they never mod any
@@ -911,10 +933,14 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS1,
// possibilities for guard intrinsics.
if (isIntrinsicCall(CS1, Intrinsic::experimental_guard))
- return getModRefBehavior(CS2) & MRI_Mod ? MRI_Ref : MRI_NoModRef;
+ return isModSet(createModRefInfo(getModRefBehavior(CS2)))
+ ? ModRefInfo::Ref
+ : ModRefInfo::NoModRef;
if (isIntrinsicCall(CS2, Intrinsic::experimental_guard))
- return getModRefBehavior(CS1) & MRI_Mod ? MRI_Mod : MRI_NoModRef;
+ return isModSet(createModRefInfo(getModRefBehavior(CS1)))
+ ? ModRefInfo::Mod
+ : ModRefInfo::NoModRef;
// The AAResultBase base class has some smarts, lets use them.
return AAResultBase::getModRefInfo(CS1, CS2);
@@ -927,7 +953,6 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
const GEPOperator *GEP2,
uint64_t V2Size,
const DataLayout &DL) {
-
assert(GEP1->getPointerOperand()->stripPointerCastsAndBarriers() ==
GEP2->getPointerOperand()->stripPointerCastsAndBarriers() &&
GEP1->getPointerOperandType() == GEP2->getPointerOperandType() &&
@@ -1196,8 +1221,10 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
// If we get a No or May, then return it immediately, no amount of analysis
// will improve this situation.
- if (BaseAlias != MustAlias)
+ if (BaseAlias != MustAlias) {
+ assert(BaseAlias == NoAlias || BaseAlias == MayAlias);
return BaseAlias;
+ }
// Otherwise, we have a MustAlias. Since the base pointers alias each other
// exactly, see if the computed offset from the common pointer tells us
@@ -1236,13 +1263,15 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
AliasResult R = aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize,
AAMDNodes(), V2, MemoryLocation::UnknownSize,
V2AAInfo, nullptr, UnderlyingV2);
- if (R != MustAlias)
+ if (R != MustAlias) {
// If V2 may alias GEP base pointer, conservatively returns MayAlias.
// If V2 is known not to alias GEP base pointer, then the two values
// cannot alias per GEP semantics: "Any memory access must be done through
// a pointer value associated with an address range of the memory access,
// otherwise the behavior is undefined.".
+ assert(R == NoAlias || R == MayAlias);
return R;
+ }
// If the max search depth is reached the result is undefined
if (GEP1MaxLookupReached)
@@ -1569,11 +1598,6 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size,
(isa<Argument>(O2) && isIdentifiedFunctionLocal(O1)))
return NoAlias;
- // Most objects can't alias null.
- if ((isa<ConstantPointerNull>(O2) && isKnownNonNull(O1)) ||
- (isa<ConstantPointerNull>(O1) && isKnownNonNull(O2)))
- return NoAlias;
-
// If one pointer is the result of a call/invoke or load and the other is a
// non-escaping local object within the same function, then we know the
// object couldn't escape to a point where the call could return it.
@@ -1652,9 +1676,9 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size,
// If both pointers are pointing into the same object and one of them
// accesses the entire object, then the accesses must overlap in some way.
if (O1 == O2)
- if ((V1Size != MemoryLocation::UnknownSize &&
- isObjectSize(O1, V1Size, DL, TLI)) ||
- (V2Size != MemoryLocation::UnknownSize &&
+ if (V1Size != MemoryLocation::UnknownSize &&
+ V2Size != MemoryLocation::UnknownSize &&
+ (isObjectSize(O1, V1Size, DL, TLI) ||
isObjectSize(O2, V2Size, DL, TLI)))
return AliasCache[Locs] = PartialAlias;
@@ -1810,6 +1834,7 @@ BasicAAWrapperPass::BasicAAWrapperPass() : FunctionPass(ID) {
}
char BasicAAWrapperPass::ID = 0;
+
void BasicAAWrapperPass::anchor() {}
INITIALIZE_PASS_BEGIN(BasicAAWrapperPass, "basicaa",
diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
index 07a2a9229fd5..41c295895213 100644
--- a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
@@ -12,15 +12,22 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/iterator.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/Passes.h"
#include "llvm/IR/CFG.h"
-#include "llvm/InitializePasses.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <string>
using namespace llvm;
@@ -54,52 +61,67 @@ cl::opt<unsigned>
"is no less than the max frequency of the "
"function multiplied by this percent."));
-// Command line option to turn on CFG dot dump after profile annotation.
-cl::opt<bool>
- PGOViewCounts("pgo-view-counts", cl::init(false), cl::Hidden,
- cl::desc("A boolean option to show CFG dag with "
- "block profile counts and branch probabilities "
- "right after PGO profile annotation step. The "
- "profile counts are computed using branch "
- "probabilities from the runtime profile data and "
- "block frequency propagation algorithm. To view "
- "the raw counts from the profile, use option "
- "-pgo-view-raw-counts instead. To limit graph "
- "display to only one function, use filtering option "
- "-view-bfi-func-name."));
+// Command line option to turn on CFG dot or text dump after profile annotation.
+cl::opt<PGOViewCountsType> PGOViewCounts(
+ "pgo-view-counts", cl::Hidden,
+ cl::desc("A boolean option to show CFG dag or text with "
+ "block profile counts and branch probabilities "
+ "right after PGO profile annotation step. The "
+ "profile counts are computed using branch "
+ "probabilities from the runtime profile data and "
+ "block frequency propagation algorithm. To view "
+ "the raw counts from the profile, use option "
+ "-pgo-view-raw-counts instead. To limit graph "
+ "display to only one function, use filtering option "
+ "-view-bfi-func-name."),
+ cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
+ clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
+ clEnumValN(PGOVCT_Text, "text", "show in text.")));
+
+static cl::opt<bool> PrintBlockFreq(
+ "print-bfi", cl::init(false), cl::Hidden,
+ cl::desc("Print the block frequency info."));
+
+cl::opt<std::string> PrintBlockFreqFuncName(
+ "print-bfi-func-name", cl::Hidden,
+ cl::desc("The option to specify the name of the function "
+ "whose block frequency info is printed."));
namespace llvm {
static GVDAGType getGVDT() {
-
- if (PGOViewCounts)
+ if (PGOViewCounts == PGOVCT_Graph)
return GVDT_Count;
return ViewBlockFreqPropagationDAG;
}
template <>
struct GraphTraits<BlockFrequencyInfo *> {
- typedef const BasicBlock *NodeRef;
- typedef succ_const_iterator ChildIteratorType;
- typedef pointer_iterator<Function::const_iterator> nodes_iterator;
+ using NodeRef = const BasicBlock *;
+ using ChildIteratorType = succ_const_iterator;
+ using nodes_iterator = pointer_iterator<Function::const_iterator>;
static NodeRef getEntryNode(const BlockFrequencyInfo *G) {
return &G->getFunction()->front();
}
+
static ChildIteratorType child_begin(const NodeRef N) {
return succ_begin(N);
}
+
static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
+
static nodes_iterator nodes_begin(const BlockFrequencyInfo *G) {
return nodes_iterator(G->getFunction()->begin());
}
+
static nodes_iterator nodes_end(const BlockFrequencyInfo *G) {
return nodes_iterator(G->getFunction()->end());
}
};
-typedef BFIDOTGraphTraitsBase<BlockFrequencyInfo, BranchProbabilityInfo>
- BFIDOTGTraitsBase;
+using BFIDOTGTraitsBase =
+ BFIDOTGraphTraitsBase<BlockFrequencyInfo, BranchProbabilityInfo>;
template <>
struct DOTGraphTraits<BlockFrequencyInfo *> : public BFIDOTGTraitsBase {
@@ -127,7 +149,7 @@ struct DOTGraphTraits<BlockFrequencyInfo *> : public BFIDOTGTraitsBase {
} // end namespace llvm
-BlockFrequencyInfo::BlockFrequencyInfo() {}
+BlockFrequencyInfo::BlockFrequencyInfo() = default;
BlockFrequencyInfo::BlockFrequencyInfo(const Function &F,
const BranchProbabilityInfo &BPI,
@@ -148,7 +170,7 @@ BlockFrequencyInfo &BlockFrequencyInfo::operator=(BlockFrequencyInfo &&RHS) {
// defined at the first ODR-use which is the BFI member in the
// LazyBlockFrequencyInfo header. The dtor needs the BlockFrequencyInfoImpl
// template instantiated which is not available in the header.
-BlockFrequencyInfo::~BlockFrequencyInfo() {}
+BlockFrequencyInfo::~BlockFrequencyInfo() = default;
bool BlockFrequencyInfo::invalidate(Function &F, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &) {
@@ -170,6 +192,11 @@ void BlockFrequencyInfo::calculate(const Function &F,
F.getName().equals(ViewBlockFreqFuncName))) {
view();
}
+ if (PrintBlockFreq &&
+ (PrintBlockFreqFuncName.empty() ||
+ F.getName().equals(PrintBlockFreqFuncName))) {
+ print(dbgs());
+ }
}
BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const {
@@ -191,6 +218,11 @@ BlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const {
return BFI->getProfileCountFromFreq(*getFunction(), Freq);
}
+bool BlockFrequencyInfo::isIrrLoopHeader(const BasicBlock *BB) {
+ assert(BFI && "Expected analysis to be available");
+ return BFI->isIrrLoopHeader(BB);
+}
+
void BlockFrequencyInfo::setBlockFreq(const BasicBlock *BB, uint64_t Freq) {
assert(BFI && "Expected analysis to be available");
BFI->setBlockFreq(BB, Freq);
@@ -254,7 +286,6 @@ void BlockFrequencyInfo::print(raw_ostream &OS) const {
BFI->print(OS);
}
-
INITIALIZE_PASS_BEGIN(BlockFrequencyInfoWrapperPass, "block-freq",
"Block Frequency Analysis", true, true)
INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
@@ -264,13 +295,12 @@ INITIALIZE_PASS_END(BlockFrequencyInfoWrapperPass, "block-freq",
char BlockFrequencyInfoWrapperPass::ID = 0;
-
BlockFrequencyInfoWrapperPass::BlockFrequencyInfoWrapperPass()
: FunctionPass(ID) {
initializeBlockFrequencyInfoWrapperPassPass(*PassRegistry::getPassRegistry());
}
-BlockFrequencyInfoWrapperPass::~BlockFrequencyInfoWrapperPass() {}
+BlockFrequencyInfoWrapperPass::~BlockFrequencyInfoWrapperPass() = default;
void BlockFrequencyInfoWrapperPass::print(raw_ostream &OS,
const Module *) const {
diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
index e5d8c3347c16..7e323022d9ce 100644
--- a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -12,10 +12,28 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/None.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/IR/Function.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ScaledNumber.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <list>
#include <numeric>
+#include <utility>
+#include <vector>
using namespace llvm;
using namespace llvm::bfi_detail;
@@ -47,13 +65,13 @@ raw_ostream &BlockMass::print(raw_ostream &OS) const {
namespace {
-typedef BlockFrequencyInfoImplBase::BlockNode BlockNode;
-typedef BlockFrequencyInfoImplBase::Distribution Distribution;
-typedef BlockFrequencyInfoImplBase::Distribution::WeightList WeightList;
-typedef BlockFrequencyInfoImplBase::Scaled64 Scaled64;
-typedef BlockFrequencyInfoImplBase::LoopData LoopData;
-typedef BlockFrequencyInfoImplBase::Weight Weight;
-typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData;
+using BlockNode = BlockFrequencyInfoImplBase::BlockNode;
+using Distribution = BlockFrequencyInfoImplBase::Distribution;
+using WeightList = BlockFrequencyInfoImplBase::Distribution::WeightList;
+using Scaled64 = BlockFrequencyInfoImplBase::Scaled64;
+using LoopData = BlockFrequencyInfoImplBase::LoopData;
+using Weight = BlockFrequencyInfoImplBase::Weight;
+using FrequencyData = BlockFrequencyInfoImplBase::FrequencyData;
/// \brief Dithering mass distributer.
///
@@ -158,7 +176,8 @@ static void combineWeightsBySorting(WeightList &Weights) {
static void combineWeightsByHashing(WeightList &Weights) {
// Collect weights into a DenseMap.
- typedef DenseMap<BlockNode::IndexType, Weight> HashTable;
+ using HashTable = DenseMap<BlockNode::IndexType, Weight>;
+
HashTable Combined(NextPowerOf2(2 * Weights.size()));
for (const Weight &W : Weights)
combineWeight(Combined[W.TargetNode.Index], W);
@@ -252,6 +271,7 @@ void BlockFrequencyInfoImplBase::clear() {
// Swap with a default-constructed std::vector, since std::vector<>::clear()
// does not actually clear heap storage.
std::vector<FrequencyData>().swap(Freqs);
+ IsIrrLoopHeader.clear();
std::vector<WorkingData>().swap(Working);
Loops.clear();
}
@@ -261,8 +281,10 @@ void BlockFrequencyInfoImplBase::clear() {
/// Releases all memory not used downstream. In particular, saves Freqs.
static void cleanup(BlockFrequencyInfoImplBase &BFI) {
std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
+ SparseBitVector<> SavedIsIrrLoopHeader(std::move(BFI.IsIrrLoopHeader));
BFI.clear();
BFI.Freqs = std::move(SavedFreqs);
+ BFI.IsIrrLoopHeader = std::move(SavedIsIrrLoopHeader);
}
bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
@@ -553,6 +575,13 @@ BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F,
return BlockCount.getLimitedValue();
}
+bool
+BlockFrequencyInfoImplBase::isIrrLoopHeader(const BlockNode &Node) {
+ if (!Node.isValid())
+ return false;
+ return IsIrrLoopHeader.test(Node.Index);
+}
+
Scaled64
BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
if (!Node.isValid())
@@ -569,7 +598,7 @@ void BlockFrequencyInfoImplBase::setBlockFreq(const BlockNode &Node,
std::string
BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {
- return std::string();
+ return {};
}
std::string
@@ -627,16 +656,17 @@ void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ,
}
namespace llvm {
-template <> struct GraphTraits<IrreducibleGraph> {
- typedef bfi_detail::IrreducibleGraph GraphT;
- typedef const GraphT::IrrNode *NodeRef;
- typedef GraphT::IrrNode::iterator ChildIteratorType;
+template <> struct GraphTraits<IrreducibleGraph> {
+ using GraphT = bfi_detail::IrreducibleGraph;
+ using NodeRef = const GraphT::IrrNode *;
+ using ChildIteratorType = GraphT::IrrNode::iterator;
static NodeRef getEntryNode(const GraphT &G) { return G.StartIrr; }
static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
};
+
} // end namespace llvm
/// \brief Find extra irreducible headers.
@@ -799,3 +829,14 @@ void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) {
DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
}
}
+
+void BlockFrequencyInfoImplBase::distributeIrrLoopHeaderMass(Distribution &Dist) {
+ BlockMass LoopMass = BlockMass::getFull();
+ DitheringDistributer D(Dist, LoopMass);
+ for (const Weight &W : Dist.Weights) {
+ BlockMass Taken = D.takeMass(W.Amount);
+ assert(W.Type == Weight::Local && "all weights should be local");
+ Working[W.TargetNode.Index].getMass() = Taken;
+ DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
+ }
+}
diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index a329e5ad48c9..58ccad89d508 100644
--- a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -1,4 +1,4 @@
-//===-- BranchProbabilityInfo.cpp - Branch Probability Analysis -----------===//
+//===- BranchProbabilityInfo.cpp - Branch Probability Analysis ------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,21 +13,47 @@
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <utility>
using namespace llvm;
#define DEBUG_TYPE "branch-prob"
+static cl::opt<bool> PrintBranchProb(
+ "print-bpi", cl::init(false), cl::Hidden,
+ cl::desc("Print the branch probability info."));
+
+cl::opt<std::string> PrintBranchProbFuncName(
+ "print-bpi-func-name", cl::Hidden,
+ cl::desc("The option to specify the name of the function "
+ "whose branch probability info is printed."));
+
INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob",
"Branch Probability Analysis", false, true)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
@@ -221,7 +247,7 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
const TerminatorInst *TI = BB->getTerminator();
assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
- if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
+ if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || isa<IndirectBrInst>(TI)))
return false;
MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof);
@@ -399,25 +425,73 @@ bool BranchProbabilityInfo::calcPointerHeuristics(const BasicBlock *BB) {
return true;
}
+static int getSCCNum(const BasicBlock *BB,
+ const BranchProbabilityInfo::SccInfo &SccI) {
+ auto SccIt = SccI.SccNums.find(BB);
+ if (SccIt == SccI.SccNums.end())
+ return -1;
+ return SccIt->second;
+}
+
+// Consider any block that is an entry point to the SCC as a header.
+static bool isSCCHeader(const BasicBlock *BB, int SccNum,
+ BranchProbabilityInfo::SccInfo &SccI) {
+ assert(getSCCNum(BB, SccI) == SccNum);
+
+ // Lazily compute the set of headers for a given SCC and cache the results
+ // in the SccHeaderMap.
+ if (SccI.SccHeaders.size() <= static_cast<unsigned>(SccNum))
+ SccI.SccHeaders.resize(SccNum + 1);
+ auto &HeaderMap = SccI.SccHeaders[SccNum];
+ bool Inserted;
+ BranchProbabilityInfo::SccHeaderMap::iterator HeaderMapIt;
+ std::tie(HeaderMapIt, Inserted) = HeaderMap.insert(std::make_pair(BB, false));
+ if (Inserted) {
+ bool IsHeader = llvm::any_of(make_range(pred_begin(BB), pred_end(BB)),
+ [&](const BasicBlock *Pred) {
+ return getSCCNum(Pred, SccI) != SccNum;
+ });
+ HeaderMapIt->second = IsHeader;
+ return IsHeader;
+ } else
+ return HeaderMapIt->second;
+}
+
// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges
// as taken, exiting edges as not-taken.
bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB,
- const LoopInfo &LI) {
+ const LoopInfo &LI,
+ SccInfo &SccI) {
+ int SccNum;
Loop *L = LI.getLoopFor(BB);
- if (!L)
- return false;
+ if (!L) {
+ SccNum = getSCCNum(BB, SccI);
+ if (SccNum < 0)
+ return false;
+ }
SmallVector<unsigned, 8> BackEdges;
SmallVector<unsigned, 8> ExitingEdges;
SmallVector<unsigned, 8> InEdges; // Edges from header to the loop.
for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
- if (!L->contains(*I))
- ExitingEdges.push_back(I.getSuccessorIndex());
- else if (L->getHeader() == *I)
- BackEdges.push_back(I.getSuccessorIndex());
- else
- InEdges.push_back(I.getSuccessorIndex());
+ // Use LoopInfo if we have it, otherwise fall-back to SCC info to catch
+ // irreducible loops.
+ if (L) {
+ if (!L->contains(*I))
+ ExitingEdges.push_back(I.getSuccessorIndex());
+ else if (L->getHeader() == *I)
+ BackEdges.push_back(I.getSuccessorIndex());
+ else
+ InEdges.push_back(I.getSuccessorIndex());
+ } else {
+ if (getSCCNum(*I, SccI) != SccNum)
+ ExitingEdges.push_back(I.getSuccessorIndex());
+ else if (isSCCHeader(*I, SccNum, SccI))
+ BackEdges.push_back(I.getSuccessorIndex());
+ else
+ InEdges.push_back(I.getSuccessorIndex());
+ }
}
if (BackEdges.empty() && ExitingEdges.empty())
@@ -480,7 +554,7 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB,
if (Instruction *LHS = dyn_cast<Instruction>(CI->getOperand(0)))
if (LHS->getOpcode() == Instruction::And)
if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(LHS->getOperand(1)))
- if (AndRHS->getUniqueInteger().isPowerOf2())
+ if (AndRHS->getValue().isPowerOf2())
return false;
// Check if the LHS is the return value of a library function
@@ -722,7 +796,6 @@ raw_ostream &
BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS,
const BasicBlock *Src,
const BasicBlock *Dst) const {
-
const BranchProbability Prob = getEdgeProbability(Src, Dst);
OS << "edge " << Src->getName() << " -> " << Dst->getName()
<< " probability is " << Prob
@@ -747,6 +820,27 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI,
assert(PostDominatedByUnreachable.empty());
assert(PostDominatedByColdCall.empty());
+ // Record SCC numbers of blocks in the CFG to identify irreducible loops.
+ // FIXME: We could only calculate this if the CFG is known to be irreducible
+ // (perhaps cache this info in LoopInfo if we can easily calculate it there?).
+ int SccNum = 0;
+ SccInfo SccI;
+ for (scc_iterator<const Function *> It = scc_begin(&F); !It.isAtEnd();
+ ++It, ++SccNum) {
+ // Ignore single-block SCCs since they either aren't loops or LoopInfo will
+ // catch them.
+ const std::vector<const BasicBlock *> &Scc = *It;
+ if (Scc.size() == 1)
+ continue;
+
+ DEBUG(dbgs() << "BPI: SCC " << SccNum << ":");
+ for (auto *BB : Scc) {
+ DEBUG(dbgs() << " " << BB->getName());
+ SccI.SccNums[BB] = SccNum;
+ }
+ DEBUG(dbgs() << "\n");
+ }
+
// Walk the basic blocks in post-order so that we can build up state about
// the successors of a block iteratively.
for (auto BB : post_order(&F.getEntryBlock())) {
@@ -762,7 +856,7 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI,
continue;
if (calcColdCallHeuristics(BB))
continue;
- if (calcLoopBranchHeuristics(BB, LI))
+ if (calcLoopBranchHeuristics(BB, LI, SccI))
continue;
if (calcPointerHeuristics(BB))
continue;
@@ -775,6 +869,12 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI,
PostDominatedByUnreachable.clear();
PostDominatedByColdCall.clear();
+
+ if (PrintBranchProb &&
+ (PrintBranchProbFuncName.empty() ||
+ F.getName().equals(PrintBranchProbFuncName))) {
+ print(dbgs());
+ }
}
void BranchProbabilityInfoWrapperPass::getAnalysisUsage(
diff --git a/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
index 0de7ad98af46..076a2b205d00 100644
--- a/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
@@ -1,4 +1,4 @@
-//- CFLAndersAliasAnalysis.cpp - Unification-based Alias Analysis ---*- C++-*-//
+//===- CFLAndersAliasAnalysis.cpp - Unification-based Alias Analysis ------===//
//
// The LLVM Compiler Infrastructure
//
@@ -54,9 +54,35 @@
// FunctionPasses to run concurrently.
#include "llvm/Analysis/CFLAndersAliasAnalysis.h"
+#include "AliasAnalysisSummary.h"
#include "CFLGraph.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <bitset>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <utility>
+#include <vector>
using namespace llvm;
using namespace llvm::cflaa;
@@ -66,7 +92,7 @@ using namespace llvm::cflaa;
CFLAndersAAResult::CFLAndersAAResult(const TargetLibraryInfo &TLI) : TLI(TLI) {}
CFLAndersAAResult::CFLAndersAAResult(CFLAndersAAResult &&RHS)
: AAResultBase(std::move(RHS)), TLI(RHS.TLI) {}
-CFLAndersAAResult::~CFLAndersAAResult() {}
+CFLAndersAAResult::~CFLAndersAAResult() = default;
namespace {
@@ -95,7 +121,8 @@ enum class MatchState : uint8_t {
FlowToMemAliasReadWrite,
};
-typedef std::bitset<7> StateSet;
+using StateSet = std::bitset<7>;
+
const unsigned ReadOnlyStateMask =
(1U << static_cast<uint8_t>(MatchState::FlowFromReadOnly)) |
(1U << static_cast<uint8_t>(MatchState::FlowFromMemAliasReadOnly));
@@ -130,13 +157,14 @@ bool operator==(OffsetInstantiatedValue LHS, OffsetInstantiatedValue RHS) {
// We use ReachabilitySet to keep track of value aliases (The nonterminal "V" in
// the paper) during the analysis.
class ReachabilitySet {
- typedef DenseMap<InstantiatedValue, StateSet> ValueStateMap;
- typedef DenseMap<InstantiatedValue, ValueStateMap> ValueReachMap;
+ using ValueStateMap = DenseMap<InstantiatedValue, StateSet>;
+ using ValueReachMap = DenseMap<InstantiatedValue, ValueStateMap>;
+
ValueReachMap ReachMap;
public:
- typedef ValueStateMap::const_iterator const_valuestate_iterator;
- typedef ValueReachMap::const_iterator const_value_iterator;
+ using const_valuestate_iterator = ValueStateMap::const_iterator;
+ using const_value_iterator = ValueReachMap::const_iterator;
// Insert edge 'From->To' at state 'State'
bool insert(InstantiatedValue From, InstantiatedValue To, MatchState State) {
@@ -169,12 +197,13 @@ public:
// We use AliasMemSet to keep track of all memory aliases (the nonterminal "M"
// in the paper) during the analysis.
class AliasMemSet {
- typedef DenseSet<InstantiatedValue> MemSet;
- typedef DenseMap<InstantiatedValue, MemSet> MemMapType;
+ using MemSet = DenseSet<InstantiatedValue>;
+ using MemMapType = DenseMap<InstantiatedValue, MemSet>;
+
MemMapType MemMap;
public:
- typedef MemSet::const_iterator const_mem_iterator;
+ using const_mem_iterator = MemSet::const_iterator;
bool insert(InstantiatedValue LHS, InstantiatedValue RHS) {
// Top-level values can never be memory aliases because one cannot take the
@@ -193,11 +222,12 @@ public:
// We use AliasAttrMap to keep track of the AliasAttr of each node.
class AliasAttrMap {
- typedef DenseMap<InstantiatedValue, AliasAttrs> MapType;
+ using MapType = DenseMap<InstantiatedValue, AliasAttrs>;
+
MapType AttrMap;
public:
- typedef MapType::const_iterator const_iterator;
+ using const_iterator = MapType::const_iterator;
bool add(InstantiatedValue V, AliasAttrs Attr) {
auto &OldAttr = AttrMap[V];
@@ -234,23 +264,28 @@ struct ValueSummary {
};
SmallVector<Record, 4> FromRecords, ToRecords;
};
-}
+
+} // end anonymous namespace
namespace llvm {
+
// Specialize DenseMapInfo for OffsetValue.
template <> struct DenseMapInfo<OffsetValue> {
static OffsetValue getEmptyKey() {
return OffsetValue{DenseMapInfo<const Value *>::getEmptyKey(),
DenseMapInfo<int64_t>::getEmptyKey()};
}
+
static OffsetValue getTombstoneKey() {
return OffsetValue{DenseMapInfo<const Value *>::getTombstoneKey(),
DenseMapInfo<int64_t>::getEmptyKey()};
}
+
static unsigned getHashValue(const OffsetValue &OVal) {
return DenseMapInfo<std::pair<const Value *, int64_t>>::getHashValue(
std::make_pair(OVal.Val, OVal.Offset));
}
+
static bool isEqual(const OffsetValue &LHS, const OffsetValue &RHS) {
return LHS == RHS;
}
@@ -263,21 +298,25 @@ template <> struct DenseMapInfo<OffsetInstantiatedValue> {
DenseMapInfo<InstantiatedValue>::getEmptyKey(),
DenseMapInfo<int64_t>::getEmptyKey()};
}
+
static OffsetInstantiatedValue getTombstoneKey() {
return OffsetInstantiatedValue{
DenseMapInfo<InstantiatedValue>::getTombstoneKey(),
DenseMapInfo<int64_t>::getEmptyKey()};
}
+
static unsigned getHashValue(const OffsetInstantiatedValue &OVal) {
return DenseMapInfo<std::pair<InstantiatedValue, int64_t>>::getHashValue(
std::make_pair(OVal.IVal, OVal.Offset));
}
+
static bool isEqual(const OffsetInstantiatedValue &LHS,
const OffsetInstantiatedValue &RHS) {
return LHS == RHS;
}
};
-}
+
+} // end namespace llvm
class CFLAndersAAResult::FunctionInfo {
/// Map a value to other values that may alias it
@@ -654,41 +693,40 @@ static void processWorkListItem(const WorkListItem &Item, const CFLGraph &Graph,
};
switch (Item.State) {
- case MatchState::FlowFromReadOnly: {
+ case MatchState::FlowFromReadOnly:
NextRevAssignState(MatchState::FlowFromReadOnly);
NextAssignState(MatchState::FlowToReadWrite);
NextMemState(MatchState::FlowFromMemAliasReadOnly);
break;
- }
- case MatchState::FlowFromMemAliasNoReadWrite: {
+
+ case MatchState::FlowFromMemAliasNoReadWrite:
NextRevAssignState(MatchState::FlowFromReadOnly);
NextAssignState(MatchState::FlowToWriteOnly);
break;
- }
- case MatchState::FlowFromMemAliasReadOnly: {
+
+ case MatchState::FlowFromMemAliasReadOnly:
NextRevAssignState(MatchState::FlowFromReadOnly);
NextAssignState(MatchState::FlowToReadWrite);
break;
- }
- case MatchState::FlowToWriteOnly: {
+
+ case MatchState::FlowToWriteOnly:
NextAssignState(MatchState::FlowToWriteOnly);
NextMemState(MatchState::FlowToMemAliasWriteOnly);
break;
- }
- case MatchState::FlowToReadWrite: {
+
+ case MatchState::FlowToReadWrite:
NextAssignState(MatchState::FlowToReadWrite);
NextMemState(MatchState::FlowToMemAliasReadWrite);
break;
- }
- case MatchState::FlowToMemAliasWriteOnly: {
+
+ case MatchState::FlowToMemAliasWriteOnly:
NextAssignState(MatchState::FlowToWriteOnly);
break;
- }
- case MatchState::FlowToMemAliasReadWrite: {
+
+ case MatchState::FlowToMemAliasReadWrite:
NextAssignState(MatchState::FlowToReadWrite);
break;
}
- }
}
static AliasAttrMap buildAttrMap(const CFLGraph &Graph,
@@ -837,7 +875,7 @@ AliasResult CFLAndersAAResult::query(const MemoryLocation &LocA,
AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA,
const MemoryLocation &LocB) {
if (LocA.Ptr == LocB.Ptr)
- return LocA.Size == LocB.Size ? MustAlias : PartialAlias;
+ return MustAlias;
// Comparisons between global variables and other constants should be
// handled by BasicAA.
diff --git a/contrib/llvm/lib/Analysis/CFLGraph.h b/contrib/llvm/lib/Analysis/CFLGraph.h
index 95874b88244b..e4e92864061f 100644
--- a/contrib/llvm/lib/Analysis/CFLGraph.h
+++ b/contrib/llvm/lib/Analysis/CFLGraph.h
@@ -1,4 +1,4 @@
-//======- CFLGraph.h - Abstract stratified sets implementation. --------======//
+//===- CFLGraph.h - Abstract stratified sets implementation. -----*- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,19 +6,42 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
+//
/// \file
/// This file defines CFLGraph, an auxiliary data structure used by CFL-based
/// alias analysis.
-///
+//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_ANALYSIS_CFLGRAPH_H
-#define LLVM_ANALYSIS_CFLGRAPH_H
+#ifndef LLVM_LIB_ANALYSIS_CFLGRAPH_H
+#define LLVM_LIB_ANALYSIS_CFLGRAPH_H
#include "AliasAnalysisSummary.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
+#include <vector>
namespace llvm {
namespace cflaa {
@@ -35,14 +58,14 @@ namespace cflaa {
/// I+1) and a reference edge to (X, I-1).
class CFLGraph {
public:
- typedef InstantiatedValue Node;
+ using Node = InstantiatedValue;
struct Edge {
Node Other;
int64_t Offset;
};
- typedef std::vector<Edge> EdgeList;
+ using EdgeList = std::vector<Edge>;
struct NodeInfo {
EdgeList Edges, ReverseEdges;
@@ -74,7 +97,8 @@ public:
};
private:
- typedef DenseMap<Value *, ValueInfo> ValueMap;
+ using ValueMap = DenseMap<Value *, ValueInfo>;
+
ValueMap ValueImpls;
NodeInfo *getNode(Node N) {
@@ -85,7 +109,7 @@ private:
}
public:
- typedef ValueMap::const_iterator const_value_iterator;
+ using const_value_iterator = ValueMap::const_iterator;
bool addNode(Node N, AliasAttrs Attr = AliasAttrs()) {
assert(N.Val != nullptr);
@@ -496,10 +520,10 @@ template <typename CFLAA> class CFLGraphBuilder {
addNode(Ptr, getAttrEscaped());
break;
}
- case Instruction::IntToPtr: {
+ case Instruction::IntToPtr:
addNode(CE, getAttrUnknown());
break;
- }
+
case Instruction::BitCast:
case Instruction::AddrSpaceCast:
case Instruction::Trunc:
@@ -571,11 +595,11 @@ template <typename CFLAA> class CFLGraphBuilder {
case Instruction::LShr:
case Instruction::AShr:
case Instruction::ICmp:
- case Instruction::FCmp: {
+ case Instruction::FCmp:
addAssignEdge(CE->getOperand(0), CE);
addAssignEdge(CE->getOperand(1), CE);
break;
- }
+
default:
llvm_unreachable("Unknown instruction type encountered!");
}
@@ -640,7 +664,8 @@ public:
return ReturnedValues;
}
};
-}
-}
-#endif
+} // end namespace cflaa
+} // end namespace llvm
+
+#endif // LLVM_LIB_ANALYSIS_CFLGRAPH_H
diff --git a/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
index adbdd82012a3..eee6d26ba787 100644
--- a/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
@@ -1,4 +1,4 @@
-//- CFLSteensAliasAnalysis.cpp - Unification-based Alias Analysis ---*- C++-*-//
+//===- CFLSteensAliasAnalysis.cpp - Unification-based Alias Analysis ------===//
//
// The LLVM Compiler Infrastructure
//
@@ -36,23 +36,25 @@
// FunctionPasses to run concurrently.
#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
+#include "AliasAnalysisSummary.h"
#include "CFLGraph.h"
#include "StratifiedSets.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
+#include <limits>
#include <memory>
-#include <tuple>
+#include <utility>
using namespace llvm;
using namespace llvm::cflaa;
@@ -63,7 +65,7 @@ CFLSteensAAResult::CFLSteensAAResult(const TargetLibraryInfo &TLI)
: AAResultBase(), TLI(TLI) {}
CFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg)
: AAResultBase(std::move(Arg)), TLI(Arg.TLI) {}
-CFLSteensAAResult::~CFLSteensAAResult() {}
+CFLSteensAAResult::~CFLSteensAAResult() = default;
/// Information we have about a function and would like to keep around.
class CFLSteensAAResult::FunctionInfo {
@@ -77,6 +79,7 @@ public:
const StratifiedSets<InstantiatedValue> &getStratifiedSets() const {
return Sets;
}
+
const AliasSummary &getAliasSummary() const { return Summary; }
};
diff --git a/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp b/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp
index 74b5d79ebac5..ceff94756fe3 100644
--- a/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp
+++ b/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp
@@ -8,8 +8,27 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+
+#define DEBUG_TYPE "cgscc"
using namespace llvm;
@@ -53,8 +72,13 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
// Update the SCC if necessary.
C = UR.UpdatedC ? UR.UpdatedC : C;
+ // If the CGSCC pass wasn't able to provide a valid updated SCC, the
+ // current SCC may simply need to be skipped if invalid.
+ if (UR.InvalidatedSCCs.count(C)) {
+ DEBUG(dbgs() << "Skipping invalidated root or island SCC!\n");
+ break;
+ }
// Check that we didn't miss any update scenario.
- assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!");
assert(C->begin() != C->end() && "Cannot have an empty SCC!");
// Update the analysis manager as each pass runs and potentially
@@ -211,7 +235,7 @@ bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate(
auto PAC = PA.getChecker<FunctionAnalysisManagerCGSCCProxy>();
if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<LazyCallGraph::SCC>>()) {
for (LazyCallGraph::Node &N : C)
- FAM->clear(N.getFunction());
+ FAM->clear(N.getFunction(), N.getFunction().getName());
return true;
}
@@ -260,7 +284,7 @@ bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate(
return false;
}
-} // End llvm namespace
+} // end namespace llvm
/// When a new SCC is created for the graph and there might be function
/// analysis results cached for the functions now in that SCC two forms of
@@ -307,7 +331,6 @@ static void updateNewSCCFunctionAnalyses(LazyCallGraph::SCC &C,
}
}
-namespace {
/// Helper function to update both the \c CGSCCAnalysisManager \p AM and the \c
/// CGSCCPassManager's \c CGSCCUpdateResult \p UR based on a range of newly
/// added SCCs.
@@ -319,20 +342,18 @@ namespace {
/// This function returns the SCC containing \p N. This will be either \p C if
/// no new SCCs have been split out, or it will be the new SCC containing \p N.
template <typename SCCRangeT>
-LazyCallGraph::SCC *
+static LazyCallGraph::SCC *
incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G,
LazyCallGraph::Node &N, LazyCallGraph::SCC *C,
- CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR,
- bool DebugLogging = false) {
- typedef LazyCallGraph::SCC SCC;
+ CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR) {
+ using SCC = LazyCallGraph::SCC;
if (NewSCCRange.begin() == NewSCCRange.end())
return C;
// Add the current SCC to the worklist as its shape has changed.
UR.CWorklist.insert(C);
- if (DebugLogging)
- dbgs() << "Enqueuing the existing SCC in the worklist:" << *C << "\n";
+ DEBUG(dbgs() << "Enqueuing the existing SCC in the worklist:" << *C << "\n");
SCC *OldC = C;
@@ -363,13 +384,12 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G,
if (NeedFAMProxy)
updateNewSCCFunctionAnalyses(*C, G, AM);
- for (SCC &NewC :
- reverse(make_range(std::next(NewSCCRange.begin()), NewSCCRange.end()))) {
+ for (SCC &NewC : llvm::reverse(make_range(std::next(NewSCCRange.begin()),
+ NewSCCRange.end()))) {
assert(C != &NewC && "No need to re-visit the current SCC!");
assert(OldC != &NewC && "Already handled the original SCC!");
UR.CWorklist.insert(&NewC);
- if (DebugLogging)
- dbgs() << "Enqueuing a newly formed SCC:" << NewC << "\n";
+ DEBUG(dbgs() << "Enqueuing a newly formed SCC:" << NewC << "\n");
// Ensure new SCCs' function analyses are updated.
if (NeedFAMProxy)
@@ -381,15 +401,14 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G,
}
return C;
}
-}
LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
LazyCallGraph &G, LazyCallGraph::SCC &InitialC, LazyCallGraph::Node &N,
- CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, bool DebugLogging) {
- typedef LazyCallGraph::Node Node;
- typedef LazyCallGraph::Edge Edge;
- typedef LazyCallGraph::SCC SCC;
- typedef LazyCallGraph::RefSCC RefSCC;
+ CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR) {
+ using Node = LazyCallGraph::Node;
+ using Edge = LazyCallGraph::Edge;
+ using SCC = LazyCallGraph::SCC;
+ using RefSCC = LazyCallGraph::RefSCC;
RefSCC &InitialRC = InitialC.getOuterRefSCC();
SCC *C = &InitialC;
@@ -421,7 +440,9 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
assert(E && "No function transformations should introduce *new* "
"call edges! Any new calls should be modeled as "
"promoted existing ref edges!");
- RetainedEdges.insert(&CalleeN);
+ bool Inserted = RetainedEdges.insert(&CalleeN).second;
+ (void)Inserted;
+ assert(Inserted && "We should never visit a function twice.");
if (!E->isCall())
PromotedRefTargets.insert(&CalleeN);
}
@@ -429,7 +450,7 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
// Now walk all references.
for (Instruction &I : instructions(F))
for (Value *Op : I.operand_values())
- if (Constant *C = dyn_cast<Constant>(Op))
+ if (auto *C = dyn_cast<Constant>(Op))
if (Visited.insert(C).second)
Worklist.push_back(C);
@@ -441,7 +462,9 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
assert(E && "No function transformations should introduce *new* ref "
"edges! Any new ref edges would require IPO which "
"function passes aren't allowed to do!");
- RetainedEdges.insert(&RefereeN);
+ bool Inserted = RetainedEdges.insert(&RefereeN).second;
+ (void)Inserted;
+ assert(Inserted && "We should never visit a function twice.");
if (E->isCall())
DemotedCallTargets.insert(&RefereeN);
};
@@ -449,74 +472,82 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
// Include synthetic reference edges to known, defined lib functions.
for (auto *F : G.getLibFunctions())
- VisitRef(*F);
+ // While the list of lib functions doesn't have repeats, don't re-visit
+ // anything handled above.
+ if (!Visited.count(F))
+ VisitRef(*F);
// First remove all of the edges that are no longer present in this function.
- // We have to build a list of dead targets first and then remove them as the
- // data structures will all be invalidated by removing them.
- SmallVector<PointerIntPair<Node *, 1, Edge::Kind>, 4> DeadTargets;
- for (Edge &E : *N)
- if (!RetainedEdges.count(&E.getNode()))
- DeadTargets.push_back({&E.getNode(), E.getKind()});
- for (auto DeadTarget : DeadTargets) {
- Node &TargetN = *DeadTarget.getPointer();
- bool IsCall = DeadTarget.getInt() == Edge::Call;
- SCC &TargetC = *G.lookupSCC(TargetN);
- RefSCC &TargetRC = TargetC.getOuterRefSCC();
-
- if (&TargetRC != RC) {
- RC->removeOutgoingEdge(N, TargetN);
- if (DebugLogging)
- dbgs() << "Deleting outgoing edge from '" << N << "' to '" << TargetN
- << "'\n";
+ // The first step makes these edges uniformly ref edges and accumulates them
+ // into a separate data structure so removal doesn't invalidate anything.
+ SmallVector<Node *, 4> DeadTargets;
+ for (Edge &E : *N) {
+ if (RetainedEdges.count(&E.getNode()))
continue;
- }
- if (DebugLogging)
- dbgs() << "Deleting internal " << (IsCall ? "call" : "ref")
- << " edge from '" << N << "' to '" << TargetN << "'\n";
- if (IsCall) {
+ SCC &TargetC = *G.lookupSCC(E.getNode());
+ RefSCC &TargetRC = TargetC.getOuterRefSCC();
+ if (&TargetRC == RC && E.isCall()) {
if (C != &TargetC) {
// For separate SCCs this is trivial.
- RC->switchTrivialInternalEdgeToRef(N, TargetN);
+ RC->switchTrivialInternalEdgeToRef(N, E.getNode());
} else {
// Now update the call graph.
- C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, TargetN), G,
- N, C, AM, UR, DebugLogging);
+ C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, E.getNode()),
+ G, N, C, AM, UR);
}
}
- auto NewRefSCCs = RC->removeInternalRefEdge(N, TargetN);
- if (!NewRefSCCs.empty()) {
- // Note that we don't bother to invalidate analyses as ref-edge
- // connectivity is not really observable in any way and is intended
- // exclusively to be used for ordering of transforms rather than for
- // analysis conclusions.
-
- // The RC worklist is in reverse postorder, so we first enqueue the
- // current RefSCC as it will remain the parent of all split RefSCCs, then
- // we enqueue the new ones in RPO except for the one which contains the
- // source node as that is the "bottom" we will continue processing in the
- // bottom-up walk.
- UR.RCWorklist.insert(RC);
- if (DebugLogging)
- dbgs() << "Enqueuing the existing RefSCC in the update worklist: "
- << *RC << "\n";
- // Update the RC to the "bottom".
- assert(G.lookupSCC(N) == C && "Changed the SCC when splitting RefSCCs!");
- RC = &C->getOuterRefSCC();
- assert(G.lookupRefSCC(N) == RC && "Failed to update current RefSCC!");
- assert(NewRefSCCs.front() == RC &&
- "New current RefSCC not first in the returned list!");
- for (RefSCC *NewRC : reverse(
- make_range(std::next(NewRefSCCs.begin()), NewRefSCCs.end()))) {
- assert(NewRC != RC && "Should not encounter the current RefSCC further "
- "in the postorder list of new RefSCCs.");
- UR.RCWorklist.insert(NewRC);
- if (DebugLogging)
- dbgs() << "Enqueuing a new RefSCC in the update worklist: " << *NewRC
- << "\n";
- }
+ // Now that this is ready for actual removal, put it into our list.
+ DeadTargets.push_back(&E.getNode());
+ }
+ // Remove the easy cases quickly and actually pull them out of our list.
+ DeadTargets.erase(
+ llvm::remove_if(DeadTargets,
+ [&](Node *TargetN) {
+ SCC &TargetC = *G.lookupSCC(*TargetN);
+ RefSCC &TargetRC = TargetC.getOuterRefSCC();
+
+ // We can't trivially remove internal targets, so skip
+ // those.
+ if (&TargetRC == RC)
+ return false;
+
+ RC->removeOutgoingEdge(N, *TargetN);
+ DEBUG(dbgs() << "Deleting outgoing edge from '" << N
+ << "' to '" << TargetN << "'\n");
+ return true;
+ }),
+ DeadTargets.end());
+
+ // Now do a batch removal of the internal ref edges left.
+ auto NewRefSCCs = RC->removeInternalRefEdge(N, DeadTargets);
+ if (!NewRefSCCs.empty()) {
+ // The old RefSCC is dead, mark it as such.
+ UR.InvalidatedRefSCCs.insert(RC);
+
+ // Note that we don't bother to invalidate analyses as ref-edge
+ // connectivity is not really observable in any way and is intended
+ // exclusively to be used for ordering of transforms rather than for
+ // analysis conclusions.
+
+ // Update RC to the "bottom".
+ assert(G.lookupSCC(N) == C && "Changed the SCC when splitting RefSCCs!");
+ RC = &C->getOuterRefSCC();
+ assert(G.lookupRefSCC(N) == RC && "Failed to update current RefSCC!");
+
+ // The RC worklist is in reverse postorder, so we enqueue the new ones in
+ // RPO except for the one which contains the source node as that is the
+ // "bottom" we will continue processing in the bottom-up walk.
+ assert(NewRefSCCs.front() == RC &&
+ "New current RefSCC not first in the returned list!");
+ for (RefSCC *NewRC : llvm::reverse(make_range(std::next(NewRefSCCs.begin()),
+ NewRefSCCs.end()))) {
+ assert(NewRC != RC && "Should not encounter the current RefSCC further "
+ "in the postorder list of new RefSCCs.");
+ UR.RCWorklist.insert(NewRC);
+ DEBUG(dbgs() << "Enqueuing a new RefSCC in the update worklist: "
+ << *NewRC << "\n");
}
}
@@ -533,9 +564,8 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
assert(RC->isAncestorOf(TargetRC) &&
"Cannot potentially form RefSCC cycles here!");
RC->switchOutgoingEdgeToRef(N, *RefTarget);
- if (DebugLogging)
- dbgs() << "Switch outgoing call edge to a ref edge from '" << N
- << "' to '" << *RefTarget << "'\n";
+ DEBUG(dbgs() << "Switch outgoing call edge to a ref edge from '" << N
+ << "' to '" << *RefTarget << "'\n");
continue;
}
@@ -549,7 +579,7 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
// Now update the call graph.
C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, *RefTarget), G, N,
- C, AM, UR, DebugLogging);
+ C, AM, UR);
}
// Now promote ref edges into call edges.
@@ -563,14 +593,12 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
assert(RC->isAncestorOf(TargetRC) &&
"Cannot potentially form RefSCC cycles here!");
RC->switchOutgoingEdgeToCall(N, *CallTarget);
- if (DebugLogging)
- dbgs() << "Switch outgoing ref edge to a call edge from '" << N
- << "' to '" << *CallTarget << "'\n";
+ DEBUG(dbgs() << "Switch outgoing ref edge to a call edge from '" << N
+ << "' to '" << *CallTarget << "'\n");
continue;
}
- if (DebugLogging)
- dbgs() << "Switch an internal ref edge to a call edge from '" << N
- << "' to '" << *CallTarget << "'\n";
+ DEBUG(dbgs() << "Switch an internal ref edge to a call edge from '" << N
+ << "' to '" << *CallTarget << "'\n");
// Otherwise we are switching an internal ref edge to a call edge. This
// may merge away some SCCs, and we add those to the UpdateResult. We also
@@ -619,21 +647,28 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
AM.invalidate(*C, PA);
}
auto NewSCCIndex = RC->find(*C) - RC->begin();
+ // If we have actually moved an SCC to be topologically "below" the current
+ // one due to merging, we will need to revisit the current SCC after
+ // visiting those moved SCCs.
+ //
+ // It is critical that we *do not* revisit the current SCC unless we
+ // actually move SCCs in the process of merging because otherwise we may
+ // form a cycle where an SCC is split apart, merged, split, merged and so
+ // on infinitely.
if (InitialSCCIndex < NewSCCIndex) {
// Put our current SCC back onto the worklist as we'll visit other SCCs
// that are now definitively ordered prior to the current one in the
// post-order sequence, and may end up observing more precise context to
// optimize the current SCC.
UR.CWorklist.insert(C);
- if (DebugLogging)
- dbgs() << "Enqueuing the existing SCC in the worklist: " << *C << "\n";
+ DEBUG(dbgs() << "Enqueuing the existing SCC in the worklist: " << *C
+ << "\n");
// Enqueue in reverse order as we pop off the back of the worklist.
- for (SCC &MovedC : reverse(make_range(RC->begin() + InitialSCCIndex,
- RC->begin() + NewSCCIndex))) {
+ for (SCC &MovedC : llvm::reverse(make_range(RC->begin() + InitialSCCIndex,
+ RC->begin() + NewSCCIndex))) {
UR.CWorklist.insert(&MovedC);
- if (DebugLogging)
- dbgs() << "Enqueuing a newly earlier in post-order SCC: " << MovedC
- << "\n";
+ DEBUG(dbgs() << "Enqueuing a newly earlier in post-order SCC: "
+ << MovedC << "\n");
}
}
}
diff --git a/contrib/llvm/lib/Analysis/CallGraph.cpp b/contrib/llvm/lib/Analysis/CallGraph.cpp
index ff5242f69a1b..ac3ea2b73fed 100644
--- a/contrib/llvm/lib/Analysis/CallGraph.cpp
+++ b/contrib/llvm/lib/Analysis/CallGraph.cpp
@@ -8,12 +8,20 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/CallGraph.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/CallSite.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -125,7 +133,6 @@ Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) {
/// This does not rescan the body of the function, so it is suitable when
/// splicing the body of the old function to the new while also updating all
/// callers from old to new.
-///
void CallGraph::spliceFunction(const Function *From, const Function *To) {
assert(FunctionMap.count(From) && "No CallGraphNode for function!");
assert(!FunctionMap.count(To) &&
@@ -256,7 +263,7 @@ CallGraphWrapperPass::CallGraphWrapperPass() : ModulePass(ID) {
initializeCallGraphWrapperPassPass(*PassRegistry::getPassRegistry());
}
-CallGraphWrapperPass::~CallGraphWrapperPass() {}
+CallGraphWrapperPass::~CallGraphWrapperPass() = default;
void CallGraphWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -291,8 +298,10 @@ void CallGraphWrapperPass::dump() const { print(dbgs(), nullptr); }
#endif
namespace {
+
struct CallGraphPrinterLegacyPass : public ModulePass {
static char ID; // Pass ID, replacement for typeid
+
CallGraphPrinterLegacyPass() : ModulePass(ID) {
initializeCallGraphPrinterLegacyPassPass(*PassRegistry::getPassRegistry());
}
@@ -301,12 +310,14 @@ struct CallGraphPrinterLegacyPass : public ModulePass {
AU.setPreservesAll();
AU.addRequiredTransitive<CallGraphWrapperPass>();
}
+
bool runOnModule(Module &M) override {
getAnalysis<CallGraphWrapperPass>().print(errs(), &M);
return false;
}
};
-}
+
+} // end anonymous namespace
char CallGraphPrinterLegacyPass::ID = 0;
diff --git a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
index facda246936d..a2dda58a6a2f 100644
--- a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
+++ b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
@@ -16,18 +16,27 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CallGraph.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LegacyPassManagers.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/OptBisect.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <string>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "cgscc-passmgr"
@@ -47,8 +56,8 @@ namespace {
class CGPassManager : public ModulePass, public PMDataManager {
public:
static char ID;
- explicit CGPassManager()
- : ModulePass(ID), PMDataManager() { }
+
+ explicit CGPassManager() : ModulePass(ID), PMDataManager() {}
/// Execute all of the passes scheduled for execution. Keep track of
/// whether any of the passes modifies the module, and if so, return true.
@@ -106,7 +115,6 @@ private:
char CGPassManager::ID = 0;
-
bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
CallGraph &CG, bool &CallGraphUpToDate,
bool &DevirtualizedCall) {
@@ -135,7 +143,6 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
return Changed;
}
-
assert(PM->getPassManagerType() == PMT_FunctionPassManager &&
"Invalid CGPassManager member");
FPPassManager *FPP = (FPPassManager*)P;
@@ -162,7 +169,6 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
return Changed;
}
-
/// Scan the functions in the specified CFG and resync the
/// callgraph with the call sites found in it. This is used after
/// FunctionPasses have potentially munged the callgraph, and can be used after
@@ -172,7 +178,6 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
/// meaning it turned an indirect call into a direct call. This happens when
/// a function pass like GVN optimizes away stuff feeding the indirect call.
/// This never happens in checking mode.
-///
bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
bool CheckingMode) {
DenseMap<Value*, CallGraphNode*> CallSites;
@@ -484,7 +489,6 @@ bool CGPassManager::runOnModule(Module &M) {
return Changed;
}
-
/// Initialize CG
bool CGPassManager::doInitialization(CallGraph &CG) {
bool Changed = false;
@@ -536,7 +540,6 @@ void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) {
CGI->ReplaceNode(Old, New);
}
-
//===----------------------------------------------------------------------===//
// CallGraphSCCPass Implementation
//===----------------------------------------------------------------------===//
@@ -586,22 +589,23 @@ void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<CallGraphWrapperPass>();
}
-
//===----------------------------------------------------------------------===//
// PrintCallGraphPass Implementation
//===----------------------------------------------------------------------===//
namespace {
+
/// PrintCallGraphPass - Print a Module corresponding to a call graph.
///
class PrintCallGraphPass : public CallGraphSCCPass {
std::string Banner;
- raw_ostream &Out; // raw_ostream to print on.
+ raw_ostream &OS; // raw_ostream to print on.
public:
static char ID;
- PrintCallGraphPass(const std::string &B, raw_ostream &o)
- : CallGraphSCCPass(ID), Banner(B), Out(o) {}
+
+ PrintCallGraphPass(const std::string &B, raw_ostream &OS)
+ : CallGraphSCCPass(ID), Banner(B), OS(OS) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
@@ -612,18 +616,18 @@ namespace {
auto PrintBannerOnce = [&] () {
if (BannerPrinted)
return;
- Out << Banner;
+ OS << Banner;
BannerPrinted = true;
};
for (CallGraphNode *CGN : SCC) {
if (Function *F = CGN->getFunction()) {
if (!F->isDeclaration() && isFunctionInPrintList(F->getName())) {
PrintBannerOnce();
- F->print(Out);
+ F->print(OS);
}
- } else if (llvm::isFunctionInPrintList("*")) {
+ } else if (isFunctionInPrintList("*")) {
PrintBannerOnce();
- Out << "\nPrinting <null> Function\n";
+ OS << "\nPrinting <null> Function\n";
}
}
return false;
@@ -636,9 +640,9 @@ namespace {
char PrintCallGraphPass::ID = 0;
-Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &O,
+Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &OS,
const std::string &Banner) const {
- return new PrintCallGraphPass(Banner, O);
+ return new PrintCallGraphPass(Banner, OS);
}
bool CallGraphSCCPass::skipSCC(CallGraphSCC &SCC) const {
@@ -649,5 +653,6 @@ bool CallGraphSCCPass::skipSCC(CallGraphSCC &SCC) const {
}
char DummyCGSCCPass::ID = 0;
+
INITIALIZE_PASS(DummyCGSCCPass, "DummyCGSCCPass", "DummyCGSCCPass", false,
false)
diff --git a/contrib/llvm/lib/Analysis/CmpInstAnalysis.cpp b/contrib/llvm/lib/Analysis/CmpInstAnalysis.cpp
new file mode 100644
index 000000000000..159c1a2d135a
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/CmpInstAnalysis.cpp
@@ -0,0 +1,144 @@
+//===- CmpInstAnalysis.cpp - Utils to help fold compares ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file holds routines to help analyse compare instructions
+// and fold them into constants or other compare instructions
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CmpInstAnalysis.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PatternMatch.h"
+
+using namespace llvm;
+
+unsigned llvm::getICmpCode(const ICmpInst *ICI, bool InvertPred) {
+ ICmpInst::Predicate Pred = InvertPred ? ICI->getInversePredicate()
+ : ICI->getPredicate();
+ switch (Pred) {
+ // False -> 0
+ case ICmpInst::ICMP_UGT: return 1; // 001
+ case ICmpInst::ICMP_SGT: return 1; // 001
+ case ICmpInst::ICMP_EQ: return 2; // 010
+ case ICmpInst::ICMP_UGE: return 3; // 011
+ case ICmpInst::ICMP_SGE: return 3; // 011
+ case ICmpInst::ICMP_ULT: return 4; // 100
+ case ICmpInst::ICMP_SLT: return 4; // 100
+ case ICmpInst::ICMP_NE: return 5; // 101
+ case ICmpInst::ICMP_ULE: return 6; // 110
+ case ICmpInst::ICMP_SLE: return 6; // 110
+ // True -> 7
+ default:
+ llvm_unreachable("Invalid ICmp predicate!");
+ }
+}
+
+Value *llvm::getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
+ CmpInst::Predicate &NewICmpPred) {
+ switch (Code) {
+ default: llvm_unreachable("Illegal ICmp code!");
+ case 0: // False.
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+ case 1: NewICmpPred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
+ case 2: NewICmpPred = ICmpInst::ICMP_EQ; break;
+ case 3: NewICmpPred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
+ case 4: NewICmpPred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
+ case 5: NewICmpPred = ICmpInst::ICMP_NE; break;
+ case 6: NewICmpPred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
+ case 7: // True.
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
+ }
+ return nullptr;
+}
+
+bool llvm::PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
+ return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
+ (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
+ (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
+}
+
+bool llvm::decomposeBitTestICmp(Value *LHS, Value *RHS,
+ CmpInst::Predicate &Pred,
+ Value *&X, APInt &Mask, bool LookThruTrunc) {
+ using namespace PatternMatch;
+
+ const APInt *C;
+ if (!match(RHS, m_APInt(C)))
+ return false;
+
+ switch (Pred) {
+ default:
+ return false;
+ case ICmpInst::ICMP_SLT:
+ // X < 0 is equivalent to (X & SignMask) != 0.
+ if (!C->isNullValue())
+ return false;
+ Mask = APInt::getSignMask(C->getBitWidth());
+ Pred = ICmpInst::ICMP_NE;
+ break;
+ case ICmpInst::ICMP_SLE:
+ // X <= -1 is equivalent to (X & SignMask) != 0.
+ if (!C->isAllOnesValue())
+ return false;
+ Mask = APInt::getSignMask(C->getBitWidth());
+ Pred = ICmpInst::ICMP_NE;
+ break;
+ case ICmpInst::ICMP_SGT:
+ // X > -1 is equivalent to (X & SignMask) == 0.
+ if (!C->isAllOnesValue())
+ return false;
+ Mask = APInt::getSignMask(C->getBitWidth());
+ Pred = ICmpInst::ICMP_EQ;
+ break;
+ case ICmpInst::ICMP_SGE:
+ // X >= 0 is equivalent to (X & SignMask) == 0.
+ if (!C->isNullValue())
+ return false;
+ Mask = APInt::getSignMask(C->getBitWidth());
+ Pred = ICmpInst::ICMP_EQ;
+ break;
+ case ICmpInst::ICMP_ULT:
+ // X <u 2^n is equivalent to (X & ~(2^n-1)) == 0.
+ if (!C->isPowerOf2())
+ return false;
+ Mask = -*C;
+ Pred = ICmpInst::ICMP_EQ;
+ break;
+ case ICmpInst::ICMP_ULE:
+ // X <=u 2^n-1 is equivalent to (X & ~(2^n-1)) == 0.
+ if (!(*C + 1).isPowerOf2())
+ return false;
+ Mask = ~*C;
+ Pred = ICmpInst::ICMP_EQ;
+ break;
+ case ICmpInst::ICMP_UGT:
+ // X >u 2^n-1 is equivalent to (X & ~(2^n-1)) != 0.
+ if (!(*C + 1).isPowerOf2())
+ return false;
+ Mask = ~*C;
+ Pred = ICmpInst::ICMP_NE;
+ break;
+ case ICmpInst::ICMP_UGE:
+ // X >=u 2^n is equivalent to (X & ~(2^n-1)) != 0.
+ if (!C->isPowerOf2())
+ return false;
+ Mask = -*C;
+ Pred = ICmpInst::ICMP_NE;
+ break;
+ }
+
+ if (LookThruTrunc && match(LHS, m_Trunc(m_Value(X)))) {
+ Mask = Mask.zext(X->getType()->getScalarSizeInBits());
+ } else {
+ X = LHS;
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Analysis/CodeMetrics.cpp b/contrib/llvm/lib/Analysis/CodeMetrics.cpp
index e4d9292db92d..ac7d14ebdaea 100644
--- a/contrib/llvm/lib/Analysis/CodeMetrics.cpp
+++ b/contrib/llvm/lib/Analysis/CodeMetrics.cpp
@@ -19,7 +19,6 @@
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
index 0f5ec3f5626e..e88b8f14d54e 100644
--- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1359,7 +1359,7 @@ llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
//
bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
- if (CS.isNoBuiltin())
+ if (CS.isNoBuiltin() || CS.isStrictFP())
return false;
switch (F->getIntrinsicID()) {
case Intrinsic::fabs:
@@ -2066,7 +2066,7 @@ Constant *
llvm::ConstantFoldCall(ImmutableCallSite CS, Function *F,
ArrayRef<Constant *> Operands,
const TargetLibraryInfo *TLI) {
- if (CS.isNoBuiltin())
+ if (CS.isNoBuiltin() || CS.isStrictFP())
return nullptr;
if (!F->hasName())
return nullptr;
@@ -2084,7 +2084,7 @@ llvm::ConstantFoldCall(ImmutableCallSite CS, Function *F,
bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
// FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap
// (and to some extent ConstantFoldScalarCall).
- if (CS.isNoBuiltin())
+ if (CS.isNoBuiltin() || CS.isStrictFP())
return false;
Function *F = CS.getCalledFunction();
if (!F)
diff --git a/contrib/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm/lib/Analysis/CostModel.cpp
index 32bfea58bf9d..3d55bf20bb40 100644
--- a/contrib/llvm/lib/Analysis/CostModel.cpp
+++ b/contrib/llvm/lib/Analysis/CostModel.cpp
@@ -20,24 +20,26 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+static cl::opt<TargetTransformInfo::TargetCostKind> CostKind(
+ "cost-kind", cl::desc("Target cost kind"),
+ cl::init(TargetTransformInfo::TCK_RecipThroughput),
+ cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput,
+ "throughput", "Reciprocal throughput"),
+ clEnumValN(TargetTransformInfo::TCK_Latency,
+ "latency", "Instruction latency"),
+ clEnumValN(TargetTransformInfo::TCK_CodeSize,
+ "code-size", "Code size")));
+
#define CM_NAME "cost-model"
#define DEBUG_TYPE CM_NAME
-static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
- cl::Hidden,
- cl::desc("Recognize reduction patterns."));
-
namespace {
class CostModelAnalysis : public FunctionPass {
@@ -52,7 +54,9 @@ namespace {
/// Returns -1 if the cost is unknown.
/// Note, this method does not cache the cost calculation and it
/// can be expensive in some cases.
- unsigned getInstructionCost(const Instruction *I) const;
+ unsigned getInstructionCost(const Instruction *I) const {
+ return TTI->getInstructionCost(I, TargetTransformInfo::TCK_RecipThroughput);
+ }
private:
void getAnalysisUsage(AnalysisUsage &AU) const override;
@@ -90,481 +94,13 @@ CostModelAnalysis::runOnFunction(Function &F) {
return false;
}
-static bool isReverseVectorMask(ArrayRef<int> Mask) {
- for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i)
- if (Mask[i] >= 0 && Mask[i] != (int)(MaskSize - 1 - i))
- return false;
- return true;
-}
-
-static bool isSingleSourceVectorMask(ArrayRef<int> Mask) {
- bool Vec0 = false;
- bool Vec1 = false;
- for (unsigned i = 0, NumVecElts = Mask.size(); i < NumVecElts; ++i) {
- if (Mask[i] >= 0) {
- if ((unsigned)Mask[i] >= NumVecElts)
- Vec1 = true;
- else
- Vec0 = true;
- }
- }
- return !(Vec0 && Vec1);
-}
-
-static bool isZeroEltBroadcastVectorMask(ArrayRef<int> Mask) {
- for (unsigned i = 0; i < Mask.size(); ++i)
- if (Mask[i] > 0)
- return false;
- return true;
-}
-
-static bool isAlternateVectorMask(ArrayRef<int> Mask) {
- bool isAlternate = true;
- unsigned MaskSize = Mask.size();
-
- // Example: shufflevector A, B, <0,5,2,7>
- for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
- if (Mask[i] < 0)
- continue;
- isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i);
- }
-
- if (isAlternate)
- return true;
-
- isAlternate = true;
- // Example: shufflevector A, B, <4,1,6,3>
- for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
- if (Mask[i] < 0)
- continue;
- isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i);
- }
-
- return isAlternate;
-}
-
-static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
- TargetTransformInfo::OperandValueKind OpInfo =
- TargetTransformInfo::OK_AnyValue;
-
- // Check for a splat of a constant or for a non uniform vector of constants.
- if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
- OpInfo = TargetTransformInfo::OK_NonUniformConstantValue;
- if (cast<Constant>(V)->getSplatValue() != nullptr)
- OpInfo = TargetTransformInfo::OK_UniformConstantValue;
- }
-
- // Check for a splat of a uniform value. This is not loop aware, so return
- // true only for the obviously uniform cases (argument, globalvalue)
- const Value *Splat = getSplatValue(V);
- if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
- OpInfo = TargetTransformInfo::OK_UniformValue;
-
- return OpInfo;
-}
-
-static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
- unsigned Level) {
- // We don't need a shuffle if we just want to have element 0 in position 0 of
- // the vector.
- if (!SI && Level == 0 && IsLeft)
- return true;
- else if (!SI)
- return false;
-
- SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1);
-
- // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
- // we look at the left or right side.
- for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
- Mask[i] = val;
-
- SmallVector<int, 16> ActualMask = SI->getShuffleMask();
- return Mask == ActualMask;
-}
-
-static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp,
- unsigned Level, unsigned NumLevels) {
- // Match one level of pairwise operations.
- // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
- // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
- // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
- // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
- // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
- if (BinOp == nullptr)
- return false;
-
- assert(BinOp->getType()->isVectorTy() && "Expecting a vector type");
-
- unsigned Opcode = BinOp->getOpcode();
- Value *L = BinOp->getOperand(0);
- Value *R = BinOp->getOperand(1);
-
- ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(L);
- if (!LS && Level)
- return false;
- ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(R);
- if (!RS && Level)
- return false;
-
- // On level 0 we can omit one shufflevector instruction.
- if (!Level && !RS && !LS)
- return false;
-
- // Shuffle inputs must match.
- Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
- Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
- Value *NextLevelOp = nullptr;
- if (NextLevelOpR && NextLevelOpL) {
- // If we have two shuffles their operands must match.
- if (NextLevelOpL != NextLevelOpR)
- return false;
-
- NextLevelOp = NextLevelOpL;
- } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
- // On the first level we can omit the shufflevector <0, undef,...>. So the
- // input to the other shufflevector <1, undef> must match with one of the
- // inputs to the current binary operation.
- // Example:
- // %NextLevelOpL = shufflevector %R, <1, undef ...>
- // %BinOp = fadd %NextLevelOpL, %R
- if (NextLevelOpL && NextLevelOpL != R)
- return false;
- else if (NextLevelOpR && NextLevelOpR != L)
- return false;
-
- NextLevelOp = NextLevelOpL ? R : L;
- } else
- return false;
-
- // Check that the next levels binary operation exists and matches with the
- // current one.
- BinaryOperator *NextLevelBinOp = nullptr;
- if (Level + 1 != NumLevels) {
- if (!(NextLevelBinOp = dyn_cast<BinaryOperator>(NextLevelOp)))
- return false;
- else if (NextLevelBinOp->getOpcode() != Opcode)
- return false;
- }
-
- // Shuffle mask for pairwise operation must match.
- if (matchPairwiseShuffleMask(LS, true, Level)) {
- if (!matchPairwiseShuffleMask(RS, false, Level))
- return false;
- } else if (matchPairwiseShuffleMask(RS, true, Level)) {
- if (!matchPairwiseShuffleMask(LS, false, Level))
- return false;
- } else
- return false;
-
- if (++Level == NumLevels)
- return true;
-
- // Match next level.
- return matchPairwiseReductionAtLevel(NextLevelBinOp, Level, NumLevels);
-}
-
-static bool matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
- unsigned &Opcode, Type *&Ty) {
- if (!EnableReduxCost)
- return false;
-
- // Need to extract the first element.
- ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
- unsigned Idx = ~0u;
- if (CI)
- Idx = CI->getZExtValue();
- if (Idx != 0)
- return false;
-
- BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0));
- if (!RdxStart)
- return false;
-
- Type *VecTy = ReduxRoot->getOperand(0)->getType();
- unsigned NumVecElems = VecTy->getVectorNumElements();
- if (!isPowerOf2_32(NumVecElems))
- return false;
-
- // We look for a sequence of shuffle,shuffle,add triples like the following
- // that builds a pairwise reduction tree.
- //
- // (X0, X1, X2, X3)
- // (X0 + X1, X2 + X3, undef, undef)
- // ((X0 + X1) + (X2 + X3), undef, undef, undef)
- //
- // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
- // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
- // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
- // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
- // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
- // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
- // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
- // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
- // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
- // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
- // %r = extractelement <4 x float> %bin.rdx8, i32 0
- if (!matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)))
- return false;
-
- Opcode = RdxStart->getOpcode();
- Ty = VecTy;
-
- return true;
-}
-
-static std::pair<Value *, ShuffleVectorInst *>
-getShuffleAndOtherOprd(BinaryOperator *B) {
-
- Value *L = B->getOperand(0);
- Value *R = B->getOperand(1);
- ShuffleVectorInst *S = nullptr;
-
- if ((S = dyn_cast<ShuffleVectorInst>(L)))
- return std::make_pair(R, S);
-
- S = dyn_cast<ShuffleVectorInst>(R);
- return std::make_pair(L, S);
-}
-
-static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
- unsigned &Opcode, Type *&Ty) {
- if (!EnableReduxCost)
- return false;
-
- // Need to extract the first element.
- ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
- unsigned Idx = ~0u;
- if (CI)
- Idx = CI->getZExtValue();
- if (Idx != 0)
- return false;
-
- BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0));
- if (!RdxStart)
- return false;
- unsigned RdxOpcode = RdxStart->getOpcode();
-
- Type *VecTy = ReduxRoot->getOperand(0)->getType();
- unsigned NumVecElems = VecTy->getVectorNumElements();
- if (!isPowerOf2_32(NumVecElems))
- return false;
-
- // We look for a sequence of shuffles and adds like the following matching one
- // fadd, shuffle vector pair at a time.
- //
- // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
- // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
- // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
- // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
- // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
- // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
- // %r = extractelement <4 x float> %bin.rdx8, i32 0
-
- unsigned MaskStart = 1;
- Value *RdxOp = RdxStart;
- SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
- unsigned NumVecElemsRemain = NumVecElems;
- while (NumVecElemsRemain - 1) {
- // Check for the right reduction operation.
- BinaryOperator *BinOp;
- if (!(BinOp = dyn_cast<BinaryOperator>(RdxOp)))
- return false;
- if (BinOp->getOpcode() != RdxOpcode)
- return false;
-
- Value *NextRdxOp;
- ShuffleVectorInst *Shuffle;
- std::tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp);
-
- // Check the current reduction operation and the shuffle use the same value.
- if (Shuffle == nullptr)
- return false;
- if (Shuffle->getOperand(0) != NextRdxOp)
- return false;
-
- // Check that shuffle masks matches.
- for (unsigned j = 0; j != MaskStart; ++j)
- ShuffleMask[j] = MaskStart + j;
- // Fill the rest of the mask with -1 for undef.
- std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
-
- SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
- if (ShuffleMask != Mask)
- return false;
-
- RdxOp = NextRdxOp;
- NumVecElemsRemain /= 2;
- MaskStart *= 2;
- }
-
- Opcode = RdxOpcode;
- Ty = VecTy;
- return true;
-}
-
-unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
- if (!TTI)
- return -1;
-
- switch (I->getOpcode()) {
- case Instruction::GetElementPtr:
- return TTI->getUserCost(I);
-
- case Instruction::Ret:
- case Instruction::PHI:
- case Instruction::Br: {
- return TTI->getCFInstrCost(I->getOpcode());
- }
- case Instruction::Add:
- case Instruction::FAdd:
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::Mul:
- case Instruction::FMul:
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::FDiv:
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::FRem:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor: {
- TargetTransformInfo::OperandValueKind Op1VK =
- getOperandInfo(I->getOperand(0));
- TargetTransformInfo::OperandValueKind Op2VK =
- getOperandInfo(I->getOperand(1));
- SmallVector<const Value*, 2> Operands(I->operand_values());
- return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK,
- Op2VK, TargetTransformInfo::OP_None,
- TargetTransformInfo::OP_None,
- Operands);
- }
- case Instruction::Select: {
- const SelectInst *SI = cast<SelectInst>(I);
- Type *CondTy = SI->getCondition()->getType();
- return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I);
- }
- case Instruction::ICmp:
- case Instruction::FCmp: {
- Type *ValTy = I->getOperand(0)->getType();
- return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I);
- }
- case Instruction::Store: {
- const StoreInst *SI = cast<StoreInst>(I);
- Type *ValTy = SI->getValueOperand()->getType();
- return TTI->getMemoryOpCost(I->getOpcode(), ValTy,
- SI->getAlignment(),
- SI->getPointerAddressSpace(), I);
- }
- case Instruction::Load: {
- const LoadInst *LI = cast<LoadInst>(I);
- return TTI->getMemoryOpCost(I->getOpcode(), I->getType(),
- LI->getAlignment(),
- LI->getPointerAddressSpace(), I);
- }
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::FPExt:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::SIToFP:
- case Instruction::UIToFP:
- case Instruction::Trunc:
- case Instruction::FPTrunc:
- case Instruction::BitCast:
- case Instruction::AddrSpaceCast: {
- Type *SrcTy = I->getOperand(0)->getType();
- return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I);
- }
- case Instruction::ExtractElement: {
- const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
- ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
- unsigned Idx = -1;
- if (CI)
- Idx = CI->getZExtValue();
-
- // Try to match a reduction sequence (series of shufflevector and vector
- // adds followed by a extractelement).
- unsigned ReduxOpCode;
- Type *ReduxType;
-
- if (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType))
- return TTI->getReductionCost(ReduxOpCode, ReduxType, false);
- else if (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType))
- return TTI->getReductionCost(ReduxOpCode, ReduxType, true);
-
- return TTI->getVectorInstrCost(I->getOpcode(),
- EEI->getOperand(0)->getType(), Idx);
- }
- case Instruction::InsertElement: {
- const InsertElementInst * IE = cast<InsertElementInst>(I);
- ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
- unsigned Idx = -1;
- if (CI)
- Idx = CI->getZExtValue();
- return TTI->getVectorInstrCost(I->getOpcode(),
- IE->getType(), Idx);
- }
- case Instruction::ShuffleVector: {
- const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
- Type *VecTypOp0 = Shuffle->getOperand(0)->getType();
- unsigned NumVecElems = VecTypOp0->getVectorNumElements();
- SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
-
- if (NumVecElems == Mask.size()) {
- if (isReverseVectorMask(Mask))
- return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0,
- 0, nullptr);
- if (isAlternateVectorMask(Mask))
- return TTI->getShuffleCost(TargetTransformInfo::SK_Alternate,
- VecTypOp0, 0, nullptr);
-
- if (isZeroEltBroadcastVectorMask(Mask))
- return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast,
- VecTypOp0, 0, nullptr);
-
- if (isSingleSourceVectorMask(Mask))
- return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
- VecTypOp0, 0, nullptr);
-
- return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
- VecTypOp0, 0, nullptr);
- }
-
- return -1;
- }
- case Instruction::Call:
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
- SmallVector<Value *, 4> Args(II->arg_operands());
-
- FastMathFlags FMF;
- if (auto *FPMO = dyn_cast<FPMathOperator>(II))
- FMF = FPMO->getFastMathFlags();
-
- return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
- Args, FMF);
- }
- return -1;
- default:
- // We don't have any information on this instruction.
- return -1;
- }
-}
-
void CostModelAnalysis::print(raw_ostream &OS, const Module*) const {
if (!F)
return;
for (BasicBlock &B : *F) {
for (Instruction &Inst : B) {
- unsigned Cost = getInstructionCost(&Inst);
+ unsigned Cost = TTI->getInstructionCost(&Inst, CostKind);
if (Cost != (unsigned)-1)
OS << "Cost Model: Found an estimated cost of " << Cost;
else
diff --git a/contrib/llvm/lib/Analysis/DemandedBits.cpp b/contrib/llvm/lib/Analysis/DemandedBits.cpp
index 9c53f9140ca3..7276f2524fed 100644
--- a/contrib/llvm/lib/Analysis/DemandedBits.cpp
+++ b/contrib/llvm/lib/Analysis/DemandedBits.cpp
@@ -1,4 +1,4 @@
-//===---- DemandedBits.cpp - Determine demanded bits ----------------------===//
+//===- DemandedBits.cpp - Determine demanded bits -------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -20,30 +20,41 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/DemandedBits.h"
-#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instructions.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstdint>
+
using namespace llvm;
#define DEBUG_TYPE "demanded-bits"
char DemandedBitsWrapperPass::ID = 0;
+
INITIALIZE_PASS_BEGIN(DemandedBitsWrapperPass, "demanded-bits",
"Demanded bits analysis", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
@@ -357,7 +368,7 @@ void DemandedBits::performAnalysis() {
APInt DemandedBits::getDemandedBits(Instruction *I) {
performAnalysis();
- const DataLayout &DL = I->getParent()->getModule()->getDataLayout();
+ const DataLayout &DL = I->getModule()->getDataLayout();
auto Found = AliveBits.find(I);
if (Found != AliveBits.end())
return Found->second;
diff --git a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
index 2d39a0b02150..ac684ec18466 100644
--- a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
@@ -71,7 +71,6 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
index c08c6cfe0c3b..bb8caf4a5174 100644
--- a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
+++ b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
@@ -9,15 +9,23 @@
#include "llvm/Analysis/DominanceFrontier.h"
#include "llvm/Analysis/DominanceFrontierImpl.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
namespace llvm {
+
template class DominanceFrontierBase<BasicBlock, false>;
template class DominanceFrontierBase<BasicBlock, true>;
template class ForwardDominanceFrontierBase<BasicBlock>;
-}
+
+} // end namespace llvm
char DominanceFrontierWrapperPass::ID = 0;
@@ -27,7 +35,7 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(DominanceFrontierWrapperPass, "domfrontier",
"Dominance Frontier Construction", true, true)
- DominanceFrontierWrapperPass::DominanceFrontierWrapperPass()
+DominanceFrontierWrapperPass::DominanceFrontierWrapperPass()
: FunctionPass(ID), DF() {
initializeDominanceFrontierWrapperPassPass(*PassRegistry::getPassRegistry());
}
diff --git a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
index 4ef023379bb6..23109c67e5c3 100644
--- a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
+++ b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
@@ -84,12 +84,13 @@ class GlobalsAAResult::FunctionInfo {
/// The bit that flags that this function may read any global. This is
/// chosen to mix together with ModRefInfo bits.
+ /// FIXME: This assumes ModRefInfo lattice will remain 4 bits!
enum { MayReadAnyGlobal = 4 };
/// Checks to document the invariants of the bit packing here.
- static_assert((MayReadAnyGlobal & MRI_ModRef) == 0,
+ static_assert((MayReadAnyGlobal & static_cast<int>(ModRefInfo::ModRef)) == 0,
"ModRef and the MayReadAnyGlobal flag bits overlap.");
- static_assert(((MayReadAnyGlobal | MRI_ModRef) >>
+ static_assert(((MayReadAnyGlobal | static_cast<int>(ModRefInfo::ModRef)) >>
AlignedMapPointerTraits::NumLowBitsAvailable) == 0,
"Insufficient low bits to store our flag and ModRef info.");
@@ -126,12 +127,12 @@ public:
/// Returns the \c ModRefInfo info for this function.
ModRefInfo getModRefInfo() const {
- return ModRefInfo(Info.getInt() & MRI_ModRef);
+ return ModRefInfo(Info.getInt() & static_cast<int>(ModRefInfo::ModRef));
}
/// Adds new \c ModRefInfo for this function to its state.
void addModRefInfo(ModRefInfo NewMRI) {
- Info.setInt(Info.getInt() | NewMRI);
+ Info.setInt(Info.getInt() | static_cast<int>(NewMRI));
}
/// Returns whether this function may read any global variable, and we don't
@@ -144,17 +145,18 @@ public:
/// Returns the \c ModRefInfo info for this function w.r.t. a particular
/// global, which may be more precise than the general information above.
ModRefInfo getModRefInfoForGlobal(const GlobalValue &GV) const {
- ModRefInfo GlobalMRI = mayReadAnyGlobal() ? MRI_Ref : MRI_NoModRef;
+ ModRefInfo GlobalMRI =
+ mayReadAnyGlobal() ? ModRefInfo::Ref : ModRefInfo::NoModRef;
if (AlignedMap *P = Info.getPointer()) {
auto I = P->Map.find(&GV);
if (I != P->Map.end())
- GlobalMRI = ModRefInfo(GlobalMRI | I->second);
+ GlobalMRI = unionModRef(GlobalMRI, I->second);
}
return GlobalMRI;
}
/// Add mod/ref info from another function into ours, saturating towards
- /// MRI_ModRef.
+ /// ModRef.
void addFunctionInfo(const FunctionInfo &FI) {
addModRefInfo(FI.getModRefInfo());
@@ -173,7 +175,7 @@ public:
Info.setPointer(P);
}
auto &GlobalMRI = P->Map[&GV];
- GlobalMRI = ModRefInfo(GlobalMRI | NewMRI);
+ GlobalMRI = unionModRef(GlobalMRI, NewMRI);
}
/// Clear a global's ModRef info. Should be used when a global is being
@@ -230,9 +232,9 @@ FunctionModRefBehavior GlobalsAAResult::getModRefBehavior(const Function *F) {
FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
if (FunctionInfo *FI = getFunctionInfo(F)) {
- if (FI->getModRefInfo() == MRI_NoModRef)
+ if (!isModOrRefSet(FI->getModRefInfo()))
Min = FMRB_DoesNotAccessMemory;
- else if ((FI->getModRefInfo() & MRI_Mod) == 0)
+ else if (!isModSet(FI->getModRefInfo()))
Min = FMRB_OnlyReadsMemory;
}
@@ -246,9 +248,9 @@ GlobalsAAResult::getModRefBehavior(ImmutableCallSite CS) {
if (!CS.hasOperandBundles())
if (const Function *F = CS.getCalledFunction())
if (FunctionInfo *FI = getFunctionInfo(F)) {
- if (FI->getModRefInfo() == MRI_NoModRef)
+ if (!isModOrRefSet(FI->getModRefInfo()))
Min = FMRB_DoesNotAccessMemory;
- else if ((FI->getModRefInfo() & MRI_Mod) == 0)
+ else if (!isModSet(FI->getModRefInfo()))
Min = FMRB_OnlyReadsMemory;
}
@@ -297,7 +299,7 @@ void GlobalsAAResult::AnalyzeGlobals(Module &M) {
Handles.emplace_front(*this, Reader);
Handles.front().I = Handles.begin();
}
- FunctionInfos[Reader].addModRefInfoForGlobal(GV, MRI_Ref);
+ FunctionInfos[Reader].addModRefInfoForGlobal(GV, ModRefInfo::Ref);
}
if (!GV.isConstant()) // No need to keep track of writers to constants
@@ -306,7 +308,7 @@ void GlobalsAAResult::AnalyzeGlobals(Module &M) {
Handles.emplace_front(*this, Writer);
Handles.front().I = Handles.begin();
}
- FunctionInfos[Writer].addModRefInfoForGlobal(GV, MRI_Mod);
+ FunctionInfos[Writer].addModRefInfoForGlobal(GV, ModRefInfo::Mod);
}
++NumNonAddrTakenGlobalVars;
@@ -502,13 +504,13 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
if (F->doesNotAccessMemory()) {
// Can't do better than that!
} else if (F->onlyReadsMemory()) {
- FI.addModRefInfo(MRI_Ref);
+ FI.addModRefInfo(ModRefInfo::Ref);
if (!F->isIntrinsic() && !F->onlyAccessesArgMemory())
// This function might call back into the module and read a global -
// consider every global as possibly being read by this function.
FI.setMayReadAnyGlobal();
} else {
- FI.addModRefInfo(MRI_ModRef);
+ FI.addModRefInfo(ModRefInfo::ModRef);
// Can't say anything useful unless it's an intrinsic - they don't
// read or write global variables of the kind considered here.
KnowNothing = !F->isIntrinsic();
@@ -544,7 +546,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
// Scan the function bodies for explicit loads or stores.
for (auto *Node : SCC) {
- if (FI.getModRefInfo() == MRI_ModRef)
+ if (isModAndRefSet(FI.getModRefInfo()))
break; // The mod/ref lattice saturates here.
// Don't prove any properties based on the implementation of an optnone
@@ -554,7 +556,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
continue;
for (Instruction &I : instructions(Node->getFunction())) {
- if (FI.getModRefInfo() == MRI_ModRef)
+ if (isModAndRefSet(FI.getModRefInfo()))
break; // The mod/ref lattice saturates here.
// We handle calls specially because the graph-relevant aspects are
@@ -563,13 +565,13 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
if (isAllocationFn(&I, &TLI) || isFreeCall(&I, &TLI)) {
// FIXME: It is completely unclear why this is necessary and not
// handled by the above graph code.
- FI.addModRefInfo(MRI_ModRef);
+ FI.addModRefInfo(ModRefInfo::ModRef);
} else if (Function *Callee = CS.getCalledFunction()) {
// The callgraph doesn't include intrinsic calls.
if (Callee->isIntrinsic()) {
FunctionModRefBehavior Behaviour =
AAResultBase::getModRefBehavior(Callee);
- FI.addModRefInfo(ModRefInfo(Behaviour & MRI_ModRef));
+ FI.addModRefInfo(createModRefInfo(Behaviour));
}
}
continue;
@@ -578,15 +580,15 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
// All non-call instructions we use the primary predicates for whether
// thay read or write memory.
if (I.mayReadFromMemory())
- FI.addModRefInfo(MRI_Ref);
+ FI.addModRefInfo(ModRefInfo::Ref);
if (I.mayWriteToMemory())
- FI.addModRefInfo(MRI_Mod);
+ FI.addModRefInfo(ModRefInfo::Mod);
}
}
- if ((FI.getModRefInfo() & MRI_Mod) == 0)
+ if (!isModSet(FI.getModRefInfo()))
++NumReadMemFunctions;
- if (FI.getModRefInfo() == MRI_NoModRef)
+ if (!isModOrRefSet(FI.getModRefInfo()))
++NumNoMemFunctions;
// Finally, now that we know the full effect on this SCC, clone the
@@ -867,8 +869,9 @@ AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA,
ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS,
const GlobalValue *GV) {
if (CS.doesNotAccessMemory())
- return MRI_NoModRef;
- ModRefInfo ConservativeResult = CS.onlyReadsMemory() ? MRI_Ref : MRI_ModRef;
+ return ModRefInfo::NoModRef;
+ ModRefInfo ConservativeResult =
+ CS.onlyReadsMemory() ? ModRefInfo::Ref : ModRefInfo::ModRef;
// Iterate through all the arguments to the called function. If any argument
// is based on GV, return the conservative result.
@@ -889,12 +892,12 @@ ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS,
}
// We identified all objects in the argument list, and none of them were GV.
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
}
ModRefInfo GlobalsAAResult::getModRefInfo(ImmutableCallSite CS,
const MemoryLocation &Loc) {
- unsigned Known = MRI_ModRef;
+ ModRefInfo Known = ModRefInfo::ModRef;
// If we are asking for mod/ref info of a direct call with a pointer to a
// global we are tracking, return information if we have it.
@@ -904,12 +907,12 @@ ModRefInfo GlobalsAAResult::getModRefInfo(ImmutableCallSite CS,
if (const Function *F = CS.getCalledFunction())
if (NonAddressTakenGlobals.count(GV))
if (const FunctionInfo *FI = getFunctionInfo(F))
- Known = FI->getModRefInfoForGlobal(*GV) |
- getModRefInfoForArgument(CS, GV);
+ Known = unionModRef(FI->getModRefInfoForGlobal(*GV),
+ getModRefInfoForArgument(CS, GV));
- if (Known == MRI_NoModRef)
- return MRI_NoModRef; // No need to query other mod/ref analyses
- return ModRefInfo(Known & AAResultBase::getModRefInfo(CS, Loc));
+ if (!isModOrRefSet(Known))
+ return ModRefInfo::NoModRef; // No need to query other mod/ref analyses
+ return intersectModRef(Known, AAResultBase::getModRefInfo(CS, Loc));
}
GlobalsAAResult::GlobalsAAResult(const DataLayout &DL,
diff --git a/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
index ed233d201537..c11176bbb9c8 100644
--- a/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
@@ -17,7 +17,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/IndirectCallSiteVisitor.h"
#include "llvm/IR/CallSite.h"
-#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instructions.h"
@@ -32,25 +31,25 @@ using namespace llvm;
#define DEBUG_TYPE "pgo-icall-prom-analysis"
-// The minimum call count for the direct-call target to be considered as the
-// promotion candidate.
-static cl::opt<unsigned>
- ICPCountThreshold("icp-count-threshold", cl::Hidden, cl::ZeroOrMore,
- cl::init(1000),
- cl::desc("The minimum count to the direct call target "
- "for the promotion"));
+// The percent threshold for the direct-call target (this call site vs the
+// remaining call count) for it to be considered as the promotion target.
+static cl::opt<unsigned> ICPRemainingPercentThreshold(
+ "icp-remaining-percent-threshold", cl::init(30), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("The percentage threshold against remaining unpromoted indirect "
+ "call count for the promotion"));
// The percent threshold for the direct-call target (this call site vs the
// total call count) for it to be considered as the promotion target.
static cl::opt<unsigned>
- ICPPercentThreshold("icp-percent-threshold", cl::init(30), cl::Hidden,
- cl::ZeroOrMore,
- cl::desc("The percentage threshold for the promotion"));
+ ICPTotalPercentThreshold("icp-total-percent-threshold", cl::init(5),
+ cl::Hidden, cl::ZeroOrMore,
+ cl::desc("The percentage threshold against total "
+ "count for the promotion"));
// Set the maximum number of targets to promote for a single indirect-call
// callsite.
static cl::opt<unsigned>
- MaxNumPromotions("icp-max-prom", cl::init(2), cl::Hidden, cl::ZeroOrMore,
+ MaxNumPromotions("icp-max-prom", cl::init(3), cl::Hidden, cl::ZeroOrMore,
cl::desc("Max number of promotions for a single indirect "
"call callsite"));
@@ -59,12 +58,10 @@ ICallPromotionAnalysis::ICallPromotionAnalysis() {
}
bool ICallPromotionAnalysis::isPromotionProfitable(uint64_t Count,
- uint64_t TotalCount) {
- if (Count < ICPCountThreshold)
- return false;
-
- unsigned Percentage = (Count * 100) / TotalCount;
- return (Percentage >= ICPPercentThreshold);
+ uint64_t TotalCount,
+ uint64_t RemainingCount) {
+ return Count * 100 >= ICPRemainingPercentThreshold * RemainingCount &&
+ Count * 100 >= ICPTotalPercentThreshold * TotalCount;
}
// Indirect-call promotion heuristic. The direct targets are sorted based on
@@ -78,17 +75,18 @@ uint32_t ICallPromotionAnalysis::getProfitablePromotionCandidates(
<< "\n");
uint32_t I = 0;
+ uint64_t RemainingCount = TotalCount;
for (; I < MaxNumPromotions && I < NumVals; I++) {
uint64_t Count = ValueDataRef[I].Count;
- assert(Count <= TotalCount);
+ assert(Count <= RemainingCount);
DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
<< " Target_func: " << ValueDataRef[I].Value << "\n");
- if (!isPromotionProfitable(Count, TotalCount)) {
+ if (!isPromotionProfitable(Count, TotalCount, RemainingCount)) {
DEBUG(dbgs() << " Not promote: Cold target.\n");
return I;
}
- TotalCount -= Count;
+ RemainingCount -= Count;
}
return I;
}
diff --git a/contrib/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm/lib/Analysis/InlineCost.cpp
index 35693666aa03..fba96c8976a6 100644
--- a/contrib/llvm/lib/Analysis/InlineCost.cpp
+++ b/contrib/llvm/lib/Analysis/InlineCost.cpp
@@ -21,9 +21,11 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
@@ -66,12 +68,27 @@ static cl::opt<int>
cl::ZeroOrMore,
cl::desc("Threshold for hot callsites "));
+static cl::opt<int> LocallyHotCallSiteThreshold(
+ "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore,
+ cl::desc("Threshold for locally hot callsites "));
+
static cl::opt<int> ColdCallSiteRelFreq(
"cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
cl::desc("Maxmimum block frequency, expressed as a percentage of caller's "
"entry frequency, for a callsite to be cold in the absence of "
"profile information."));
+static cl::opt<int> HotCallSiteRelFreq(
+ "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore,
+ cl::desc("Minimum block frequency, expressed as a multiple of caller's "
+ "entry frequency, for a callsite to be hot in the absence of "
+ "profile information."));
+
+static cl::opt<bool> OptComputeFullInlineCost(
+ "inline-cost-full", cl::Hidden, cl::init(false),
+ cl::desc("Compute the full inline cost of a call site even when the cost "
+ "exceeds the threshold."));
+
namespace {
class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
@@ -96,6 +113,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
// Cache the DataLayout since we use it a lot.
const DataLayout &DL;
+ /// The OptimizationRemarkEmitter available for this compilation.
+ OptimizationRemarkEmitter *ORE;
+
/// The candidate callsite being analyzed. Please do not use this to do
/// analysis in the caller function; we want the inline cost query to be
/// easily cacheable. Instead, use the cover function paramHasAttr.
@@ -106,6 +126,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
int Threshold;
int Cost;
+ bool ComputeFullInlineCost;
bool IsCallerRecursive;
bool IsRecursiveCall;
@@ -119,8 +140,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// Number of bytes allocated statically by the callee.
uint64_t AllocatedSize;
unsigned NumInstructions, NumVectorInstructions;
- int FiftyPercentVectorBonus, TenPercentVectorBonus;
- int VectorBonus;
+ int VectorBonus, TenPercentVectorBonus;
+ // Bonus to be applied when the callee has only one reachable basic block.
+ int SingleBBBonus;
/// While we walk the potentially-inlined instructions, we build up and
/// maintain a mapping of simplified values specific to this callsite. The
@@ -143,15 +165,32 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// Keep track of values which map to a pointer base and constant offset.
DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;
+ /// Keep track of dead blocks due to the constant arguments.
+ SetVector<BasicBlock *> DeadBlocks;
+
+ /// The mapping of the blocks to their known unique successors due to the
+ /// constant arguments.
+ DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
+
+ /// Model the elimination of repeated loads that is expected to happen
+ /// whenever we simplify away the stores that would otherwise cause them to be
+ /// loads.
+ bool EnableLoadElimination;
+ SmallPtrSet<Value *, 16> LoadAddrSet;
+ int LoadEliminationCost;
+
// Custom simplification helper routines.
bool isAllocaDerivedArg(Value *V);
bool lookupSROAArgAndCost(Value *V, Value *&Arg,
DenseMap<Value *, int>::iterator &CostIt);
void disableSROA(DenseMap<Value *, int>::iterator CostIt);
void disableSROA(Value *V);
+ void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
int InstructionCost);
+ void disableLoadElimination();
bool isGEPFree(GetElementPtrInst &GEP);
+ bool canFoldInboundsGEP(GetElementPtrInst &I);
bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
bool simplifyCallSite(Function *F, CallSite CS);
template <typename Callable>
@@ -181,6 +220,10 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// Return true if \p CS is a cold callsite.
bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI);
+ /// Return a higher threshold if \p CS is a hot callsite.
+ Optional<int> getHotCallSiteThreshold(CallSite CS,
+ BlockFrequencyInfo *CallerBFI);
+
// Custom analysis routines.
bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
@@ -206,6 +249,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool visitCastInst(CastInst &I);
bool visitUnaryInstruction(UnaryInstruction &I);
bool visitCmpInst(CmpInst &I);
+ bool visitAnd(BinaryOperator &I);
+ bool visitOr(BinaryOperator &I);
bool visitSub(BinaryOperator &I);
bool visitBinaryOperator(BinaryOperator &I);
bool visitLoad(LoadInst &I);
@@ -215,6 +260,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool visitCallSite(CallSite CS);
bool visitReturnInst(ReturnInst &RI);
bool visitBranchInst(BranchInst &BI);
+ bool visitSelectInst(SelectInst &SI);
bool visitSwitchInst(SwitchInst &SI);
bool visitIndirectBrInst(IndirectBrInst &IBI);
bool visitResumeInst(ResumeInst &RI);
@@ -226,17 +272,19 @@ public:
CallAnalyzer(const TargetTransformInfo &TTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI,
- ProfileSummaryInfo *PSI, Function &Callee, CallSite CSArg,
- const InlineParams &Params)
+ ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE,
+ Function &Callee, CallSite CSArg, const InlineParams &Params)
: TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
- PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()),
+ PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
- Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
+ Cost(0), ComputeFullInlineCost(OptComputeFullInlineCost ||
+ Params.ComputeFullInlineCost || ORE),
+ IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false),
ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
- NumVectorInstructions(0), FiftyPercentVectorBonus(0),
- TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
+ NumVectorInstructions(0), VectorBonus(0), SingleBBBonus(0),
+ EnableLoadElimination(true), LoadEliminationCost(0), NumConstantArgs(0),
NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
SROACostSavings(0), SROACostSavingsLost(0) {}
@@ -294,6 +342,7 @@ void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
SROACostSavings -= CostIt->second;
SROACostSavingsLost += CostIt->second;
SROAArgCosts.erase(CostIt);
+ disableLoadElimination();
}
/// \brief If 'V' maps to a SROA candidate, disable SROA for it.
@@ -311,6 +360,13 @@ void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
SROACostSavings += InstructionCost;
}
+void CallAnalyzer::disableLoadElimination() {
+ if (EnableLoadElimination) {
+ Cost += LoadEliminationCost;
+ EnableLoadElimination = false;
+ }
+}
+
/// \brief Accumulate a constant GEP offset into an APInt if possible.
///
/// Returns false if unable to compute the offset for any reason. Respects any
@@ -348,15 +404,14 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
///
/// Respects any simplified values known during the analysis of this callsite.
bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
- SmallVector<Value *, 4> Indices;
+ SmallVector<Value *, 4> Operands;
+ Operands.push_back(GEP.getOperand(0));
for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
- Indices.push_back(SimpleOp);
+ Operands.push_back(SimpleOp);
else
- Indices.push_back(*I);
- return TargetTransformInfo::TCC_Free ==
- TTI.getGEPCost(GEP.getSourceElementType(), GEP.getPointerOperand(),
- Indices);
+ Operands.push_back(*I);
+ return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands);
}
bool CallAnalyzer::visitAlloca(AllocaInst &I) {
@@ -391,52 +446,125 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) {
}
bool CallAnalyzer::visitPHI(PHINode &I) {
- // FIXME: We should potentially be tracking values through phi nodes,
- // especially when they collapse to a single value due to deleted CFG edges
- // during inlining.
-
// FIXME: We need to propagate SROA *disabling* through phi nodes, even
// though we don't want to propagate it's bonuses. The idea is to disable
// SROA if it *might* be used in an inappropriate manner.
// Phi nodes are always zero-cost.
- return true;
-}
-bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
- Value *SROAArg;
- DenseMap<Value *, int>::iterator CostIt;
- bool SROACandidate =
- lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt);
+ APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits());
+ bool CheckSROA = I.getType()->isPointerTy();
- // Try to fold GEPs of constant-offset call site argument pointers. This
- // requires target data and inbounds GEPs.
- if (I.isInBounds()) {
- // Check if we have a base + offset for the pointer.
- Value *Ptr = I.getPointerOperand();
- std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
- if (BaseAndOffset.first) {
- // Check if the offset of this GEP is constant, and if so accumulate it
- // into Offset.
- if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) {
- // Non-constant GEPs aren't folded, and disable SROA.
- if (SROACandidate)
- disableSROA(CostIt);
- return isGEPFree(I);
- }
+ // Track the constant or pointer with constant offset we've seen so far.
+ Constant *FirstC = nullptr;
+ std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset};
+ Value *FirstV = nullptr;
- // Add the result as a new mapping to Base + Offset.
- ConstantOffsetPtrs[&I] = BaseAndOffset;
+ for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = I.getIncomingBlock(i);
+ // If the incoming block is dead, skip the incoming block.
+ if (DeadBlocks.count(Pred))
+ continue;
+ // If the parent block of phi is not the known successor of the incoming
+ // block, skip the incoming block.
+ BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
+ if (KnownSuccessor && KnownSuccessor != I.getParent())
+ continue;
+
+ Value *V = I.getIncomingValue(i);
+ // If the incoming value is this phi itself, skip the incoming value.
+ if (&I == V)
+ continue;
+
+ Constant *C = dyn_cast<Constant>(V);
+ if (!C)
+ C = SimplifiedValues.lookup(V);
- // Also handle SROA candidates here, we already know that the GEP is
- // all-constant indexed.
- if (SROACandidate)
- SROAArgValues[&I] = SROAArg;
+ std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset};
+ if (!C && CheckSROA)
+ BaseAndOffset = ConstantOffsetPtrs.lookup(V);
+ if (!C && !BaseAndOffset.first)
+ // The incoming value is neither a constant nor a pointer with constant
+ // offset, exit early.
+ return true;
+
+ if (FirstC) {
+ if (FirstC == C)
+ // If we've seen a constant incoming value before and it is the same
+ // constant we see this time, continue checking the next incoming value.
+ continue;
+ // Otherwise early exit because we either see a different constant or saw
+ // a constant before but we have a pointer with constant offset this time.
+ return true;
+ }
+
+ if (FirstV) {
+ // The same logic as above, but check pointer with constant offset here.
+ if (FirstBaseAndOffset == BaseAndOffset)
+ continue;
return true;
}
+
+ if (C) {
+ // This is the 1st time we've seen a constant, record it.
+ FirstC = C;
+ continue;
+ }
+
+ // The remaining case is that this is the 1st time we've seen a pointer with
+ // constant offset, record it.
+ FirstV = V;
+ FirstBaseAndOffset = BaseAndOffset;
}
+ // Check if we can map phi to a constant.
+ if (FirstC) {
+ SimplifiedValues[&I] = FirstC;
+ return true;
+ }
+
+ // Check if we can map phi to a pointer with constant offset.
+ if (FirstBaseAndOffset.first) {
+ ConstantOffsetPtrs[&I] = FirstBaseAndOffset;
+
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ if (lookupSROAArgAndCost(FirstV, SROAArg, CostIt))
+ SROAArgValues[&I] = SROAArg;
+ }
+
+ return true;
+}
+
+/// \brief Check we can fold GEPs of constant-offset call site argument pointers.
+/// This requires target data and inbounds GEPs.
+///
+/// \return true if the specified GEP can be folded.
+bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
+ // Check if we have a base + offset for the pointer.
+ std::pair<Value *, APInt> BaseAndOffset =
+ ConstantOffsetPtrs.lookup(I.getPointerOperand());
+ if (!BaseAndOffset.first)
+ return false;
+
+ // Check if the offset of this GEP is constant, and if so accumulate it
+ // into Offset.
+ if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second))
+ return false;
+
+ // Add the result as a new mapping to Base + Offset.
+ ConstantOffsetPtrs[&I] = BaseAndOffset;
+
+ return true;
+}
+
+bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ bool SROACandidate =
+ lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt);
+
// Lambda to check whether a GEP's indices are all constant.
auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
@@ -445,7 +573,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
return true;
};
- if (IsGEPOffsetConstant(I)) {
+ if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
if (SROACandidate)
SROAArgValues[&I] = SROAArg;
@@ -643,15 +771,17 @@ bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) {
// If global profile summary is available, then callsite's coldness is
// determined based on that.
- if (PSI->hasProfileSummary())
+ if (PSI && PSI->hasProfileSummary())
return PSI->isColdCallSite(CS, CallerBFI);
+
+ // Otherwise we need BFI to be available.
if (!CallerBFI)
return false;
- // In the absence of global profile summary, determine if the callsite is cold
- // relative to caller's entry. We could potentially cache the computation of
- // scaled entry frequency, but the added complexity is not worth it unless
- // this scaling shows up high in the profiles.
+ // Determine if the callsite is cold relative to caller's entry. We could
+ // potentially cache the computation of scaled entry frequency, but the added
+ // complexity is not worth it unless this scaling shows up high in the
+ // profiles.
const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
auto CallSiteBB = CS.getInstruction()->getParent();
auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
@@ -660,6 +790,34 @@ bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) {
return CallSiteFreq < CallerEntryFreq * ColdProb;
}
+Optional<int>
+CallAnalyzer::getHotCallSiteThreshold(CallSite CS,
+ BlockFrequencyInfo *CallerBFI) {
+
+ // If global profile summary is available, then callsite's hotness is
+ // determined based on that.
+ if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(CS, CallerBFI))
+ return Params.HotCallSiteThreshold;
+
+ // Otherwise we need BFI to be available and to have a locally hot callsite
+ // threshold.
+ if (!CallerBFI || !Params.LocallyHotCallSiteThreshold)
+ return None;
+
+ // Determine if the callsite is hot relative to caller's entry. We could
+ // potentially cache the computation of scaled entry frequency, but the added
+ // complexity is not worth it unless this scaling shows up high in the
+ // profiles.
+ auto CallSiteBB = CS.getInstruction()->getParent();
+ auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency();
+ auto CallerEntryFreq = CallerBFI->getEntryFreq();
+ if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq)
+ return Params.LocallyHotCallSiteThreshold;
+
+ // Otherwise treat it normally.
+ return None;
+}
+
void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
// If no size growth is allowed for this inlining, set Threshold to 0.
if (!allowSizeGrowth(CS)) {
@@ -679,11 +837,49 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
return B ? std::max(A, B.getValue()) : A;
};
+ // Various bonus percentages. These are multiplied by Threshold to get the
+ // bonus values.
+ // SingleBBBonus: This bonus is applied if the callee has a single reachable
+ // basic block at the given callsite context. This is speculatively applied
+ // and withdrawn if more than one basic block is seen.
+ //
+ // Vector bonuses: We want to more aggressively inline vector-dense kernels
+ // and apply this bonus based on the percentage of vector instructions. A
+ // bonus is applied if the vector instructions exceed 50% and half that amount
+ // is applied if it exceeds 10%. Note that these bonuses are some what
+ // arbitrary and evolved over time by accident as much as because they are
+ // principled bonuses.
+ // FIXME: It would be nice to base the bonus values on something more
+ // scientific.
+ //
+ // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
+ // of the last call to a static function as inlining such functions is
+ // guaranteed to reduce code size.
+ //
+ // These bonus percentages may be set to 0 based on properties of the caller
+ // and the callsite.
+ int SingleBBBonusPercent = 50;
+ int VectorBonusPercent = 150;
+ int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus;
+
+ // Lambda to set all the above bonus and bonus percentages to 0.
+ auto DisallowAllBonuses = [&]() {
+ SingleBBBonusPercent = 0;
+ VectorBonusPercent = 0;
+ LastCallToStaticBonus = 0;
+ };
+
// Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
// and reduce the threshold if the caller has the necessary attribute.
- if (Caller->optForMinSize())
+ if (Caller->optForMinSize()) {
Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
- else if (Caller->optForSize())
+ // For minsize, we want to disable the single BB bonus and the vector
+ // bonuses, but not the last-call-to-static bonus. Inlining the last call to
+ // a static function will, at the minimum, eliminate the parameter setup and
+ // call/return instructions.
+ SingleBBBonusPercent = 0;
+ VectorBonusPercent = 0;
+ } else if (Caller->optForSize())
Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
// Adjust the threshold based on inlinehint attribute and profile based
@@ -691,35 +887,48 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
if (!Caller->optForMinSize()) {
if (Callee.hasFnAttribute(Attribute::InlineHint))
Threshold = MaxIfValid(Threshold, Params.HintThreshold);
- if (PSI) {
- BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
- // FIXME: After switching to the new passmanager, simplify the logic below
- // by checking only the callsite hotness/coldness. The check for CallerBFI
- // exists only because we do not have BFI available with the old PM.
- //
- // Use callee's hotness information only if we have no way of determining
- // callsite's hotness information. Callsite hotness can be determined if
- // sample profile is used (which adds hotness metadata to calls) or if
- // caller's BlockFrequencyInfo is available.
- if (CallerBFI || PSI->hasSampleProfile()) {
- if (PSI->isHotCallSite(CS, CallerBFI)) {
- DEBUG(dbgs() << "Hot callsite.\n");
- Threshold = Params.HotCallSiteThreshold.getValue();
- } else if (isColdCallSite(CS, CallerBFI)) {
- DEBUG(dbgs() << "Cold callsite.\n");
- Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
- }
- } else {
- if (PSI->isFunctionEntryHot(&Callee)) {
- DEBUG(dbgs() << "Hot callee.\n");
- // If callsite hotness can not be determined, we may still know
- // that the callee is hot and treat it as a weaker hint for threshold
- // increase.
- Threshold = MaxIfValid(Threshold, Params.HintThreshold);
- } else if (PSI->isFunctionEntryCold(&Callee)) {
- DEBUG(dbgs() << "Cold callee.\n");
- Threshold = MinIfValid(Threshold, Params.ColdThreshold);
- }
+
+ // FIXME: After switching to the new passmanager, simplify the logic below
+ // by checking only the callsite hotness/coldness as we will reliably
+ // have local profile information.
+ //
+ // Callsite hotness and coldness can be determined if sample profile is
+ // used (which adds hotness metadata to calls) or if caller's
+ // BlockFrequencyInfo is available.
+ BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
+ auto HotCallSiteThreshold = getHotCallSiteThreshold(CS, CallerBFI);
+ if (!Caller->optForSize() && HotCallSiteThreshold) {
+ DEBUG(dbgs() << "Hot callsite.\n");
+ // FIXME: This should update the threshold only if it exceeds the
+ // current threshold, but AutoFDO + ThinLTO currently relies on this
+ // behavior to prevent inlining of hot callsites during ThinLTO
+ // compile phase.
+ Threshold = HotCallSiteThreshold.getValue();
+ } else if (isColdCallSite(CS, CallerBFI)) {
+ DEBUG(dbgs() << "Cold callsite.\n");
+ // Do not apply bonuses for a cold callsite including the
+ // LastCallToStatic bonus. While this bonus might result in code size
+ // reduction, it can cause the size of a non-cold caller to increase
+ // preventing it from being inlined.
+ DisallowAllBonuses();
+ Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
+ } else if (PSI) {
+ // Use callee's global profile information only if we have no way of
+ // determining this via callsite information.
+ if (PSI->isFunctionEntryHot(&Callee)) {
+ DEBUG(dbgs() << "Hot callee.\n");
+ // If callsite hotness can not be determined, we may still know
+ // that the callee is hot and treat it as a weaker hint for threshold
+ // increase.
+ Threshold = MaxIfValid(Threshold, Params.HintThreshold);
+ } else if (PSI->isFunctionEntryCold(&Callee)) {
+ DEBUG(dbgs() << "Cold callee.\n");
+ // Do not apply bonuses for a cold callee including the
+ // LastCallToStatic bonus. While this bonus might result in code size
+ // reduction, it can cause the size of a non-cold caller to increase
+ // preventing it from being inlined.
+ DisallowAllBonuses();
+ Threshold = MinIfValid(Threshold, Params.ColdThreshold);
}
}
}
@@ -727,6 +936,17 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
// Finally, take the target-specific inlining threshold multiplier into
// account.
Threshold *= TTI.getInliningThresholdMultiplier();
+
+ SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
+ VectorBonus = Threshold * VectorBonusPercent / 100;
+
+ bool OnlyOneCallAndLocalLinkage =
+ F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
+ // If there is only one call of the function, and it has internal linkage,
+ // the cost of inlining it drops dramatically. It may seem odd to update
+ // Cost in updateThreshold, but the bonus depends on the logic in this method.
+ if (OnlyOneCallAndLocalLinkage)
+ Cost -= LastCallToStaticBonus;
}
bool CallAnalyzer::visitCmpInst(CmpInst &I) {
@@ -784,6 +1004,34 @@ bool CallAnalyzer::visitCmpInst(CmpInst &I) {
return false;
}
+bool CallAnalyzer::visitOr(BinaryOperator &I) {
+ // This is necessary because the generic simplify instruction only works if
+ // both operands are constants.
+ for (unsigned i = 0; i < 2; ++i) {
+ if (ConstantInt *C = dyn_cast_or_null<ConstantInt>(
+ SimplifiedValues.lookup(I.getOperand(i))))
+ if (C->isAllOnesValue()) {
+ SimplifiedValues[&I] = C;
+ return true;
+ }
+ }
+ return Base::visitOr(I);
+}
+
+bool CallAnalyzer::visitAnd(BinaryOperator &I) {
+ // This is necessary because the generic simplify instruction only works if
+ // both operands are constants.
+ for (unsigned i = 0; i < 2; ++i) {
+ if (ConstantInt *C = dyn_cast_or_null<ConstantInt>(
+ SimplifiedValues.lookup(I.getOperand(i))))
+ if (C->isZero()) {
+ SimplifiedValues[&I] = C;
+ return true;
+ }
+ }
+ return Base::visitAnd(I);
+}
+
bool CallAnalyzer::visitSub(BinaryOperator &I) {
// Try to handle a special case: we can fold computing the difference of two
// constant-related pointers.
@@ -845,6 +1093,15 @@ bool CallAnalyzer::visitLoad(LoadInst &I) {
disableSROA(CostIt);
}
+ // If the data is already loaded from this address and hasn't been clobbered
+ // by any stores or calls, this load is likely to be redundant and can be
+ // eliminated.
+ if (EnableLoadElimination &&
+ !LoadAddrSet.insert(I.getPointerOperand()).second) {
+ LoadEliminationCost += InlineConstants::InstrCost;
+ return true;
+ }
+
return false;
}
@@ -860,6 +1117,15 @@ bool CallAnalyzer::visitStore(StoreInst &I) {
disableSROA(CostIt);
}
+ // The store can potentially clobber loads and prevent repeated loads from
+ // being eliminated.
+ // FIXME:
+ // 1. We can probably keep an initial set of eliminatable loads substracted
+ // from the cost even when we finally see a store. We just need to disable
+ // *further* accumulation of elimination savings.
+ // 2. We should probably at some point thread MemorySSA for the callee into
+ // this and then use that to actually compute *really* precise savings.
+ disableLoadElimination();
return false;
}
@@ -942,6 +1208,8 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
switch (II->getIntrinsicID()) {
default:
+ if (!CS.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
+ disableLoadElimination();
return Base::visitCallSite(CS);
case Intrinsic::load_relative:
@@ -952,6 +1220,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
case Intrinsic::memset:
case Intrinsic::memcpy:
case Intrinsic::memmove:
+ disableLoadElimination();
// SROA can usually chew through these intrinsics, but they aren't free.
return false;
case Intrinsic::localescape:
@@ -960,7 +1229,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
}
}
- if (F == CS.getInstruction()->getParent()->getParent()) {
+ if (F == CS.getInstruction()->getFunction()) {
// This flag will fully abort the analysis, so don't bother with anything
// else.
IsRecursiveCall = true;
@@ -978,6 +1247,8 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
Cost += InlineConstants::CallPenalty;
}
+ if (!CS.onlyReadsMemory())
+ disableLoadElimination();
return Base::visitCallSite(CS);
}
@@ -992,8 +1263,11 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
// Next, check if this happens to be an indirect function call to a known
// function in this inline context. If not, we've done all we can.
Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
- if (!F)
+ if (!F) {
+ if (!CS.onlyReadsMemory())
+ disableLoadElimination();
return Base::visitCallSite(CS);
+ }
// If we have a constant that we are calling as a function, we can peer
// through it and see the function target. This happens not infrequently
@@ -1002,7 +1276,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
// out. Pretend to inline the function, with a custom threshold.
auto IndirectCallParams = Params;
IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold;
- CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, *F, CS,
+ CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, CS,
IndirectCallParams);
if (CA.analyzeCall(CS)) {
// We were able to inline the indirect call! Subtract the cost from the
@@ -1010,6 +1284,8 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
Cost -= std::max(0, CA.getThreshold() - CA.getCost());
}
+ if (!F->onlyReadsMemory())
+ disableLoadElimination();
return Base::visitCallSite(CS);
}
@@ -1030,6 +1306,87 @@ bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
SimplifiedValues.lookup(BI.getCondition()));
}
+bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
+ bool CheckSROA = SI.getType()->isPointerTy();
+ Value *TrueVal = SI.getTrueValue();
+ Value *FalseVal = SI.getFalseValue();
+
+ Constant *TrueC = dyn_cast<Constant>(TrueVal);
+ if (!TrueC)
+ TrueC = SimplifiedValues.lookup(TrueVal);
+ Constant *FalseC = dyn_cast<Constant>(FalseVal);
+ if (!FalseC)
+ FalseC = SimplifiedValues.lookup(FalseVal);
+ Constant *CondC =
+ dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition()));
+
+ if (!CondC) {
+ // Select C, X, X => X
+ if (TrueC == FalseC && TrueC) {
+ SimplifiedValues[&SI] = TrueC;
+ return true;
+ }
+
+ if (!CheckSROA)
+ return Base::visitSelectInst(SI);
+
+ std::pair<Value *, APInt> TrueBaseAndOffset =
+ ConstantOffsetPtrs.lookup(TrueVal);
+ std::pair<Value *, APInt> FalseBaseAndOffset =
+ ConstantOffsetPtrs.lookup(FalseVal);
+ if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
+ ConstantOffsetPtrs[&SI] = TrueBaseAndOffset;
+
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ if (lookupSROAArgAndCost(TrueVal, SROAArg, CostIt))
+ SROAArgValues[&SI] = SROAArg;
+ return true;
+ }
+
+ return Base::visitSelectInst(SI);
+ }
+
+ // Select condition is a constant.
+ Value *SelectedV = CondC->isAllOnesValue()
+ ? TrueVal
+ : (CondC->isNullValue()) ? FalseVal : nullptr;
+ if (!SelectedV) {
+ // Condition is a vector constant that is not all 1s or all 0s. If all
+ // operands are constants, ConstantExpr::getSelect() can handle the cases
+ // such as select vectors.
+ if (TrueC && FalseC) {
+ if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) {
+ SimplifiedValues[&SI] = C;
+ return true;
+ }
+ }
+ return Base::visitSelectInst(SI);
+ }
+
+ // Condition is either all 1s or all 0s. SI can be simplified.
+ if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
+ SimplifiedValues[&SI] = SelectedC;
+ return true;
+ }
+
+ if (!CheckSROA)
+ return true;
+
+ std::pair<Value *, APInt> BaseAndOffset =
+ ConstantOffsetPtrs.lookup(SelectedV);
+ if (BaseAndOffset.first) {
+ ConstantOffsetPtrs[&SI] = BaseAndOffset;
+
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ if (lookupSROAArgAndCost(SelectedV, SROAArg, CostIt))
+ SROAArgValues[&SI] = SROAArg;
+ }
+
+ return true;
+}
+
bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
// We model unconditional switches as free, see the comments on handling
// branches.
@@ -1062,7 +1419,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
std::min((int64_t)CostUpperBound,
(int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
- if (CostLowerBound > Threshold) {
+ if (CostLowerBound > Threshold && !ComputeFullInlineCost) {
Cost = CostLowerBound;
return false;
}
@@ -1211,21 +1568,39 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
else
Cost += InlineConstants::InstrCost;
+ using namespace ore;
// If the visit this instruction detected an uninlinable pattern, abort.
if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
- HasIndirectBr || HasFrameEscape)
+ HasIndirectBr || HasFrameEscape) {
+ if (ORE)
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
+ CandidateCS.getInstruction())
+ << NV("Callee", &F)
+ << " has uninlinable pattern and cost is not fully computed";
+ });
return false;
+ }
// If the caller is a recursive function then we don't want to inline
// functions which allocate a lot of stack space because it would increase
// the caller stack usage dramatically.
if (IsCallerRecursive &&
- AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
+ AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) {
+ if (ORE)
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
+ CandidateCS.getInstruction())
+ << NV("Callee", &F)
+ << " is recursive and allocates too much stack space. Cost is "
+ "not fully computed";
+ });
return false;
+ }
// Check if we've past the maximum possible threshold so we don't spin in
// huge basic blocks that will never inline.
- if (Cost > Threshold)
+ if (Cost >= Threshold && !ComputeFullInlineCost)
return false;
}
@@ -1270,6 +1645,44 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
}
+/// \brief Find dead blocks due to deleted CFG edges during inlining.
+///
+/// If we know the successor of the current block, \p CurrBB, has to be \p
+/// NextBB, the other successors of \p CurrBB are dead if these successors have
+/// no live incoming CFG edges. If one block is found to be dead, we can
+/// continue growing the dead block list by checking the successors of the dead
+/// blocks to see if all their incoming edges are dead or not.
+void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
+ auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) {
+ // A CFG edge is dead if the predecessor is dead or the predessor has a
+ // known successor which is not the one under exam.
+ return (DeadBlocks.count(Pred) ||
+ (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
+ };
+
+ auto IsNewlyDead = [&](BasicBlock *BB) {
+ // If all the edges to a block are dead, the block is also dead.
+ return (!DeadBlocks.count(BB) &&
+ llvm::all_of(predecessors(BB),
+ [&](BasicBlock *P) { return IsEdgeDead(P, BB); }));
+ };
+
+ for (BasicBlock *Succ : successors(CurrBB)) {
+ if (Succ == NextBB || !IsNewlyDead(Succ))
+ continue;
+ SmallVector<BasicBlock *, 4> NewDead;
+ NewDead.push_back(Succ);
+ while (!NewDead.empty()) {
+ BasicBlock *Dead = NewDead.pop_back_val();
+ if (DeadBlocks.insert(Dead))
+ // Continue growing the dead block lists.
+ for (BasicBlock *S : successors(Dead))
+ if (IsNewlyDead(S))
+ NewDead.push_back(S);
+ }
+ }
+}
+
/// \brief Analyze a call site for potential inlining.
///
/// Returns true if inlining this call is viable, and false if it is not
@@ -1296,51 +1709,35 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// Update the threshold based on callsite properties
updateThreshold(CS, F);
- FiftyPercentVectorBonus = 3 * Threshold / 2;
- TenPercentVectorBonus = 3 * Threshold / 4;
-
- // Track whether the post-inlining function would have more than one basic
- // block. A single basic block is often intended for inlining. Balloon the
- // threshold by 50% until we pass the single-BB phase.
- bool SingleBB = true;
- int SingleBBBonus = Threshold / 2;
-
// Speculatively apply all possible bonuses to Threshold. If cost exceeds
// this Threshold any time, and cost cannot decrease, we can stop processing
// the rest of the function body.
- Threshold += (SingleBBBonus + FiftyPercentVectorBonus);
+ Threshold += (SingleBBBonus + VectorBonus);
// Give out bonuses for the callsite, as the instructions setting them up
// will be gone after inlining.
Cost -= getCallsiteCost(CS, DL);
- // If there is only one call of the function, and it has internal linkage,
- // the cost of inlining it drops dramatically.
- bool OnlyOneCallAndLocalLinkage =
- F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
- if (OnlyOneCallAndLocalLinkage)
- Cost -= InlineConstants::LastCallToStaticBonus;
-
// If this function uses the coldcc calling convention, prefer not to inline
// it.
if (F.getCallingConv() == CallingConv::Cold)
Cost += InlineConstants::ColdccPenalty;
// Check if we're done. This can happen due to bonuses and penalties.
- if (Cost > Threshold)
+ if (Cost >= Threshold && !ComputeFullInlineCost)
return false;
if (F.empty())
return true;
- Function *Caller = CS.getInstruction()->getParent()->getParent();
+ Function *Caller = CS.getInstruction()->getFunction();
// Check if the caller function is recursive itself.
for (User *U : Caller->users()) {
CallSite Site(U);
if (!Site)
continue;
Instruction *I = Site.getInstruction();
- if (I->getParent()->getParent() == Caller) {
+ if (I->getFunction() == Caller) {
IsCallerRecursive = true;
break;
}
@@ -1388,11 +1785,12 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
BBSetVector;
BBSetVector BBWorklist;
BBWorklist.insert(&F.getEntryBlock());
+ bool SingleBB = true;
// Note that we *must not* cache the size, this loop grows the worklist.
for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
// Bail out the moment we cross the threshold. This means we'll under-count
// the cost, but only when undercounting doesn't matter.
- if (Cost > Threshold)
+ if (Cost >= Threshold && !ComputeFullInlineCost)
break;
BasicBlock *BB = BBWorklist[Idx];
@@ -1422,7 +1820,10 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
Value *Cond = BI->getCondition();
if (ConstantInt *SimpleCond =
dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
- BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
+ BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0);
+ BBWorklist.insert(NextBB);
+ KnownSuccessors[BB] = NextBB;
+ findDeadBlocks(BB, NextBB);
continue;
}
}
@@ -1430,7 +1831,10 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
Value *Cond = SI->getCondition();
if (ConstantInt *SimpleCond =
dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
- BBWorklist.insert(SI->findCaseValue(SimpleCond)->getCaseSuccessor());
+ BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor();
+ BBWorklist.insert(NextBB);
+ KnownSuccessors[BB] = NextBB;
+ findDeadBlocks(BB, NextBB);
continue;
}
}
@@ -1452,6 +1856,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
}
}
+ bool OnlyOneCallAndLocalLinkage =
+ F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
// If this is a noduplicate call, we can still inline as long as
// inlining this would cause the removal of the caller (so the instruction
// is not actually duplicated, just moved).
@@ -1462,9 +1868,9 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// subtract the excess bonus, if any, from the Threshold before
// comparing against Cost.
if (NumVectorInstructions <= NumInstructions / 10)
- Threshold -= FiftyPercentVectorBonus;
+ Threshold -= VectorBonus;
else if (NumVectorInstructions <= NumInstructions / 2)
- Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus);
+ Threshold -= VectorBonus/2;
return Cost < std::max(1, Threshold);
}
@@ -1482,6 +1888,7 @@ LLVM_DUMP_METHOD void CallAnalyzer::dump() {
DEBUG_PRINT_STAT(NumInstructions);
DEBUG_PRINT_STAT(SROACostSavings);
DEBUG_PRINT_STAT(SROACostSavingsLost);
+ DEBUG_PRINT_STAT(LoadEliminationCost);
DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
DEBUG_PRINT_STAT(Cost);
DEBUG_PRINT_STAT(Threshold);
@@ -1534,9 +1941,9 @@ InlineCost llvm::getInlineCost(
CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
- ProfileSummaryInfo *PSI) {
+ ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
- GetAssumptionCache, GetBFI, PSI);
+ GetAssumptionCache, GetBFI, PSI, ORE);
}
InlineCost llvm::getInlineCost(
@@ -1544,7 +1951,7 @@ InlineCost llvm::getInlineCost(
TargetTransformInfo &CalleeTTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
- ProfileSummaryInfo *PSI) {
+ ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
// Cannot inline indirect calls.
if (!Callee)
@@ -1560,11 +1967,12 @@ InlineCost llvm::getInlineCost(
// Never inline functions with conflicting attributes (unless callee has
// always-inline attribute).
- if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee, CalleeTTI))
+ Function *Caller = CS.getCaller();
+ if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI))
return llvm::InlineCost::getNever();
// Don't inline this call if the caller has the optnone attribute.
- if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone))
+ if (Caller->hasFnAttribute(Attribute::OptimizeNone))
return llvm::InlineCost::getNever();
// Don't inline functions which can be interposed at link-time. Don't inline
@@ -1576,9 +1984,9 @@ InlineCost llvm::getInlineCost(
return llvm::InlineCost::getNever();
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
- << "...\n");
+ << "... (caller:" << Caller->getName() << ")\n");
- CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, *Callee, CS,
+ CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS,
Params);
bool ShouldInline = CA.analyzeCall(CS);
@@ -1652,6 +2060,16 @@ InlineParams llvm::getInlineParams(int Threshold) {
// Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
Params.HotCallSiteThreshold = HotCallSiteThreshold;
+ // If the -locally-hot-callsite-threshold is explicitly specified, use it to
+ // populate LocallyHotCallSiteThreshold. Later, we populate
+ // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if
+ // we know that optimization level is O3 (in the getInlineParams variant that
+ // takes the opt and size levels).
+ // FIXME: Remove this check (and make the assignment unconditional) after
+ // addressing size regression issues at O2.
+ if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0)
+ Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold;
+
// Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold.
Params.ColdCallSiteThreshold = ColdCallSiteThreshold;
@@ -1691,5 +2109,12 @@ static int computeThresholdFromOptLevels(unsigned OptLevel,
}
InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
- return getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
+ auto Params =
+ getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
+ // At O3, use the value of -locally-hot-callsite-threshold option to populate
+ // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only
+ // when it is specified explicitly.
+ if (OptLevel > 2)
+ Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold;
+ return Params;
}
diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
index b4f3b87e1846..3ce1281743c3 100644
--- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -23,10 +23,10 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/CmpInstAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/ConstantRange.h"
@@ -327,7 +327,7 @@ static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS,
// Check that the simplified value has the form "X op Y" where "op" is the
// same as the original operation.
Instruction *Simplified = dyn_cast<Instruction>(FV ? FV : TV);
- if (Simplified && Simplified->getOpcode() == Opcode) {
+ if (Simplified && Simplified->getOpcode() == unsigned(Opcode)) {
// The value that didn't simplify is "UnsimplifiedLHS op UnsimplifiedRHS".
// We already know that "op" is the same as for the simplified value. See
// if the operands match too. If so, return the simplified value.
@@ -791,90 +791,6 @@ Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit);
}
-/// Given operands for an FAdd, see if we can fold the result. If not, this
-/// returns null.
-static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const SimplifyQuery &Q, unsigned MaxRecurse) {
- if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q))
- return C;
-
- // fadd X, -0 ==> X
- if (match(Op1, m_NegZero()))
- return Op0;
-
- // fadd X, 0 ==> X, when we know X is not -0
- if (match(Op1, m_Zero()) &&
- (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI)))
- return Op0;
-
- // fadd [nnan ninf] X, (fsub [nnan ninf] 0, X) ==> 0
- // where nnan and ninf have to occur at least once somewhere in this
- // expression
- Value *SubOp = nullptr;
- if (match(Op1, m_FSub(m_AnyZero(), m_Specific(Op0))))
- SubOp = Op1;
- else if (match(Op0, m_FSub(m_AnyZero(), m_Specific(Op1))))
- SubOp = Op0;
- if (SubOp) {
- Instruction *FSub = cast<Instruction>(SubOp);
- if ((FMF.noNaNs() || FSub->hasNoNaNs()) &&
- (FMF.noInfs() || FSub->hasNoInfs()))
- return Constant::getNullValue(Op0->getType());
- }
-
- return nullptr;
-}
-
-/// Given operands for an FSub, see if we can fold the result. If not, this
-/// returns null.
-static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const SimplifyQuery &Q, unsigned MaxRecurse) {
- if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q))
- return C;
-
- // fsub X, 0 ==> X
- if (match(Op1, m_Zero()))
- return Op0;
-
- // fsub X, -0 ==> X, when we know X is not -0
- if (match(Op1, m_NegZero()) &&
- (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI)))
- return Op0;
-
- // fsub -0.0, (fsub -0.0, X) ==> X
- Value *X;
- if (match(Op0, m_NegZero()) && match(Op1, m_FSub(m_NegZero(), m_Value(X))))
- return X;
-
- // fsub 0.0, (fsub 0.0, X) ==> X if signed zeros are ignored.
- if (FMF.noSignedZeros() && match(Op0, m_AnyZero()) &&
- match(Op1, m_FSub(m_AnyZero(), m_Value(X))))
- return X;
-
- // fsub nnan x, x ==> 0.0
- if (FMF.noNaNs() && Op0 == Op1)
- return Constant::getNullValue(Op0->getType());
-
- return nullptr;
-}
-
-/// Given the operands for an FMul, see if we can fold the result
-static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const SimplifyQuery &Q, unsigned MaxRecurse) {
- if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q))
- return C;
-
- // fmul X, 1.0 ==> X
- if (match(Op1, m_FPOne()))
- return Op0;
-
- // fmul nnan nsz X, 0 ==> 0
- if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero()))
- return Op1;
-
- return nullptr;
-}
-
/// Given operands for a Mul, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
@@ -932,27 +848,12 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return nullptr;
}
-Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const SimplifyQuery &Q) {
- return ::SimplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit);
-}
-
-
-Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const SimplifyQuery &Q) {
- return ::SimplifyFSubInst(Op0, Op1, FMF, Q, RecursionLimit);
-}
-
-Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const SimplifyQuery &Q) {
- return ::SimplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit);
-}
-
Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
return ::SimplifyMulInst(Op0, Op1, Q, RecursionLimit);
}
/// Check for common or similar folds of integer division or integer remainder.
+/// This applies to all 4 opcodes (sdiv/udiv/srem/urem).
static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv) {
Type *Ty = Op0->getType();
@@ -1003,9 +904,70 @@ static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv) {
return nullptr;
}
-/// Given operands for an SDiv or UDiv, see if we can fold the result.
-/// If not, this returns null.
-static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
+/// Given a predicate and two operands, return true if the comparison is true.
+/// This is a helper for div/rem simplification where we return some other value
+/// when we can prove a relationship between the operands.
+static bool isICmpTrue(ICmpInst::Predicate Pred, Value *LHS, Value *RHS,
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
+ Value *V = SimplifyICmpInst(Pred, LHS, RHS, Q, MaxRecurse);
+ Constant *C = dyn_cast_or_null<Constant>(V);
+ return (C && C->isAllOnesValue());
+}
+
+/// Return true if we can simplify X / Y to 0. Remainder can adapt that answer
+/// to simplify X % Y to X.
+static bool isDivZero(Value *X, Value *Y, const SimplifyQuery &Q,
+ unsigned MaxRecurse, bool IsSigned) {
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return false;
+
+ if (IsSigned) {
+ // |X| / |Y| --> 0
+ //
+ // We require that 1 operand is a simple constant. That could be extended to
+ // 2 variables if we computed the sign bit for each.
+ //
+ // Make sure that a constant is not the minimum signed value because taking
+ // the abs() of that is undefined.
+ Type *Ty = X->getType();
+ const APInt *C;
+ if (match(X, m_APInt(C)) && !C->isMinSignedValue()) {
+ // Is the variable divisor magnitude always greater than the constant
+ // dividend magnitude?
+ // |Y| > |C| --> Y < -abs(C) or Y > abs(C)
+ Constant *PosDividendC = ConstantInt::get(Ty, C->abs());
+ Constant *NegDividendC = ConstantInt::get(Ty, -C->abs());
+ if (isICmpTrue(CmpInst::ICMP_SLT, Y, NegDividendC, Q, MaxRecurse) ||
+ isICmpTrue(CmpInst::ICMP_SGT, Y, PosDividendC, Q, MaxRecurse))
+ return true;
+ }
+ if (match(Y, m_APInt(C))) {
+ // Special-case: we can't take the abs() of a minimum signed value. If
+ // that's the divisor, then all we have to do is prove that the dividend
+ // is also not the minimum signed value.
+ if (C->isMinSignedValue())
+ return isICmpTrue(CmpInst::ICMP_NE, X, Y, Q, MaxRecurse);
+
+ // Is the variable dividend magnitude always less than the constant
+ // divisor magnitude?
+ // |X| < |C| --> X > -abs(C) and X < abs(C)
+ Constant *PosDivisorC = ConstantInt::get(Ty, C->abs());
+ Constant *NegDivisorC = ConstantInt::get(Ty, -C->abs());
+ if (isICmpTrue(CmpInst::ICMP_SGT, X, NegDivisorC, Q, MaxRecurse) &&
+ isICmpTrue(CmpInst::ICMP_SLT, X, PosDivisorC, Q, MaxRecurse))
+ return true;
+ }
+ return false;
+ }
+
+ // IsSigned == false.
+ // Is the dividend unsigned less than the divisor?
+ return isICmpTrue(ICmpInst::ICMP_ULT, X, Y, Q, MaxRecurse);
+}
+
+/// These are simplifications common to SDiv and UDiv.
+static Value *simplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q))
return C;
@@ -1013,7 +975,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
if (Value *V = simplifyDivRem(Op0, Op1, true))
return V;
- bool isSigned = Opcode == Instruction::SDiv;
+ bool IsSigned = Opcode == Instruction::SDiv;
// (X * Y) / Y -> X if the multiplication does not overflow.
Value *X = nullptr, *Y = nullptr;
@@ -1021,8 +983,8 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1
OverflowingBinaryOperator *Mul = cast<OverflowingBinaryOperator>(Op0);
// If the Mul knows it does not overflow, then we are good to go.
- if ((isSigned && Mul->hasNoSignedWrap()) ||
- (!isSigned && Mul->hasNoUnsignedWrap()))
+ if ((IsSigned && Mul->hasNoSignedWrap()) ||
+ (!IsSigned && Mul->hasNoUnsignedWrap()))
return X;
// If X has the form X = A / Y then X * Y cannot overflow.
if (BinaryOperator *Div = dyn_cast<BinaryOperator>(X))
@@ -1031,13 +993,13 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
}
// (X rem Y) / Y -> 0
- if ((isSigned && match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) ||
- (!isSigned && match(Op0, m_URem(m_Value(), m_Specific(Op1)))))
+ if ((IsSigned && match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) ||
+ (!IsSigned && match(Op0, m_URem(m_Value(), m_Specific(Op1)))))
return Constant::getNullValue(Op0->getType());
// (X /u C1) /u C2 -> 0 if C1 * C2 overflow
ConstantInt *C1, *C2;
- if (!isSigned && match(Op0, m_UDiv(m_Value(X), m_ConstantInt(C1))) &&
+ if (!IsSigned && match(Op0, m_UDiv(m_Value(X), m_ConstantInt(C1))) &&
match(Op1, m_ConstantInt(C2))) {
bool Overflow;
(void)C1->getValue().umul_ov(C2->getValue(), Overflow);
@@ -1057,96 +1019,14 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
- return nullptr;
-}
-
-/// Given operands for an SDiv, see if we can fold the result.
-/// If not, this returns null.
-static Value *SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
- unsigned MaxRecurse) {
- if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse))
- return V;
-
- return nullptr;
-}
-
-Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
- return ::SimplifySDivInst(Op0, Op1, Q, RecursionLimit);
-}
-
-/// Given operands for a UDiv, see if we can fold the result.
-/// If not, this returns null.
-static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
- unsigned MaxRecurse) {
- if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse))
- return V;
-
- // udiv %V, C -> 0 if %V < C
- if (MaxRecurse) {
- if (Constant *C = dyn_cast_or_null<Constant>(SimplifyICmpInst(
- ICmpInst::ICMP_ULT, Op0, Op1, Q, MaxRecurse - 1))) {
- if (C->isAllOnesValue()) {
- return Constant::getNullValue(Op0->getType());
- }
- }
- }
-
- return nullptr;
-}
-
-Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
- return ::SimplifyUDivInst(Op0, Op1, Q, RecursionLimit);
-}
-
-static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const SimplifyQuery &Q, unsigned) {
- if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q))
- return C;
-
- // undef / X -> undef (the undef could be a snan).
- if (match(Op0, m_Undef()))
- return Op0;
-
- // X / undef -> undef
- if (match(Op1, m_Undef()))
- return Op1;
-
- // X / 1.0 -> X
- if (match(Op1, m_FPOne()))
- return Op0;
-
- // 0 / X -> 0
- // Requires that NaNs are off (X could be zero) and signed zeroes are
- // ignored (X could be positive or negative, so the output sign is unknown).
- if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZero()))
- return Op0;
-
- if (FMF.noNaNs()) {
- // X / X -> 1.0 is legal when NaNs are ignored.
- if (Op0 == Op1)
- return ConstantFP::get(Op0->getType(), 1.0);
-
- // -X / X -> -1.0 and
- // X / -X -> -1.0 are legal when NaNs are ignored.
- // We can ignore signed zeros because +-0.0/+-0.0 is NaN and ignored.
- if ((BinaryOperator::isFNeg(Op0, /*IgnoreZeroSign=*/true) &&
- BinaryOperator::getFNegArgument(Op0) == Op1) ||
- (BinaryOperator::isFNeg(Op1, /*IgnoreZeroSign=*/true) &&
- BinaryOperator::getFNegArgument(Op1) == Op0))
- return ConstantFP::get(Op0->getType(), -1.0);
- }
+ if (isDivZero(Op0, Op1, Q, MaxRecurse, IsSigned))
+ return Constant::getNullValue(Op0->getType());
return nullptr;
}
-Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const SimplifyQuery &Q) {
- return ::SimplifyFDivInst(Op0, Op1, FMF, Q, RecursionLimit);
-}
-
-/// Given operands for an SRem or URem, see if we can fold the result.
-/// If not, this returns null.
-static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
+/// These are simplifications common to SRem and URem.
+static Value *simplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q))
return C;
@@ -1173,17 +1053,40 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
+ // If X / Y == 0, then X % Y == X.
+ if (isDivZero(Op0, Op1, Q, MaxRecurse, Opcode == Instruction::SRem))
+ return Op0;
+
return nullptr;
}
+/// Given operands for an SDiv, see if we can fold the result.
+/// If not, this returns null.
+static Value *SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
+ unsigned MaxRecurse) {
+ return simplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse);
+}
+
+Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+ return ::SimplifySDivInst(Op0, Op1, Q, RecursionLimit);
+}
+
+/// Given operands for a UDiv, see if we can fold the result.
+/// If not, this returns null.
+static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
+ unsigned MaxRecurse) {
+ return simplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse);
+}
+
+Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+ return ::SimplifyUDivInst(Op0, Op1, Q, RecursionLimit);
+}
+
/// Given operands for an SRem, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
- if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse))
- return V;
-
- return nullptr;
+ return simplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse);
}
Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
@@ -1194,53 +1097,13 @@ Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
/// If not, this returns null.
static Value *SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
- if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse))
- return V;
-
- // urem %V, C -> %V if %V < C
- if (MaxRecurse) {
- if (Constant *C = dyn_cast_or_null<Constant>(SimplifyICmpInst(
- ICmpInst::ICMP_ULT, Op0, Op1, Q, MaxRecurse - 1))) {
- if (C->isAllOnesValue()) {
- return Op0;
- }
- }
- }
-
- return nullptr;
+ return simplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse);
}
Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
return ::SimplifyURemInst(Op0, Op1, Q, RecursionLimit);
}
-static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const SimplifyQuery &Q, unsigned) {
- if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q))
- return C;
-
- // undef % X -> undef (the undef could be a snan).
- if (match(Op0, m_Undef()))
- return Op0;
-
- // X % undef -> undef
- if (match(Op1, m_Undef()))
- return Op1;
-
- // 0 % X -> 0
- // Requires that NaNs are off (X could be zero) and signed zeroes are
- // ignored (X could be positive or negative, so the output sign is unknown).
- if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZero()))
- return Op0;
-
- return nullptr;
-}
-
-Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const SimplifyQuery &Q) {
- return ::SimplifyFRemInst(Op0, Op1, FMF, Q, RecursionLimit);
-}
-
/// Returns true if a shift by \c Amount always yields undef.
static bool isUndefShift(Value *Amount) {
Constant *C = dyn_cast<Constant>(Amount);
@@ -1686,7 +1549,44 @@ static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
return nullptr;
}
-static Value *simplifyAndOrOfICmps(Value *Op0, Value *Op1, bool IsAnd) {
+static Value *simplifyAndOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd) {
+ Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1);
+ Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1);
+ if (LHS0->getType() != RHS0->getType())
+ return nullptr;
+
+ FCmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
+ if ((PredL == FCmpInst::FCMP_ORD && PredR == FCmpInst::FCMP_ORD && IsAnd) ||
+ (PredL == FCmpInst::FCMP_UNO && PredR == FCmpInst::FCMP_UNO && !IsAnd)) {
+ // (fcmp ord NNAN, X) & (fcmp ord X, Y) --> fcmp ord X, Y
+ // (fcmp ord NNAN, X) & (fcmp ord Y, X) --> fcmp ord Y, X
+ // (fcmp ord X, NNAN) & (fcmp ord X, Y) --> fcmp ord X, Y
+ // (fcmp ord X, NNAN) & (fcmp ord Y, X) --> fcmp ord Y, X
+ // (fcmp uno NNAN, X) | (fcmp uno X, Y) --> fcmp uno X, Y
+ // (fcmp uno NNAN, X) | (fcmp uno Y, X) --> fcmp uno Y, X
+ // (fcmp uno X, NNAN) | (fcmp uno X, Y) --> fcmp uno X, Y
+ // (fcmp uno X, NNAN) | (fcmp uno Y, X) --> fcmp uno Y, X
+ if ((isKnownNeverNaN(LHS0) && (LHS1 == RHS0 || LHS1 == RHS1)) ||
+ (isKnownNeverNaN(LHS1) && (LHS0 == RHS0 || LHS0 == RHS1)))
+ return RHS;
+
+ // (fcmp ord X, Y) & (fcmp ord NNAN, X) --> fcmp ord X, Y
+ // (fcmp ord Y, X) & (fcmp ord NNAN, X) --> fcmp ord Y, X
+ // (fcmp ord X, Y) & (fcmp ord X, NNAN) --> fcmp ord X, Y
+ // (fcmp ord Y, X) & (fcmp ord X, NNAN) --> fcmp ord Y, X
+ // (fcmp uno X, Y) | (fcmp uno NNAN, X) --> fcmp uno X, Y
+ // (fcmp uno Y, X) | (fcmp uno NNAN, X) --> fcmp uno Y, X
+ // (fcmp uno X, Y) | (fcmp uno X, NNAN) --> fcmp uno X, Y
+ // (fcmp uno Y, X) | (fcmp uno X, NNAN) --> fcmp uno Y, X
+ if ((isKnownNeverNaN(RHS0) && (RHS1 == LHS0 || RHS1 == LHS1)) ||
+ (isKnownNeverNaN(RHS1) && (RHS0 == LHS0 || RHS0 == LHS1)))
+ return LHS;
+ }
+
+ return nullptr;
+}
+
+static Value *simplifyAndOrOfCmps(Value *Op0, Value *Op1, bool IsAnd) {
// Look through casts of the 'and' operands to find compares.
auto *Cast0 = dyn_cast<CastInst>(Op0);
auto *Cast1 = dyn_cast<CastInst>(Op1);
@@ -1696,13 +1596,18 @@ static Value *simplifyAndOrOfICmps(Value *Op0, Value *Op1, bool IsAnd) {
Op1 = Cast1->getOperand(0);
}
- auto *Cmp0 = dyn_cast<ICmpInst>(Op0);
- auto *Cmp1 = dyn_cast<ICmpInst>(Op1);
- if (!Cmp0 || !Cmp1)
- return nullptr;
+ Value *V = nullptr;
+ auto *ICmp0 = dyn_cast<ICmpInst>(Op0);
+ auto *ICmp1 = dyn_cast<ICmpInst>(Op1);
+ if (ICmp0 && ICmp1)
+ V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1) :
+ simplifyOrOfICmps(ICmp0, ICmp1);
+
+ auto *FCmp0 = dyn_cast<FCmpInst>(Op0);
+ auto *FCmp1 = dyn_cast<FCmpInst>(Op1);
+ if (FCmp0 && FCmp1)
+ V = simplifyAndOrOfFCmps(FCmp0, FCmp1, IsAnd);
- Value *V =
- IsAnd ? simplifyAndOfICmps(Cmp0, Cmp1) : simplifyOrOfICmps(Cmp0, Cmp1);
if (!V)
return nullptr;
if (!Cast0)
@@ -1781,7 +1686,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return Op1;
}
- if (Value *V = simplifyAndOrOfICmps(Op0, Op1, true))
+ if (Value *V = simplifyAndOrOfCmps(Op0, Op1, true))
return V;
// Try some generic simplifications for associative operations.
@@ -1902,7 +1807,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
match(Op0, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B)))))
return Op0;
- if (Value *V = simplifyAndOrOfICmps(Op0, Op1, false))
+ if (Value *V = simplifyAndOrOfCmps(Op0, Op1, false))
return V;
// Try some generic simplifications for associative operations.
@@ -2062,13 +1967,14 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
static Constant *
computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
const DominatorTree *DT, CmpInst::Predicate Pred,
- const Instruction *CxtI, Value *LHS, Value *RHS) {
+ AssumptionCache *AC, const Instruction *CxtI,
+ Value *LHS, Value *RHS) {
// First, skip past any trivial no-ops.
LHS = LHS->stripPointerCasts();
RHS = RHS->stripPointerCasts();
// A non-null pointer is not equal to a null pointer.
- if (llvm::isKnownNonNull(LHS) && isa<ConstantPointerNull>(RHS) &&
+ if (llvm::isKnownNonZero(LHS, DL) && isa<ConstantPointerNull>(RHS) &&
(Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE))
return ConstantInt::get(GetCompareTy(LHS),
!CmpInst::isTrueWhenEqual(Pred));
@@ -2223,9 +2129,11 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
// cannot be elided. We cannot fold malloc comparison to null. Also, the
// dynamic allocation call could be either of the operands.
Value *MI = nullptr;
- if (isAllocLikeFn(LHS, TLI) && llvm::isKnownNonNullAt(RHS, CxtI, DT))
+ if (isAllocLikeFn(LHS, TLI) &&
+ llvm::isKnownNonZero(RHS, DL, 0, nullptr, CxtI, DT))
MI = LHS;
- else if (isAllocLikeFn(RHS, TLI) && llvm::isKnownNonNullAt(LHS, CxtI, DT))
+ else if (isAllocLikeFn(RHS, TLI) &&
+ llvm::isKnownNonZero(LHS, DL, 0, nullptr, CxtI, DT))
MI = RHS;
// FIXME: We should also fold the compare when the pointer escapes, but the
// compare dominates the pointer escape
@@ -3312,7 +3220,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// Simplify comparisons of related pointers using a powerful, recursive
// GEP-walk when we have target data available..
if (LHS->getType()->isPointerTy())
- if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.CxtI, LHS, RHS))
+ if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.AC, Q.CxtI, LHS,
+ RHS))
return C;
if (auto *CLHS = dyn_cast<PtrToIntOperator>(LHS))
if (auto *CRHS = dyn_cast<PtrToIntOperator>(RHS))
@@ -3320,7 +3229,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
Q.DL.getTypeSizeInBits(CLHS->getType()) &&
Q.DL.getTypeSizeInBits(CRHS->getPointerOperandType()) ==
Q.DL.getTypeSizeInBits(CRHS->getType()))
- if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.CxtI,
+ if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.AC, Q.CxtI,
CLHS->getPointerOperand(),
CRHS->getPointerOperand()))
return C;
@@ -3416,17 +3325,11 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getFalse(RetTy);
}
- // Handle fcmp with constant RHS
- const ConstantFP *CFP = nullptr;
- if (const auto *RHSC = dyn_cast<Constant>(RHS)) {
- if (RHS->getType()->isVectorTy())
- CFP = dyn_cast_or_null<ConstantFP>(RHSC->getSplatValue());
- else
- CFP = dyn_cast<ConstantFP>(RHSC);
- }
- if (CFP) {
+ // Handle fcmp with constant RHS.
+ const APFloat *C;
+ if (match(RHS, m_APFloat(C))) {
// If the constant is a nan, see if we can fold the comparison based on it.
- if (CFP->getValueAPF().isNaN()) {
+ if (C->isNaN()) {
if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo"
return getFalse(RetTy);
assert(FCmpInst::isUnordered(Pred) &&
@@ -3435,8 +3338,8 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getTrue(RetTy);
}
// Check whether the constant is an infinity.
- if (CFP->getValueAPF().isInfinity()) {
- if (CFP->getValueAPF().isNegative()) {
+ if (C->isInfinity()) {
+ if (C->isNegative()) {
switch (Pred) {
case FCmpInst::FCMP_OLT:
// No value is ordered and less than negative infinity.
@@ -3460,7 +3363,7 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
}
}
- if (CFP->getValueAPF().isZero()) {
+ if (C->isZero()) {
switch (Pred) {
case FCmpInst::FCMP_UGE:
if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
@@ -3474,6 +3377,28 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
default:
break;
}
+ } else if (C->isNegative()) {
+ assert(!C->isNaN() && "Unexpected NaN constant!");
+ // TODO: We can catch more cases by using a range check rather than
+ // relying on CannotBeOrderedLessThanZero.
+ switch (Pred) {
+ case FCmpInst::FCMP_UGE:
+ case FCmpInst::FCMP_UGT:
+ case FCmpInst::FCMP_UNE:
+ // (X >= 0) implies (X > C) when (C < 0)
+ if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
+ return getTrue(RetTy);
+ break;
+ case FCmpInst::FCMP_OEQ:
+ case FCmpInst::FCMP_OLE:
+ case FCmpInst::FCMP_OLT:
+ // (X >= 0) implies !(X < C) when (C < 0)
+ if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
+ return getFalse(RetTy);
+ break;
+ default:
+ break;
+ }
}
}
@@ -3620,32 +3545,16 @@ static Value *simplifySelectBitTest(Value *TrueVal, Value *FalseVal, Value *X,
/// An alternative way to test if a bit is set or not uses sgt/slt instead of
/// eq/ne.
-static Value *simplifySelectWithFakeICmpEq(Value *CmpLHS, Value *TrueVal,
- Value *FalseVal,
- bool TrueWhenUnset) {
- unsigned BitWidth = TrueVal->getType()->getScalarSizeInBits();
- if (!BitWidth)
- return nullptr;
-
- APInt MinSignedValue;
+static Value *simplifySelectWithFakeICmpEq(Value *CmpLHS, Value *CmpRHS,
+ ICmpInst::Predicate Pred,
+ Value *TrueVal, Value *FalseVal) {
Value *X;
- if (match(CmpLHS, m_Trunc(m_Value(X))) && (X == TrueVal || X == FalseVal)) {
- // icmp slt (trunc X), 0 <--> icmp ne (and X, C), 0
- // icmp sgt (trunc X), -1 <--> icmp eq (and X, C), 0
- unsigned DestSize = CmpLHS->getType()->getScalarSizeInBits();
- MinSignedValue = APInt::getSignedMinValue(DestSize).zext(BitWidth);
- } else {
- // icmp slt X, 0 <--> icmp ne (and X, C), 0
- // icmp sgt X, -1 <--> icmp eq (and X, C), 0
- X = CmpLHS;
- MinSignedValue = APInt::getSignedMinValue(BitWidth);
- }
-
- if (Value *V = simplifySelectBitTest(TrueVal, FalseVal, X, &MinSignedValue,
- TrueWhenUnset))
- return V;
+ APInt Mask;
+ if (!decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, X, Mask))
+ return nullptr;
- return nullptr;
+ return simplifySelectBitTest(TrueVal, FalseVal, X, &Mask,
+ Pred == ICmpInst::ICMP_EQ);
}
/// Try to simplify a select instruction when its condition operand is an
@@ -3658,8 +3567,6 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
if (!match(CondVal, m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS))))
return nullptr;
- // FIXME: This code is nearly duplicated in InstCombine. Using/refactoring
- // decomposeBitTestICmp() might help.
if (ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero())) {
Value *X;
const APInt *Y;
@@ -3667,18 +3574,13 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
if (Value *V = simplifySelectBitTest(TrueVal, FalseVal, X, Y,
Pred == ICmpInst::ICMP_EQ))
return V;
- } else if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_Zero())) {
- // Comparing signed-less-than 0 checks if the sign bit is set.
- if (Value *V = simplifySelectWithFakeICmpEq(CmpLHS, TrueVal, FalseVal,
- false))
- return V;
- } else if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_AllOnes())) {
- // Comparing signed-greater-than -1 checks if the sign bit is not set.
- if (Value *V = simplifySelectWithFakeICmpEq(CmpLHS, TrueVal, FalseVal,
- true))
- return V;
}
+ // Check for other compares that behave like bit test.
+ if (Value *V = simplifySelectWithFakeICmpEq(CmpLHS, CmpRHS, Pred,
+ TrueVal, FalseVal))
+ return V;
+
if (CondVal->hasOneUse()) {
const APInt *C;
if (match(CmpRHS, m_APInt(C))) {
@@ -3735,6 +3637,9 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
// select true, X, Y -> X
// select false, X, Y -> Y
if (Constant *CB = dyn_cast<Constant>(CondVal)) {
+ if (Constant *CT = dyn_cast<Constant>(TrueVal))
+ if (Constant *CF = dyn_cast<Constant>(FalseVal))
+ return ConstantFoldSelectInstruction(CB, CT, CF);
if (CB->isAllOnesValue())
return TrueVal;
if (CB->isNullValue())
@@ -3921,6 +3826,28 @@ Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
return ::SimplifyInsertValueInst(Agg, Val, Idxs, Q, RecursionLimit);
}
+Value *llvm::SimplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx,
+ const SimplifyQuery &Q) {
+ // Try to constant fold.
+ auto *VecC = dyn_cast<Constant>(Vec);
+ auto *ValC = dyn_cast<Constant>(Val);
+ auto *IdxC = dyn_cast<Constant>(Idx);
+ if (VecC && ValC && IdxC)
+ return ConstantFoldInsertElementInstruction(VecC, ValC, IdxC);
+
+ // Fold into undef if index is out of bounds.
+ if (auto *CI = dyn_cast<ConstantInt>(Idx)) {
+ uint64_t NumElements = cast<VectorType>(Vec->getType())->getNumElements();
+
+ if (CI->uge(NumElements))
+ return UndefValue::get(Vec->getType());
+ }
+
+ // TODO: We should also fold if index is iteslf an undef.
+
+ return nullptr;
+}
+
/// Given operands for an ExtractValueInst, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
@@ -3973,6 +3900,11 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQ
if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue()))
return Elt;
+ // An undef extract index can be arbitrarily chosen to be an out-of-range
+ // index value, which would result in the instruction being undef.
+ if (isa<UndefValue>(Idx))
+ return UndefValue::get(Vec->getType()->getVectorElementType());
+
return nullptr;
}
@@ -4186,6 +4118,179 @@ Value *llvm::SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit);
}
+/// Given operands for an FAdd, see if we can fold the result. If not, this
+/// returns null.
+static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
+ if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q))
+ return C;
+
+ // fadd X, -0 ==> X
+ if (match(Op1, m_NegZero()))
+ return Op0;
+
+ // fadd X, 0 ==> X, when we know X is not -0
+ if (match(Op1, m_Zero()) &&
+ (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI)))
+ return Op0;
+
+ // fadd [nnan ninf] X, (fsub [nnan ninf] 0, X) ==> 0
+ // where nnan and ninf have to occur at least once somewhere in this
+ // expression
+ Value *SubOp = nullptr;
+ if (match(Op1, m_FSub(m_AnyZero(), m_Specific(Op0))))
+ SubOp = Op1;
+ else if (match(Op0, m_FSub(m_AnyZero(), m_Specific(Op1))))
+ SubOp = Op0;
+ if (SubOp) {
+ Instruction *FSub = cast<Instruction>(SubOp);
+ if ((FMF.noNaNs() || FSub->hasNoNaNs()) &&
+ (FMF.noInfs() || FSub->hasNoInfs()))
+ return Constant::getNullValue(Op0->getType());
+ }
+
+ return nullptr;
+}
+
+/// Given operands for an FSub, see if we can fold the result. If not, this
+/// returns null.
+static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
+ if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q))
+ return C;
+
+ // fsub X, 0 ==> X
+ if (match(Op1, m_Zero()))
+ return Op0;
+
+ // fsub X, -0 ==> X, when we know X is not -0
+ if (match(Op1, m_NegZero()) &&
+ (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI)))
+ return Op0;
+
+ // fsub -0.0, (fsub -0.0, X) ==> X
+ Value *X;
+ if (match(Op0, m_NegZero()) && match(Op1, m_FSub(m_NegZero(), m_Value(X))))
+ return X;
+
+ // fsub 0.0, (fsub 0.0, X) ==> X if signed zeros are ignored.
+ if (FMF.noSignedZeros() && match(Op0, m_AnyZero()) &&
+ match(Op1, m_FSub(m_AnyZero(), m_Value(X))))
+ return X;
+
+ // fsub nnan x, x ==> 0.0
+ if (FMF.noNaNs() && Op0 == Op1)
+ return Constant::getNullValue(Op0->getType());
+
+ return nullptr;
+}
+
+/// Given the operands for an FMul, see if we can fold the result
+static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
+ if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q))
+ return C;
+
+ // fmul X, 1.0 ==> X
+ if (match(Op1, m_FPOne()))
+ return Op0;
+
+ // fmul nnan nsz X, 0 ==> 0
+ if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero()))
+ return Op1;
+
+ return nullptr;
+}
+
+Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const SimplifyQuery &Q) {
+ return ::SimplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit);
+}
+
+
+Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const SimplifyQuery &Q) {
+ return ::SimplifyFSubInst(Op0, Op1, FMF, Q, RecursionLimit);
+}
+
+Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const SimplifyQuery &Q) {
+ return ::SimplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit);
+}
+
+static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const SimplifyQuery &Q, unsigned) {
+ if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q))
+ return C;
+
+ // undef / X -> undef (the undef could be a snan).
+ if (match(Op0, m_Undef()))
+ return Op0;
+
+ // X / undef -> undef
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ // X / 1.0 -> X
+ if (match(Op1, m_FPOne()))
+ return Op0;
+
+ // 0 / X -> 0
+ // Requires that NaNs are off (X could be zero) and signed zeroes are
+ // ignored (X could be positive or negative, so the output sign is unknown).
+ if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZero()))
+ return Op0;
+
+ if (FMF.noNaNs()) {
+ // X / X -> 1.0 is legal when NaNs are ignored.
+ if (Op0 == Op1)
+ return ConstantFP::get(Op0->getType(), 1.0);
+
+ // -X / X -> -1.0 and
+ // X / -X -> -1.0 are legal when NaNs are ignored.
+ // We can ignore signed zeros because +-0.0/+-0.0 is NaN and ignored.
+ if ((BinaryOperator::isFNeg(Op0, /*IgnoreZeroSign=*/true) &&
+ BinaryOperator::getFNegArgument(Op0) == Op1) ||
+ (BinaryOperator::isFNeg(Op1, /*IgnoreZeroSign=*/true) &&
+ BinaryOperator::getFNegArgument(Op1) == Op0))
+ return ConstantFP::get(Op0->getType(), -1.0);
+ }
+
+ return nullptr;
+}
+
+Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const SimplifyQuery &Q) {
+ return ::SimplifyFDivInst(Op0, Op1, FMF, Q, RecursionLimit);
+}
+
+static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const SimplifyQuery &Q, unsigned) {
+ if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q))
+ return C;
+
+ // undef % X -> undef (the undef could be a snan).
+ if (match(Op0, m_Undef()))
+ return Op0;
+
+ // X % undef -> undef
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ // 0 % X -> 0
+ // Requires that NaNs are off (X could be zero) and signed zeroes are
+ // ignored (X could be positive or negative, so the output sign is unknown).
+ if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZero()))
+ return Op0;
+
+ return nullptr;
+}
+
+Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const SimplifyQuery &Q) {
+ return ::SimplifyFRemInst(Op0, Op1, FMF, Q, RecursionLimit);
+}
+
//=== Helper functions for higher up the class hierarchy.
/// Given operands for a BinaryOperator, see if we can fold the result.
@@ -4195,28 +4300,18 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
switch (Opcode) {
case Instruction::Add:
return SimplifyAddInst(LHS, RHS, false, false, Q, MaxRecurse);
- case Instruction::FAdd:
- return SimplifyFAddInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
case Instruction::Sub:
return SimplifySubInst(LHS, RHS, false, false, Q, MaxRecurse);
- case Instruction::FSub:
- return SimplifyFSubInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
case Instruction::Mul:
return SimplifyMulInst(LHS, RHS, Q, MaxRecurse);
- case Instruction::FMul:
- return SimplifyFMulInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
case Instruction::SDiv:
return SimplifySDivInst(LHS, RHS, Q, MaxRecurse);
case Instruction::UDiv:
return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse);
- case Instruction::FDiv:
- return SimplifyFDivInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
case Instruction::SRem:
return SimplifySRemInst(LHS, RHS, Q, MaxRecurse);
case Instruction::URem:
return SimplifyURemInst(LHS, RHS, Q, MaxRecurse);
- case Instruction::FRem:
- return SimplifyFRemInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
case Instruction::Shl:
return SimplifyShlInst(LHS, RHS, false, false, Q, MaxRecurse);
case Instruction::LShr:
@@ -4229,6 +4324,16 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
return SimplifyOrInst(LHS, RHS, Q, MaxRecurse);
case Instruction::Xor:
return SimplifyXorInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::FAdd:
+ return SimplifyFAddInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+ case Instruction::FSub:
+ return SimplifyFSubInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+ case Instruction::FMul:
+ return SimplifyFMulInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+ case Instruction::FDiv:
+ return SimplifyFDivInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+ case Instruction::FRem:
+ return SimplifyFRemInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
default:
llvm_unreachable("Unexpected opcode");
}
@@ -4290,6 +4395,7 @@ static bool IsIdempotent(Intrinsic::ID ID) {
case Intrinsic::rint:
case Intrinsic::nearbyint:
case Intrinsic::round:
+ case Intrinsic::canonicalize:
return true;
}
}
@@ -4615,6 +4721,12 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
IV->getIndices(), Q);
break;
}
+ case Instruction::InsertElement: {
+ auto *IE = cast<InsertElementInst>(I);
+ Result = SimplifyInsertElementInst(IE->getOperand(0), IE->getOperand(1),
+ IE->getOperand(2), Q);
+ break;
+ }
case Instruction::ExtractValue: {
auto *EVI = cast<ExtractValueInst>(I);
Result = SimplifyExtractValueInst(EVI->getAggregateOperand(),
diff --git a/contrib/llvm/lib/Analysis/Interval.cpp b/contrib/llvm/lib/Analysis/Interval.cpp
index 6c10d73bcb44..6d5de22cb93f 100644
--- a/contrib/llvm/lib/Analysis/Interval.cpp
+++ b/contrib/llvm/lib/Analysis/Interval.cpp
@@ -16,7 +16,6 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
using namespace llvm;
@@ -25,7 +24,6 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
// isLoop - Find out if there is a back edge in this interval...
-//
bool Interval::isLoop() const {
// There is a loop in this interval iff one of the predecessors of the header
// node lives in the interval.
@@ -36,7 +34,6 @@ bool Interval::isLoop() const {
return false;
}
-
void Interval::print(raw_ostream &OS) const {
OS << "-------------------------------------------------------------\n"
<< "Interval Contents:\n";
diff --git a/contrib/llvm/lib/Analysis/IntervalPartition.cpp b/contrib/llvm/lib/Analysis/IntervalPartition.cpp
index a4e56e0694bc..c777d91b67c6 100644
--- a/contrib/llvm/lib/Analysis/IntervalPartition.cpp
+++ b/contrib/llvm/lib/Analysis/IntervalPartition.cpp
@@ -12,10 +12,17 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/IntervalPartition.h"
+#include "llvm/Analysis/Interval.h"
#include "llvm/Analysis/IntervalIterator.h"
+#include "llvm/Pass.h"
+#include <cassert>
+#include <utility>
+
using namespace llvm;
char IntervalPartition::ID = 0;
+
INITIALIZE_PASS(IntervalPartition, "intervals",
"Interval Partition Construction", true, true)
@@ -40,7 +47,6 @@ void IntervalPartition::print(raw_ostream &O, const Module*) const {
// addIntervalToPartition - Add an interval to the internal list of intervals,
// and then add mappings from all of the basic blocks in the interval to the
// interval itself (in the IntervalMap).
-//
void IntervalPartition::addIntervalToPartition(Interval *I) {
Intervals.push_back(I);
@@ -54,7 +60,6 @@ void IntervalPartition::addIntervalToPartition(Interval *I) {
// the interval data structures. After interval generation is complete,
// run through all of the intervals and propagate successor info as
// predecessor info.
-//
void IntervalPartition::updatePredecessors(Interval *Int) {
BasicBlock *Header = Int->getHeaderNode();
for (BasicBlock *Successor : Int->Successors)
@@ -63,7 +68,6 @@ void IntervalPartition::updatePredecessors(Interval *Int) {
// IntervalPartition ctor - Build the first level interval partition for the
// specified function...
-//
bool IntervalPartition::runOnFunction(Function &F) {
// Pass false to intervals_begin because we take ownership of it's memory
function_interval_iterator I = intervals_begin(&F, false);
@@ -84,11 +88,9 @@ bool IntervalPartition::runOnFunction(Function &F) {
return false;
}
-
// IntervalPartition ctor - Build a reduced interval partition from an
// existing interval graph. This takes an additional boolean parameter to
// distinguish it from a copy constructor. Always pass in false for now.
-//
IntervalPartition::IntervalPartition(IntervalPartition &IP, bool)
: FunctionPass(ID) {
assert(IP.getRootInterval() && "Cannot operate on empty IntervalPartitions!");
@@ -110,4 +112,3 @@ IntervalPartition::IntervalPartition(IntervalPartition &IP, bool)
for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
updatePredecessors(Intervals[i]);
}
-
diff --git a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
index d287f81985fd..54299d078be5 100644
--- a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
+++ b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
@@ -8,15 +8,31 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/Sequence.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
-#include "llvm/IR/InstVisitor.h"
-#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+#include <string>
+#include <tuple>
#include <utility>
using namespace llvm;
@@ -175,7 +191,7 @@ LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) {
LazyCallGraph::LazyCallGraph(LazyCallGraph &&G)
: BPA(std::move(G.BPA)), NodeMap(std::move(G.NodeMap)),
EntryEdges(std::move(G.EntryEdges)), SCCBPA(std::move(G.SCCBPA)),
- SCCMap(std::move(G.SCCMap)), LeafRefSCCs(std::move(G.LeafRefSCCs)),
+ SCCMap(std::move(G.SCCMap)),
LibFunctions(std::move(G.LibFunctions)) {
updateGraphPtrs();
}
@@ -186,7 +202,6 @@ LazyCallGraph &LazyCallGraph::operator=(LazyCallGraph &&G) {
EntryEdges = std::move(G.EntryEdges);
SCCBPA = std::move(G.SCCBPA);
SCCMap = std::move(G.SCCMap);
- LeafRefSCCs = std::move(G.LeafRefSCCs);
LibFunctions = std::move(G.LibFunctions);
updateGraphPtrs();
return *this;
@@ -212,7 +227,7 @@ void LazyCallGraph::SCC::verify() {
assert(N->LowLink == -1 &&
"Must set low link to -1 when adding a node to an SCC!");
for (Edge &E : **N)
- assert(E.getNode() && "Can't have an unpopulated node!");
+ assert(E.getNode().isPopulated() && "Can't have an unpopulated node!");
}
}
#endif
@@ -313,38 +328,49 @@ void LazyCallGraph::RefSCC::verify() {
"Edge between SCCs violates post-order relationship.");
continue;
}
- assert(TargetSCC.getOuterRefSCC().Parents.count(this) &&
- "Edge to a RefSCC missing us in its parent set.");
}
}
-
- // Check that our parents are actually parents.
- for (RefSCC *ParentRC : Parents) {
- assert(ParentRC != this && "Cannot be our own parent!");
- auto HasConnectingEdge = [&] {
- for (SCC &C : *ParentRC)
- for (Node &N : C)
- for (Edge &E : *N)
- if (G->lookupRefSCC(E.getNode()) == this)
- return true;
- return false;
- };
- assert(HasConnectingEdge() && "No edge connects the parent to us!");
- }
}
#endif
-bool LazyCallGraph::RefSCC::isDescendantOf(const RefSCC &C) const {
- // Walk up the parents of this SCC and verify that we eventually find C.
- SmallVector<const RefSCC *, 4> AncestorWorklist;
- AncestorWorklist.push_back(this);
+bool LazyCallGraph::RefSCC::isParentOf(const RefSCC &RC) const {
+ if (&RC == this)
+ return false;
+
+ // Search all edges to see if this is a parent.
+ for (SCC &C : *this)
+ for (Node &N : C)
+ for (Edge &E : *N)
+ if (G->lookupRefSCC(E.getNode()) == &RC)
+ return true;
+
+ return false;
+}
+
+bool LazyCallGraph::RefSCC::isAncestorOf(const RefSCC &RC) const {
+ if (&RC == this)
+ return false;
+
+ // For each descendant of this RefSCC, see if one of its children is the
+ // argument. If not, add that descendant to the worklist and continue
+ // searching.
+ SmallVector<const RefSCC *, 4> Worklist;
+ SmallPtrSet<const RefSCC *, 4> Visited;
+ Worklist.push_back(this);
+ Visited.insert(this);
do {
- const RefSCC *AncestorC = AncestorWorklist.pop_back_val();
- if (AncestorC->isChildOf(C))
- return true;
- for (const RefSCC *ParentC : AncestorC->Parents)
- AncestorWorklist.push_back(ParentC);
- } while (!AncestorWorklist.empty());
+ const RefSCC &DescendantRC = *Worklist.pop_back_val();
+ for (SCC &C : DescendantRC)
+ for (Node &N : C)
+ for (Edge &E : *N) {
+ auto *ChildRC = G->lookupRefSCC(E.getNode());
+ if (ChildRC == &RC)
+ return true;
+ if (!ChildRC || !Visited.insert(ChildRC).second)
+ continue;
+ Worklist.push_back(ChildRC);
+ }
+ } while (!Worklist.empty());
return false;
}
@@ -907,17 +933,13 @@ void LazyCallGraph::RefSCC::insertOutgoingEdge(Node &SourceN, Node &TargetN,
assert(G->lookupRefSCC(SourceN) == this && "Source must be in this RefSCC.");
- RefSCC &TargetC = *G->lookupRefSCC(TargetN);
- assert(&TargetC != this && "Target must not be in this RefSCC.");
+ assert(G->lookupRefSCC(TargetN) != this &&
+ "Target must not be in this RefSCC.");
#ifdef EXPENSIVE_CHECKS
- assert(TargetC.isDescendantOf(*this) &&
+ assert(G->lookupRefSCC(TargetN)->isDescendantOf(*this) &&
"Target must be a descendant of the Source.");
#endif
- // The only change required is to add this SCC to the parent set of the
- // callee.
- TargetC.Parents.insert(this);
-
#ifndef NDEBUG
// Check that the RefSCC is still valid.
verify();
@@ -957,22 +979,20 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) {
// RefSCCs (and their edges) are visited here.
auto ComputeSourceConnectedSet = [&](SmallPtrSetImpl<RefSCC *> &Set) {
Set.insert(&SourceC);
- SmallVector<RefSCC *, 4> Worklist;
- Worklist.push_back(&SourceC);
- do {
- RefSCC &RC = *Worklist.pop_back_val();
- for (RefSCC &ParentRC : RC.parents()) {
- // Skip any RefSCCs outside the range of source to target in the
- // postorder sequence.
- int ParentIdx = G->getRefSCCIndex(ParentRC);
- assert(ParentIdx > SourceIdx && "Parent cannot precede source in postorder!");
- if (ParentIdx > TargetIdx)
- continue;
- if (Set.insert(&ParentRC).second)
- // First edge connecting to this parent, add it to our worklist.
- Worklist.push_back(&ParentRC);
- }
- } while (!Worklist.empty());
+ auto IsConnected = [&](RefSCC &RC) {
+ for (SCC &C : RC)
+ for (Node &N : C)
+ for (Edge &E : *N)
+ if (Set.count(G->lookupRefSCC(E.getNode())))
+ return true;
+
+ return false;
+ };
+
+ for (RefSCC *C : make_range(G->PostOrderRefSCCs.begin() + SourceIdx + 1,
+ G->PostOrderRefSCCs.begin() + TargetIdx + 1))
+ if (IsConnected(*C))
+ Set.insert(C);
};
// Use a normal worklist to find which SCCs the target connects to. We still
@@ -1023,12 +1043,6 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) {
assert(RC != this && "We're merging into the target RefSCC, so it "
"shouldn't be in the range.");
- // Merge the parents which aren't part of the merge into the our parents.
- for (RefSCC *ParentRC : RC->Parents)
- if (!MergeSet.count(ParentRC))
- Parents.insert(ParentRC);
- RC->Parents.clear();
-
// Walk the inner SCCs to update their up-pointer and walk all the edges to
// update any parent sets.
// FIXME: We should try to find a way to avoid this (rather expensive) edge
@@ -1036,16 +1050,8 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) {
for (SCC &InnerC : *RC) {
InnerC.OuterRefSCC = this;
SCCIndices[&InnerC] = SCCIndex++;
- for (Node &N : InnerC) {
+ for (Node &N : InnerC)
G->SCCMap[&N] = &InnerC;
- for (Edge &E : *N) {
- RefSCC &ChildRC = *G->lookupRefSCC(E.getNode());
- if (MergeSet.count(&ChildRC))
- continue;
- ChildRC.Parents.erase(RC);
- ChildRC.Parents.insert(this);
- }
- }
}
// Now merge in the SCCs. We can actually move here so try to reuse storage
@@ -1087,12 +1093,8 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) {
void LazyCallGraph::RefSCC::removeOutgoingEdge(Node &SourceN, Node &TargetN) {
assert(G->lookupRefSCC(SourceN) == this &&
"The source must be a member of this RefSCC.");
-
- RefSCC &TargetRC = *G->lookupRefSCC(TargetN);
- assert(&TargetRC != this && "The target must not be a member of this RefSCC");
-
- assert(!is_contained(G->LeafRefSCCs, this) &&
- "Cannot have a leaf RefSCC source.");
+ assert(G->lookupRefSCC(TargetN) != this &&
+ "The target must not be a member of this RefSCC");
#ifndef NDEBUG
// In a debug build, verify the RefSCC is valid to start with and when this
@@ -1105,122 +1107,72 @@ void LazyCallGraph::RefSCC::removeOutgoingEdge(Node &SourceN, Node &TargetN) {
bool Removed = SourceN->removeEdgeInternal(TargetN);
(void)Removed;
assert(Removed && "Target not in the edge set for this caller?");
-
- bool HasOtherEdgeToChildRC = false;
- bool HasOtherChildRC = false;
- for (SCC *InnerC : SCCs) {
- for (Node &N : *InnerC) {
- for (Edge &E : *N) {
- RefSCC &OtherChildRC = *G->lookupRefSCC(E.getNode());
- if (&OtherChildRC == &TargetRC) {
- HasOtherEdgeToChildRC = true;
- break;
- }
- if (&OtherChildRC != this)
- HasOtherChildRC = true;
- }
- if (HasOtherEdgeToChildRC)
- break;
- }
- if (HasOtherEdgeToChildRC)
- break;
- }
- // Because the SCCs form a DAG, deleting such an edge cannot change the set
- // of SCCs in the graph. However, it may cut an edge of the SCC DAG, making
- // the source SCC no longer connected to the target SCC. If so, we need to
- // update the target SCC's map of its parents.
- if (!HasOtherEdgeToChildRC) {
- bool Removed = TargetRC.Parents.erase(this);
- (void)Removed;
- assert(Removed &&
- "Did not find the source SCC in the target SCC's parent list!");
-
- // It may orphan an SCC if it is the last edge reaching it, but that does
- // not violate any invariants of the graph.
- if (TargetRC.Parents.empty())
- DEBUG(dbgs() << "LCG: Update removing " << SourceN.getFunction().getName()
- << " -> " << TargetN.getFunction().getName()
- << " edge orphaned the callee's SCC!\n");
-
- // It may make the Source SCC a leaf SCC.
- if (!HasOtherChildRC)
- G->LeafRefSCCs.push_back(this);
- }
}
SmallVector<LazyCallGraph::RefSCC *, 1>
-LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
- assert(!(*SourceN)[TargetN].isCall() &&
- "Cannot remove a call edge, it must first be made a ref edge");
+LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN,
+ ArrayRef<Node *> TargetNs) {
+ // We return a list of the resulting *new* RefSCCs in post-order.
+ SmallVector<RefSCC *, 1> Result;
#ifndef NDEBUG
- // In a debug build, verify the RefSCC is valid to start with and when this
- // routine finishes.
+ // In a debug build, verify the RefSCC is valid to start with and that either
+ // we return an empty list of result RefSCCs and this RefSCC remains valid,
+ // or we return new RefSCCs and this RefSCC is dead.
verify();
- auto VerifyOnExit = make_scope_exit([&]() { verify(); });
+ auto VerifyOnExit = make_scope_exit([&]() {
+ // If we didn't replace our RefSCC with new ones, check that this one
+ // remains valid.
+ if (G)
+ verify();
+ });
#endif
- // First remove the actual edge.
- bool Removed = SourceN->removeEdgeInternal(TargetN);
- (void)Removed;
- assert(Removed && "Target not in the edge set for this caller?");
+ // First remove the actual edges.
+ for (Node *TargetN : TargetNs) {
+ assert(!(*SourceN)[*TargetN].isCall() &&
+ "Cannot remove a call edge, it must first be made a ref edge");
- // We return a list of the resulting *new* RefSCCs in post-order.
- SmallVector<RefSCC *, 1> Result;
+ bool Removed = SourceN->removeEdgeInternal(*TargetN);
+ (void)Removed;
+ assert(Removed && "Target not in the edge set for this caller?");
+ }
- // Direct recursion doesn't impact the SCC graph at all.
- if (&SourceN == &TargetN)
+ // Direct self references don't impact the ref graph at all.
+ if (llvm::all_of(TargetNs,
+ [&](Node *TargetN) { return &SourceN == TargetN; }))
return Result;
- // If this ref edge is within an SCC then there are sufficient other edges to
- // form a cycle without this edge so removing it is a no-op.
+ // If all targets are in the same SCC as the source, because no call edges
+ // were removed there is no RefSCC structure change.
SCC &SourceC = *G->lookupSCC(SourceN);
- SCC &TargetC = *G->lookupSCC(TargetN);
- if (&SourceC == &TargetC)
+ if (llvm::all_of(TargetNs, [&](Node *TargetN) {
+ return G->lookupSCC(*TargetN) == &SourceC;
+ }))
return Result;
// We build somewhat synthetic new RefSCCs by providing a postorder mapping
- // for each inner SCC. We also store these associated with *nodes* rather
- // than SCCs because this saves a round-trip through the node->SCC map and in
- // the common case, SCCs are small. We will verify that we always give the
- // same number to every node in the SCC such that these are equivalent.
- const int RootPostOrderNumber = 0;
- int PostOrderNumber = RootPostOrderNumber + 1;
- SmallDenseMap<Node *, int> PostOrderMapping;
-
- // Every node in the target SCC can already reach every node in this RefSCC
- // (by definition). It is the only node we know will stay inside this RefSCC.
- // Everything which transitively reaches Target will also remain in the
- // RefSCC. We handle this by pre-marking that the nodes in the target SCC map
- // back to the root post order number.
- //
- // This also enables us to take a very significant short-cut in the standard
- // Tarjan walk to re-form RefSCCs below: whenever we build an edge that
- // references the target node, we know that the target node eventually
- // references all other nodes in our walk. As a consequence, we can detect
- // and handle participants in that cycle without walking all the edges that
- // form the connections, and instead by relying on the fundamental guarantee
- // coming into this operation.
- for (Node &N : TargetC)
- PostOrderMapping[&N] = RootPostOrderNumber;
+ // for each inner SCC. We store these inside the low-link field of the nodes
+ // rather than associated with SCCs because this saves a round-trip through
+ // the node->SCC map and in the common case, SCCs are small. We will verify
+ // that we always give the same number to every node in the SCC such that
+ // these are equivalent.
+ int PostOrderNumber = 0;
// Reset all the other nodes to prepare for a DFS over them, and add them to
// our worklist.
SmallVector<Node *, 8> Worklist;
for (SCC *C : SCCs) {
- if (C == &TargetC)
- continue;
-
for (Node &N : *C)
N.DFSNumber = N.LowLink = 0;
Worklist.append(C->Nodes.begin(), C->Nodes.end());
}
- auto MarkNodeForSCCNumber = [&PostOrderMapping](Node &N, int Number) {
- N.DFSNumber = N.LowLink = -1;
- PostOrderMapping[&N] = Number;
- };
+ // Track the number of nodes in this RefSCC so that we can quickly recognize
+ // an important special case of the edge removal not breaking the cycle of
+ // this RefSCC.
+ const int NumRefSCCNodes = Worklist.size();
SmallVector<std::pair<Node *, EdgeSequence::iterator>, 4> DFSStack;
SmallVector<Node *, 4> PendingRefSCCStack;
@@ -1267,31 +1219,8 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
continue;
}
if (ChildN.DFSNumber == -1) {
- // Check if this edge's target node connects to the deleted edge's
- // target node. If so, we know that every node connected will end up
- // in this RefSCC, so collapse the entire current stack into the root
- // slot in our SCC numbering. See above for the motivation of
- // optimizing the target connected nodes in this way.
- auto PostOrderI = PostOrderMapping.find(&ChildN);
- if (PostOrderI != PostOrderMapping.end() &&
- PostOrderI->second == RootPostOrderNumber) {
- MarkNodeForSCCNumber(*N, RootPostOrderNumber);
- while (!PendingRefSCCStack.empty())
- MarkNodeForSCCNumber(*PendingRefSCCStack.pop_back_val(),
- RootPostOrderNumber);
- while (!DFSStack.empty())
- MarkNodeForSCCNumber(*DFSStack.pop_back_val().first,
- RootPostOrderNumber);
- // Ensure we break all the way out of the enclosing loop.
- N = nullptr;
- break;
- }
-
// If this child isn't currently in this RefSCC, no need to process
- // it. However, we do need to remove this RefSCC from its RefSCC's
- // parent set.
- RefSCC &ChildRC = *G->lookupRefSCC(ChildN);
- ChildRC.Parents.erase(this);
+ // it.
++I;
continue;
}
@@ -1304,9 +1233,6 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
N->LowLink = ChildN.LowLink;
++I;
}
- if (!N)
- // We short-circuited this node.
- break;
// We've finished processing N and its descendents, put it on our pending
// stack to eventually get merged into a RefSCC.
@@ -1321,146 +1247,98 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
}
// Otherwise, form a new RefSCC from the top of the pending node stack.
+ int RefSCCNumber = PostOrderNumber++;
int RootDFSNumber = N->DFSNumber;
+
// Find the range of the node stack by walking down until we pass the
- // root DFS number.
- auto RefSCCNodes = make_range(
- PendingRefSCCStack.rbegin(),
- find_if(reverse(PendingRefSCCStack), [RootDFSNumber](const Node *N) {
- return N->DFSNumber < RootDFSNumber;
- }));
+ // root DFS number. Update the DFS numbers and low link numbers in the
+ // process to avoid re-walking this list where possible.
+ auto StackRI = find_if(reverse(PendingRefSCCStack), [&](Node *N) {
+ if (N->DFSNumber < RootDFSNumber)
+ // We've found the bottom.
+ return true;
- // Mark the postorder number for these nodes and clear them off the
- // stack. We'll use the postorder number to pull them into RefSCCs at the
- // end. FIXME: Fuse with the loop above.
- int RefSCCNumber = PostOrderNumber++;
- for (Node *N : RefSCCNodes)
- MarkNodeForSCCNumber(*N, RefSCCNumber);
+ // Update this node and keep scanning.
+ N->DFSNumber = -1;
+ // Save the post-order number in the lowlink field so that we can use
+ // it to map SCCs into new RefSCCs after we finish the DFS.
+ N->LowLink = RefSCCNumber;
+ return false;
+ });
+ auto RefSCCNodes = make_range(StackRI.base(), PendingRefSCCStack.end());
+
+ // If we find a cycle containing all nodes originally in this RefSCC then
+ // the removal hasn't changed the structure at all. This is an important
+ // special case and we can directly exit the entire routine more
+ // efficiently as soon as we discover it.
+ if (std::distance(RefSCCNodes.begin(), RefSCCNodes.end()) ==
+ NumRefSCCNodes) {
+ // Clear out the low link field as we won't need it.
+ for (Node *N : RefSCCNodes)
+ N->LowLink = -1;
+ // Return the empty result immediately.
+ return Result;
+ }
- PendingRefSCCStack.erase(RefSCCNodes.end().base(),
- PendingRefSCCStack.end());
+ // We've already marked the nodes internally with the RefSCC number so
+ // just clear them off the stack and continue.
+ PendingRefSCCStack.erase(RefSCCNodes.begin(), PendingRefSCCStack.end());
} while (!DFSStack.empty());
assert(DFSStack.empty() && "Didn't flush the entire DFS stack!");
assert(PendingRefSCCStack.empty() && "Didn't flush all pending nodes!");
} while (!Worklist.empty());
- // We now have a post-order numbering for RefSCCs and a mapping from each
- // node in this RefSCC to its final RefSCC. We create each new RefSCC node
- // (re-using this RefSCC node for the root) and build a radix-sort style map
- // from postorder number to the RefSCC. We then append SCCs to each of these
- // RefSCCs in the order they occured in the original SCCs container.
- for (int i = 1; i < PostOrderNumber; ++i)
+ assert(PostOrderNumber > 1 &&
+ "Should never finish the DFS when the existing RefSCC remains valid!");
+
+ // Otherwise we create a collection of new RefSCC nodes and build
+ // a radix-sort style map from postorder number to these new RefSCCs. We then
+ // append SCCs to each of these RefSCCs in the order they occured in the
+ // original SCCs container.
+ for (int i = 0; i < PostOrderNumber; ++i)
Result.push_back(G->createRefSCC(*G));
// Insert the resulting postorder sequence into the global graph postorder
- // sequence before the current RefSCC in that sequence. The idea being that
- // this RefSCC is the target of the reference edge removed, and thus has
- // a direct or indirect edge to every other RefSCC formed and so must be at
- // the end of any postorder traversal.
+ // sequence before the current RefSCC in that sequence, and then remove the
+ // current one.
//
// FIXME: It'd be nice to change the APIs so that we returned an iterator
// range over the global postorder sequence and generally use that sequence
// rather than building a separate result vector here.
- if (!Result.empty()) {
- int Idx = G->getRefSCCIndex(*this);
- G->PostOrderRefSCCs.insert(G->PostOrderRefSCCs.begin() + Idx,
- Result.begin(), Result.end());
- for (int i : seq<int>(Idx, G->PostOrderRefSCCs.size()))
- G->RefSCCIndices[G->PostOrderRefSCCs[i]] = i;
- assert(G->PostOrderRefSCCs[G->getRefSCCIndex(*this)] == this &&
- "Failed to update this RefSCC's index after insertion!");
- }
+ int Idx = G->getRefSCCIndex(*this);
+ G->PostOrderRefSCCs.erase(G->PostOrderRefSCCs.begin() + Idx);
+ G->PostOrderRefSCCs.insert(G->PostOrderRefSCCs.begin() + Idx, Result.begin(),
+ Result.end());
+ for (int i : seq<int>(Idx, G->PostOrderRefSCCs.size()))
+ G->RefSCCIndices[G->PostOrderRefSCCs[i]] = i;
for (SCC *C : SCCs) {
- auto PostOrderI = PostOrderMapping.find(&*C->begin());
- assert(PostOrderI != PostOrderMapping.end() &&
- "Cannot have missing mappings for nodes!");
- int SCCNumber = PostOrderI->second;
-#ifndef NDEBUG
- for (Node &N : *C)
- assert(PostOrderMapping.find(&N)->second == SCCNumber &&
+ // We store the SCC number in the node's low-link field above.
+ int SCCNumber = C->begin()->LowLink;
+ // Clear out all of the SCC's node's low-link fields now that we're done
+ // using them as side-storage.
+ for (Node &N : *C) {
+ assert(N.LowLink == SCCNumber &&
"Cannot have different numbers for nodes in the same SCC!");
-#endif
- if (SCCNumber == 0)
- // The root node is handled separately by removing the SCCs.
- continue;
+ N.LowLink = -1;
+ }
- RefSCC &RC = *Result[SCCNumber - 1];
+ RefSCC &RC = *Result[SCCNumber];
int SCCIndex = RC.SCCs.size();
RC.SCCs.push_back(C);
RC.SCCIndices[C] = SCCIndex;
C->OuterRefSCC = &RC;
}
- // FIXME: We re-walk the edges in each RefSCC to establish whether it is
- // a leaf and connect it to the rest of the graph's parents lists. This is
- // really wasteful. We should instead do this during the DFS to avoid yet
- // another edge walk.
- for (RefSCC *RC : Result)
- G->connectRefSCC(*RC);
-
- // Now erase all but the root's SCCs.
- SCCs.erase(remove_if(SCCs,
- [&](SCC *C) {
- return PostOrderMapping.lookup(&*C->begin()) !=
- RootPostOrderNumber;
- }),
- SCCs.end());
+ // Now that we've moved things into the new RefSCCs, clear out our current
+ // one.
+ G = nullptr;
+ SCCs.clear();
SCCIndices.clear();
- for (int i = 0, Size = SCCs.size(); i < Size; ++i)
- SCCIndices[SCCs[i]] = i;
#ifndef NDEBUG
- // Now we need to reconnect the current (root) SCC to the graph. We do this
- // manually because we can special case our leaf handling and detect errors.
- bool IsLeaf = true;
-#endif
- for (SCC *C : SCCs)
- for (Node &N : *C) {
- for (Edge &E : *N) {
- RefSCC &ChildRC = *G->lookupRefSCC(E.getNode());
- if (&ChildRC == this)
- continue;
- ChildRC.Parents.insert(this);
-#ifndef NDEBUG
- IsLeaf = false;
-#endif
- }
- }
-#ifndef NDEBUG
- if (!Result.empty())
- assert(!IsLeaf && "This SCC cannot be a leaf as we have split out new "
- "SCCs by removing this edge.");
- if (none_of(G->LeafRefSCCs, [&](RefSCC *C) { return C == this; }))
- assert(!IsLeaf && "This SCC cannot be a leaf as it already had child "
- "SCCs before we removed this edge.");
-#endif
- // And connect both this RefSCC and all the new ones to the correct parents.
- // The easiest way to do this is just to re-analyze the old parent set.
- SmallVector<RefSCC *, 4> OldParents(Parents.begin(), Parents.end());
- Parents.clear();
- for (RefSCC *ParentRC : OldParents)
- for (SCC &ParentC : *ParentRC)
- for (Node &ParentN : ParentC)
- for (Edge &E : *ParentN) {
- RefSCC &RC = *G->lookupRefSCC(E.getNode());
- if (&RC != ParentRC)
- RC.Parents.insert(ParentRC);
- }
-
- // If this SCC stopped being a leaf through this edge removal, remove it from
- // the leaf SCC list. Note that this DTRT in the case where this was never
- // a leaf.
- // FIXME: As LeafRefSCCs could be very large, we might want to not walk the
- // entire list if this RefSCC wasn't a leaf before the edge removal.
- if (!Result.empty())
- G->LeafRefSCCs.erase(
- std::remove(G->LeafRefSCCs.begin(), G->LeafRefSCCs.end(), this),
- G->LeafRefSCCs.end());
-
-#ifndef NDEBUG
- // Verify all of the new RefSCCs.
+ // Verify the new RefSCCs we've built.
for (RefSCC *RC : Result)
RC->verify();
#endif
@@ -1477,18 +1355,13 @@ void LazyCallGraph::RefSCC::handleTrivialEdgeInsertion(Node &SourceN,
// after this edge insertion.
assert(G->lookupRefSCC(SourceN) == this && "Source must be in this RefSCC.");
RefSCC &TargetRC = *G->lookupRefSCC(TargetN);
- if (&TargetRC == this) {
-
+ if (&TargetRC == this)
return;
- }
#ifdef EXPENSIVE_CHECKS
assert(TargetRC.isDescendantOf(*this) &&
"Target must be a descendant of the Source.");
#endif
- // The only change required is to add this RefSCC to the parent set of the
- // target. This is a set and so idempotent if the edge already existed.
- TargetRC.Parents.insert(this);
}
void LazyCallGraph::RefSCC::insertTrivialCallEdge(Node &SourceN,
@@ -1646,24 +1519,6 @@ void LazyCallGraph::removeDeadFunction(Function &F) {
assert(C.size() == 1 && "Dead functions must be in a singular SCC");
assert(RC.size() == 1 && "Dead functions must be in a singular RefSCC");
- // Clean up any remaining reference edges. Note that we walk an unordered set
- // here but are just removing and so the order doesn't matter.
- for (RefSCC &ParentRC : RC.parents())
- for (SCC &ParentC : ParentRC)
- for (Node &ParentN : ParentC)
- if (ParentN)
- ParentN->removeEdgeInternal(N);
-
- // Now remove this RefSCC from any parents sets and the leaf list.
- for (Edge &E : *N)
- if (RefSCC *TargetRC = lookupRefSCC(E.getNode()))
- TargetRC->Parents.erase(&RC);
- // FIXME: This is a linear operation which could become hot and benefit from
- // an index map.
- auto LRI = find(LeafRefSCCs, &RC);
- if (LRI != LeafRefSCCs.end())
- LeafRefSCCs.erase(LRI);
-
auto RCIndexI = RefSCCIndices.find(&RC);
int RCIndex = RCIndexI->second;
PostOrderRefSCCs.erase(PostOrderRefSCCs.begin() + RCIndex);
@@ -1674,8 +1529,11 @@ void LazyCallGraph::removeDeadFunction(Function &F) {
// Finally clear out all the data structures from the node down through the
// components.
N.clear();
+ N.G = nullptr;
+ N.F = nullptr;
C.clear();
RC.clear();
+ RC.G = nullptr;
// Nothing to delete as all the objects are allocated in stable bump pointer
// allocators.
@@ -1686,32 +1544,13 @@ LazyCallGraph::Node &LazyCallGraph::insertInto(Function &F, Node *&MappedN) {
}
void LazyCallGraph::updateGraphPtrs() {
- // Process all nodes updating the graph pointers.
- {
- SmallVector<Node *, 16> Worklist;
- for (Edge &E : EntryEdges)
- Worklist.push_back(&E.getNode());
-
- while (!Worklist.empty()) {
- Node &N = *Worklist.pop_back_val();
- N.G = this;
- if (N)
- for (Edge &E : *N)
- Worklist.push_back(&E.getNode());
- }
- }
+ // Walk the node map to update their graph pointers. While this iterates in
+ // an unstable order, the order has no effect so it remains correct.
+ for (auto &FunctionNodePair : NodeMap)
+ FunctionNodePair.second->G = this;
- // Process all SCCs updating the graph pointers.
- {
- SmallVector<RefSCC *, 16> Worklist(LeafRefSCCs.begin(), LeafRefSCCs.end());
-
- while (!Worklist.empty()) {
- RefSCC &C = *Worklist.pop_back_val();
- C.G = this;
- for (RefSCC &ParentC : C.parents())
- Worklist.push_back(&ParentC);
- }
- }
+ for (auto *RC : PostOrderRefSCCs)
+ RC->G = this;
}
template <typename RootsT, typename GetBeginT, typename GetEndT,
@@ -1719,7 +1558,7 @@ template <typename RootsT, typename GetBeginT, typename GetEndT,
void LazyCallGraph::buildGenericSCCs(RootsT &&Roots, GetBeginT &&GetBegin,
GetEndT &&GetEnd, GetNodeT &&GetNode,
FormSCCCallbackT &&FormSCC) {
- typedef decltype(GetBegin(std::declval<Node &>())) EdgeItT;
+ using EdgeItT = decltype(GetBegin(std::declval<Node &>()));
SmallVector<std::pair<Node *, EdgeItT>, 16> DFSStack;
SmallVector<Node *, 16> PendingSCCStack;
@@ -1871,7 +1710,6 @@ void LazyCallGraph::buildRefSCCs() {
[this](node_stack_range Nodes) {
RefSCC *NewRC = createRefSCC(*this);
buildSCCs(*NewRC, Nodes);
- connectRefSCC(*NewRC);
// Push the new node into the postorder list and remember its position
// in the index map.
@@ -1886,28 +1724,6 @@ void LazyCallGraph::buildRefSCCs() {
});
}
-// FIXME: We should move callers of this to embed the parent linking and leaf
-// tracking into their DFS in order to remove a full walk of all edges.
-void LazyCallGraph::connectRefSCC(RefSCC &RC) {
- // Walk all edges in the RefSCC (this remains linear as we only do this once
- // when we build the RefSCC) to connect it to the parent sets of its
- // children.
- bool IsLeaf = true;
- for (SCC &C : RC)
- for (Node &N : C)
- for (Edge &E : *N) {
- RefSCC &ChildRC = *lookupRefSCC(E.getNode());
- if (&ChildRC == &RC)
- continue;
- ChildRC.Parents.insert(&RC);
- IsLeaf = false;
- }
-
- // For the SCCs where we find no child SCCs, add them to the leaf list.
- if (IsLeaf)
- LeafRefSCCs.push_back(&RC);
-}
-
AnalysisKey LazyCallGraphAnalysis::Key;
LazyCallGraphPrinterPass::LazyCallGraphPrinterPass(raw_ostream &OS) : OS(OS) {}
diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
index 102081e721ac..d7da669f6e79 100644
--- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -17,8 +17,10 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/ValueLattice.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/ConstantRange.h"
@@ -35,7 +37,6 @@
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
#include <map>
-#include <stack>
using namespace llvm;
using namespace PatternMatch;
@@ -59,225 +60,10 @@ namespace llvm {
AnalysisKey LazyValueAnalysis::Key;
-//===----------------------------------------------------------------------===//
-// LVILatticeVal
-//===----------------------------------------------------------------------===//
-
-/// This is the information tracked by LazyValueInfo for each value.
-///
-/// FIXME: This is basically just for bringup, this can be made a lot more rich
-/// in the future.
-///
-namespace {
-class LVILatticeVal {
- enum LatticeValueTy {
- /// This Value has no known value yet. As a result, this implies the
- /// producing instruction is dead. Caution: We use this as the starting
- /// state in our local meet rules. In this usage, it's taken to mean
- /// "nothing known yet".
- undefined,
-
- /// This Value has a specific constant value. (For constant integers,
- /// constantrange is used instead. Integer typed constantexprs can appear
- /// as constant.)
- constant,
-
- /// This Value is known to not have the specified value. (For constant
- /// integers, constantrange is used instead. As above, integer typed
- /// constantexprs can appear here.)
- notconstant,
-
- /// The Value falls within this range. (Used only for integer typed values.)
- constantrange,
-
- /// We can not precisely model the dynamic values this value might take.
- overdefined
- };
-
- /// Val: This stores the current lattice value along with the Constant* for
- /// the constant if this is a 'constant' or 'notconstant' value.
- LatticeValueTy Tag;
- Constant *Val;
- ConstantRange Range;
-
-public:
- LVILatticeVal() : Tag(undefined), Val(nullptr), Range(1, true) {}
-
- static LVILatticeVal get(Constant *C) {
- LVILatticeVal Res;
- if (!isa<UndefValue>(C))
- Res.markConstant(C);
- return Res;
- }
- static LVILatticeVal getNot(Constant *C) {
- LVILatticeVal Res;
- if (!isa<UndefValue>(C))
- Res.markNotConstant(C);
- return Res;
- }
- static LVILatticeVal getRange(ConstantRange CR) {
- LVILatticeVal Res;
- Res.markConstantRange(std::move(CR));
- return Res;
- }
- static LVILatticeVal getOverdefined() {
- LVILatticeVal Res;
- Res.markOverdefined();
- return Res;
- }
-
- bool isUndefined() const { return Tag == undefined; }
- bool isConstant() const { return Tag == constant; }
- bool isNotConstant() const { return Tag == notconstant; }
- bool isConstantRange() const { return Tag == constantrange; }
- bool isOverdefined() const { return Tag == overdefined; }
-
- Constant *getConstant() const {
- assert(isConstant() && "Cannot get the constant of a non-constant!");
- return Val;
- }
-
- Constant *getNotConstant() const {
- assert(isNotConstant() && "Cannot get the constant of a non-notconstant!");
- return Val;
- }
-
- const ConstantRange &getConstantRange() const {
- assert(isConstantRange() &&
- "Cannot get the constant-range of a non-constant-range!");
- return Range;
- }
-
-private:
- void markOverdefined() {
- if (isOverdefined())
- return;
- Tag = overdefined;
- }
-
- void markConstant(Constant *V) {
- assert(V && "Marking constant with NULL");
- if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- markConstantRange(ConstantRange(CI->getValue()));
- return;
- }
- if (isa<UndefValue>(V))
- return;
-
- assert((!isConstant() || getConstant() == V) &&
- "Marking constant with different value");
- assert(isUndefined());
- Tag = constant;
- Val = V;
- }
-
- void markNotConstant(Constant *V) {
- assert(V && "Marking constant with NULL");
- if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue()));
- return;
- }
- if (isa<UndefValue>(V))
- return;
-
- assert((!isConstant() || getConstant() != V) &&
- "Marking constant !constant with same value");
- assert((!isNotConstant() || getNotConstant() == V) &&
- "Marking !constant with different value");
- assert(isUndefined() || isConstant());
- Tag = notconstant;
- Val = V;
- }
-
- void markConstantRange(ConstantRange NewR) {
- if (isConstantRange()) {
- if (NewR.isEmptySet())
- markOverdefined();
- else {
- Range = std::move(NewR);
- }
- return;
- }
-
- assert(isUndefined());
- if (NewR.isEmptySet())
- markOverdefined();
- else {
- Tag = constantrange;
- Range = std::move(NewR);
- }
- }
-
-public:
-
- /// Merge the specified lattice value into this one, updating this
- /// one and returning true if anything changed.
- void mergeIn(const LVILatticeVal &RHS, const DataLayout &DL) {
- if (RHS.isUndefined() || isOverdefined())
- return;
- if (RHS.isOverdefined()) {
- markOverdefined();
- return;
- }
-
- if (isUndefined()) {
- *this = RHS;
- return;
- }
-
- if (isConstant()) {
- if (RHS.isConstant() && Val == RHS.Val)
- return;
- markOverdefined();
- return;
- }
-
- if (isNotConstant()) {
- if (RHS.isNotConstant() && Val == RHS.Val)
- return;
- markOverdefined();
- return;
- }
-
- assert(isConstantRange() && "New LVILattice type?");
- if (!RHS.isConstantRange()) {
- // We can get here if we've encountered a constantexpr of integer type
- // and merge it with a constantrange.
- markOverdefined();
- return;
- }
- ConstantRange NewR = Range.unionWith(RHS.getConstantRange());
- if (NewR.isFullSet())
- markOverdefined();
- else
- markConstantRange(std::move(NewR));
- }
-};
-
-} // end anonymous namespace.
-
-namespace llvm {
-raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val)
- LLVM_ATTRIBUTE_USED;
-raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) {
- if (Val.isUndefined())
- return OS << "undefined";
- if (Val.isOverdefined())
- return OS << "overdefined";
-
- if (Val.isNotConstant())
- return OS << "notconstant<" << *Val.getNotConstant() << '>';
- if (Val.isConstantRange())
- return OS << "constantrange<" << Val.getConstantRange().getLower() << ", "
- << Val.getConstantRange().getUpper() << '>';
- return OS << "constant<" << *Val.getConstant() << '>';
-}
-}
-
/// Returns true if this lattice value represents at most one possible value.
/// This is as precise as any lattice value can get while still representing
/// reachable code.
-static bool hasSingleValue(const LVILatticeVal &Val) {
+static bool hasSingleValue(const ValueLatticeElement &Val) {
if (Val.isConstantRange() &&
Val.getConstantRange().isSingleElement())
// Integer constants are single element ranges
@@ -302,7 +88,8 @@ static bool hasSingleValue(const LVILatticeVal &Val) {
/// contradictory. If this happens, we return some valid lattice value so as
/// not confuse the rest of LVI. Ideally, we'd always return Undefined, but
/// we do not make this guarantee. TODO: This would be a useful enhancement.
-static LVILatticeVal intersect(const LVILatticeVal &A, const LVILatticeVal &B) {
+static ValueLatticeElement intersect(const ValueLatticeElement &A,
+ const ValueLatticeElement &B) {
// Undefined is the strongest state. It means the value is known to be along
// an unreachable path.
if (A.isUndefined())
@@ -334,7 +121,7 @@ static LVILatticeVal intersect(const LVILatticeVal &A, const LVILatticeVal &B) {
// Note: An empty range is implicitly converted to overdefined internally.
// TODO: We could instead use Undefined here since we've proven a conflict
// and thus know this path must be unreachable.
- return LVILatticeVal::getRange(std::move(Range));
+ return ValueLatticeElement::getRange(std::move(Range));
}
//===----------------------------------------------------------------------===//
@@ -372,7 +159,7 @@ namespace {
struct ValueCacheEntryTy {
ValueCacheEntryTy(Value *V, LazyValueInfoCache *P) : Handle(V, P) {}
LVIValueHandle Handle;
- SmallDenseMap<PoisoningVH<BasicBlock>, LVILatticeVal, 4> BlockVals;
+ SmallDenseMap<PoisoningVH<BasicBlock>, ValueLatticeElement, 4> BlockVals;
};
/// This tracks, on a per-block basis, the set of values that are
@@ -390,7 +177,8 @@ namespace {
public:
- void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) {
+ void insertResult(Value *Val, BasicBlock *BB,
+ const ValueLatticeElement &Result) {
SeenBlocks.insert(BB);
// Insert over-defined values into their own cache to reduce memory
@@ -428,16 +216,16 @@ namespace {
return I->second->BlockVals.count(BB);
}
- LVILatticeVal getCachedValueInfo(Value *V, BasicBlock *BB) const {
+ ValueLatticeElement getCachedValueInfo(Value *V, BasicBlock *BB) const {
if (isOverdefined(V, BB))
- return LVILatticeVal::getOverdefined();
+ return ValueLatticeElement::getOverdefined();
auto I = ValueCache.find_as(V);
if (I == ValueCache.end())
- return LVILatticeVal();
+ return ValueLatticeElement();
auto BBI = I->second->BlockVals.find(BB);
if (BBI == I->second->BlockVals.end())
- return LVILatticeVal();
+ return ValueLatticeElement();
return BBI->second;
}
@@ -614,26 +402,29 @@ namespace {
const DataLayout &DL; ///< A mandatory DataLayout
DominatorTree *DT; ///< An optional DT pointer.
- LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB);
+ ValueLatticeElement getBlockValue(Value *Val, BasicBlock *BB);
bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T,
- LVILatticeVal &Result, Instruction *CxtI = nullptr);
+ ValueLatticeElement &Result, Instruction *CxtI = nullptr);
bool hasBlockValue(Value *Val, BasicBlock *BB);
// These methods process one work item and may add more. A false value
// returned means that the work item was not completely processed and must
// be revisited after going through the new items.
bool solveBlockValue(Value *Val, BasicBlock *BB);
- bool solveBlockValueImpl(LVILatticeVal &Res, Value *Val, BasicBlock *BB);
- bool solveBlockValueNonLocal(LVILatticeVal &BBLV, Value *Val, BasicBlock *BB);
- bool solveBlockValuePHINode(LVILatticeVal &BBLV, PHINode *PN, BasicBlock *BB);
- bool solveBlockValueSelect(LVILatticeVal &BBLV, SelectInst *S,
+ bool solveBlockValueImpl(ValueLatticeElement &Res, Value *Val,
+ BasicBlock *BB);
+ bool solveBlockValueNonLocal(ValueLatticeElement &BBLV, Value *Val,
+ BasicBlock *BB);
+ bool solveBlockValuePHINode(ValueLatticeElement &BBLV, PHINode *PN,
+ BasicBlock *BB);
+ bool solveBlockValueSelect(ValueLatticeElement &BBLV, SelectInst *S,
BasicBlock *BB);
- bool solveBlockValueBinaryOp(LVILatticeVal &BBLV, BinaryOperator *BBI,
+ bool solveBlockValueBinaryOp(ValueLatticeElement &BBLV, BinaryOperator *BBI,
BasicBlock *BB);
- bool solveBlockValueCast(LVILatticeVal &BBLV, CastInst *CI,
+ bool solveBlockValueCast(ValueLatticeElement &BBLV, CastInst *CI,
BasicBlock *BB);
void intersectAssumeOrGuardBlockValueConstantRange(Value *Val,
- LVILatticeVal &BBLV,
+ ValueLatticeElement &BBLV,
Instruction *BBI);
void solve();
@@ -641,18 +432,19 @@ namespace {
public:
/// This is the query interface to determine the lattice
/// value for the specified Value* at the end of the specified block.
- LVILatticeVal getValueInBlock(Value *V, BasicBlock *BB,
- Instruction *CxtI = nullptr);
+ ValueLatticeElement getValueInBlock(Value *V, BasicBlock *BB,
+ Instruction *CxtI = nullptr);
/// This is the query interface to determine the lattice
/// value for the specified Value* at the specified instruction (generally
/// from an assume intrinsic).
- LVILatticeVal getValueAt(Value *V, Instruction *CxtI);
+ ValueLatticeElement getValueAt(Value *V, Instruction *CxtI);
/// This is the query interface to determine the lattice
/// value for the specified Value* that is true on the specified edge.
- LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB,
- Instruction *CxtI = nullptr);
+ ValueLatticeElement getValueOnEdge(Value *V, BasicBlock *FromBB,
+ BasicBlock *ToBB,
+ Instruction *CxtI = nullptr);
/// Complete flush all previously computed values
void clear() {
@@ -703,7 +495,7 @@ void LazyValueInfoImpl::solve() {
while (!StartingStack.empty()) {
std::pair<BasicBlock *, Value *> &e = StartingStack.back();
TheCache.insertResult(e.second, e.first,
- LVILatticeVal::getOverdefined());
+ ValueLatticeElement::getOverdefined());
StartingStack.pop_back();
}
BlockValueSet.clear();
@@ -739,15 +531,16 @@ bool LazyValueInfoImpl::hasBlockValue(Value *Val, BasicBlock *BB) {
return TheCache.hasCachedValueInfo(Val, BB);
}
-LVILatticeVal LazyValueInfoImpl::getBlockValue(Value *Val, BasicBlock *BB) {
+ValueLatticeElement LazyValueInfoImpl::getBlockValue(Value *Val,
+ BasicBlock *BB) {
// If already a constant, there is nothing to compute.
if (Constant *VC = dyn_cast<Constant>(Val))
- return LVILatticeVal::get(VC);
+ return ValueLatticeElement::get(VC);
return TheCache.getCachedValueInfo(Val, BB);
}
-static LVILatticeVal getFromRangeMetadata(Instruction *BBI) {
+static ValueLatticeElement getFromRangeMetadata(Instruction *BBI) {
switch (BBI->getOpcode()) {
default: break;
case Instruction::Load:
@@ -755,12 +548,13 @@ static LVILatticeVal getFromRangeMetadata(Instruction *BBI) {
case Instruction::Invoke:
if (MDNode *Ranges = BBI->getMetadata(LLVMContext::MD_range))
if (isa<IntegerType>(BBI->getType())) {
- return LVILatticeVal::getRange(getConstantRangeFromMetadata(*Ranges));
+ return ValueLatticeElement::getRange(
+ getConstantRangeFromMetadata(*Ranges));
}
break;
};
// Nothing known - will be intersected with other facts
- return LVILatticeVal::getOverdefined();
+ return ValueLatticeElement::getOverdefined();
}
bool LazyValueInfoImpl::solveBlockValue(Value *Val, BasicBlock *BB) {
@@ -780,7 +574,7 @@ bool LazyValueInfoImpl::solveBlockValue(Value *Val, BasicBlock *BB) {
// Hold off inserting this value into the Cache in case we have to return
// false and come back later.
- LVILatticeVal Res;
+ ValueLatticeElement Res;
if (!solveBlockValueImpl(Res, Val, BB))
// Work pushed, will revisit
return false;
@@ -789,7 +583,7 @@ bool LazyValueInfoImpl::solveBlockValue(Value *Val, BasicBlock *BB) {
return true;
}
-bool LazyValueInfoImpl::solveBlockValueImpl(LVILatticeVal &Res,
+bool LazyValueInfoImpl::solveBlockValueImpl(ValueLatticeElement &Res,
Value *Val, BasicBlock *BB) {
Instruction *BBI = dyn_cast<Instruction>(Val);
@@ -807,13 +601,13 @@ bool LazyValueInfoImpl::solveBlockValueImpl(LVILatticeVal &Res,
// definition. We could easily extend this to look through geps, bitcasts,
// and the like to prove non-nullness, but it's not clear that's worth it
// compile time wise. The context-insensitive value walk done inside
- // isKnownNonNull gets most of the profitable cases at much less expense.
+ // isKnownNonZero gets most of the profitable cases at much less expense.
// This does mean that we have a sensativity to where the defining
// instruction is placed, even if it could legally be hoisted much higher.
// That is unfortunate.
PointerType *PT = dyn_cast<PointerType>(BBI->getType());
- if (PT && isKnownNonNull(BBI)) {
- Res = LVILatticeVal::getNot(ConstantPointerNull::get(PT));
+ if (PT && isKnownNonZero(BBI, DL)) {
+ Res = ValueLatticeElement::getNot(ConstantPointerNull::get(PT));
return true;
}
if (BBI->getType()->isIntegerTy()) {
@@ -880,9 +674,9 @@ static bool isObjectDereferencedInBlock(Value *Val, BasicBlock *BB) {
return false;
}
-bool LazyValueInfoImpl::solveBlockValueNonLocal(LVILatticeVal &BBLV,
+bool LazyValueInfoImpl::solveBlockValueNonLocal(ValueLatticeElement &BBLV,
Value *Val, BasicBlock *BB) {
- LVILatticeVal Result; // Start Undefined.
+ ValueLatticeElement Result; // Start Undefined.
// If this is the entry block, we must be asking about an argument. The
// value is overdefined.
@@ -891,11 +685,11 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(LVILatticeVal &BBLV,
// Before giving up, see if we can prove the pointer non-null local to
// this particular block.
if (Val->getType()->isPointerTy() &&
- (isKnownNonNull(Val) || isObjectDereferencedInBlock(Val, BB))) {
+ (isKnownNonZero(Val, DL) || isObjectDereferencedInBlock(Val, BB))) {
PointerType *PTy = cast<PointerType>(Val->getType());
- Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
+ Result = ValueLatticeElement::getNot(ConstantPointerNull::get(PTy));
} else {
- Result = LVILatticeVal::getOverdefined();
+ Result = ValueLatticeElement::getOverdefined();
}
BBLV = Result;
return true;
@@ -911,7 +705,7 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(LVILatticeVal &BBLV,
// canonicalizing to make this true rather than relying on this happy
// accident.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- LVILatticeVal EdgeResult;
+ ValueLatticeElement EdgeResult;
if (!getEdgeValue(Val, *PI, BB, EdgeResult))
// Explore that input, then return here
return false;
@@ -928,7 +722,7 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(LVILatticeVal &BBLV,
if (Val->getType()->isPointerTy() &&
isObjectDereferencedInBlock(Val, BB)) {
PointerType *PTy = cast<PointerType>(Val->getType());
- Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
+ Result = ValueLatticeElement::getNot(ConstantPointerNull::get(PTy));
}
BBLV = Result;
@@ -942,9 +736,9 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(LVILatticeVal &BBLV,
return true;
}
-bool LazyValueInfoImpl::solveBlockValuePHINode(LVILatticeVal &BBLV,
- PHINode *PN, BasicBlock *BB) {
- LVILatticeVal Result; // Start Undefined.
+bool LazyValueInfoImpl::solveBlockValuePHINode(ValueLatticeElement &BBLV,
+ PHINode *PN, BasicBlock *BB) {
+ ValueLatticeElement Result; // Start Undefined.
// Loop over all of our predecessors, merging what we know from them into
// result. See the comment about the chosen traversal order in
@@ -952,7 +746,7 @@ bool LazyValueInfoImpl::solveBlockValuePHINode(LVILatticeVal &BBLV,
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *PhiBB = PN->getIncomingBlock(i);
Value *PhiVal = PN->getIncomingValue(i);
- LVILatticeVal EdgeResult;
+ ValueLatticeElement EdgeResult;
// Note that we can provide PN as the context value to getEdgeValue, even
// though the results will be cached, because PN is the value being used as
// the cache key in the caller.
@@ -979,13 +773,13 @@ bool LazyValueInfoImpl::solveBlockValuePHINode(LVILatticeVal &BBLV,
return true;
}
-static LVILatticeVal getValueFromCondition(Value *Val, Value *Cond,
- bool isTrueDest = true);
+static ValueLatticeElement getValueFromCondition(Value *Val, Value *Cond,
+ bool isTrueDest = true);
// If we can determine a constraint on the value given conditions assumed by
// the program, intersect those constraints with BBLV
void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange(
- Value *Val, LVILatticeVal &BBLV, Instruction *BBI) {
+ Value *Val, ValueLatticeElement &BBLV, Instruction *BBI) {
BBI = BBI ? BBI : dyn_cast<Instruction>(Val);
if (!BBI)
return;
@@ -1014,35 +808,35 @@ void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange(
}
}
-bool LazyValueInfoImpl::solveBlockValueSelect(LVILatticeVal &BBLV,
- SelectInst *SI, BasicBlock *BB) {
+bool LazyValueInfoImpl::solveBlockValueSelect(ValueLatticeElement &BBLV,
+ SelectInst *SI, BasicBlock *BB) {
// Recurse on our inputs if needed
if (!hasBlockValue(SI->getTrueValue(), BB)) {
if (pushBlockValue(std::make_pair(BB, SI->getTrueValue())))
return false;
- BBLV = LVILatticeVal::getOverdefined();
+ BBLV = ValueLatticeElement::getOverdefined();
return true;
}
- LVILatticeVal TrueVal = getBlockValue(SI->getTrueValue(), BB);
+ ValueLatticeElement TrueVal = getBlockValue(SI->getTrueValue(), BB);
// If we hit overdefined, don't ask more queries. We want to avoid poisoning
// extra slots in the table if we can.
if (TrueVal.isOverdefined()) {
- BBLV = LVILatticeVal::getOverdefined();
+ BBLV = ValueLatticeElement::getOverdefined();
return true;
}
if (!hasBlockValue(SI->getFalseValue(), BB)) {
if (pushBlockValue(std::make_pair(BB, SI->getFalseValue())))
return false;
- BBLV = LVILatticeVal::getOverdefined();
+ BBLV = ValueLatticeElement::getOverdefined();
return true;
}
- LVILatticeVal FalseVal = getBlockValue(SI->getFalseValue(), BB);
+ ValueLatticeElement FalseVal = getBlockValue(SI->getFalseValue(), BB);
// If we hit overdefined, don't ask more queries. We want to avoid poisoning
// extra slots in the table if we can.
if (FalseVal.isOverdefined()) {
- BBLV = LVILatticeVal::getOverdefined();
+ BBLV = ValueLatticeElement::getOverdefined();
return true;
}
@@ -1070,7 +864,7 @@ bool LazyValueInfoImpl::solveBlockValueSelect(LVILatticeVal &BBLV,
return TrueCR.umax(FalseCR);
};
}();
- BBLV = LVILatticeVal::getRange(ResultCR);
+ BBLV = ValueLatticeElement::getRange(ResultCR);
return true;
}
@@ -1113,7 +907,7 @@ bool LazyValueInfoImpl::solveBlockValueSelect(LVILatticeVal &BBLV,
m_ConstantInt(CIAdded)))) {
auto ResNot = addConstants(CIBase, CIAdded);
FalseVal = intersect(FalseVal,
- LVILatticeVal::getNot(ResNot));
+ ValueLatticeElement::getNot(ResNot));
}
break;
case ICmpInst::ICMP_NE:
@@ -1121,27 +915,27 @@ bool LazyValueInfoImpl::solveBlockValueSelect(LVILatticeVal &BBLV,
m_ConstantInt(CIAdded)))) {
auto ResNot = addConstants(CIBase, CIAdded);
TrueVal = intersect(TrueVal,
- LVILatticeVal::getNot(ResNot));
+ ValueLatticeElement::getNot(ResNot));
}
break;
};
}
}
- LVILatticeVal Result; // Start Undefined.
+ ValueLatticeElement Result; // Start Undefined.
Result.mergeIn(TrueVal, DL);
Result.mergeIn(FalseVal, DL);
BBLV = Result;
return true;
}
-bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV,
+bool LazyValueInfoImpl::solveBlockValueCast(ValueLatticeElement &BBLV,
CastInst *CI,
BasicBlock *BB) {
if (!CI->getOperand(0)->getType()->isSized()) {
// Without knowing how wide the input is, we can't analyze it in any useful
// way.
- BBLV = LVILatticeVal::getOverdefined();
+ BBLV = ValueLatticeElement::getOverdefined();
return true;
}
@@ -1158,7 +952,7 @@ bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV,
// Unhandled instructions are overdefined.
DEBUG(dbgs() << " compute BB '" << BB->getName()
<< "' - overdefined (unknown cast).\n");
- BBLV = LVILatticeVal::getOverdefined();
+ BBLV = ValueLatticeElement::getOverdefined();
return true;
}
@@ -1174,7 +968,7 @@ bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV,
DL.getTypeSizeInBits(CI->getOperand(0)->getType());
ConstantRange LHSRange = ConstantRange(OperandBitWidth);
if (hasBlockValue(CI->getOperand(0), BB)) {
- LVILatticeVal LHSVal = getBlockValue(CI->getOperand(0), BB);
+ ValueLatticeElement LHSVal = getBlockValue(CI->getOperand(0), BB);
intersectAssumeOrGuardBlockValueConstantRange(CI->getOperand(0), LHSVal,
CI);
if (LHSVal.isConstantRange())
@@ -1186,14 +980,14 @@ bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV,
// NOTE: We're currently limited by the set of operations that ConstantRange
// can evaluate symbolically. Enhancing that set will allows us to analyze
// more definitions.
- BBLV = LVILatticeVal::getRange(LHSRange.castOp(CI->getOpcode(),
- ResultBitWidth));
+ BBLV = ValueLatticeElement::getRange(LHSRange.castOp(CI->getOpcode(),
+ ResultBitWidth));
return true;
}
-bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
- BinaryOperator *BO,
- BasicBlock *BB) {
+bool LazyValueInfoImpl::solveBlockValueBinaryOp(ValueLatticeElement &BBLV,
+ BinaryOperator *BO,
+ BasicBlock *BB) {
assert(BO->getOperand(0)->getType()->isSized() &&
"all operands to binary operators are sized");
@@ -1208,6 +1002,7 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
case Instruction::UDiv:
case Instruction::Shl:
case Instruction::LShr:
+ case Instruction::AShr:
case Instruction::And:
case Instruction::Or:
// continue into the code below
@@ -1216,7 +1011,7 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
// Unhandled instructions are overdefined.
DEBUG(dbgs() << " compute BB '" << BB->getName()
<< "' - overdefined (unknown binary operator).\n");
- BBLV = LVILatticeVal::getOverdefined();
+ BBLV = ValueLatticeElement::getOverdefined();
return true;
};
@@ -1232,7 +1027,7 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
DL.getTypeSizeInBits(BO->getOperand(0)->getType());
ConstantRange LHSRange = ConstantRange(OperandBitWidth);
if (hasBlockValue(BO->getOperand(0), BB)) {
- LVILatticeVal LHSVal = getBlockValue(BO->getOperand(0), BB);
+ ValueLatticeElement LHSVal = getBlockValue(BO->getOperand(0), BB);
intersectAssumeOrGuardBlockValueConstantRange(BO->getOperand(0), LHSVal,
BO);
if (LHSVal.isConstantRange())
@@ -1246,12 +1041,12 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
// can evaluate symbolically. Enhancing that set will allows us to analyze
// more definitions.
Instruction::BinaryOps BinOp = BO->getOpcode();
- BBLV = LVILatticeVal::getRange(LHSRange.binaryOp(BinOp, RHSRange));
+ BBLV = ValueLatticeElement::getRange(LHSRange.binaryOp(BinOp, RHSRange));
return true;
}
-static LVILatticeVal getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
- bool isTrueDest) {
+static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
+ bool isTrueDest) {
Value *LHS = ICI->getOperand(0);
Value *RHS = ICI->getOperand(1);
CmpInst::Predicate Predicate = ICI->getPredicate();
@@ -1261,14 +1056,14 @@ static LVILatticeVal getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
// We know that V has the RHS constant if this is a true SETEQ or
// false SETNE.
if (isTrueDest == (Predicate == ICmpInst::ICMP_EQ))
- return LVILatticeVal::get(cast<Constant>(RHS));
+ return ValueLatticeElement::get(cast<Constant>(RHS));
else
- return LVILatticeVal::getNot(cast<Constant>(RHS));
+ return ValueLatticeElement::getNot(cast<Constant>(RHS));
}
}
if (!Val->getType()->isIntegerTy())
- return LVILatticeVal::getOverdefined();
+ return ValueLatticeElement::getOverdefined();
// Use ConstantRange::makeAllowedICmpRegion in order to determine the possible
// range of Val guaranteed by the condition. Recognize comparisons in the from
@@ -1307,19 +1102,19 @@ static LVILatticeVal getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
if (Offset) // Apply the offset from above.
TrueValues = TrueValues.subtract(Offset->getValue());
- return LVILatticeVal::getRange(std::move(TrueValues));
+ return ValueLatticeElement::getRange(std::move(TrueValues));
}
- return LVILatticeVal::getOverdefined();
+ return ValueLatticeElement::getOverdefined();
}
-static LVILatticeVal
+static ValueLatticeElement
getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest,
- DenseMap<Value*, LVILatticeVal> &Visited);
+ DenseMap<Value*, ValueLatticeElement> &Visited);
-static LVILatticeVal
+static ValueLatticeElement
getValueFromConditionImpl(Value *Val, Value *Cond, bool isTrueDest,
- DenseMap<Value*, LVILatticeVal> &Visited) {
+ DenseMap<Value*, ValueLatticeElement> &Visited) {
if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cond))
return getValueFromICmpCondition(Val, ICI, isTrueDest);
@@ -1330,16 +1125,16 @@ getValueFromConditionImpl(Value *Val, Value *Cond, bool isTrueDest,
BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond);
if (!BO || (isTrueDest && BO->getOpcode() != BinaryOperator::And) ||
(!isTrueDest && BO->getOpcode() != BinaryOperator::Or))
- return LVILatticeVal::getOverdefined();
+ return ValueLatticeElement::getOverdefined();
auto RHS = getValueFromCondition(Val, BO->getOperand(0), isTrueDest, Visited);
auto LHS = getValueFromCondition(Val, BO->getOperand(1), isTrueDest, Visited);
return intersect(RHS, LHS);
}
-static LVILatticeVal
+static ValueLatticeElement
getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest,
- DenseMap<Value*, LVILatticeVal> &Visited) {
+ DenseMap<Value*, ValueLatticeElement> &Visited) {
auto I = Visited.find(Cond);
if (I != Visited.end())
return I->second;
@@ -1349,17 +1144,63 @@ getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest,
return Result;
}
-LVILatticeVal getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest) {
+ValueLatticeElement getValueFromCondition(Value *Val, Value *Cond,
+ bool isTrueDest) {
assert(Cond && "precondition");
- DenseMap<Value*, LVILatticeVal> Visited;
+ DenseMap<Value*, ValueLatticeElement> Visited;
return getValueFromCondition(Val, Cond, isTrueDest, Visited);
}
+// Return true if Usr has Op as an operand, otherwise false.
+static bool usesOperand(User *Usr, Value *Op) {
+ return find(Usr->operands(), Op) != Usr->op_end();
+}
+
+// Return true if the instruction type of Val is supported by
+// constantFoldUser(). Currently CastInst and BinaryOperator only. Call this
+// before calling constantFoldUser() to find out if it's even worth attempting
+// to call it.
+static bool isOperationFoldable(User *Usr) {
+ return isa<CastInst>(Usr) || isa<BinaryOperator>(Usr);
+}
+
+// Check if Usr can be simplified to an integer constant when the value of one
+// of its operands Op is an integer constant OpConstVal. If so, return it as an
+// lattice value range with a single element or otherwise return an overdefined
+// lattice value.
+static ValueLatticeElement constantFoldUser(User *Usr, Value *Op,
+ const APInt &OpConstVal,
+ const DataLayout &DL) {
+ assert(isOperationFoldable(Usr) && "Precondition");
+ Constant* OpConst = Constant::getIntegerValue(Op->getType(), OpConstVal);
+ // Check if Usr can be simplified to a constant.
+ if (auto *CI = dyn_cast<CastInst>(Usr)) {
+ assert(CI->getOperand(0) == Op && "Operand 0 isn't Op");
+ if (auto *C = dyn_cast_or_null<ConstantInt>(
+ SimplifyCastInst(CI->getOpcode(), OpConst,
+ CI->getDestTy(), DL))) {
+ return ValueLatticeElement::getRange(ConstantRange(C->getValue()));
+ }
+ } else if (auto *BO = dyn_cast<BinaryOperator>(Usr)) {
+ bool Op0Match = BO->getOperand(0) == Op;
+ bool Op1Match = BO->getOperand(1) == Op;
+ assert((Op0Match || Op1Match) &&
+ "Operand 0 nor Operand 1 isn't a match");
+ Value *LHS = Op0Match ? OpConst : BO->getOperand(0);
+ Value *RHS = Op1Match ? OpConst : BO->getOperand(1);
+ if (auto *C = dyn_cast_or_null<ConstantInt>(
+ SimplifyBinOp(BO->getOpcode(), LHS, RHS, DL))) {
+ return ValueLatticeElement::getRange(ConstantRange(C->getValue()));
+ }
+ }
+ return ValueLatticeElement::getOverdefined();
+}
+
/// \brief Compute the value of Val on the edge BBFrom -> BBTo. Returns false if
/// Val is not constrained on the edge. Result is unspecified if return value
/// is false.
static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
- BasicBlock *BBTo, LVILatticeVal &Result) {
+ BasicBlock *BBTo, ValueLatticeElement &Result) {
// TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we
// know that v != 0.
if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) {
@@ -1370,18 +1211,59 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
bool isTrueDest = BI->getSuccessor(0) == BBTo;
assert(BI->getSuccessor(!isTrueDest) == BBTo &&
"BBTo isn't a successor of BBFrom");
+ Value *Condition = BI->getCondition();
// If V is the condition of the branch itself, then we know exactly what
// it is.
- if (BI->getCondition() == Val) {
- Result = LVILatticeVal::get(ConstantInt::get(
+ if (Condition == Val) {
+ Result = ValueLatticeElement::get(ConstantInt::get(
Type::getInt1Ty(Val->getContext()), isTrueDest));
return true;
}
// If the condition of the branch is an equality comparison, we may be
// able to infer the value.
- Result = getValueFromCondition(Val, BI->getCondition(), isTrueDest);
+ Result = getValueFromCondition(Val, Condition, isTrueDest);
+ if (!Result.isOverdefined())
+ return true;
+
+ if (User *Usr = dyn_cast<User>(Val)) {
+ assert(Result.isOverdefined() && "Result isn't overdefined");
+ // Check with isOperationFoldable() first to avoid linearly iterating
+ // over the operands unnecessarily which can be expensive for
+ // instructions with many operands.
+ if (isa<IntegerType>(Usr->getType()) && isOperationFoldable(Usr)) {
+ const DataLayout &DL = BBTo->getModule()->getDataLayout();
+ if (usesOperand(Usr, Condition)) {
+ // If Val has Condition as an operand and Val can be folded into a
+ // constant with either Condition == true or Condition == false,
+ // propagate the constant.
+ // eg.
+ // ; %Val is true on the edge to %then.
+ // %Val = and i1 %Condition, true.
+ // br %Condition, label %then, label %else
+ APInt ConditionVal(1, isTrueDest ? 1 : 0);
+ Result = constantFoldUser(Usr, Condition, ConditionVal, DL);
+ } else {
+ // If one of Val's operand has an inferred value, we may be able to
+ // infer the value of Val.
+ // eg.
+ // ; %Val is 94 on the edge to %then.
+ // %Val = add i8 %Op, 1
+ // %Condition = icmp eq i8 %Op, 93
+ // br i1 %Condition, label %then, label %else
+ for (unsigned i = 0; i < Usr->getNumOperands(); ++i) {
+ Value *Op = Usr->getOperand(i);
+ ValueLatticeElement OpLatticeVal =
+ getValueFromCondition(Op, Condition, isTrueDest);
+ if (Optional<APInt> OpConst = OpLatticeVal.asConstantInteger()) {
+ Result = constantFoldUser(Usr, Op, OpConst.getValue(), DL);
+ break;
+ }
+ }
+ }
+ }
+ }
if (!Result.isOverdefined())
return true;
}
@@ -1390,24 +1272,50 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
// If the edge was formed by a switch on the value, then we may know exactly
// what it is.
if (SwitchInst *SI = dyn_cast<SwitchInst>(BBFrom->getTerminator())) {
- if (SI->getCondition() != Val)
+ Value *Condition = SI->getCondition();
+ if (!isa<IntegerType>(Val->getType()))
return false;
+ bool ValUsesConditionAndMayBeFoldable = false;
+ if (Condition != Val) {
+ // Check if Val has Condition as an operand.
+ if (User *Usr = dyn_cast<User>(Val))
+ ValUsesConditionAndMayBeFoldable = isOperationFoldable(Usr) &&
+ usesOperand(Usr, Condition);
+ if (!ValUsesConditionAndMayBeFoldable)
+ return false;
+ }
+ assert((Condition == Val || ValUsesConditionAndMayBeFoldable) &&
+ "Condition != Val nor Val doesn't use Condition");
bool DefaultCase = SI->getDefaultDest() == BBTo;
unsigned BitWidth = Val->getType()->getIntegerBitWidth();
ConstantRange EdgesVals(BitWidth, DefaultCase/*isFullSet*/);
for (auto Case : SI->cases()) {
- ConstantRange EdgeVal(Case.getCaseValue()->getValue());
+ APInt CaseValue = Case.getCaseValue()->getValue();
+ ConstantRange EdgeVal(CaseValue);
+ if (ValUsesConditionAndMayBeFoldable) {
+ User *Usr = cast<User>(Val);
+ const DataLayout &DL = BBTo->getModule()->getDataLayout();
+ ValueLatticeElement EdgeLatticeVal =
+ constantFoldUser(Usr, Condition, CaseValue, DL);
+ if (EdgeLatticeVal.isOverdefined())
+ return false;
+ EdgeVal = EdgeLatticeVal.getConstantRange();
+ }
if (DefaultCase) {
// It is possible that the default destination is the destination of
- // some cases. There is no need to perform difference for those cases.
- if (Case.getCaseSuccessor() != BBTo)
+ // some cases. We cannot perform difference for those cases.
+ // We know Condition != CaseValue in BBTo. In some cases we can use
+ // this to infer Val == f(Condition) is != f(CaseValue). For now, we
+ // only do this when f is identity (i.e. Val == Condition), but we
+ // should be able to do this for any injective f.
+ if (Case.getCaseSuccessor() != BBTo && Condition == Val)
EdgesVals = EdgesVals.difference(EdgeVal);
} else if (Case.getCaseSuccessor() == BBTo)
EdgesVals = EdgesVals.unionWith(EdgeVal);
}
- Result = LVILatticeVal::getRange(std::move(EdgesVals));
+ Result = ValueLatticeElement::getRange(std::move(EdgesVals));
return true;
}
return false;
@@ -1416,19 +1324,20 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
/// \brief Compute the value of Val on the edge BBFrom -> BBTo or the value at
/// the basic block if the edge does not constrain Val.
bool LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom,
- BasicBlock *BBTo, LVILatticeVal &Result,
+ BasicBlock *BBTo,
+ ValueLatticeElement &Result,
Instruction *CxtI) {
// If already a constant, there is nothing to compute.
if (Constant *VC = dyn_cast<Constant>(Val)) {
- Result = LVILatticeVal::get(VC);
+ Result = ValueLatticeElement::get(VC);
return true;
}
- LVILatticeVal LocalResult;
+ ValueLatticeElement LocalResult;
if (!getEdgeValueLocal(Val, BBFrom, BBTo, LocalResult))
// If we couldn't constrain the value on the edge, LocalResult doesn't
// provide any information.
- LocalResult = LVILatticeVal::getOverdefined();
+ LocalResult = ValueLatticeElement::getOverdefined();
if (hasSingleValue(LocalResult)) {
// Can't get any more precise here
@@ -1445,7 +1354,7 @@ bool LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom,
}
// Try to intersect ranges of the BB and the constraint on the edge.
- LVILatticeVal InBlock = getBlockValue(Val, BBFrom);
+ ValueLatticeElement InBlock = getBlockValue(Val, BBFrom);
intersectAssumeOrGuardBlockValueConstantRange(Val, InBlock,
BBFrom->getTerminator());
// We can use the context instruction (generically the ultimate instruction
@@ -1462,8 +1371,8 @@ bool LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom,
return true;
}
-LVILatticeVal LazyValueInfoImpl::getValueInBlock(Value *V, BasicBlock *BB,
- Instruction *CxtI) {
+ValueLatticeElement LazyValueInfoImpl::getValueInBlock(Value *V, BasicBlock *BB,
+ Instruction *CxtI) {
DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '"
<< BB->getName() << "'\n");
@@ -1472,21 +1381,21 @@ LVILatticeVal LazyValueInfoImpl::getValueInBlock(Value *V, BasicBlock *BB,
pushBlockValue(std::make_pair(BB, V));
solve();
}
- LVILatticeVal Result = getBlockValue(V, BB);
+ ValueLatticeElement Result = getBlockValue(V, BB);
intersectAssumeOrGuardBlockValueConstantRange(V, Result, CxtI);
DEBUG(dbgs() << " Result = " << Result << "\n");
return Result;
}
-LVILatticeVal LazyValueInfoImpl::getValueAt(Value *V, Instruction *CxtI) {
+ValueLatticeElement LazyValueInfoImpl::getValueAt(Value *V, Instruction *CxtI) {
DEBUG(dbgs() << "LVI Getting value " << *V << " at '"
<< CxtI->getName() << "'\n");
if (auto *C = dyn_cast<Constant>(V))
- return LVILatticeVal::get(C);
+ return ValueLatticeElement::get(C);
- LVILatticeVal Result = LVILatticeVal::getOverdefined();
+ ValueLatticeElement Result = ValueLatticeElement::getOverdefined();
if (auto *I = dyn_cast<Instruction>(V))
Result = getFromRangeMetadata(I);
intersectAssumeOrGuardBlockValueConstantRange(V, Result, CxtI);
@@ -1495,13 +1404,13 @@ LVILatticeVal LazyValueInfoImpl::getValueAt(Value *V, Instruction *CxtI) {
return Result;
}
-LVILatticeVal LazyValueInfoImpl::
+ValueLatticeElement LazyValueInfoImpl::
getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB,
Instruction *CxtI) {
DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '"
<< FromBB->getName() << "' to '" << ToBB->getName() << "'\n");
- LVILatticeVal Result;
+ ValueLatticeElement Result;
if (!getEdgeValue(V, FromBB, ToBB, Result, CxtI)) {
solve();
bool WasFastQuery = getEdgeValue(V, FromBB, ToBB, Result, CxtI);
@@ -1581,7 +1490,8 @@ bool LazyValueInfo::invalidate(Function &F, const PreservedAnalyses &PA,
void LazyValueInfoWrapperPass::releaseMemory() { Info.releaseMemory(); }
-LazyValueInfo LazyValueAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
+LazyValueInfo LazyValueAnalysis::run(Function &F,
+ FunctionAnalysisManager &FAM) {
auto &AC = FAM.getResult<AssumptionAnalysis>(F);
auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
@@ -1610,7 +1520,7 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB,
return nullptr;
const DataLayout &DL = BB->getModule()->getDataLayout();
- LVILatticeVal Result =
+ ValueLatticeElement Result =
getImpl(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI);
if (Result.isConstant())
@@ -1628,7 +1538,7 @@ ConstantRange LazyValueInfo::getConstantRange(Value *V, BasicBlock *BB,
assert(V->getType()->isIntegerTy());
unsigned Width = V->getType()->getIntegerBitWidth();
const DataLayout &DL = BB->getModule()->getDataLayout();
- LVILatticeVal Result =
+ ValueLatticeElement Result =
getImpl(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI);
if (Result.isUndefined())
return ConstantRange(Width, /*isFullSet=*/false);
@@ -1647,7 +1557,7 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
BasicBlock *ToBB,
Instruction *CxtI) {
const DataLayout &DL = FromBB->getModule()->getDataLayout();
- LVILatticeVal Result =
+ ValueLatticeElement Result =
getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
if (Result.isConstant())
@@ -1666,7 +1576,7 @@ ConstantRange LazyValueInfo::getConstantRangeOnEdge(Value *V,
Instruction *CxtI) {
unsigned Width = V->getType()->getIntegerBitWidth();
const DataLayout &DL = FromBB->getModule()->getDataLayout();
- LVILatticeVal Result =
+ ValueLatticeElement Result =
getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
if (Result.isUndefined())
@@ -1680,11 +1590,9 @@ ConstantRange LazyValueInfo::getConstantRangeOnEdge(Value *V,
return ConstantRange(Width, /*isFullSet=*/true);
}
-static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C,
- const LVILatticeVal &Val,
- const DataLayout &DL,
- TargetLibraryInfo *TLI) {
-
+static LazyValueInfo::Tristate
+getPredicateResult(unsigned Pred, Constant *C, const ValueLatticeElement &Val,
+ const DataLayout &DL, TargetLibraryInfo *TLI) {
// If we know the value is a constant, evaluate the conditional.
Constant *Res = nullptr;
if (Val.isConstant()) {
@@ -1754,7 +1662,7 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
BasicBlock *FromBB, BasicBlock *ToBB,
Instruction *CxtI) {
const DataLayout &DL = FromBB->getModule()->getDataLayout();
- LVILatticeVal Result =
+ ValueLatticeElement Result =
getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
return getPredicateResult(Pred, C, Result, DL, TLI);
@@ -1764,18 +1672,18 @@ LazyValueInfo::Tristate
LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
Instruction *CxtI) {
// Is or is not NonNull are common predicates being queried. If
- // isKnownNonNull can tell us the result of the predicate, we can
+ // isKnownNonZero can tell us the result of the predicate, we can
// return it quickly. But this is only a fastpath, and falling
// through would still be correct.
+ const DataLayout &DL = CxtI->getModule()->getDataLayout();
if (V->getType()->isPointerTy() && C->isNullValue() &&
- isKnownNonNull(V->stripPointerCasts())) {
+ isKnownNonZero(V->stripPointerCasts(), DL)) {
if (Pred == ICmpInst::ICMP_EQ)
return LazyValueInfo::False;
else if (Pred == ICmpInst::ICMP_NE)
return LazyValueInfo::True;
}
- const DataLayout &DL = CxtI->getModule()->getDataLayout();
- LVILatticeVal Result = getImpl(PImpl, AC, &DL, DT).getValueAt(V, CxtI);
+ ValueLatticeElement Result = getImpl(PImpl, AC, &DL, DT).getValueAt(V, CxtI);
Tristate Ret = getPredicateResult(Pred, C, Result, DL, TLI);
if (Ret != Unknown)
return Ret;
@@ -1889,7 +1797,7 @@ void LazyValueInfoAnnotatedWriter::emitBasicBlockStartAnnot(
// Find if there are latticevalues defined for arguments of the function.
auto *F = BB->getParent();
for (auto &Arg : F->args()) {
- LVILatticeVal Result = LVIImpl->getValueInBlock(
+ ValueLatticeElement Result = LVIImpl->getValueInBlock(
const_cast<Argument *>(&Arg), const_cast<BasicBlock *>(BB));
if (Result.isUndefined())
continue;
@@ -1914,7 +1822,7 @@ void LazyValueInfoAnnotatedWriter::emitInstructionAnnot(
auto printResult = [&](const BasicBlock *BB) {
if (!BlocksContainingLVI.insert(BB).second)
return;
- LVILatticeVal Result = LVIImpl->getValueInBlock(
+ ValueLatticeElement Result = LVIImpl->getValueInBlock(
const_cast<Instruction *>(I), const_cast<BasicBlock *>(BB));
OS << "; LatticeVal for: '" << *I << "' in BB: '";
BB->printAsOperand(OS, false);
diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp
index ada600a69b87..7b792ed0a2e2 100644
--- a/contrib/llvm/lib/Analysis/Lint.cpp
+++ b/contrib/llvm/lib/Analysis/Lint.cpp
@@ -285,15 +285,24 @@ void Lint::visitCallSite(CallSite CS) {
}
}
- if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall())
- for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
- AI != AE; ++AI) {
- Value *Obj = findValue(*AI, /*OffsetOk=*/true);
- Assert(!isa<AllocaInst>(Obj),
- "Undefined behavior: Call with \"tail\" keyword references "
- "alloca",
- &I);
+ if (CS.isCall()) {
+ const CallInst *CI = cast<CallInst>(CS.getInstruction());
+ if (CI->isTailCall()) {
+ const AttributeList &PAL = CI->getAttributes();
+ unsigned ArgNo = 0;
+ for (Value *Arg : CS.args()) {
+ // Skip ByVal arguments since they will be memcpy'd to the callee's
+ // stack anyway.
+ if (PAL.hasParamAttribute(ArgNo++, Attribute::ByVal))
+ continue;
+ Value *Obj = findValue(Arg, /*OffsetOk=*/true);
+ Assert(!isa<AllocaInst>(Obj),
+ "Undefined behavior: Call with \"tail\" keyword references "
+ "alloca",
+ &I);
+ }
}
+ }
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
@@ -683,7 +692,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
if (Instruction::isCast(CE->getOpcode())) {
if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()),
CE->getOperand(0)->getType(), CE->getType(),
- DL->getIntPtrType(V->getType())))
+ *DL))
return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
} else if (CE->getOpcode() == Instruction::ExtractValue) {
ArrayRef<unsigned> Indices = CE->getIndices();
diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp
index 591b0fc481d2..834727c9224d 100644
--- a/contrib/llvm/lib/Analysis/Loads.cpp
+++ b/contrib/llvm/lib/Analysis/Loads.cpp
@@ -72,7 +72,7 @@ static bool isDereferenceableAndAlignedPointer(
V->getPointerDereferenceableBytes(DL, CheckForNonNull));
if (KnownDerefBytes.getBoolValue()) {
if (KnownDerefBytes.uge(Size))
- if (!CheckForNonNull || isKnownNonNullAt(V, CtxI, DT))
+ if (!CheckForNonNull || isKnownNonZero(V, DL, 0, nullptr, CtxI, DT))
return isAligned(V, Align, DL);
}
@@ -414,7 +414,7 @@ Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy,
// If we have alias analysis and it says the store won't modify the loaded
// value, ignore the store.
- if (AA && (AA->getModRefInfo(SI, StrippedPtr, AccessSize) & MRI_Mod) == 0)
+ if (AA && !isModSet(AA->getModRefInfo(SI, StrippedPtr, AccessSize)))
continue;
// Otherwise the store that may or may not alias the pointer, bail out.
@@ -426,8 +426,7 @@ Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy,
if (Inst->mayWriteToMemory()) {
// If alias analysis claims that it really won't modify the load,
// ignore it.
- if (AA &&
- (AA->getModRefInfo(Inst, StrippedPtr, AccessSize) & MRI_Mod) == 0)
+ if (AA && !isModSet(AA->getModRefInfo(Inst, StrippedPtr, AccessSize)))
continue;
// May modify the pointer, bail out.
diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 4ba12583ff83..ed8e5e8cc489 100644
--- a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -29,7 +29,7 @@
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -522,6 +522,21 @@ public:
Accesses.insert(MemAccessInfo(Ptr, true));
}
+ /// \brief Check if we can emit a run-time no-alias check for \p Access.
+ ///
+ /// Returns true if we can emit a run-time no alias check for \p Access.
+ /// If we can check this access, this also adds it to a dependence set and
+ /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
+ /// we will attempt to use additional run-time checks in order to get
+ /// the bounds of the pointer.
+ bool createCheckForAccess(RuntimePointerChecking &RtCheck,
+ MemAccessInfo Access,
+ const ValueToValueMap &Strides,
+ DenseMap<Value *, unsigned> &DepSetId,
+ Loop *TheLoop, unsigned &RunningDepId,
+ unsigned ASId, bool ShouldCheckStride,
+ bool Assume);
+
/// \brief Check whether we can check the pointers at runtime for
/// non-intersection.
///
@@ -597,9 +612,11 @@ private:
} // end anonymous namespace
/// \brief Check whether a pointer can participate in a runtime bounds check.
+/// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
+/// by adding run-time checks (overflow checks) if necessary.
static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
const ValueToValueMap &Strides, Value *Ptr,
- Loop *L) {
+ Loop *L, bool Assume) {
const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
// The bounds for loop-invariant pointer is trivial.
@@ -607,6 +624,10 @@ static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
return true;
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
+
+ if (!AR && Assume)
+ AR = PSE.getAsAddRec(Ptr);
+
if (!AR)
return false;
@@ -621,9 +642,53 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE,
return true;
int64_t Stride = getPtrStride(PSE, Ptr, L, Strides);
- return Stride == 1;
+ if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
+ return true;
+
+ return false;
}
+bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
+ MemAccessInfo Access,
+ const ValueToValueMap &StridesMap,
+ DenseMap<Value *, unsigned> &DepSetId,
+ Loop *TheLoop, unsigned &RunningDepId,
+ unsigned ASId, bool ShouldCheckWrap,
+ bool Assume) {
+ Value *Ptr = Access.getPointer();
+
+ if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume))
+ return false;
+
+ // When we run after a failing dependency check we have to make sure
+ // we don't have wrapping pointers.
+ if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) {
+ auto *Expr = PSE.getSCEV(Ptr);
+ if (!Assume || !isa<SCEVAddRecExpr>(Expr))
+ return false;
+ PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
+ }
+
+ // The id of the dependence set.
+ unsigned DepId;
+
+ if (isDependencyCheckNeeded()) {
+ Value *Leader = DepCands.getLeaderValue(Access).getPointer();
+ unsigned &LeaderId = DepSetId[Leader];
+ if (!LeaderId)
+ LeaderId = RunningDepId++;
+ DepId = LeaderId;
+ } else
+ // Each access has its own dependence set.
+ DepId = RunningDepId++;
+
+ bool IsWrite = Access.getInt();
+ RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
+ DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
+
+ return true;
+ }
+
bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
ScalarEvolution *SE, Loop *TheLoop,
const ValueToValueMap &StridesMap,
@@ -643,12 +708,15 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
for (auto &AS : AST) {
int NumReadPtrChecks = 0;
int NumWritePtrChecks = 0;
+ bool CanDoAliasSetRT = true;
// We assign consecutive id to access from different dependence sets.
// Accesses within the same set don't need a runtime check.
unsigned RunningDepId = 1;
DenseMap<Value *, unsigned> DepSetId;
+ SmallVector<MemAccessInfo, 4> Retries;
+
for (auto A : AS) {
Value *Ptr = A.getValue();
bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
@@ -659,29 +727,11 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
else
++NumReadPtrChecks;
- if (hasComputableBounds(PSE, StridesMap, Ptr, TheLoop) &&
- // When we run after a failing dependency check we have to make sure
- // we don't have wrapping pointers.
- (!ShouldCheckWrap || isNoWrap(PSE, StridesMap, Ptr, TheLoop))) {
- // The id of the dependence set.
- unsigned DepId;
-
- if (IsDepCheckNeeded) {
- Value *Leader = DepCands.getLeaderValue(Access).getPointer();
- unsigned &LeaderId = DepSetId[Leader];
- if (!LeaderId)
- LeaderId = RunningDepId++;
- DepId = LeaderId;
- } else
- // Each access has its own dependence set.
- DepId = RunningDepId++;
-
- RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
-
- DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
- } else {
+ if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
+ RunningDepId, ASId, ShouldCheckWrap, false)) {
DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n');
- CanDoRT = false;
+ Retries.push_back(Access);
+ CanDoAliasSetRT = false;
}
}
@@ -693,10 +743,29 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
// For example CanDoRT=false, NeedRTCheck=false means that we have a pointer
// for which we couldn't find the bounds but we don't actually need to emit
// any checks so it does not matter.
- if (!(IsDepCheckNeeded && CanDoRT && RunningDepId == 2))
- NeedRTCheck |= (NumWritePtrChecks >= 2 || (NumReadPtrChecks >= 1 &&
- NumWritePtrChecks >= 1));
+ bool NeedsAliasSetRTCheck = false;
+ if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2))
+ NeedsAliasSetRTCheck = (NumWritePtrChecks >= 2 ||
+ (NumReadPtrChecks >= 1 && NumWritePtrChecks >= 1));
+
+ // We need to perform run-time alias checks, but some pointers had bounds
+ // that couldn't be checked.
+ if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
+ // Reset the CanDoSetRt flag and retry all accesses that have failed.
+ // We know that we need these checks, so we can now be more aggressive
+ // and add further checks if required (overflow checks).
+ CanDoAliasSetRT = true;
+ for (auto Access : Retries)
+ if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId,
+ TheLoop, RunningDepId, ASId,
+ ShouldCheckWrap, /*Assume=*/true)) {
+ CanDoAliasSetRT = false;
+ break;
+ }
+ }
+ CanDoRT &= CanDoAliasSetRT;
+ NeedRTCheck |= NeedsAliasSetRTCheck;
++ASId;
}
@@ -1038,6 +1107,77 @@ static unsigned getAddressSpaceOperand(Value *I) {
return -1;
}
+// TODO:This API can be improved by using the permutation of given width as the
+// accesses are entered into the map.
+bool llvm::sortLoadAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
+ ScalarEvolution &SE,
+ SmallVectorImpl<Value *> &Sorted,
+ SmallVectorImpl<unsigned> *Mask) {
+ SmallVector<std::pair<int64_t, Value *>, 4> OffValPairs;
+ OffValPairs.reserve(VL.size());
+ Sorted.reserve(VL.size());
+
+ // Walk over the pointers, and map each of them to an offset relative to
+ // first pointer in the array.
+ Value *Ptr0 = getPointerOperand(VL[0]);
+ const SCEV *Scev0 = SE.getSCEV(Ptr0);
+ Value *Obj0 = GetUnderlyingObject(Ptr0, DL);
+ PointerType *PtrTy = dyn_cast<PointerType>(Ptr0->getType());
+ uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
+
+ for (auto *Val : VL) {
+ // The only kind of access we care about here is load.
+ if (!isa<LoadInst>(Val))
+ return false;
+
+ Value *Ptr = getPointerOperand(Val);
+ assert(Ptr && "Expected value to have a pointer operand.");
+ // If a pointer refers to a different underlying object, bail - the
+ // pointers are by definition incomparable.
+ Value *CurrObj = GetUnderlyingObject(Ptr, DL);
+ if (CurrObj != Obj0)
+ return false;
+
+ const SCEVConstant *Diff =
+ dyn_cast<SCEVConstant>(SE.getMinusSCEV(SE.getSCEV(Ptr), Scev0));
+ // The pointers may not have a constant offset from each other, or SCEV
+ // may just not be smart enough to figure out they do. Regardless,
+ // there's nothing we can do.
+ if (!Diff || static_cast<unsigned>(Diff->getAPInt().abs().getSExtValue()) >
+ (VL.size() - 1) * Size)
+ return false;
+
+ OffValPairs.emplace_back(Diff->getAPInt().getSExtValue(), Val);
+ }
+ SmallVector<unsigned, 4> UseOrder(VL.size());
+ for (unsigned i = 0; i < VL.size(); i++) {
+ UseOrder[i] = i;
+ }
+
+ // Sort the memory accesses and keep the order of their uses in UseOrder.
+ std::sort(UseOrder.begin(), UseOrder.end(),
+ [&OffValPairs](unsigned Left, unsigned Right) {
+ return OffValPairs[Left].first < OffValPairs[Right].first;
+ });
+
+ for (unsigned i = 0; i < VL.size(); i++)
+ Sorted.emplace_back(OffValPairs[UseOrder[i]].second);
+
+ // Sort UseOrder to compute the Mask.
+ if (Mask) {
+ Mask->reserve(VL.size());
+ for (unsigned i = 0; i < VL.size(); i++)
+ Mask->emplace_back(i);
+ std::sort(Mask->begin(), Mask->end(),
+ [&UseOrder](unsigned Left, unsigned Right) {
+ return UseOrder[Left] < UseOrder[Right];
+ });
+ }
+
+ return true;
+}
+
+
/// Returns true if the memory operations \p A and \p B are consecutive.
bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
ScalarEvolution &SE, bool CheckType) {
@@ -1471,10 +1611,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
couldPreventStoreLoadForward(Distance, TypeByteSize))
return Dependence::BackwardVectorizableButPreventsForwarding;
+ uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride);
DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
- << " with max VF = "
- << MaxSafeDepDistBytes / (TypeByteSize * Stride) << '\n');
-
+ << " with max VF = " << MaxVF << '\n');
+ uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
+ MaxSafeRegisterWidth = std::min(MaxSafeRegisterWidth, MaxVFInBits);
return Dependence::BackwardVectorizable;
}
@@ -2066,8 +2207,51 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
if (!Stride)
return;
- DEBUG(dbgs() << "LAA: Found a strided access that we can version");
+ DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
+ "versioning:");
DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
+
+ // Avoid adding the "Stride == 1" predicate when we know that
+ // Stride >= Trip-Count. Such a predicate will effectively optimize a single
+ // or zero iteration loop, as Trip-Count <= Stride == 1.
+ //
+ // TODO: We are currently not making a very informed decision on when it is
+ // beneficial to apply stride versioning. It might make more sense that the
+ // users of this analysis (such as the vectorizer) will trigger it, based on
+ // their specific cost considerations; For example, in cases where stride
+ // versioning does not help resolving memory accesses/dependences, the
+ // vectorizer should evaluate the cost of the runtime test, and the benefit
+ // of various possible stride specializations, considering the alternatives
+ // of using gather/scatters (if available).
+
+ const SCEV *StrideExpr = PSE->getSCEV(Stride);
+ const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
+
+ // Match the types so we can compare the stride and the BETakenCount.
+ // The Stride can be positive/negative, so we sign extend Stride;
+ // The backdgeTakenCount is non-negative, so we zero extend BETakenCount.
+ const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
+ uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType());
+ uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType());
+ const SCEV *CastedStride = StrideExpr;
+ const SCEV *CastedBECount = BETakenCount;
+ ScalarEvolution *SE = PSE->getSE();
+ if (BETypeSize >= StrideTypeSize)
+ CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType());
+ else
+ CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType());
+ const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
+ // Since TripCount == BackEdgeTakenCount + 1, checking:
+ // "Stride >= TripCount" is equivalent to checking:
+ // Stride - BETakenCount > 0
+ if (SE->isKnownPositive(StrideMinusBETaken)) {
+ DEBUG(dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
+ "Stride==1 predicate will imply that the loop executes "
+ "at most once.\n");
+ return;
+ }
+ DEBUG(dbgs() << "LAA: Found a strided access that we can version.");
+
SymbolicStrides[Ptr] = Stride;
StrideSet.insert(Stride);
}
diff --git a/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp b/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp
index e4a0f90b2f71..ea7a62d179c4 100644
--- a/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp
+++ b/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp
@@ -11,15 +11,21 @@
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/IR/Dominators.h"
using namespace llvm;
+namespace llvm {
+/// Enables memory ssa as a dependency for loop passes in legacy pass manager.
+cl::opt<bool> EnableMSSALoopDependency(
+ "enable-mssa-loop-dependency", cl::Hidden, cl::init(false),
+ cl::desc("Enable MemorySSA dependency for loop pass manager"));
+
// Explicit template instantiations and specialization defininitions for core
// template typedefs.
-namespace llvm {
template class AllAnalysesOn<Loop>;
template class AnalysisManager<Loop, LoopStandardAnalysisResults &>;
template class InnerAnalysisManagerProxy<LoopAnalysisManager, Function>;
@@ -45,19 +51,25 @@ bool LoopAnalysisManagerFunctionProxy::Result::invalidate(
// loop analyses declare any dependencies on these and use the more general
// invalidation logic below to act on that.
auto PAC = PA.getChecker<LoopAnalysisManagerFunctionProxy>();
+ bool invalidateMemorySSAAnalysis = false;
+ if (EnableMSSALoopDependency)
+ invalidateMemorySSAAnalysis = Inv.invalidate<MemorySSAAnalysis>(F, PA);
if (!(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) ||
Inv.invalidate<AAManager>(F, PA) ||
Inv.invalidate<AssumptionAnalysis>(F, PA) ||
Inv.invalidate<DominatorTreeAnalysis>(F, PA) ||
Inv.invalidate<LoopAnalysis>(F, PA) ||
- Inv.invalidate<ScalarEvolutionAnalysis>(F, PA)) {
+ Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) ||
+ invalidateMemorySSAAnalysis) {
// Note that the LoopInfo may be stale at this point, however the loop
// objects themselves remain the only viable keys that could be in the
// analysis manager's cache. So we just walk the keys and forcibly clear
// those results. Note that the order doesn't matter here as this will just
// directly destroy the results without calling methods on them.
- for (Loop *L : PreOrderLoops)
- InnerAM->clear(*L);
+ for (Loop *L : PreOrderLoops) {
+ // NB! `L` may not be in a good enough state to run Loop::getName.
+ InnerAM->clear(*L, "<possibly invalidated loop>");
+ }
// We also need to null out the inner AM so that when the object gets
// destroyed as invalid we don't try to clear the inner AM again. At that
@@ -135,7 +147,9 @@ PreservedAnalyses llvm::getLoopPassPreservedAnalyses() {
PA.preserve<LoopAnalysis>();
PA.preserve<LoopAnalysisManagerFunctionProxy>();
PA.preserve<ScalarEvolutionAnalysis>();
- // TODO: What we really want to do here is preserve an AA category, but that
+ // FIXME: Uncomment this when all loop passes preserve MemorySSA
+ // PA.preserve<MemorySSAAnalysis>();
+ // FIXME: What we really want to do here is preserve an AA category, but that
// concept doesn't exist yet.
PA.preserve<AAManager>();
PA.preserve<BasicAA>();
diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp
index 697b58622bb4..9e54d60779a0 100644
--- a/contrib/llvm/lib/Analysis/LoopInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp
@@ -16,6 +16,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/LoopInfoImpl.h"
#include "llvm/Analysis/LoopIterator.h"
@@ -44,9 +45,9 @@ bool llvm::VerifyLoopInfo = true;
#else
bool llvm::VerifyLoopInfo = false;
#endif
-static cl::opt<bool,true>
-VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
- cl::desc("Verify loop info (time consuming)"));
+static cl::opt<bool, true>
+ VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
+ cl::Hidden, cl::desc("Verify loop info (time consuming)"));
//===----------------------------------------------------------------------===//
// Loop implementation
@@ -55,7 +56,7 @@ VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
bool Loop::isLoopInvariant(const Value *V) const {
if (const Instruction *I = dyn_cast<Instruction>(V))
return !contains(I);
- return true; // All non-instructions are loop invariant
+ return true; // All non-instructions are loop invariant
}
bool Loop::hasLoopInvariantOperands(const Instruction *I) const {
@@ -66,7 +67,7 @@ bool Loop::makeLoopInvariant(Value *V, bool &Changed,
Instruction *InsertPt) const {
if (Instruction *I = dyn_cast<Instruction>(V))
return makeLoopInvariant(I, Changed, InsertPt);
- return true; // All non-instructions are loop-invariant.
+ return true; // All non-instructions are loop-invariant.
}
bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
@@ -112,12 +113,13 @@ PHINode *Loop::getCanonicalInductionVariable() const {
BasicBlock *Incoming = nullptr, *Backedge = nullptr;
pred_iterator PI = pred_begin(H);
- assert(PI != pred_end(H) &&
- "Loop must have at least one backedge!");
+ assert(PI != pred_end(H) && "Loop must have at least one backedge!");
Backedge = *PI++;
- if (PI == pred_end(H)) return nullptr; // dead loop
+ if (PI == pred_end(H))
+ return nullptr; // dead loop
Incoming = *PI++;
- if (PI != pred_end(H)) return nullptr; // multiple backedges?
+ if (PI != pred_end(H))
+ return nullptr; // multiple backedges?
if (contains(Incoming)) {
if (contains(Backedge))
@@ -130,12 +132,11 @@ PHINode *Loop::getCanonicalInductionVariable() const {
for (BasicBlock::iterator I = H->begin(); isa<PHINode>(I); ++I) {
PHINode *PN = cast<PHINode>(I);
if (ConstantInt *CI =
- dyn_cast<ConstantInt>(PN->getIncomingValueForBlock(Incoming)))
+ dyn_cast<ConstantInt>(PN->getIncomingValueForBlock(Incoming)))
if (CI->isZero())
if (Instruction *Inc =
- dyn_cast<Instruction>(PN->getIncomingValueForBlock(Backedge)))
- if (Inc->getOpcode() == Instruction::Add &&
- Inc->getOperand(0) == PN)
+ dyn_cast<Instruction>(PN->getIncomingValueForBlock(Backedge)))
+ if (Inc->getOpcode() == Instruction::Add && Inc->getOperand(0) == PN)
if (ConstantInt *CI = dyn_cast<ConstantInt>(Inc->getOperand(1)))
if (CI->isOne())
return PN;
@@ -255,7 +256,8 @@ void Loop::setLoopID(MDNode *LoopID) const {
return;
}
- assert(!getLoopLatch() && "The loop should have no single latch at this point");
+ assert(!getLoopLatch() &&
+ "The loop should have no single latch at this point");
BasicBlock *H = getHeader();
for (BasicBlock *BB : this->blocks()) {
TerminatorInst *TI = BB->getTerminator();
@@ -266,11 +268,44 @@ void Loop::setLoopID(MDNode *LoopID) const {
}
}
+void Loop::setLoopAlreadyUnrolled() {
+ MDNode *LoopID = getLoopID();
+ // First remove any existing loop unrolling metadata.
+ SmallVector<Metadata *, 4> MDs;
+ // Reserve first location for self reference to the LoopID metadata node.
+ MDs.push_back(nullptr);
+
+ if (LoopID) {
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+ bool IsUnrollMetadata = false;
+ MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+ if (MD) {
+ const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+ IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll.");
+ }
+ if (!IsUnrollMetadata)
+ MDs.push_back(LoopID->getOperand(i));
+ }
+ }
+
+ // Add unroll(disable) metadata to disable future unrolling.
+ LLVMContext &Context = getHeader()->getContext();
+ SmallVector<Metadata *, 1> DisableOperands;
+ DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable"));
+ MDNode *DisableNode = MDNode::get(Context, DisableOperands);
+ MDs.push_back(DisableNode);
+
+ MDNode *NewLoopID = MDNode::get(Context, MDs);
+ // Set operand 0 to refer to the loop id itself.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+ setLoopID(NewLoopID);
+}
+
bool Loop::isAnnotatedParallel() const {
MDNode *DesiredLoopIdMetadata = getLoopID();
if (!DesiredLoopIdMetadata)
- return false;
+ return false;
// The loop branch contains the parallel loop metadata. In order to ensure
// that any parallel-loop-unaware optimization pass hasn't added loop-carried
@@ -307,9 +342,7 @@ bool Loop::isAnnotatedParallel() const {
return true;
}
-DebugLoc Loop::getStartLoc() const {
- return getLocRange().getStart();
-}
+DebugLoc Loop::getStartLoc() const { return getLocRange().getStart(); }
Loop::LocRange Loop::getLocRange() const {
// If we have a debug location in the loop ID, then use it.
@@ -357,8 +390,8 @@ bool Loop::hasDedicatedExits() const {
return true;
}
-void
-Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
+void Loop::getUniqueExitBlocks(
+ SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
assert(hasDedicatedExits() &&
"getUniqueExitBlocks assumes the loop has canonical form exits!");
@@ -408,12 +441,10 @@ BasicBlock *Loop::getUniqueExitBlock() const {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void Loop::dump() const {
- print(dbgs());
-}
+LLVM_DUMP_METHOD void Loop::dump() const { print(dbgs()); }
LLVM_DUMP_METHOD void Loop::dumpVerbose() const {
- print(dbgs(), /*Depth=*/ 0, /*Verbose=*/ true);
+ print(dbgs(), /*Depth=*/0, /*Verbose=*/true);
}
#endif
@@ -434,15 +465,15 @@ class UnloopUpdater {
// loops within these subloops will not change parents. However, an immediate
// subloop's new parent will be the nearest loop reachable from either its own
// exits *or* any of its nested loop's exits.
- DenseMap<Loop*, Loop*> SubloopParents;
+ DenseMap<Loop *, Loop *> SubloopParents;
// Flag the presence of an irreducible backedge whose destination is a block
// directly contained by the original unloop.
bool FoundIB;
public:
- UnloopUpdater(Loop *UL, LoopInfo *LInfo) :
- Unloop(*UL), LI(LInfo), DFS(UL), FoundIB(false) {}
+ UnloopUpdater(Loop *UL, LoopInfo *LInfo)
+ : Unloop(*UL), LI(LInfo), DFS(UL), FoundIB(false) {}
void updateBlockParents();
@@ -472,8 +503,7 @@ void UnloopUpdater::updateBlockParents() {
assert((NL != &Unloop && (!NL || NL->contains(&Unloop))) &&
"uninitialized successor");
LI->changeLoopFor(POI, NL);
- }
- else {
+ } else {
// Or the current block is part of a subloop, in which case its parent
// is unchanged.
assert((FoundIB || Unloop.contains(L)) && "uninitialized successor");
@@ -490,7 +520,8 @@ void UnloopUpdater::updateBlockParents() {
// from successors to predecessors as before.
Changed = false;
for (LoopBlocksDFS::POIterator POI = DFS.beginPostorder(),
- POE = DFS.endPostorder(); POI != POE; ++POI) {
+ POE = DFS.endPostorder();
+ POI != POE; ++POI) {
Loop *L = LI->getLoopFor(*POI);
Loop *NL = getNearestLoop(*POI, L);
@@ -508,8 +539,8 @@ void UnloopUpdater::updateBlockParents() {
void UnloopUpdater::removeBlocksFromAncestors() {
// Remove all unloop's blocks (including those in nested subloops) from
// ancestors below the new parent loop.
- for (Loop::block_iterator BI = Unloop.block_begin(),
- BE = Unloop.block_end(); BI != BE; ++BI) {
+ for (Loop::block_iterator BI = Unloop.block_begin(), BE = Unloop.block_end();
+ BI != BE; ++BI) {
Loop *OuterParent = LI->getLoopFor(*BI);
if (Unloop.contains(OuterParent)) {
while (OuterParent->getParentLoop() != &Unloop)
@@ -609,9 +640,7 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) {
return NearLoop;
}
-LoopInfo::LoopInfo(const DomTreeBase<BasicBlock> &DomTree) {
- analyze(DomTree);
-}
+LoopInfo::LoopInfo(const DomTreeBase<BasicBlock> &DomTree) { analyze(DomTree); }
bool LoopInfo::invalidate(Function &F, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &) {
@@ -622,10 +651,10 @@ bool LoopInfo::invalidate(Function &F, const PreservedAnalyses &PA,
PAC.preservedSet<CFGAnalyses>());
}
-void LoopInfo::markAsRemoved(Loop *Unloop) {
- assert(!Unloop->isInvalid() && "Loop has already been removed");
- Unloop->invalidate();
- RemovedLoops.push_back(Unloop);
+void LoopInfo::erase(Loop *Unloop) {
+ assert(!Unloop->isInvalid() && "Loop has already been erased!");
+
+ auto InvalidateOnExit = make_scope_exit([&]() { destroy(Unloop); });
// First handle the special case of no parent loop to simplify the algorithm.
if (!Unloop->getParentLoop()) {
@@ -702,12 +731,43 @@ PreservedAnalyses LoopPrinterPass::run(Function &F,
}
void llvm::printLoop(Loop &L, raw_ostream &OS, const std::string &Banner) {
+
+ if (forcePrintModuleIR()) {
+ // handling -print-module-scope
+ OS << Banner << " (loop: ";
+ L.getHeader()->printAsOperand(OS, false);
+ OS << ")\n";
+
+ // printing whole module
+ OS << *L.getHeader()->getModule();
+ return;
+ }
+
OS << Banner;
+
+ auto *PreHeader = L.getLoopPreheader();
+ if (PreHeader) {
+ OS << "\n; Preheader:";
+ PreHeader->print(OS);
+ OS << "\n; Loop:";
+ }
+
for (auto *Block : L.blocks())
if (Block)
Block->print(OS);
else
OS << "Printing <null> block";
+
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ L.getExitBlocks(ExitBlocks);
+ if (!ExitBlocks.empty()) {
+ OS << "\n; Exit blocks";
+ for (auto *Block : ExitBlocks)
+ if (Block)
+ Block->print(OS);
+ else
+ OS << "Printing <null> block";
+ }
}
//===----------------------------------------------------------------------===//
@@ -766,5 +826,7 @@ PreservedAnalyses LoopVerifierPass::run(Function &F,
void LoopBlocksDFS::perform(LoopInfo *LI) {
LoopBlocksTraversal Traversal(*this, LI);
for (LoopBlocksTraversal::POTIterator POI = Traversal.begin(),
- POE = Traversal.end(); POI != POE; ++POI) ;
+ POE = Traversal.end();
+ POI != POE; ++POI)
+ ;
}
diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp
index e988f6444a58..9af717bafdca 100644
--- a/contrib/llvm/lib/Analysis/LoopPass.cpp
+++ b/contrib/llvm/lib/Analysis/LoopPass.cpp
@@ -46,8 +46,7 @@ public:
}
bool runOnLoop(Loop *L, LPPassManager &) override {
- auto BBI = find_if(L->blocks().begin(), L->blocks().end(),
- [](BasicBlock *BB) { return BB; });
+ auto BBI = llvm::find_if(L->blocks(), [](BasicBlock *BB) { return BB; });
if (BBI != L->blocks().end() &&
isFunctionInPrintList((*BBI)->getParent()->getName())) {
printLoop(*L, OS, Banner);
@@ -140,6 +139,13 @@ void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
Info.setPreservesAll();
}
+void LPPassManager::markLoopAsDeleted(Loop &L) {
+ assert((&L == CurrentLoop || CurrentLoop->contains(&L)) &&
+ "Must not delete loop outside the current loop tree!");
+ if (&L == CurrentLoop)
+ CurrentLoopDeleted = true;
+}
+
/// run - Execute all of the passes scheduled for execution. Keep track of
/// whether any of the passes modifies the function, and if so, return true.
bool LPPassManager::runOnFunction(Function &F) {
@@ -176,7 +182,7 @@ bool LPPassManager::runOnFunction(Function &F) {
// Walk Loops
while (!LQ.empty()) {
- bool LoopWasDeleted = false;
+ CurrentLoopDeleted = false;
CurrentLoop = LQ.back();
// Run all passes on the current Loop.
@@ -195,15 +201,14 @@ bool LPPassManager::runOnFunction(Function &F) {
Changed |= P->runOnLoop(CurrentLoop, *this);
}
- LoopWasDeleted = CurrentLoop->isInvalid();
if (Changed)
dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG,
- LoopWasDeleted ? "<deleted>"
- : CurrentLoop->getHeader()->getName());
+ CurrentLoopDeleted ? "<deleted loop>"
+ : CurrentLoop->getName());
dumpPreservedSet(P);
- if (LoopWasDeleted) {
+ if (CurrentLoopDeleted) {
// Notify passes that the loop is being deleted.
deleteSimpleAnalysisLoop(CurrentLoop);
} else {
@@ -231,11 +236,12 @@ bool LPPassManager::runOnFunction(Function &F) {
removeNotPreservedAnalysis(P);
recordAvailableAnalysis(P);
- removeDeadPasses(P, LoopWasDeleted ? "<deleted>"
- : CurrentLoop->getHeader()->getName(),
+ removeDeadPasses(P,
+ CurrentLoopDeleted ? "<deleted>"
+ : CurrentLoop->getHeader()->getName(),
ON_LOOP_MSG);
- if (LoopWasDeleted)
+ if (CurrentLoopDeleted)
// Do not run other passes on this loop.
break;
}
@@ -243,7 +249,7 @@ bool LPPassManager::runOnFunction(Function &F) {
// If the loop was deleted, release all the loop passes. This frees up
// some memory, and avoids trouble with the pass manager trying to call
// verifyAnalysis on them.
- if (LoopWasDeleted) {
+ if (CurrentLoopDeleted) {
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
Pass *P = getContainedPass(Index);
freePass(P, "<deleted>", ON_LOOP_MSG);
@@ -361,4 +367,3 @@ bool LoopPass::skipLoop(const Loop *L) const {
char LCSSAVerificationPass::ID = 0;
INITIALIZE_PASS(LCSSAVerificationPass, "lcssa-verification", "LCSSA Verifier",
false, false)
-
diff --git a/contrib/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp b/contrib/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp
index 7bdf3408a581..0da90dae3d9a 100644
--- a/contrib/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp
+++ b/contrib/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp
@@ -14,7 +14,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/LoopUnrollAnalyzer.h"
-#include "llvm/IR/Dominators.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp b/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp
index 4231a78352ce..4a136c5a0c6d 100644
--- a/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp
@@ -7,9 +7,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/Loads.h"
-#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
index 7327c07499be..24fedfed772c 100644
--- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -1,4 +1,4 @@
-//===------ MemoryBuiltins.cpp - Identify calls to memory builtins --------===//
+//===- MemoryBuiltins.cpp - Identify calls to memory builtins -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,20 +13,39 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "memory-builtins"
@@ -187,7 +206,6 @@ static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) {
return CS && CS.hasRetAttr(Attribute::NoAlias);
}
-
/// \brief Tests if a value is a call or invoke to a library function that
/// allocates or reallocates memory (either malloc, calloc, realloc, or strdup
/// like).
@@ -323,7 +341,6 @@ Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout &DL,
return computeArraySize(CI, DL, TLI, LookThroughSExt);
}
-
/// extractCallocCall - Returns the corresponding CallInst if the instruction
/// is a calloc call.
const CallInst *llvm::extractCallocCall(const Value *I,
@@ -331,7 +348,6 @@ const CallInst *llvm::extractCallocCall(const Value *I,
return isCallocLikeFn(I, TLI) ? cast<CallInst>(I) : nullptr;
}
-
/// isFreeCall - Returns non-null if the value is a call to the builtin free()
const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
const CallInst *CI = dyn_cast<CallInst>(I);
@@ -387,8 +403,6 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
return CI;
}
-
-
//===----------------------------------------------------------------------===//
// Utility functions to compute size of objects.
//
@@ -452,7 +466,6 @@ STATISTIC(ObjectVisitorArgument,
STATISTIC(ObjectVisitorLoad,
"Number of load instructions with unsolved size and offset");
-
APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) {
if (Options.RoundToAlign && Align)
return APInt(IntTyBits, alignTo(Size.getZExtValue(), Align));
diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 263cf42ebe27..a6c590126c2f 100644
--- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -15,28 +15,40 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OrderedBasicBlock.h"
#include "llvm/Analysis/PHITransAddr.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/PredIteratorCache.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -45,7 +57,9 @@
#include "llvm/Support/MathExtras.h"
#include <algorithm>
#include <cassert>
+#include <cstdint>
#include <iterator>
+#include <utility>
using namespace llvm;
@@ -105,38 +119,38 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
if (LI->isUnordered()) {
Loc = MemoryLocation::get(LI);
- return MRI_Ref;
+ return ModRefInfo::Ref;
}
if (LI->getOrdering() == AtomicOrdering::Monotonic) {
Loc = MemoryLocation::get(LI);
- return MRI_ModRef;
+ return ModRefInfo::ModRef;
}
Loc = MemoryLocation();
- return MRI_ModRef;
+ return ModRefInfo::ModRef;
}
if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (SI->isUnordered()) {
Loc = MemoryLocation::get(SI);
- return MRI_Mod;
+ return ModRefInfo::Mod;
}
if (SI->getOrdering() == AtomicOrdering::Monotonic) {
Loc = MemoryLocation::get(SI);
- return MRI_ModRef;
+ return ModRefInfo::ModRef;
}
Loc = MemoryLocation();
- return MRI_ModRef;
+ return ModRefInfo::ModRef;
}
if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
Loc = MemoryLocation::get(V);
- return MRI_ModRef;
+ return ModRefInfo::ModRef;
}
if (const CallInst *CI = isFreeCall(Inst, &TLI)) {
// calls to free() deallocate the entire structure
Loc = MemoryLocation(CI->getArgOperand(0));
- return MRI_Mod;
+ return ModRefInfo::Mod;
}
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
@@ -152,7 +166,7 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(), AAInfo);
// These intrinsics don't really modify the memory, but returning Mod
// will allow them to be handled conservatively.
- return MRI_Mod;
+ return ModRefInfo::Mod;
case Intrinsic::invariant_end:
II->getAAMetadata(AAInfo);
Loc = MemoryLocation(
@@ -160,7 +174,7 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(), AAInfo);
// These intrinsics don't really modify the memory, but returning Mod
// will allow them to be handled conservatively.
- return MRI_Mod;
+ return ModRefInfo::Mod;
default:
break;
}
@@ -168,10 +182,10 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
// Otherwise, just do the coarse-grained thing that always works.
if (Inst->mayWriteToMemory())
- return MRI_ModRef;
+ return ModRefInfo::ModRef;
if (Inst->mayReadFromMemory())
- return MRI_Ref;
- return MRI_NoModRef;
+ return ModRefInfo::Ref;
+ return ModRefInfo::NoModRef;
}
/// Private helper for finding the local dependencies of a call site.
@@ -182,48 +196,46 @@ MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom(
// Walk backwards through the block, looking for dependencies.
while (ScanIt != BB->begin()) {
+ Instruction *Inst = &*--ScanIt;
+ // Debug intrinsics don't cause dependences and should not affect Limit
+ if (isa<DbgInfoIntrinsic>(Inst))
+ continue;
+
// Limit the amount of scanning we do so we don't end up with quadratic
// running time on extreme testcases.
--Limit;
if (!Limit)
return MemDepResult::getUnknown();
- Instruction *Inst = &*--ScanIt;
-
// If this inst is a memory op, get the pointer it accessed
MemoryLocation Loc;
ModRefInfo MR = GetLocation(Inst, Loc, TLI);
if (Loc.Ptr) {
// A simple instruction.
- if (AA.getModRefInfo(CS, Loc) != MRI_NoModRef)
+ if (isModOrRefSet(AA.getModRefInfo(CS, Loc)))
return MemDepResult::getClobber(Inst);
continue;
}
if (auto InstCS = CallSite(Inst)) {
- // Debug intrinsics don't cause dependences.
- if (isa<DbgInfoIntrinsic>(Inst))
- continue;
// If these two calls do not interfere, look past it.
- switch (AA.getModRefInfo(CS, InstCS)) {
- case MRI_NoModRef:
+ if (isNoModRef(AA.getModRefInfo(CS, InstCS))) {
// If the two calls are the same, return InstCS as a Def, so that
// CS can be found redundant and eliminated.
- if (isReadOnlyCall && !(MR & MRI_Mod) &&
+ if (isReadOnlyCall && !isModSet(MR) &&
CS.getInstruction()->isIdenticalToWhenDefined(Inst))
return MemDepResult::getDef(Inst);
// Otherwise if the two calls don't interact (e.g. InstCS is readnone)
// keep scanning.
continue;
- default:
+ } else
return MemDepResult::getClobber(Inst);
- }
}
// If we could not obtain a pointer for the instruction and the instruction
// touches memory then assume that this is a dependency.
- if (MR != MRI_NoModRef)
+ if (isModOrRefSet(MR))
return MemDepResult::getClobber(Inst);
}
@@ -294,8 +306,10 @@ unsigned MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
return 0;
if (LIOffs + NewLoadByteSize > MemLocEnd &&
- LI->getParent()->getParent()->hasFnAttribute(
- Attribute::SanitizeAddress))
+ (LI->getParent()->getParent()->hasFnAttribute(
+ Attribute::SanitizeAddress) ||
+ LI->getParent()->getParent()->hasFnAttribute(
+ Attribute::SanitizeHWAddress)))
// We will be reading past the location accessed by the original program.
// While this is safe in a regular build, Address Safety analysis tools
// may start reporting false warnings. So, don't do widening.
@@ -322,7 +336,6 @@ static bool isVolatile(Instruction *Inst) {
MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
-
MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
if (QueryInst != nullptr) {
if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
@@ -350,7 +363,6 @@ MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
MemDepResult
MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
BasicBlock *BB) {
-
auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group);
if (!InvariantGroupMD)
return MemDepResult::getUnknown();
@@ -380,7 +392,6 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
return Best;
};
-
// FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case
// we will see all the instructions. This should be fixed in MSSA.
while (!LoadOperandsQueue.empty()) {
@@ -541,7 +552,6 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
// it does not alias with when this atomic load indicates that another
// thread may be accessing the location.
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
-
// While volatile access cannot be eliminated, they do not have to clobber
// non-aliasing locations, as normal accesses, for example, can be safely
// reordered with volatile accesses.
@@ -632,7 +642,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
// If alias analysis can tell that this store is guaranteed to not modify
// the query pointer, ignore it. Use getModRefInfo to handle cases where
// the query pointer points to constant memory etc.
- if (AA.getModRefInfo(SI, MemLoc) == MRI_NoModRef)
+ if (!isModOrRefSet(AA.getModRefInfo(SI, MemLoc)))
continue;
// Ok, this store might clobber the query pointer. Check to see if it is
@@ -678,15 +688,15 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
// See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
ModRefInfo MR = AA.getModRefInfo(Inst, MemLoc);
// If necessary, perform additional analysis.
- if (MR == MRI_ModRef)
+ if (isModAndRefSet(MR))
MR = AA.callCapturesBefore(Inst, MemLoc, &DT, &OBB);
switch (MR) {
- case MRI_NoModRef:
+ case ModRefInfo::NoModRef:
// If the call has no effect on the queried pointer, just ignore it.
continue;
- case MRI_Mod:
+ case ModRefInfo::Mod:
return MemDepResult::getClobber(Inst);
- case MRI_Ref:
+ case ModRefInfo::Ref:
// If the call is known to never store to the pointer, and if this is a
// load query, we can safely ignore it (scan past it).
if (isLoad)
@@ -739,7 +749,7 @@ MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI);
if (MemLoc.Ptr) {
// If we can do a pointer scan, make it happen.
- bool isLoad = !(MR & MRI_Mod);
+ bool isLoad = !isModSet(MR);
if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
@@ -1508,7 +1518,6 @@ void MemoryDependenceResults::removeInstruction(Instruction *RemInst) {
}
// If we have a cached local dependence query for this instruction, remove it.
- //
LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
if (LocalDepEntry != LocalDeps.end()) {
// Remove us from DepInst's reverse set now that the local dep info is gone.
@@ -1531,7 +1540,6 @@ void MemoryDependenceResults::removeInstruction(Instruction *RemInst) {
}
// Loop over all of the things that depend on the instruction we're removing.
- //
SmallVector<std::pair<Instruction *, Instruction *>, 8> ReverseDepsToAdd;
// If we find RemInst as a clobber or Def in any of the maps for other values,
@@ -1726,7 +1734,7 @@ MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) {
initializeMemoryDependenceWrapperPassPass(*PassRegistry::getPassRegistry());
}
-MemoryDependenceWrapperPass::~MemoryDependenceWrapperPass() {}
+MemoryDependenceWrapperPass::~MemoryDependenceWrapperPass() = default;
void MemoryDependenceWrapperPass::releaseMemory() {
MemDep.reset();
diff --git a/contrib/llvm/lib/Analysis/MemorySSA.cpp b/contrib/llvm/lib/Analysis/MemorySSA.cpp
index 86de474c7aa9..8fe190e8bcf8 100644
--- a/contrib/llvm/lib/Analysis/MemorySSA.cpp
+++ b/contrib/llvm/lib/Analysis/MemorySSA.cpp
@@ -1,48 +1,63 @@
-//===-- MemorySSA.cpp - Memory SSA Builder---------------------------===//
+//===- MemorySSA.cpp - Memory SSA Builder ---------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
-//===----------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// This file implements the MemorySSA class.
//
-//===----------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/IteratedDominanceFrontier.h"
#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/Analysis/PHITransAddr.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Use.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <memory>
+#include <utility>
-#define DEBUG_TYPE "memoryssa"
using namespace llvm;
+
+#define DEBUG_TYPE "memoryssa"
+
INITIALIZE_PASS_BEGIN(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false,
true)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
@@ -66,30 +81,34 @@ static cl::opt<bool>
cl::desc("Verify MemorySSA in legacy printer pass."));
namespace llvm {
+
/// \brief An assembly annotator class to print Memory SSA information in
/// comments.
class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter {
friend class MemorySSA;
+
const MemorySSA *MSSA;
public:
MemorySSAAnnotatedWriter(const MemorySSA *M) : MSSA(M) {}
- virtual void emitBasicBlockStartAnnot(const BasicBlock *BB,
- formatted_raw_ostream &OS) {
+ void emitBasicBlockStartAnnot(const BasicBlock *BB,
+ formatted_raw_ostream &OS) override {
if (MemoryAccess *MA = MSSA->getMemoryAccess(BB))
OS << "; " << *MA << "\n";
}
- virtual void emitInstructionAnnot(const Instruction *I,
- formatted_raw_ostream &OS) {
+ void emitInstructionAnnot(const Instruction *I,
+ formatted_raw_ostream &OS) override {
if (MemoryAccess *MA = MSSA->getMemoryAccess(I))
OS << "; " << *MA << "\n";
}
};
-}
+
+} // end namespace llvm
namespace {
+
/// Our current alias analysis API differentiates heavily between calls and
/// non-calls, and functions called on one usually assert on the other.
/// This class encapsulates the distinction to simplify other code that wants
@@ -97,7 +116,9 @@ namespace {
/// For example, this class is used as a densemap key in the use optimizer.
class MemoryLocOrCall {
public:
- MemoryLocOrCall() : IsCall(false) {}
+ bool IsCall = false;
+
+ MemoryLocOrCall() = default;
MemoryLocOrCall(MemoryUseOrDef *MUD)
: MemoryLocOrCall(MUD->getMemoryInst()) {}
MemoryLocOrCall(const MemoryUseOrDef *MUD)
@@ -116,14 +137,13 @@ public:
}
}
- explicit MemoryLocOrCall(const MemoryLocation &Loc)
- : IsCall(false), Loc(Loc) {}
+ explicit MemoryLocOrCall(const MemoryLocation &Loc) : Loc(Loc) {}
- bool IsCall;
ImmutableCallSite getCS() const {
assert(IsCall);
return CS;
}
+
MemoryLocation getLoc() const {
assert(!IsCall);
return Loc;
@@ -144,16 +164,20 @@ private:
MemoryLocation Loc;
};
};
-}
+
+} // end anonymous namespace
namespace llvm {
+
template <> struct DenseMapInfo<MemoryLocOrCall> {
static inline MemoryLocOrCall getEmptyKey() {
return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getEmptyKey());
}
+
static inline MemoryLocOrCall getTombstoneKey() {
return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getTombstoneKey());
}
+
static unsigned getHashValue(const MemoryLocOrCall &MLOC) {
if (MLOC.IsCall)
return hash_combine(MLOC.IsCall,
@@ -162,6 +186,7 @@ template <> struct DenseMapInfo<MemoryLocOrCall> {
return hash_combine(
MLOC.IsCall, DenseMapInfo<MemoryLocation>::getHashValue(MLOC.getLoc()));
}
+
static bool isEqual(const MemoryLocOrCall &LHS, const MemoryLocOrCall &RHS) {
return LHS == RHS;
}
@@ -169,6 +194,8 @@ template <> struct DenseMapInfo<MemoryLocOrCall> {
enum class Reorderability { Always, IfNoAlias, Never };
+} // end namespace llvm
+
/// This does one-way checks to see if Use could theoretically be hoisted above
/// MayClobber. This will not check the other way around.
///
@@ -235,7 +262,7 @@ static bool instructionClobbersQuery(MemoryDef *MD,
if (UseCS) {
ModRefInfo I = AA.getModRefInfo(DefInst, UseCS);
- return I != MRI_NoModRef;
+ return isModOrRefSet(I);
}
if (auto *DefLoad = dyn_cast<LoadInst>(DefInst)) {
@@ -251,7 +278,7 @@ static bool instructionClobbersQuery(MemoryDef *MD,
}
}
- return AA.getModRefInfo(DefInst, UseLoc) & MRI_Mod;
+ return isModSet(AA.getModRefInfo(DefInst, UseLoc));
}
static bool instructionClobbersQuery(MemoryDef *MD, const MemoryUseOrDef *MU,
@@ -271,22 +298,21 @@ bool MemorySSAUtil::defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU,
AliasAnalysis &AA) {
return instructionClobbersQuery(MD, MU, MemoryLocOrCall(MU), AA);
}
-}
namespace {
+
struct UpwardsMemoryQuery {
// True if our original query started off as a call
- bool IsCall;
+ bool IsCall = false;
// The pointer location we started the query with. This will be empty if
// IsCall is true.
MemoryLocation StartingLoc;
// This is the instruction we were querying about.
- const Instruction *Inst;
+ const Instruction *Inst = nullptr;
// The MemoryAccess we actually got called with, used to test local domination
- const MemoryAccess *OriginalAccess;
+ const MemoryAccess *OriginalAccess = nullptr;
- UpwardsMemoryQuery()
- : IsCall(false), Inst(nullptr), OriginalAccess(nullptr) {}
+ UpwardsMemoryQuery() = default;
UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access)
: IsCall(ImmutableCallSite(Inst)), Inst(Inst), OriginalAccess(Access) {
@@ -295,6 +321,8 @@ struct UpwardsMemoryQuery {
}
};
+} // end anonymous namespace
+
static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc,
AliasAnalysis &AA) {
Instruction *Inst = MD->getMemoryInst();
@@ -394,6 +422,8 @@ checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt,
"ClobberAt never acted as a clobber");
}
+namespace {
+
/// Our algorithm for walking (and trying to optimize) clobbers, all wrapped up
/// in one class.
class ClobberWalker {
@@ -569,7 +599,7 @@ class ClobberWalker {
struct generic_def_path_iterator
: public iterator_facade_base<generic_def_path_iterator<T, Walker>,
std::forward_iterator_tag, T *> {
- generic_def_path_iterator() : W(nullptr), N(None) {}
+ generic_def_path_iterator() = default;
generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {}
T &operator*() const { return curNode(); }
@@ -588,8 +618,8 @@ class ClobberWalker {
private:
T &curNode() const { return W->Paths[*N]; }
- Walker *W;
- Optional<ListIndex> N;
+ Walker *W = nullptr;
+ Optional<ListIndex> N = None;
};
using def_path_iterator = generic_def_path_iterator<DefPath, ClobberWalker>;
@@ -664,7 +694,7 @@ class ClobberWalker {
};
MemoryPhi *Current = Phi;
- while (1) {
+ while (true) {
assert(!MSSA.isLiveOnEntryDef(Current) &&
"liveOnEntry wasn't treated as a clobber?");
@@ -842,30 +872,33 @@ struct RenamePassData {
RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It,
MemoryAccess *M)
: DTN(D), ChildIt(It), IncomingVal(M) {}
+
void swap(RenamePassData &RHS) {
std::swap(DTN, RHS.DTN);
std::swap(ChildIt, RHS.ChildIt);
std::swap(IncomingVal, RHS.IncomingVal);
}
};
-} // anonymous namespace
+
+} // end anonymous namespace
namespace llvm {
+
/// \brief A MemorySSAWalker that does AA walks to disambiguate accesses. It no
/// longer does caching on its own,
/// but the name has been retained for the moment.
class MemorySSA::CachingWalker final : public MemorySSAWalker {
ClobberWalker Walker;
- bool AutoResetWalker;
+ bool AutoResetWalker = true;
MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &);
- void verifyRemoved(MemoryAccess *);
public:
CachingWalker(MemorySSA *, AliasAnalysis *, DominatorTree *);
- ~CachingWalker() override;
+ ~CachingWalker() override = default;
using MemorySSAWalker::getClobberingMemoryAccess;
+
MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override;
MemoryAccess *getClobberingMemoryAccess(MemoryAccess *,
const MemoryLocation &) override;
@@ -884,6 +917,8 @@ public:
}
};
+} // end namespace llvm
+
void MemorySSA::renameSuccessorPhis(BasicBlock *BB, MemoryAccess *IncomingVal,
bool RenameAllUses) {
// Pass through values to our successors
@@ -1032,17 +1067,20 @@ MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) {
auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr));
if (Res.second)
- Res.first->second = make_unique<AccessList>();
+ Res.first->second = llvm::make_unique<AccessList>();
return Res.first->second.get();
}
+
MemorySSA::DefsList *MemorySSA::getOrCreateDefsList(const BasicBlock *BB) {
auto Res = PerBlockDefs.insert(std::make_pair(BB, nullptr));
if (Res.second)
- Res.first->second = make_unique<DefsList>();
+ Res.first->second = llvm::make_unique<DefsList>();
return Res.first->second.get();
}
+namespace llvm {
+
/// This class is a batch walker of all MemoryUse's in the program, and points
/// their defining access at the thing that actually clobbers them. Because it
/// is a batch walker that touches everything, it does not operate like the
@@ -1077,15 +1115,19 @@ private:
unsigned long LastKill;
bool LastKillValid;
};
+
void optimizeUsesInBlock(const BasicBlock *, unsigned long &, unsigned long &,
SmallVectorImpl<MemoryAccess *> &,
DenseMap<MemoryLocOrCall, MemlocStackInfo> &);
+
MemorySSA *MSSA;
MemorySSAWalker *Walker;
AliasAnalysis *AA;
DominatorTree *DT;
};
+} // end namespace llvm
+
/// Optimize the uses in a given block This is basically the SSA renaming
/// algorithm, with one caveat: We are able to use a single stack for all
/// MemoryUses. This is because the set of *possible* reaching MemoryDefs is
@@ -1281,8 +1323,9 @@ void MemorySSA::buildMemorySSA() {
// semantics do *not* imply that something with no immediate uses can simply
// be removed.
BasicBlock &StartingPoint = F.getEntryBlock();
- LiveOnEntryDef = make_unique<MemoryDef>(F.getContext(), nullptr, nullptr,
- &StartingPoint, NextID++);
+ LiveOnEntryDef =
+ llvm::make_unique<MemoryDef>(F.getContext(), nullptr, nullptr,
+ &StartingPoint, NextID++);
DenseMap<const BasicBlock *, unsigned int> BBNumbers;
unsigned NextBBNum = 0;
@@ -1343,7 +1386,7 @@ MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() {
if (Walker)
return Walker.get();
- Walker = make_unique<CachingWalker>(this, AA, DT);
+ Walker = llvm::make_unique<CachingWalker>(this, AA, DT);
return Walker.get();
}
@@ -1462,6 +1505,7 @@ static inline bool isOrdered(const Instruction *I) {
}
return false;
}
+
/// \brief Helper function to create new memory accesses
MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) {
// The assume intrinsic has a control dependency which we model by claiming
@@ -1473,7 +1517,7 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) {
return nullptr;
// Find out what affect this instruction has on memory.
- ModRefInfo ModRef = AA->getModRefInfo(I);
+ ModRefInfo ModRef = AA->getModRefInfo(I, None);
// The isOrdered check is used to ensure that volatiles end up as defs
// (atomics end up as ModRef right now anyway). Until we separate the
// ordering chain from the memory chain, this enables people to see at least
@@ -1482,8 +1526,8 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) {
// Separate memory aliasing and ordering into two different chains so that we
// can precisely represent both "what memory will this read/write/is clobbered
// by" and "what instructions can I move this past".
- bool Def = bool(ModRef & MRI_Mod) || isOrdered(I);
- bool Use = bool(ModRef & MRI_Ref);
+ bool Def = isModSet(ModRef) || isOrdered(I);
+ bool Use = isRefSet(ModRef);
// It's possible for an instruction to not modify memory at all. During
// construction, we ignore them.
@@ -1675,7 +1719,6 @@ void MemorySSA::verifyDomination(Function &F) const {
/// \brief Verify the def-use lists in MemorySSA, by verifying that \p Use
/// appears in the use list of \p Def.
-
void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const {
#ifndef NDEBUG
// The live on entry use may cause us to get a NULL def here
@@ -1739,7 +1782,6 @@ void MemorySSA::renumberBlock(const BasicBlock *B) const {
/// \returns True if \p Dominator dominates \p Dominatee.
bool MemorySSA::locallyDominates(const MemoryAccess *Dominator,
const MemoryAccess *Dominatee) const {
-
const BasicBlock *DominatorBlock = Dominator->getBlock();
assert((DominatorBlock == Dominatee->getBlock()) &&
@@ -1887,7 +1929,7 @@ MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F,
FunctionAnalysisManager &AM) {
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &AA = AM.getResult<AAManager>(F);
- return MemorySSAAnalysis::Result(make_unique<MemorySSA>(F, &AA, &DT));
+ return MemorySSAAnalysis::Result(llvm::make_unique<MemorySSA>(F, &AA, &DT));
}
PreservedAnalyses MemorySSAPrinterPass::run(Function &F,
@@ -1936,9 +1978,7 @@ MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {}
MemorySSA::CachingWalker::CachingWalker(MemorySSA *M, AliasAnalysis *A,
DominatorTree *D)
- : MemorySSAWalker(M), Walker(*M, *A, *D), AutoResetWalker(true) {}
-
-MemorySSA::CachingWalker::~CachingWalker() {}
+ : MemorySSAWalker(M), Walker(*M, *A, *D) {}
void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) {
if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
@@ -2059,7 +2099,6 @@ MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess(
return Use->getDefiningAccess();
return StartingAccess;
}
-} // namespace llvm
void MemoryPhi::deleteMe(DerivedUser *Self) {
delete static_cast<MemoryPhi *>(Self);
diff --git a/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp b/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp
index 1ff84471c094..f5d89f699a5a 100644
--- a/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp
+++ b/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp
@@ -13,13 +13,11 @@
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
@@ -85,12 +83,11 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(BasicBlock *BB) {
unsigned i = 0;
for (auto *Pred : predecessors(BB))
Phi->addIncoming(PhiOps[i++], Pred);
+ InsertedPHIs.push_back(Phi);
}
-
Result = Phi;
}
- if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Result))
- InsertedPHIs.push_back(MP);
+
// Set ourselves up for the next variable by resetting visited state.
VisitedBlocks.erase(BB);
return Result;
diff --git a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
index e12cdf9182c7..1e321f17d59f 100644
--- a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -18,7 +18,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/Function.h"
#include "llvm/Pass.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index e9e354ebb88f..d54fb700200d 100644
--- a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -13,23 +13,47 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TypeMetadataUtils.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
#include "llvm/Object/ModuleSymbolTable.h"
+#include "llvm/Object/SymbolicFile.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "module-summary-analysis"
@@ -174,7 +198,7 @@ static void addIntrinsicToSummary(
static void
computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
const Function &F, BlockFrequencyInfo *BFI,
- ProfileSummaryInfo *PSI, bool HasLocalsInUsed,
+ ProfileSummaryInfo *PSI, bool HasLocalsInUsedOrAsm,
DenseSet<GlobalValue::GUID> &CantBePromoted) {
// Summary not currently supported for anonymous functions, they should
// have been named.
@@ -191,9 +215,13 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
SetVector<FunctionSummary::ConstVCall> TypeTestAssumeConstVCalls,
TypeCheckedLoadConstVCalls;
ICallPromotionAnalysis ICallAnalysis;
+ SmallPtrSet<const User *, 8> Visited;
+
+ // Add personality function, prefix data and prologue data to function's ref
+ // list.
+ findRefEdges(Index, &F, RefEdges, Visited);
bool HasInlineAsmMaybeReferencingInternal = false;
- SmallPtrSet<const User *, 8> Visited;
for (const BasicBlock &BB : F)
for (const Instruction &I : BB) {
if (isa<DbgInfoIntrinsic>(I))
@@ -210,11 +238,16 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
// a local value from inline assembly to ensure we don't export a
// reference (which would require renaming and promotion of the
// referenced value).
- if (HasLocalsInUsed && CI && CI->isInlineAsm())
+ if (HasLocalsInUsedOrAsm && CI && CI->isInlineAsm())
HasInlineAsmMaybeReferencingInternal = true;
auto *CalledValue = CS.getCalledValue();
auto *CalledFunction = CS.getCalledFunction();
+ if (CalledValue && !CalledFunction) {
+ CalledValue = CalledValue->stripPointerCastsNoFollowAliases();
+ // Stripping pointer casts can reveal a called function.
+ CalledFunction = dyn_cast<Function>(CalledValue);
+ }
// Check if this is an alias to a function. If so, get the
// called aliasee for the checks below.
if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
@@ -248,7 +281,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
if (CI && CI->isInlineAsm())
continue;
// Skip direct calls.
- if (!CS.getCalledValue() || isa<Constant>(CS.getCalledValue()))
+ if (!CalledValue || isa<Constant>(CalledValue))
continue;
uint32_t NumVals, NumCandidates;
@@ -275,11 +308,17 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
// FIXME: refactor this to use the same code that inliner is using.
F.isVarArg();
GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport,
- /* Live = */ false);
+ /* Live = */ false, F.isDSOLocal());
+ FunctionSummary::FFlags FunFlags{
+ F.hasFnAttribute(Attribute::ReadNone),
+ F.hasFnAttribute(Attribute::ReadOnly),
+ F.hasFnAttribute(Attribute::NoRecurse),
+ F.returnDoesNotAlias(),
+ };
auto FuncSummary = llvm::make_unique<FunctionSummary>(
- Flags, NumInsts, RefEdges.takeVector(), CallGraphEdges.takeVector(),
- TypeTests.takeVector(), TypeTestAssumeVCalls.takeVector(),
- TypeCheckedLoadVCalls.takeVector(),
+ Flags, NumInsts, FunFlags, RefEdges.takeVector(),
+ CallGraphEdges.takeVector(), TypeTests.takeVector(),
+ TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(),
TypeTestAssumeConstVCalls.takeVector(),
TypeCheckedLoadConstVCalls.takeVector());
if (NonRenamableLocal)
@@ -295,7 +334,7 @@ computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V,
findRefEdges(Index, &V, RefEdges, Visited);
bool NonRenamableLocal = isNonRenamableLocal(V);
GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal,
- /* Live = */ false);
+ /* Live = */ false, V.isDSOLocal());
auto GVarSummary =
llvm::make_unique<GlobalVarSummary>(Flags, RefEdges.takeVector());
if (NonRenamableLocal)
@@ -308,8 +347,8 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
DenseSet<GlobalValue::GUID> &CantBePromoted) {
bool NonRenamableLocal = isNonRenamableLocal(A);
GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal,
- /* Live = */ false);
- auto AS = llvm::make_unique<AliasSummary>(Flags, ArrayRef<ValueInfo>{});
+ /* Live = */ false, A.isDSOLocal());
+ auto AS = llvm::make_unique<AliasSummary>(Flags);
auto *Aliasee = A.getBaseObject();
auto *AliaseeSummary = Index.getGlobalValueSummary(*Aliasee);
assert(AliaseeSummary && "Alias expects aliasee summary to be parsed");
@@ -352,6 +391,59 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
}
}
+ bool HasLocalInlineAsmSymbol = false;
+ if (!M.getModuleInlineAsm().empty()) {
+ // Collect the local values defined by module level asm, and set up
+ // summaries for these symbols so that they can be marked as NoRename,
+ // to prevent export of any use of them in regular IR that would require
+ // renaming within the module level asm. Note we don't need to create a
+ // summary for weak or global defs, as they don't need to be flagged as
+ // NoRename, and defs in module level asm can't be imported anyway.
+ // Also, any values used but not defined within module level asm should
+ // be listed on the llvm.used or llvm.compiler.used global and marked as
+ // referenced from there.
+ ModuleSymbolTable::CollectAsmSymbols(
+ M, [&](StringRef Name, object::BasicSymbolRef::Flags Flags) {
+ // Symbols not marked as Weak or Global are local definitions.
+ if (Flags & (object::BasicSymbolRef::SF_Weak |
+ object::BasicSymbolRef::SF_Global))
+ return;
+ HasLocalInlineAsmSymbol = true;
+ GlobalValue *GV = M.getNamedValue(Name);
+ if (!GV)
+ return;
+ assert(GV->isDeclaration() && "Def in module asm already has definition");
+ GlobalValueSummary::GVFlags GVFlags(GlobalValue::InternalLinkage,
+ /* NotEligibleToImport = */ true,
+ /* Live = */ true,
+ /* Local */ GV->isDSOLocal());
+ CantBePromoted.insert(GlobalValue::getGUID(Name));
+ // Create the appropriate summary type.
+ if (Function *F = dyn_cast<Function>(GV)) {
+ std::unique_ptr<FunctionSummary> Summary =
+ llvm::make_unique<FunctionSummary>(
+ GVFlags, 0,
+ FunctionSummary::FFlags{
+ F->hasFnAttribute(Attribute::ReadNone),
+ F->hasFnAttribute(Attribute::ReadOnly),
+ F->hasFnAttribute(Attribute::NoRecurse),
+ F->returnDoesNotAlias()},
+ ArrayRef<ValueInfo>{}, ArrayRef<FunctionSummary::EdgeTy>{},
+ ArrayRef<GlobalValue::GUID>{},
+ ArrayRef<FunctionSummary::VFuncId>{},
+ ArrayRef<FunctionSummary::VFuncId>{},
+ ArrayRef<FunctionSummary::ConstVCall>{},
+ ArrayRef<FunctionSummary::ConstVCall>{});
+ Index.addGlobalValueSummary(Name, std::move(Summary));
+ } else {
+ std::unique_ptr<GlobalVarSummary> Summary =
+ llvm::make_unique<GlobalVarSummary>(GVFlags,
+ ArrayRef<ValueInfo>{});
+ Index.addGlobalValueSummary(Name, std::move(Summary));
+ }
+ });
+ }
+
// Compute summaries for all functions defined in module, and save in the
// index.
for (auto &F : M) {
@@ -369,7 +461,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
BFI = BFIPtr.get();
}
- computeFunctionSummary(Index, M, F, BFI, PSI, !LocalsUsed.empty(),
+ computeFunctionSummary(Index, M, F, BFI, PSI,
+ !LocalsUsed.empty() || HasLocalInlineAsmSymbol,
CantBePromoted);
}
@@ -401,52 +494,6 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
setLiveRoot(Index, "llvm.global_dtors");
setLiveRoot(Index, "llvm.global.annotations");
- if (!M.getModuleInlineAsm().empty()) {
- // Collect the local values defined by module level asm, and set up
- // summaries for these symbols so that they can be marked as NoRename,
- // to prevent export of any use of them in regular IR that would require
- // renaming within the module level asm. Note we don't need to create a
- // summary for weak or global defs, as they don't need to be flagged as
- // NoRename, and defs in module level asm can't be imported anyway.
- // Also, any values used but not defined within module level asm should
- // be listed on the llvm.used or llvm.compiler.used global and marked as
- // referenced from there.
- ModuleSymbolTable::CollectAsmSymbols(
- M, [&M, &Index, &CantBePromoted](StringRef Name,
- object::BasicSymbolRef::Flags Flags) {
- // Symbols not marked as Weak or Global are local definitions.
- if (Flags & (object::BasicSymbolRef::SF_Weak |
- object::BasicSymbolRef::SF_Global))
- return;
- GlobalValue *GV = M.getNamedValue(Name);
- if (!GV)
- return;
- assert(GV->isDeclaration() && "Def in module asm already has definition");
- GlobalValueSummary::GVFlags GVFlags(GlobalValue::InternalLinkage,
- /* NotEligibleToImport = */ true,
- /* Live = */ true);
- CantBePromoted.insert(GlobalValue::getGUID(Name));
- // Create the appropriate summary type.
- if (isa<Function>(GV)) {
- std::unique_ptr<FunctionSummary> Summary =
- llvm::make_unique<FunctionSummary>(
- GVFlags, 0, ArrayRef<ValueInfo>{},
- ArrayRef<FunctionSummary::EdgeTy>{},
- ArrayRef<GlobalValue::GUID>{},
- ArrayRef<FunctionSummary::VFuncId>{},
- ArrayRef<FunctionSummary::VFuncId>{},
- ArrayRef<FunctionSummary::ConstVCall>{},
- ArrayRef<FunctionSummary::ConstVCall>{});
- Index.addGlobalValueSummary(Name, std::move(Summary));
- } else {
- std::unique_ptr<GlobalVarSummary> Summary =
- llvm::make_unique<GlobalVarSummary>(GVFlags,
- ArrayRef<ValueInfo>{});
- Index.addGlobalValueSummary(Name, std::move(Summary));
- }
- });
- }
-
bool IsThinLTO = true;
if (auto *MD =
mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("ThinLTO")))
@@ -503,6 +550,7 @@ ModuleSummaryIndexAnalysis::run(Module &M, ModuleAnalysisManager &AM) {
}
char ModuleSummaryIndexWrapperPass::ID = 0;
+
INITIALIZE_PASS_BEGIN(ModuleSummaryIndexWrapperPass, "module-summary-analysis",
"Module Summary Analysis", false, true)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
diff --git a/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
index ed03406ca8c6..096ea661ecb6 100644
--- a/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
@@ -123,7 +123,7 @@ ModRefInfo ObjCARCAAResult::getModRefInfo(ImmutableCallSite CS,
// These functions don't access any memory visible to the compiler.
// Note that this doesn't include objc_retainBlock, because it updates
// pointers when it copies block data.
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
default:
break;
}
diff --git a/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp b/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp
index e3e74aa249da..55335f3a7cb0 100644
--- a/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp
+++ b/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp
@@ -21,8 +21,6 @@ using namespace llvm::objcarc;
/// \brief A handy option to enable/disable all ARC Optimizations.
bool llvm::objcarc::EnableARCOpts;
-static cl::opt<bool, true>
-EnableARCOptimizations("enable-objc-arc-opts",
- cl::desc("enable/disable all ARC Optimizations"),
- cl::location(EnableARCOpts),
- cl::init(true));
+static cl::opt<bool, true> EnableARCOptimizations(
+ "enable-objc-arc-opts", cl::desc("enable/disable all ARC Optimizations"),
+ cl::location(EnableARCOpts), cl::init(true), cl::Hidden);
diff --git a/contrib/llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp b/contrib/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp
index eb259fd7a384..8ece0a2a3ed3 100644
--- a/contrib/llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp
+++ b/contrib/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp
@@ -1,4 +1,4 @@
-//===- OptimizationDiagnosticInfo.cpp - Optimization Diagnostic -*- C++ -*-===//
+//===- OptimizationRemarkEmitter.cpp - Optimization Diagnostic --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,11 +12,10 @@
// used to compute the "hotness" of the diagnostic message.
//===----------------------------------------------------------------------===//
-#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/LLVMContext.h"
@@ -64,86 +63,6 @@ Optional<uint64_t> OptimizationRemarkEmitter::computeHotness(const Value *V) {
return BFI->getBlockProfileCount(cast<BasicBlock>(V));
}
-namespace llvm {
-namespace yaml {
-
-void MappingTraits<DiagnosticInfoOptimizationBase *>::mapping(
- IO &io, DiagnosticInfoOptimizationBase *&OptDiag) {
- assert(io.outputting() && "input not yet implemented");
-
- if (io.mapTag("!Passed",
- (OptDiag->getKind() == DK_OptimizationRemark ||
- OptDiag->getKind() == DK_MachineOptimizationRemark)))
- ;
- else if (io.mapTag(
- "!Missed",
- (OptDiag->getKind() == DK_OptimizationRemarkMissed ||
- OptDiag->getKind() == DK_MachineOptimizationRemarkMissed)))
- ;
- else if (io.mapTag(
- "!Analysis",
- (OptDiag->getKind() == DK_OptimizationRemarkAnalysis ||
- OptDiag->getKind() == DK_MachineOptimizationRemarkAnalysis)))
- ;
- else if (io.mapTag("!AnalysisFPCommute",
- OptDiag->getKind() ==
- DK_OptimizationRemarkAnalysisFPCommute))
- ;
- else if (io.mapTag("!AnalysisAliasing",
- OptDiag->getKind() ==
- DK_OptimizationRemarkAnalysisAliasing))
- ;
- else if (io.mapTag("!Failure", OptDiag->getKind() == DK_OptimizationFailure))
- ;
- else
- llvm_unreachable("Unknown remark type");
-
- // These are read-only for now.
- DiagnosticLocation DL = OptDiag->getLocation();
- StringRef FN =
- GlobalValue::dropLLVMManglingEscape(OptDiag->getFunction().getName());
-
- StringRef PassName(OptDiag->PassName);
- io.mapRequired("Pass", PassName);
- io.mapRequired("Name", OptDiag->RemarkName);
- if (!io.outputting() || DL.isValid())
- io.mapOptional("DebugLoc", DL);
- io.mapRequired("Function", FN);
- io.mapOptional("Hotness", OptDiag->Hotness);
- io.mapOptional("Args", OptDiag->Args);
-}
-
-template <> struct MappingTraits<DiagnosticLocation> {
- static void mapping(IO &io, DiagnosticLocation &DL) {
- assert(io.outputting() && "input not yet implemented");
-
- StringRef File = DL.getFilename();
- unsigned Line = DL.getLine();
- unsigned Col = DL.getColumn();
-
- io.mapRequired("File", File);
- io.mapRequired("Line", Line);
- io.mapRequired("Column", Col);
- }
-
- static const bool flow = true;
-};
-
-// Implement this as a mapping for now to get proper quotation for the value.
-template <> struct MappingTraits<DiagnosticInfoOptimizationBase::Argument> {
- static void mapping(IO &io, DiagnosticInfoOptimizationBase::Argument &A) {
- assert(io.outputting() && "input not yet implemented");
- io.mapRequired(A.Key.data(), A.Val);
- if (A.Loc.isValid())
- io.mapOptional("DebugLoc", A.Loc);
- }
-};
-
-} // end namespace yaml
-} // end namespace llvm
-
-LLVM_YAML_IS_SEQUENCE_VECTOR(DiagnosticInfoOptimizationBase::Argument)
-
void OptimizationRemarkEmitter::computeHotness(
DiagnosticInfoIROptimization &OptDiag) {
const Value *V = OptDiag.getCodeRegion();
@@ -155,23 +74,14 @@ void OptimizationRemarkEmitter::emit(
DiagnosticInfoOptimizationBase &OptDiagBase) {
auto &OptDiag = cast<DiagnosticInfoIROptimization>(OptDiagBase);
computeHotness(OptDiag);
- // If a diagnostic has a hotness value, then only emit it if its hotness
- // meets the threshold.
- if (OptDiag.getHotness() &&
- *OptDiag.getHotness() <
- F->getContext().getDiagnosticsHotnessThreshold()) {
+
+ // Only emit it if its hotness meets the threshold.
+ if (OptDiag.getHotness().getValueOr(0) <
+ F->getContext().getDiagnosticsHotnessThreshold()) {
return;
}
- yaml::Output *Out = F->getContext().getDiagnosticsOutputFile();
- if (Out) {
- auto *P = const_cast<DiagnosticInfoOptimizationBase *>(&OptDiagBase);
- *Out << P;
- }
- // FIXME: now that IsVerbose is part of DI, filtering for this will be moved
- // from here to clang.
- if (!OptDiag.isVerbose() || shouldEmitVerbose())
- F->getContext().diagnose(OptDiag);
+ F->getContext().diagnose(OptDiag);
}
OptimizationRemarkEmitterWrapperPass::OptimizationRemarkEmitterWrapperPass()
diff --git a/contrib/llvm/lib/Analysis/PostDominators.cpp b/contrib/llvm/lib/Analysis/PostDominators.cpp
index 1caf151546d9..2282401085d4 100644
--- a/contrib/llvm/lib/Analysis/PostDominators.cpp
+++ b/contrib/llvm/lib/Analysis/PostDominators.cpp
@@ -12,13 +12,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/PostDominators.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SetOperations.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/GenericDomTreeConstruction.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
#define DEBUG_TYPE "postdomtree"
@@ -28,6 +26,7 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
char PostDominatorTreeWrapperPass::ID = 0;
+
INITIALIZE_PASS(PostDominatorTreeWrapperPass, "postdomtree",
"Post-Dominator Tree Construction", true, true)
diff --git a/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp
index 12b86daa602b..671744f93fb8 100644
--- a/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp
@@ -30,7 +30,7 @@ using namespace llvm;
// considered cold).
static cl::opt<int> ProfileSummaryCutoffHot(
- "profile-summary-cutoff-hot", cl::Hidden, cl::init(999000), cl::ZeroOrMore,
+ "profile-summary-cutoff-hot", cl::Hidden, cl::init(990000), cl::ZeroOrMore,
cl::desc("A count is hot if it exceeds the minimum count to"
" reach this percentile of total counts."));
@@ -39,9 +39,21 @@ static cl::opt<int> ProfileSummaryCutoffCold(
cl::desc("A count is cold if it is below the minimum count"
" to reach this percentile of total counts."));
-// Find the minimum count to reach a desired percentile of counts.
-static uint64_t getMinCountForPercentile(SummaryEntryVector &DS,
- uint64_t Percentile) {
+static cl::opt<bool> ProfileSampleAccurate(
+ "profile-sample-accurate", cl::Hidden, cl::init(false),
+ cl::desc("If the sample profile is accurate, we will mark all un-sampled "
+ "callsite as cold. Otherwise, treat un-sampled callsites as if "
+ "we have no profile."));
+static cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
+ "profile-summary-huge-working-set-size-threshold", cl::Hidden,
+ cl::init(15000), cl::ZeroOrMore,
+ cl::desc("The code working set size is considered huge if the number of"
+ " blocks required to reach the -profile-summary-cutoff-hot"
+ " percentile exceeds this count."));
+
+// Find the summary entry for a desired percentile of counts.
+static const ProfileSummaryEntry &getEntryForPercentile(SummaryEntryVector &DS,
+ uint64_t Percentile) {
auto Compare = [](const ProfileSummaryEntry &Entry, uint64_t Percentile) {
return Entry.Cutoff < Percentile;
};
@@ -50,7 +62,7 @@ static uint64_t getMinCountForPercentile(SummaryEntryVector &DS,
// detailed summary.
if (It == DS.end())
report_fatal_error("Desired percentile exceeds the maximum cutoff");
- return It->MinCount;
+ return *It;
}
// The profile summary metadata may be attached either by the frontend or by
@@ -78,10 +90,12 @@ ProfileSummaryInfo::getProfileCount(const Instruction *Inst,
if (hasSampleProfile()) {
// In sample PGO mode, check if there is a profile metadata on the
// instruction. If it is present, determine hotness solely based on that,
- // since the sampled entry count may not be accurate.
+ // since the sampled entry count may not be accurate. If there is no
+ // annotated on the instruction, return None.
uint64_t TotalCount;
if (Inst->extractProfTotalWeight(TotalCount))
return TotalCount;
+ return None;
}
if (BFI)
return BFI->getBlockProfileCount(Inst->getParent());
@@ -161,10 +175,20 @@ void ProfileSummaryInfo::computeThresholds() {
if (!computeSummary())
return;
auto &DetailedSummary = Summary->getDetailedSummary();
- HotCountThreshold =
- getMinCountForPercentile(DetailedSummary, ProfileSummaryCutoffHot);
- ColdCountThreshold =
- getMinCountForPercentile(DetailedSummary, ProfileSummaryCutoffCold);
+ auto &HotEntry =
+ getEntryForPercentile(DetailedSummary, ProfileSummaryCutoffHot);
+ HotCountThreshold = HotEntry.MinCount;
+ auto &ColdEntry =
+ getEntryForPercentile(DetailedSummary, ProfileSummaryCutoffCold);
+ ColdCountThreshold = ColdEntry.MinCount;
+ HasHugeWorkingSetSize =
+ HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold;
+}
+
+bool ProfileSummaryInfo::hasHugeWorkingSetSize() {
+ if (!HasHugeWorkingSetSize)
+ computeThresholds();
+ return HasHugeWorkingSetSize && HasHugeWorkingSetSize.getValue();
}
bool ProfileSummaryInfo::isHotCount(uint64_t C) {
@@ -199,7 +223,16 @@ bool ProfileSummaryInfo::isHotCallSite(const CallSite &CS,
bool ProfileSummaryInfo::isColdCallSite(const CallSite &CS,
BlockFrequencyInfo *BFI) {
auto C = getProfileCount(CS.getInstruction(), BFI);
- return C && isColdCount(*C);
+ if (C)
+ return isColdCount(*C);
+
+ // In SamplePGO, if the caller has been sampled, and there is no profile
+ // annotatedon the callsite, we consider the callsite as cold.
+ // If there is no profile for the caller, and we know the profile is
+ // accurate, we consider the callsite as cold.
+ return (hasSampleProfile() &&
+ (CS.getCaller()->getEntryCount() || ProfileSampleAccurate ||
+ CS.getCaller()->hasFnAttribute("profile-sample-accurate")));
}
INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info",
diff --git a/contrib/llvm/lib/Analysis/PtrUseVisitor.cpp b/contrib/llvm/lib/Analysis/PtrUseVisitor.cpp
index 68c7535ea594..1fdaf4d55b59 100644
--- a/contrib/llvm/lib/Analysis/PtrUseVisitor.cpp
+++ b/contrib/llvm/lib/Analysis/PtrUseVisitor.cpp
@@ -6,12 +6,16 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
+//
/// \file
/// Implementation of the pointer use visitors.
-///
+//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/PtrUseVisitor.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include <algorithm>
using namespace llvm;
diff --git a/contrib/llvm/lib/Analysis/RegionPass.cpp b/contrib/llvm/lib/Analysis/RegionPass.cpp
index b38e6225c840..c5d71b25e022 100644
--- a/contrib/llvm/lib/Analysis/RegionPass.cpp
+++ b/contrib/llvm/lib/Analysis/RegionPass.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/RegionPass.h"
-#include "llvm/Analysis/RegionIterator.h"
#include "llvm/IR/OptBisect.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
@@ -208,7 +207,7 @@ public:
return false;
}
- StringRef getPassName() const override { return "Print Region IR"; }
+ StringRef getPassName() const override { return "Print Region IR"; }
};
char PrintRegionPass::ID = 0;
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
index 9539fd7c7559..0b8604187121 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -59,12 +59,23 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/Sequence.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -72,28 +83,55 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
+#include <cassert>
+#include <climits>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <map>
+#include <memory>
+#include <tuple>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "scalar-evolution"
@@ -115,11 +153,11 @@ MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
cl::init(100));
// FIXME: Enable this with EXPENSIVE_CHECKS when the test suite is clean.
+static cl::opt<bool> VerifySCEV(
+ "verify-scev", cl::Hidden,
+ cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
static cl::opt<bool>
-VerifySCEV("verify-scev",
- cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
-static cl::opt<bool>
- VerifySCEVMap("verify-scev-maps",
+ VerifySCEVMap("verify-scev-maps", cl::Hidden,
cl::desc("Verify no dangling value in ScalarEvolution's "
"ExprValueMap (slow)"));
@@ -415,9 +453,6 @@ void SCEVUnknown::deleted() {
}
void SCEVUnknown::allUsesReplacedWith(Value *New) {
- // Clear this SCEVUnknown from various maps.
- SE->forgetMemoizedResults(this);
-
// Remove this SCEVUnknown from the uniquing map.
SE->UniqueSCEVs.RemoveNode(this);
@@ -514,10 +549,10 @@ bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {
/// Since we do not continue running this routine on expression trees once we
/// have seen unequal values, there is no need to track them in the cache.
static int
-CompareValueComplexity(SmallSet<std::pair<Value *, Value *>, 8> &EqCache,
+CompareValueComplexity(EquivalenceClasses<const Value *> &EqCacheValue,
const LoopInfo *const LI, Value *LV, Value *RV,
unsigned Depth) {
- if (Depth > MaxValueCompareDepth || EqCache.count({LV, RV}))
+ if (Depth > MaxValueCompareDepth || EqCacheValue.isEquivalent(LV, RV))
return 0;
// Order pointer values after integer values. This helps SCEVExpander form
@@ -577,14 +612,14 @@ CompareValueComplexity(SmallSet<std::pair<Value *, Value *>, 8> &EqCache,
for (unsigned Idx : seq(0u, LNumOps)) {
int Result =
- CompareValueComplexity(EqCache, LI, LInst->getOperand(Idx),
+ CompareValueComplexity(EqCacheValue, LI, LInst->getOperand(Idx),
RInst->getOperand(Idx), Depth + 1);
if (Result != 0)
return Result;
}
}
- EqCache.insert({LV, RV});
+ EqCacheValue.unionSets(LV, RV);
return 0;
}
@@ -592,7 +627,8 @@ CompareValueComplexity(SmallSet<std::pair<Value *, Value *>, 8> &EqCache,
// than RHS, respectively. A three-way result allows recursive comparisons to be
// more efficient.
static int CompareSCEVComplexity(
- SmallSet<std::pair<const SCEV *, const SCEV *>, 8> &EqCacheSCEV,
+ EquivalenceClasses<const SCEV *> &EqCacheSCEV,
+ EquivalenceClasses<const Value *> &EqCacheValue,
const LoopInfo *const LI, const SCEV *LHS, const SCEV *RHS,
DominatorTree &DT, unsigned Depth = 0) {
// Fast-path: SCEVs are uniqued so we can do a quick equality check.
@@ -604,7 +640,7 @@ static int CompareSCEVComplexity(
if (LType != RType)
return (int)LType - (int)RType;
- if (Depth > MaxSCEVCompareDepth || EqCacheSCEV.count({LHS, RHS}))
+ if (Depth > MaxSCEVCompareDepth || EqCacheSCEV.isEquivalent(LHS, RHS))
return 0;
// Aside from the getSCEVType() ordering, the particular ordering
// isn't very important except that it's beneficial to be consistent,
@@ -614,11 +650,10 @@ static int CompareSCEVComplexity(
const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
- SmallSet<std::pair<Value *, Value *>, 8> EqCache;
- int X = CompareValueComplexity(EqCache, LI, LU->getValue(), RU->getValue(),
- Depth + 1);
+ int X = CompareValueComplexity(EqCacheValue, LI, LU->getValue(),
+ RU->getValue(), Depth + 1);
if (X == 0)
- EqCacheSCEV.insert({LHS, RHS});
+ EqCacheSCEV.unionSets(LHS, RHS);
return X;
}
@@ -659,14 +694,19 @@ static int CompareSCEVComplexity(
if (LNumOps != RNumOps)
return (int)LNumOps - (int)RNumOps;
+ // Compare NoWrap flags.
+ if (LA->getNoWrapFlags() != RA->getNoWrapFlags())
+ return (int)LA->getNoWrapFlags() - (int)RA->getNoWrapFlags();
+
// Lexicographically compare.
for (unsigned i = 0; i != LNumOps; ++i) {
- int X = CompareSCEVComplexity(EqCacheSCEV, LI, LA->getOperand(i),
- RA->getOperand(i), DT, Depth + 1);
+ int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
+ LA->getOperand(i), RA->getOperand(i), DT,
+ Depth + 1);
if (X != 0)
return X;
}
- EqCacheSCEV.insert({LHS, RHS});
+ EqCacheSCEV.unionSets(LHS, RHS);
return 0;
}
@@ -682,15 +722,18 @@ static int CompareSCEVComplexity(
if (LNumOps != RNumOps)
return (int)LNumOps - (int)RNumOps;
+ // Compare NoWrap flags.
+ if (LC->getNoWrapFlags() != RC->getNoWrapFlags())
+ return (int)LC->getNoWrapFlags() - (int)RC->getNoWrapFlags();
+
for (unsigned i = 0; i != LNumOps; ++i) {
- if (i >= RNumOps)
- return 1;
- int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(i),
- RC->getOperand(i), DT, Depth + 1);
+ int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
+ LC->getOperand(i), RC->getOperand(i), DT,
+ Depth + 1);
if (X != 0)
return X;
}
- EqCacheSCEV.insert({LHS, RHS});
+ EqCacheSCEV.unionSets(LHS, RHS);
return 0;
}
@@ -699,14 +742,14 @@ static int CompareSCEVComplexity(
const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
// Lexicographically compare udiv expressions.
- int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getLHS(), RC->getLHS(),
- DT, Depth + 1);
+ int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getLHS(),
+ RC->getLHS(), DT, Depth + 1);
if (X != 0)
return X;
- X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getRHS(), RC->getRHS(), DT,
- Depth + 1);
+ X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getRHS(),
+ RC->getRHS(), DT, Depth + 1);
if (X == 0)
- EqCacheSCEV.insert({LHS, RHS});
+ EqCacheSCEV.unionSets(LHS, RHS);
return X;
}
@@ -717,10 +760,11 @@ static int CompareSCEVComplexity(
const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
// Compare cast expressions by operand.
- int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(),
- RC->getOperand(), DT, Depth + 1);
+ int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
+ LC->getOperand(), RC->getOperand(), DT,
+ Depth + 1);
if (X == 0)
- EqCacheSCEV.insert({LHS, RHS});
+ EqCacheSCEV.unionSets(LHS, RHS);
return X;
}
@@ -739,26 +783,26 @@ static int CompareSCEVComplexity(
/// results from this routine. In other words, we don't want the results of
/// this to depend on where the addresses of various SCEV objects happened to
/// land in memory.
-///
static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
LoopInfo *LI, DominatorTree &DT) {
if (Ops.size() < 2) return; // Noop
- SmallSet<std::pair<const SCEV *, const SCEV *>, 8> EqCache;
+ EquivalenceClasses<const SCEV *> EqCacheSCEV;
+ EquivalenceClasses<const Value *> EqCacheValue;
if (Ops.size() == 2) {
// This is the common case, which also happens to be trivially simple.
// Special case it.
const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
- if (CompareSCEVComplexity(EqCache, LI, RHS, LHS, DT) < 0)
+ if (CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, RHS, LHS, DT) < 0)
std::swap(LHS, RHS);
return;
}
// Do the rough sort by complexity.
std::stable_sort(Ops.begin(), Ops.end(),
- [&EqCache, LI, &DT](const SCEV *LHS, const SCEV *RHS) {
- return
- CompareSCEVComplexity(EqCache, LI, LHS, RHS, DT) < 0;
+ [&](const SCEV *LHS, const SCEV *RHS) {
+ return CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
+ LHS, RHS, DT) < 0;
});
// Now that we are sorted by complexity, group elements of the same
@@ -785,14 +829,16 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
// Returns the size of the SCEV S.
static inline int sizeOfSCEV(const SCEV *S) {
struct FindSCEVSize {
- int Size;
- FindSCEVSize() : Size(0) {}
+ int Size = 0;
+
+ FindSCEVSize() = default;
bool follow(const SCEV *S) {
++Size;
// Keep looking at all operands of S.
return true;
}
+
bool isDone() const {
return false;
}
@@ -1032,7 +1078,7 @@ private:
const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One;
};
-}
+} // end anonymous namespace
//===----------------------------------------------------------------------===//
// Simple SCEV method implementations
@@ -1157,7 +1203,6 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
/// A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3)
///
/// where BC(It, k) stands for binomial coefficient.
-///
const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
ScalarEvolution &SE) const {
const SCEV *Result = getStart();
@@ -1256,6 +1301,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
+ addToLoopUseLists(S);
return S;
}
@@ -1343,7 +1389,8 @@ struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase {
const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr;
-}
+
+} // end anonymous namespace
// The recurrence AR has been shown to have no signed/unsigned wrap or something
// close to it. Typically, if we can prove NSW/NUW for AR, then we can just as
@@ -1473,7 +1520,6 @@ static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty,
//
// In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T
// is `Delta` (defined below).
-//
template <typename ExtendOpTy>
bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,
const SCEV *Step,
@@ -1484,7 +1530,6 @@ bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,
// time here. It is correct (but more expensive) to continue with a
// non-constant `Start` and do a general SCEV subtraction to compute
// `PreStart` below.
- //
const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start);
if (!StartC)
return false;
@@ -1547,6 +1592,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
+ addToLoopUseLists(S);
return S;
}
@@ -1733,6 +1779,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
+ addToLoopUseLists(S);
return S;
}
@@ -1770,6 +1817,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
+ addToLoopUseLists(S);
return S;
}
@@ -1981,12 +2029,12 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
+ addToLoopUseLists(S);
return S;
}
/// getAnyExtendExpr - Return a SCEV for the given operand extended with
/// unspecified bits out to the given type.
-///
const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
Type *Ty) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
@@ -2057,7 +2105,6 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
/// may be exposed. This helps getAddRecExpr short-circuit extra work in
/// the common case where no interesting opportunities are present, and
/// is also used as a check to avoid infinite recursion.
-///
static bool
CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
SmallVectorImpl<const SCEV *> &NewOps,
@@ -2132,7 +2179,8 @@ StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
const SmallVectorImpl<const SCEV *> &Ops,
SCEV::NoWrapFlags Flags) {
using namespace std::placeholders;
- typedef OverflowingBinaryOperator OBO;
+
+ using OBO = OverflowingBinaryOperator;
bool CanAnalyze =
Type == scAddExpr || Type == scAddRecExpr || Type == scMulExpr;
@@ -2306,12 +2354,23 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// Check for truncates. If all the operands are truncated from the same
// type, see if factoring out the truncate would permit the result to be
- // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n)
+ // folded. eg., n*trunc(x) + m*trunc(y) --> trunc(trunc(m)*x + trunc(n)*y)
// if the contents of the resulting outer trunc fold to something simple.
- for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) {
- const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
- Type *DstType = Trunc->getType();
- Type *SrcType = Trunc->getOperand()->getType();
+ auto FindTruncSrcType = [&]() -> Type * {
+ // We're ultimately looking to fold an addrec of truncs and muls of only
+ // constants and truncs, so if we find any other types of SCEV
+ // as operands of the addrec then we bail and return nullptr here.
+ // Otherwise, we return the type of the operand of a trunc that we find.
+ if (auto *T = dyn_cast<SCEVTruncateExpr>(Ops[Idx]))
+ return T->getOperand()->getType();
+ if (const auto *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
+ const auto *LastOp = Mul->getOperand(Mul->getNumOperands() - 1);
+ if (const auto *T = dyn_cast<SCEVTruncateExpr>(LastOp))
+ return T->getOperand()->getType();
+ }
+ return nullptr;
+ };
+ if (auto *SrcType = FindTruncSrcType()) {
SmallVector<const SCEV *, 8> LargeOps;
bool Ok = true;
// Check all the operands to see if they can be represented in the
@@ -2354,7 +2413,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
const SCEV *Fold = getAddExpr(LargeOps, Flags, Depth + 1);
// If it folds to something simple, use it. Otherwise, don't.
if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
- return getTruncateExpr(Fold, DstType);
+ return getTruncateExpr(Fold, Ty);
}
}
@@ -2608,8 +2667,8 @@ ScalarEvolution::getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops,
SCEV::NoWrapFlags Flags) {
FoldingSetNodeID ID;
ID.AddInteger(scAddExpr);
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- ID.AddPointer(Ops[i]);
+ for (const SCEV *Op : Ops)
+ ID.AddPointer(Op);
void *IP = nullptr;
SCEVAddExpr *S =
static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
@@ -2619,6 +2678,7 @@ ScalarEvolution::getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops,
S = new (SCEVAllocator)
SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size());
UniqueSCEVs.InsertNode(S, IP);
+ addToLoopUseLists(S);
}
S->setNoWrapFlags(Flags);
return S;
@@ -2640,6 +2700,7 @@ ScalarEvolution::getOrCreateMulExpr(SmallVectorImpl<const SCEV *> &Ops,
S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
O, Ops.size());
UniqueSCEVs.InsertNode(S, IP);
+ addToLoopUseLists(S);
}
S->setNoWrapFlags(Flags);
return S;
@@ -2679,20 +2740,24 @@ static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) {
/// Determine if any of the operands in this SCEV are a constant or if
/// any of the add or multiply expressions in this SCEV contain a constant.
-static bool containsConstantSomewhere(const SCEV *StartExpr) {
- SmallVector<const SCEV *, 4> Ops;
- Ops.push_back(StartExpr);
- while (!Ops.empty()) {
- const SCEV *CurrentExpr = Ops.pop_back_val();
- if (isa<SCEVConstant>(*CurrentExpr))
- return true;
+static bool containsConstantInAddMulChain(const SCEV *StartExpr) {
+ struct FindConstantInAddMulChain {
+ bool FoundConstant = false;
- if (isa<SCEVAddExpr>(*CurrentExpr) || isa<SCEVMulExpr>(*CurrentExpr)) {
- const auto *CurrentNAry = cast<SCEVNAryExpr>(CurrentExpr);
- Ops.append(CurrentNAry->op_begin(), CurrentNAry->op_end());
+ bool follow(const SCEV *S) {
+ FoundConstant |= isa<SCEVConstant>(S);
+ return isa<SCEVAddExpr>(S) || isa<SCEVMulExpr>(S);
}
- }
- return false;
+
+ bool isDone() const {
+ return FoundConstant;
+ }
+ };
+
+ FindConstantInAddMulChain F;
+ SCEVTraversal<FindConstantInAddMulChain> ST(F);
+ ST.visitAll(StartExpr);
+ return F.FoundConstant;
}
/// Get a canonical multiply expression, or something simpler if possible.
@@ -2729,7 +2794,11 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// If any of Add's ops are Adds or Muls with a constant,
// apply this transformation as well.
if (Add->getNumOperands() == 2)
- if (containsConstantSomewhere(Add))
+ // TODO: There are some cases where this transformation is not
+ // profitable, for example:
+ // Add = (C0 + X) * Y + Z.
+ // Maybe the scope of this transformation should be narrowed down.
+ if (containsConstantInAddMulChain(Add))
return getAddExpr(getMulExpr(LHSC, Add->getOperand(0),
SCEV::FlagAnyWrap, Depth + 1),
getMulExpr(LHSC, Add->getOperand(1),
@@ -2941,6 +3010,34 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
return getOrCreateMulExpr(Ops, Flags);
}
+/// Represents an unsigned remainder expression based on unsigned division.
+const SCEV *ScalarEvolution::getURemExpr(const SCEV *LHS,
+ const SCEV *RHS) {
+ assert(getEffectiveSCEVType(LHS->getType()) ==
+ getEffectiveSCEVType(RHS->getType()) &&
+ "SCEVURemExpr operand types don't match!");
+
+ // Short-circuit easy cases
+ if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
+ // If constant is one, the result is trivial
+ if (RHSC->getValue()->isOne())
+ return getZero(LHS->getType()); // X urem 1 --> 0
+
+ // If constant is a power of two, fold into a zext(trunc(LHS)).
+ if (RHSC->getAPInt().isPowerOf2()) {
+ Type *FullTy = LHS->getType();
+ Type *TruncTy =
+ IntegerType::get(getContext(), RHSC->getAPInt().logBase2());
+ return getZeroExtendExpr(getTruncateExpr(LHS, TruncTy), FullTy);
+ }
+ }
+
+ // Fallback to %a == %x urem %y == %x -<nuw> ((%x udiv %y) *<nuw> %y)
+ const SCEV *UDiv = getUDivExpr(LHS, RHS);
+ const SCEV *Mult = getMulExpr(UDiv, RHS, SCEV::FlagNUW);
+ return getMinusSCEV(LHS, Mult, SCEV::FlagNUW);
+}
+
/// Get a canonical unsigned division expression, or something simpler if
/// possible.
const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
@@ -3056,6 +3153,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
LHS, RHS);
UniqueSCEVs.InsertNode(S, IP);
+ addToLoopUseLists(S);
return S;
}
@@ -3236,6 +3334,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator),
O, Operands.size(), L);
UniqueSCEVs.InsertNode(S, IP);
+ addToLoopUseLists(S);
}
S->setNoWrapFlags(Flags);
return S;
@@ -3391,6 +3490,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator),
O, Ops.size());
UniqueSCEVs.InsertNode(S, IP);
+ addToLoopUseLists(S);
return S;
}
@@ -3492,6 +3592,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator),
O, Ops.size());
UniqueSCEVs.InsertNode(S, IP);
+ addToLoopUseLists(S);
return S;
}
@@ -3714,7 +3815,6 @@ const SCEV *ScalarEvolution::getExistingSCEV(Value *V) {
}
/// Return a SCEV corresponding to -V = -1*V
-///
const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V,
SCEV::NoWrapFlags Flags) {
if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
@@ -3957,6 +4057,7 @@ void ScalarEvolution::forgetSymbolicName(Instruction *PN, const SCEV *SymName) {
}
namespace {
+
class SCEVInitRewriter : public SCEVRewriteVisitor<SCEVInitRewriter> {
public:
static const SCEV *rewrite(const SCEV *S, const Loop *L,
@@ -3966,9 +4067,6 @@ public:
return Rewriter.isValid() ? Result : SE.getCouldNotCompute();
}
- SCEVInitRewriter(const Loop *L, ScalarEvolution &SE)
- : SCEVRewriteVisitor(SE), L(L), Valid(true) {}
-
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
if (!SE.isLoopInvariant(Expr, L))
Valid = false;
@@ -3986,10 +4084,93 @@ public:
bool isValid() { return Valid; }
private:
+ explicit SCEVInitRewriter(const Loop *L, ScalarEvolution &SE)
+ : SCEVRewriteVisitor(SE), L(L) {}
+
+ const Loop *L;
+ bool Valid = true;
+};
+
+/// This class evaluates the compare condition by matching it against the
+/// condition of loop latch. If there is a match we assume a true value
+/// for the condition while building SCEV nodes.
+class SCEVBackedgeConditionFolder
+ : public SCEVRewriteVisitor<SCEVBackedgeConditionFolder> {
+public:
+ static const SCEV *rewrite(const SCEV *S, const Loop *L,
+ ScalarEvolution &SE) {
+ bool IsPosBECond = false;
+ Value *BECond = nullptr;
+ if (BasicBlock *Latch = L->getLoopLatch()) {
+ BranchInst *BI = dyn_cast<BranchInst>(Latch->getTerminator());
+ if (BI && BI->isConditional()) {
+ assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
+ "Both outgoing branches should not target same header!");
+ BECond = BI->getCondition();
+ IsPosBECond = BI->getSuccessor(0) == L->getHeader();
+ } else {
+ return S;
+ }
+ }
+ SCEVBackedgeConditionFolder Rewriter(L, BECond, IsPosBECond, SE);
+ return Rewriter.visit(S);
+ }
+
+ const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+ const SCEV *Result = Expr;
+ bool InvariantF = SE.isLoopInvariant(Expr, L);
+
+ if (!InvariantF) {
+ Instruction *I = cast<Instruction>(Expr->getValue());
+ switch (I->getOpcode()) {
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(I);
+ Optional<const SCEV *> Res =
+ compareWithBackedgeCondition(SI->getCondition());
+ if (Res.hasValue()) {
+ bool IsOne = cast<SCEVConstant>(Res.getValue())->getValue()->isOne();
+ Result = SE.getSCEV(IsOne ? SI->getTrueValue() : SI->getFalseValue());
+ }
+ break;
+ }
+ default: {
+ Optional<const SCEV *> Res = compareWithBackedgeCondition(I);
+ if (Res.hasValue())
+ Result = Res.getValue();
+ break;
+ }
+ }
+ }
+ return Result;
+ }
+
+private:
+ explicit SCEVBackedgeConditionFolder(const Loop *L, Value *BECond,
+ bool IsPosBECond, ScalarEvolution &SE)
+ : SCEVRewriteVisitor(SE), L(L), BackedgeCond(BECond),
+ IsPositiveBECond(IsPosBECond) {}
+
+ Optional<const SCEV *> compareWithBackedgeCondition(Value *IC);
+
const Loop *L;
- bool Valid;
+ /// Loop back condition.
+ Value *BackedgeCond = nullptr;
+ /// Set to true if loop back is on positive branch condition.
+ bool IsPositiveBECond;
};
+Optional<const SCEV *>
+SCEVBackedgeConditionFolder::compareWithBackedgeCondition(Value *IC) {
+
+ // If value matches the backedge condition for loop latch,
+ // then return a constant evolution node based on loopback
+ // branch taken.
+ if (BackedgeCond == IC)
+ return IsPositiveBECond ? SE.getOne(Type::getInt1Ty(SE.getContext()))
+ : SE.getZero(Type::getInt1Ty(SE.getContext()));
+ return None;
+}
+
class SCEVShiftRewriter : public SCEVRewriteVisitor<SCEVShiftRewriter> {
public:
static const SCEV *rewrite(const SCEV *S, const Loop *L,
@@ -3999,9 +4180,6 @@ public:
return Rewriter.isValid() ? Result : SE.getCouldNotCompute();
}
- SCEVShiftRewriter(const Loop *L, ScalarEvolution &SE)
- : SCEVRewriteVisitor(SE), L(L), Valid(true) {}
-
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
// Only allow AddRecExprs for this loop.
if (!SE.isLoopInvariant(Expr, L))
@@ -4015,12 +4193,17 @@ public:
Valid = false;
return Expr;
}
+
bool isValid() { return Valid; }
private:
+ explicit SCEVShiftRewriter(const Loop *L, ScalarEvolution &SE)
+ : SCEVRewriteVisitor(SE), L(L) {}
+
const Loop *L;
- bool Valid;
+ bool Valid = true;
};
+
} // end anonymous namespace
SCEV::NoWrapFlags
@@ -4028,7 +4211,8 @@ ScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) {
if (!AR->isAffine())
return SCEV::FlagAnyWrap;
- typedef OverflowingBinaryOperator OBO;
+ using OBO = OverflowingBinaryOperator;
+
SCEV::NoWrapFlags Result = SCEV::FlagAnyWrap;
if (!AR->hasNoSignedWrap()) {
@@ -4055,6 +4239,7 @@ ScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) {
}
namespace {
+
/// Represents an abstract binary operation. This may exist as a
/// normal instruction or constant expression, or may have been
/// derived from an expression tree.
@@ -4062,16 +4247,16 @@ struct BinaryOp {
unsigned Opcode;
Value *LHS;
Value *RHS;
- bool IsNSW;
- bool IsNUW;
+ bool IsNSW = false;
+ bool IsNUW = false;
/// Op is set if this BinaryOp corresponds to a concrete LLVM instruction or
/// constant expression.
- Operator *Op;
+ Operator *Op = nullptr;
explicit BinaryOp(Operator *Op)
: Opcode(Op->getOpcode()), LHS(Op->getOperand(0)), RHS(Op->getOperand(1)),
- IsNSW(false), IsNUW(false), Op(Op) {
+ Op(Op) {
if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Op)) {
IsNSW = OBO->hasNoSignedWrap();
IsNUW = OBO->hasNoUnsignedWrap();
@@ -4080,11 +4265,10 @@ struct BinaryOp {
explicit BinaryOp(unsigned Opcode, Value *LHS, Value *RHS, bool IsNSW = false,
bool IsNUW = false)
- : Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW),
- Op(nullptr) {}
+ : Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW) {}
};
-}
+} // end anonymous namespace
/// Try to map \p V into a BinaryOp, and return \c None on failure.
static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) {
@@ -4101,6 +4285,7 @@ static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) {
case Instruction::Sub:
case Instruction::Mul:
case Instruction::UDiv:
+ case Instruction::URem:
case Instruction::And:
case Instruction::Or:
case Instruction::AShr:
@@ -4145,7 +4330,7 @@ static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) {
if (auto *F = CI->getCalledFunction())
switch (F->getIntrinsicID()) {
case Intrinsic::sadd_with_overflow:
- case Intrinsic::uadd_with_overflow: {
+ case Intrinsic::uadd_with_overflow:
if (!isOverflowIntrinsicNoWrap(cast<IntrinsicInst>(CI), DT))
return BinaryOp(Instruction::Add, CI->getArgOperand(0),
CI->getArgOperand(1));
@@ -4161,13 +4346,21 @@ static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) {
return BinaryOp(Instruction::Add, CI->getArgOperand(0),
CI->getArgOperand(1), /* IsNSW = */ false,
/* IsNUW*/ true);
- }
-
case Intrinsic::ssub_with_overflow:
case Intrinsic::usub_with_overflow:
- return BinaryOp(Instruction::Sub, CI->getArgOperand(0),
- CI->getArgOperand(1));
+ if (!isOverflowIntrinsicNoWrap(cast<IntrinsicInst>(CI), DT))
+ return BinaryOp(Instruction::Sub, CI->getArgOperand(0),
+ CI->getArgOperand(1));
+ // The same reasoning as sadd/uadd above.
+ if (F->getIntrinsicID() == Intrinsic::ssub_with_overflow)
+ return BinaryOp(Instruction::Sub, CI->getArgOperand(0),
+ CI->getArgOperand(1), /* IsNSW = */ true,
+ /* IsNUW = */ false);
+ else
+ return BinaryOp(Instruction::Sub, CI->getArgOperand(0),
+ CI->getArgOperand(1), /* IsNSW = */ false,
+ /* IsNUW = */ true);
case Intrinsic::smul_with_overflow:
case Intrinsic::umul_with_overflow:
return BinaryOp(Instruction::Mul, CI->getArgOperand(0),
@@ -4184,28 +4377,27 @@ static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) {
return None;
}
-/// Helper function to createAddRecFromPHIWithCasts. We have a phi
+/// Helper function to createAddRecFromPHIWithCasts. We have a phi
/// node whose symbolic (unknown) SCEV is \p SymbolicPHI, which is updated via
-/// the loop backedge by a SCEVAddExpr, possibly also with a few casts on the
-/// way. This function checks if \p Op, an operand of this SCEVAddExpr,
+/// the loop backedge by a SCEVAddExpr, possibly also with a few casts on the
+/// way. This function checks if \p Op, an operand of this SCEVAddExpr,
/// follows one of the following patterns:
/// Op == (SExt ix (Trunc iy (%SymbolicPHI) to ix) to iy)
/// Op == (ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy)
/// If the SCEV expression of \p Op conforms with one of the expected patterns
/// we return the type of the truncation operation, and indicate whether the
-/// truncated type should be treated as signed/unsigned by setting
+/// truncated type should be treated as signed/unsigned by setting
/// \p Signed to true/false, respectively.
static Type *isSimpleCastedPHI(const SCEV *Op, const SCEVUnknown *SymbolicPHI,
bool &Signed, ScalarEvolution &SE) {
-
- // The case where Op == SymbolicPHI (that is, with no type conversions on
- // the way) is handled by the regular add recurrence creating logic and
+ // The case where Op == SymbolicPHI (that is, with no type conversions on
+ // the way) is handled by the regular add recurrence creating logic and
// would have already been triggered in createAddRecForPHI. Reaching it here
- // means that createAddRecFromPHI had failed for this PHI before (e.g.,
+ // means that createAddRecFromPHI had failed for this PHI before (e.g.,
// because one of the other operands of the SCEVAddExpr updating this PHI is
- // not invariant).
+ // not invariant).
//
- // Here we look for the case where Op = (ext(trunc(SymbolicPHI))), and in
+ // Here we look for the case where Op = (ext(trunc(SymbolicPHI))), and in
// this case predicates that allow us to prove that Op == SymbolicPHI will
// be added.
if (Op == SymbolicPHI)
@@ -4228,7 +4420,7 @@ static Type *isSimpleCastedPHI(const SCEV *Op, const SCEVUnknown *SymbolicPHI,
const SCEV *X = Trunc->getOperand();
if (X != SymbolicPHI)
return nullptr;
- Signed = SExt ? true : false;
+ Signed = SExt != nullptr;
return Trunc->getType();
}
@@ -4257,7 +4449,7 @@ static const Loop *isIntegerLoopHeaderPHI(const PHINode *PN, LoopInfo &LI) {
// It will visitMul->visitAdd->visitSExt->visitTrunc->visitUnknown(%X),
// and call this function with %SymbolicPHI = %X.
//
-// The analysis will find that the value coming around the backedge has
+// The analysis will find that the value coming around the backedge has
// the following SCEV:
// BEValue = ((sext i32 (trunc i64 %X to i32) to i64) + %Step)
// Upon concluding that this matches the desired pattern, the function
@@ -4270,21 +4462,21 @@ static const Loop *isIntegerLoopHeaderPHI(const PHINode *PN, LoopInfo &LI) {
// The returned pair means that SymbolicPHI can be rewritten into NewAddRec
// under the predicates {P1,P2,P3}.
// This predicated rewrite will be cached in PredicatedSCEVRewrites:
-// PredicatedSCEVRewrites[{%X,L}] = {NewAddRec, {P1,P2,P3)}
+// PredicatedSCEVRewrites[{%X,L}] = {NewAddRec, {P1,P2,P3)}
//
// TODO's:
//
// 1) Extend the Induction descriptor to also support inductions that involve
-// casts: When needed (namely, when we are called in the context of the
-// vectorizer induction analysis), a Set of cast instructions will be
+// casts: When needed (namely, when we are called in the context of the
+// vectorizer induction analysis), a Set of cast instructions will be
// populated by this method, and provided back to isInductionPHI. This is
// needed to allow the vectorizer to properly record them to be ignored by
// the cost model and to avoid vectorizing them (otherwise these casts,
-// which are redundant under the runtime overflow checks, will be
-// vectorized, which can be costly).
+// which are redundant under the runtime overflow checks, will be
+// vectorized, which can be costly).
//
// 2) Support additional induction/PHISCEV patterns: We also want to support
-// inductions where the sext-trunc / zext-trunc operations (partly) occur
+// inductions where the sext-trunc / zext-trunc operations (partly) occur
// after the induction update operation (the induction increment):
//
// (Trunc iy (SExt/ZExt ix (%SymbolicPHI + InvariantAccum) to iy) to ix)
@@ -4294,17 +4486,16 @@ static const Loop *isIntegerLoopHeaderPHI(const PHINode *PN, LoopInfo &LI) {
// which correspond to a phi->trunc->add->sext/zext->phi update chain.
//
// 3) Outline common code with createAddRecFromPHI to avoid duplication.
-//
Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI) {
SmallVector<const SCEVPredicate *, 3> Predicates;
- // *** Part1: Analyze if we have a phi-with-cast pattern for which we can
+ // *** Part1: Analyze if we have a phi-with-cast pattern for which we can
// return an AddRec expression under some predicate.
-
+
auto *PN = cast<PHINode>(SymbolicPHI->getValue());
const Loop *L = isIntegerLoopHeaderPHI(PN, LI);
- assert (L && "Expecting an integer loop header phi");
+ assert(L && "Expecting an integer loop header phi");
// The loop may have multiple entrances or multiple exits; we can analyze
// this phi as an addrec if it has a unique entry value and a unique
@@ -4339,12 +4530,12 @@ ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI
return None;
// If there is a single occurrence of the symbolic value, possibly
- // casted, replace it with a recurrence.
+ // casted, replace it with a recurrence.
unsigned FoundIndex = Add->getNumOperands();
Type *TruncTy = nullptr;
bool Signed;
for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
- if ((TruncTy =
+ if ((TruncTy =
isSimpleCastedPHI(Add->getOperand(i), SymbolicPHI, Signed, *this)))
if (FoundIndex == e) {
FoundIndex = i;
@@ -4366,77 +4557,122 @@ ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI
if (!isLoopInvariant(Accum, L))
return None;
-
- // *** Part2: Create the predicates
+ // *** Part2: Create the predicates
// Analysis was successful: we have a phi-with-cast pattern for which we
// can return an AddRec expression under the following predicates:
//
// P1: A Wrap predicate that guarantees that Trunc(Start) + i*Trunc(Accum)
// fits within the truncated type (does not overflow) for i = 0 to n-1.
- // P2: An Equal predicate that guarantees that
+ // P2: An Equal predicate that guarantees that
// Start = (Ext ix (Trunc iy (Start) to ix) to iy)
- // P3: An Equal predicate that guarantees that
+ // P3: An Equal predicate that guarantees that
// Accum = (Ext ix (Trunc iy (Accum) to ix) to iy)
//
- // As we next prove, the above predicates guarantee that:
+ // As we next prove, the above predicates guarantee that:
// Start + i*Accum = (Ext ix (Trunc iy ( Start + i*Accum ) to ix) to iy)
//
//
// More formally, we want to prove that:
- // Expr(i+1) = Start + (i+1) * Accum
- // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum
+ // Expr(i+1) = Start + (i+1) * Accum
+ // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum
//
// Given that:
- // 1) Expr(0) = Start
- // 2) Expr(1) = Start + Accum
+ // 1) Expr(0) = Start
+ // 2) Expr(1) = Start + Accum
// = (Ext ix (Trunc iy (Start) to ix) to iy) + Accum :: from P2
// 3) Induction hypothesis (step i):
- // Expr(i) = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum
+ // Expr(i) = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum
//
// Proof:
// Expr(i+1) =
// = Start + (i+1)*Accum
// = (Start + i*Accum) + Accum
- // = Expr(i) + Accum
- // = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum + Accum
+ // = Expr(i) + Accum
+ // = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum + Accum
// :: from step i
//
- // = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) + Accum + Accum
+ // = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) + Accum + Accum
//
// = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy)
// + (Ext ix (Trunc iy (Accum) to ix) to iy)
// + Accum :: from P3
//
- // = (Ext ix (Trunc iy ((Start + (i-1)*Accum) + Accum) to ix) to iy)
+ // = (Ext ix (Trunc iy ((Start + (i-1)*Accum) + Accum) to ix) to iy)
// + Accum :: from P1: Ext(x)+Ext(y)=>Ext(x+y)
//
// = (Ext ix (Trunc iy (Start + i*Accum) to ix) to iy) + Accum
- // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum
+ // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum
//
// By induction, the same applies to all iterations 1<=i<n:
//
-
+
// Create a truncated addrec for which we will add a no overflow check (P1).
const SCEV *StartVal = getSCEV(StartValueV);
- const SCEV *PHISCEV =
+ const SCEV *PHISCEV =
getAddRecExpr(getTruncateExpr(StartVal, TruncTy),
- getTruncateExpr(Accum, TruncTy), L, SCEV::FlagAnyWrap);
- const auto *AR = cast<SCEVAddRecExpr>(PHISCEV);
+ getTruncateExpr(Accum, TruncTy), L, SCEV::FlagAnyWrap);
- SCEVWrapPredicate::IncrementWrapFlags AddedFlags =
- Signed ? SCEVWrapPredicate::IncrementNSSW
- : SCEVWrapPredicate::IncrementNUSW;
- const SCEVPredicate *AddRecPred = getWrapPredicate(AR, AddedFlags);
- Predicates.push_back(AddRecPred);
+ // PHISCEV can be either a SCEVConstant or a SCEVAddRecExpr.
+ // ex: If truncated Accum is 0 and StartVal is a constant, then PHISCEV
+ // will be constant.
+ //
+ // If PHISCEV is a constant, then P1 degenerates into P2 or P3, so we don't
+ // add P1.
+ if (const auto *AR = dyn_cast<SCEVAddRecExpr>(PHISCEV)) {
+ SCEVWrapPredicate::IncrementWrapFlags AddedFlags =
+ Signed ? SCEVWrapPredicate::IncrementNSSW
+ : SCEVWrapPredicate::IncrementNUSW;
+ const SCEVPredicate *AddRecPred = getWrapPredicate(AR, AddedFlags);
+ Predicates.push_back(AddRecPred);
+ }
// Create the Equal Predicates P2,P3:
- auto AppendPredicate = [&](const SCEV *Expr) -> void {
- assert (isLoopInvariant(Expr, L) && "Expr is expected to be invariant");
+
+ // It is possible that the predicates P2 and/or P3 are computable at
+ // compile time due to StartVal and/or Accum being constants.
+ // If either one is, then we can check that now and escape if either P2
+ // or P3 is false.
+
+ // Construct the extended SCEV: (Ext ix (Trunc iy (Expr) to ix) to iy)
+ // for each of StartVal and Accum
+ auto getExtendedExpr = [&](const SCEV *Expr,
+ bool CreateSignExtend) -> const SCEV * {
+ assert(isLoopInvariant(Expr, L) && "Expr is expected to be invariant");
const SCEV *TruncatedExpr = getTruncateExpr(Expr, TruncTy);
const SCEV *ExtendedExpr =
- Signed ? getSignExtendExpr(TruncatedExpr, Expr->getType())
- : getZeroExtendExpr(TruncatedExpr, Expr->getType());
+ CreateSignExtend ? getSignExtendExpr(TruncatedExpr, Expr->getType())
+ : getZeroExtendExpr(TruncatedExpr, Expr->getType());
+ return ExtendedExpr;
+ };
+
+ // Given:
+ // ExtendedExpr = (Ext ix (Trunc iy (Expr) to ix) to iy
+ // = getExtendedExpr(Expr)
+ // Determine whether the predicate P: Expr == ExtendedExpr
+ // is known to be false at compile time
+ auto PredIsKnownFalse = [&](const SCEV *Expr,
+ const SCEV *ExtendedExpr) -> bool {
+ return Expr != ExtendedExpr &&
+ isKnownPredicate(ICmpInst::ICMP_NE, Expr, ExtendedExpr);
+ };
+
+ const SCEV *StartExtended = getExtendedExpr(StartVal, Signed);
+ if (PredIsKnownFalse(StartVal, StartExtended)) {
+ DEBUG(dbgs() << "P2 is compile-time false\n";);
+ return None;
+ }
+
+ // The Step is always Signed (because the overflow checks are either
+ // NSSW or NUSW)
+ const SCEV *AccumExtended = getExtendedExpr(Accum, /*CreateSignExtend=*/true);
+ if (PredIsKnownFalse(Accum, AccumExtended)) {
+ DEBUG(dbgs() << "P3 is compile-time false\n";);
+ return None;
+ }
+
+ auto AppendPredicate = [&](const SCEV *Expr,
+ const SCEV *ExtendedExpr) -> void {
if (Expr != ExtendedExpr &&
!isKnownPredicate(ICmpInst::ICMP_EQ, Expr, ExtendedExpr)) {
const SCEVPredicate *Pred = getEqualPredicate(Expr, ExtendedExpr);
@@ -4444,14 +4680,14 @@ ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI
Predicates.push_back(Pred);
}
};
-
- AppendPredicate(StartVal);
- AppendPredicate(Accum);
-
+
+ AppendPredicate(StartVal, StartExtended);
+ AppendPredicate(Accum, AccumExtended);
+
// *** Part3: Predicates are ready. Now go ahead and create the new addrec in
// which the casts had been folded away. The caller can rewrite SymbolicPHI
// into NewAR if it will also add the runtime overflow checks specified in
- // Predicates.
+ // Predicates.
auto *NewAR = getAddRecExpr(StartVal, Accum, L, SCEV::FlagAnyWrap);
std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>> PredRewrite =
@@ -4463,7 +4699,6 @@ ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI
Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) {
-
auto *PN = cast<PHINode>(SymbolicPHI->getValue());
const Loop *L = isIntegerLoopHeaderPHI(PN, LI);
if (!L)
@@ -4475,7 +4710,7 @@ ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) {
std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>> Rewrite =
I->second;
// Analysis was done before and failed to create an AddRec:
- if (Rewrite.first == SymbolicPHI)
+ if (Rewrite.first == SymbolicPHI)
return None;
// Analysis was done before and succeeded to create an AddRec under
// a predicate:
@@ -4497,6 +4732,30 @@ ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) {
return Rewrite;
}
+// FIXME: This utility is currently required because the Rewriter currently
+// does not rewrite this expression:
+// {0, +, (sext ix (trunc iy to ix) to iy)}
+// into {0, +, %step},
+// even when the following Equal predicate exists:
+// "%step == (sext ix (trunc iy to ix) to iy)".
+bool PredicatedScalarEvolution::areAddRecsEqualWithPreds(
+ const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const {
+ if (AR1 == AR2)
+ return true;
+
+ auto areExprsEqual = [&](const SCEV *Expr1, const SCEV *Expr2) -> bool {
+ if (Expr1 != Expr2 && !Preds.implies(SE.getEqualPredicate(Expr1, Expr2)) &&
+ !Preds.implies(SE.getEqualPredicate(Expr2, Expr1)))
+ return false;
+ return true;
+ };
+
+ if (!areExprsEqual(AR1->getStart(), AR2->getStart()) ||
+ !areExprsEqual(AR1->getStepRecurrence(SE), AR2->getStepRecurrence(SE)))
+ return false;
+ return true;
+}
+
/// A helper function for createAddRecFromPHI to handle simple cases.
///
/// This function tries to find an AddRec expression for the simplest (yet most
@@ -4612,7 +4871,8 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
SmallVector<const SCEV *, 8> Ops;
for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
if (i != FoundIndex)
- Ops.push_back(Add->getOperand(i));
+ Ops.push_back(SCEVBackedgeConditionFolder::rewrite(Add->getOperand(i),
+ L, *this));
const SCEV *Accum = getAddExpr(Ops);
// This is not a valid addrec if the step amount is varying each
@@ -5599,7 +5859,7 @@ bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) {
ScalarEvolution::LoopProperties
ScalarEvolution::getLoopProperties(const Loop *L) {
- typedef ScalarEvolution::LoopProperties LoopProperties;
+ using LoopProperties = ScalarEvolution::LoopProperties;
auto Itr = LoopPropertiesCache.find(L);
if (Itr == LoopPropertiesCache.end()) {
@@ -5735,6 +5995,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
}
case Instruction::UDiv:
return getUDivExpr(getSCEV(BO->LHS), getSCEV(BO->RHS));
+ case Instruction::URem:
+ return getURemExpr(getSCEV(BO->LHS), getSCEV(BO->RHS));
case Instruction::Sub: {
SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
if (BO->Op)
@@ -5886,7 +6148,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
}
break;
- case Instruction::AShr:
+ case Instruction::AShr: {
// AShr X, C, where C is a constant.
ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS);
if (!CI)
@@ -5938,6 +6200,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
}
break;
}
+ }
}
switch (U->getOpcode()) {
@@ -5948,6 +6211,21 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType());
case Instruction::SExt:
+ if (auto BO = MatchBinaryOp(U->getOperand(0), DT)) {
+ // The NSW flag of a subtract does not always survive the conversion to
+ // A + (-1)*B. By pushing sign extension onto its operands we are much
+ // more likely to preserve NSW and allow later AddRec optimisations.
+ //
+ // NOTE: This is effectively duplicating this logic from getSignExtend:
+ // sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw>
+ // but by that point the NSW information has potentially been lost.
+ if (BO->Opcode == Instruction::Sub && BO->IsNSW) {
+ Type *Ty = U->getType();
+ auto *V1 = getSignExtendExpr(getSCEV(BO->LHS), Ty);
+ auto *V2 = getSignExtendExpr(getSCEV(BO->RHS), Ty);
+ return getMinusSCEV(V1, V2, SCEV::FlagNSW);
+ }
+ }
return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType());
case Instruction::BitCast:
@@ -5987,8 +6265,6 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
return getUnknown(V);
}
-
-
//===----------------------------------------------------------------------===//
// Iteration Count Computation Code
//
@@ -6177,11 +6453,9 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
SmallVector<Instruction *, 16> Worklist;
PushLoopPHIs(L, Worklist);
- SmallPtrSet<Instruction *, 8> Visited;
+ SmallPtrSet<Instruction *, 8> Discovered;
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
- if (!Visited.insert(I).second)
- continue;
ValueExprMapType::iterator It =
ValueExprMap.find_as(static_cast<Value *>(I));
@@ -6202,7 +6476,31 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
ConstantEvolutionLoopExitValue.erase(PN);
}
- PushDefUseChildren(I, Worklist);
+ // Since we don't need to invalidate anything for correctness and we're
+ // only invalidating to make SCEV's results more precise, we get to stop
+ // early to avoid invalidating too much. This is especially important in
+ // cases like:
+ //
+ // %v = f(pn0, pn1) // pn0 and pn1 used through some other phi node
+ // loop0:
+ // %pn0 = phi
+ // ...
+ // loop1:
+ // %pn1 = phi
+ // ...
+ //
+ // where both loop0 and loop1's backedge taken count uses the SCEV
+ // expression for %v. If we don't have the early stop below then in cases
+ // like the above, getBackedgeTakenInfo(loop1) will clear out the trip
+ // count for loop0 and getBackedgeTakenInfo(loop0) will clear out the trip
+ // count for loop1, effectively nullifying SCEV's trip count cache.
+ for (auto *U : I->users())
+ if (auto *I = dyn_cast<Instruction>(U)) {
+ auto *LoopForUser = LI.getLoopFor(I->getParent());
+ if (LoopForUser && L->contains(LoopForUser) &&
+ Discovered.insert(I).second)
+ Worklist.push_back(I);
+ }
}
}
@@ -6217,7 +6515,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
void ScalarEvolution::forgetLoop(const Loop *L) {
// Drop any stored trip count value.
auto RemoveLoopFromBackedgeMap =
- [L](DenseMap<const Loop *, BackedgeTakenInfo> &Map) {
+ [](DenseMap<const Loop *, BackedgeTakenInfo> &Map, const Loop *L) {
auto BTCPos = Map.find(L);
if (BTCPos != Map.end()) {
BTCPos->second.clear();
@@ -6225,47 +6523,59 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
}
};
- RemoveLoopFromBackedgeMap(BackedgeTakenCounts);
- RemoveLoopFromBackedgeMap(PredicatedBackedgeTakenCounts);
+ SmallVector<const Loop *, 16> LoopWorklist(1, L);
+ SmallVector<Instruction *, 32> Worklist;
+ SmallPtrSet<Instruction *, 16> Visited;
- // Drop information about predicated SCEV rewrites for this loop.
- for (auto I = PredicatedSCEVRewrites.begin();
- I != PredicatedSCEVRewrites.end();) {
- std::pair<const SCEV *, const Loop *> Entry = I->first;
- if (Entry.second == L)
- PredicatedSCEVRewrites.erase(I++);
- else
- ++I;
- }
+ // Iterate over all the loops and sub-loops to drop SCEV information.
+ while (!LoopWorklist.empty()) {
+ auto *CurrL = LoopWorklist.pop_back_val();
- // Drop information about expressions based on loop-header PHIs.
- SmallVector<Instruction *, 16> Worklist;
- PushLoopPHIs(L, Worklist);
+ RemoveLoopFromBackedgeMap(BackedgeTakenCounts, CurrL);
+ RemoveLoopFromBackedgeMap(PredicatedBackedgeTakenCounts, CurrL);
- SmallPtrSet<Instruction *, 8> Visited;
- while (!Worklist.empty()) {
- Instruction *I = Worklist.pop_back_val();
- if (!Visited.insert(I).second)
- continue;
+ // Drop information about predicated SCEV rewrites for this loop.
+ for (auto I = PredicatedSCEVRewrites.begin();
+ I != PredicatedSCEVRewrites.end();) {
+ std::pair<const SCEV *, const Loop *> Entry = I->first;
+ if (Entry.second == CurrL)
+ PredicatedSCEVRewrites.erase(I++);
+ else
+ ++I;
+ }
- ValueExprMapType::iterator It =
- ValueExprMap.find_as(static_cast<Value *>(I));
- if (It != ValueExprMap.end()) {
- eraseValueFromMap(It->first);
- forgetMemoizedResults(It->second);
- if (PHINode *PN = dyn_cast<PHINode>(I))
- ConstantEvolutionLoopExitValue.erase(PN);
+ auto LoopUsersItr = LoopUsers.find(CurrL);
+ if (LoopUsersItr != LoopUsers.end()) {
+ for (auto *S : LoopUsersItr->second)
+ forgetMemoizedResults(S);
+ LoopUsers.erase(LoopUsersItr);
}
- PushDefUseChildren(I, Worklist);
- }
+ // Drop information about expressions based on loop-header PHIs.
+ PushLoopPHIs(CurrL, Worklist);
- // Forget all contained loops too, to avoid dangling entries in the
- // ValuesAtScopes map.
- for (Loop *I : *L)
- forgetLoop(I);
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
+ if (!Visited.insert(I).second)
+ continue;
- LoopPropertiesCache.erase(L);
+ ValueExprMapType::iterator It =
+ ValueExprMap.find_as(static_cast<Value *>(I));
+ if (It != ValueExprMap.end()) {
+ eraseValueFromMap(It->first);
+ forgetMemoizedResults(It->second);
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ ConstantEvolutionLoopExitValue.erase(PN);
+ }
+
+ PushDefUseChildren(I, Worklist);
+ }
+
+ LoopPropertiesCache.erase(CurrL);
+ // Forget all contained loops too, to avoid dangling entries in the
+ // ValuesAtScopes map.
+ LoopWorklist.append(CurrL->begin(), CurrL->end());
+ }
}
void ScalarEvolution::forgetValue(Value *V) {
@@ -6377,7 +6687,7 @@ bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
}
ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E)
- : ExactNotTaken(E), MaxNotTaken(E), MaxOrZero(false) {
+ : ExactNotTaken(E), MaxNotTaken(E) {
assert((isa<SCEVCouldNotCompute>(MaxNotTaken) ||
isa<SCEVConstant>(MaxNotTaken)) &&
"No point in having a non-constant max backedge taken count!");
@@ -6422,7 +6732,8 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
&&ExitCounts,
bool Complete, const SCEV *MaxCount, bool MaxOrZero)
: MaxAndComplete(MaxCount, Complete), MaxOrZero(MaxOrZero) {
- typedef ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo EdgeExitInfo;
+ using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo;
+
ExitNotTaken.reserve(ExitCounts.size());
std::transform(
ExitCounts.begin(), ExitCounts.end(), std::back_inserter(ExitNotTaken),
@@ -6454,7 +6765,7 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
SmallVector<BasicBlock *, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
- typedef ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo EdgeExitInfo;
+ using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo;
SmallVector<EdgeExitInfo, 4> ExitCounts;
bool CouldComputeBECount = true;
@@ -6521,8 +6832,7 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
ScalarEvolution::ExitLimit
ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
- bool AllowPredicates) {
-
+ bool AllowPredicates) {
// Okay, we've chosen an exiting block. See what condition causes us to exit
// at this block and remember the exit block and whether all other targets
// lead to the loop header.
@@ -6785,19 +7095,19 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
BasicBlock *FBB,
bool ControlsExit,
bool AllowPredicates) {
-
// If the condition was exit on true, convert the condition to exit on false
- ICmpInst::Predicate Cond;
+ ICmpInst::Predicate Pred;
if (!L->contains(FBB))
- Cond = ExitCond->getPredicate();
+ Pred = ExitCond->getPredicate();
else
- Cond = ExitCond->getInversePredicate();
+ Pred = ExitCond->getInversePredicate();
+ const ICmpInst::Predicate OriginalPred = Pred;
// Handle common loops like: for (X = "string"; *X; ++X)
if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
ExitLimit ItCnt =
- computeLoadConstantCompareExitLimit(LI, RHS, L, Cond);
+ computeLoadConstantCompareExitLimit(LI, RHS, L, Pred);
if (ItCnt.hasAnyInfo())
return ItCnt;
}
@@ -6814,11 +7124,11 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) {
// If there is a loop-invariant, force it into the RHS.
std::swap(LHS, RHS);
- Cond = ICmpInst::getSwappedPredicate(Cond);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
}
// Simplify the operands before analyzing them.
- (void)SimplifyICmpOperands(Cond, LHS, RHS);
+ (void)SimplifyICmpOperands(Pred, LHS, RHS);
// If we have a comparison of a chrec against a constant, try to use value
// ranges to answer this query.
@@ -6827,13 +7137,13 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
if (AddRec->getLoop() == L) {
// Form the constant range.
ConstantRange CompRange =
- ConstantRange::makeExactICmpRegion(Cond, RHSC->getAPInt());
+ ConstantRange::makeExactICmpRegion(Pred, RHSC->getAPInt());
const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
}
- switch (Cond) {
+ switch (Pred) {
case ICmpInst::ICMP_NE: { // while (X != Y)
// Convert to: while (X-Y != 0)
ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit,
@@ -6849,7 +7159,7 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
}
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_ULT: { // while (X < Y)
- bool IsSigned = Cond == ICmpInst::ICMP_SLT;
+ bool IsSigned = Pred == ICmpInst::ICMP_SLT;
ExitLimit EL = howManyLessThans(LHS, RHS, L, IsSigned, ControlsExit,
AllowPredicates);
if (EL.hasAnyInfo()) return EL;
@@ -6857,7 +7167,7 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
}
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_UGT: { // while (X > Y)
- bool IsSigned = Cond == ICmpInst::ICMP_SGT;
+ bool IsSigned = Pred == ICmpInst::ICMP_SGT;
ExitLimit EL =
howManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit,
AllowPredicates);
@@ -6875,7 +7185,7 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
return ExhaustiveCount;
return computeShiftCompareExitLimit(ExitCond->getOperand(0),
- ExitCond->getOperand(1), L, Cond);
+ ExitCond->getOperand(1), L, OriginalPred);
}
ScalarEvolution::ExitLimit
@@ -6920,7 +7230,6 @@ ScalarEvolution::computeLoadConstantCompareExitLimit(
Constant *RHS,
const Loop *L,
ICmpInst::Predicate predicate) {
-
if (LI->isVolatile()) return getCouldNotCompute();
// Check to see if the loaded pointer is a getelementptr of a global.
@@ -7333,8 +7642,8 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
Value *BEValue = PN->getIncomingValueForBlock(Latch);
// Execute the loop symbolically to determine the exit value.
- if (BEs.getActiveBits() >= 32)
- return RetVal = nullptr; // More than 2^32-1 iterations?? Not doing it!
+ assert(BEs.getActiveBits() < CHAR_BIT * sizeof(unsigned) &&
+ "BEs is <= MaxBruteForceIterations which is an 'unsigned'!");
unsigned NumIterations = BEs.getZExtValue(); // must be in range
unsigned IterationNum = 0;
@@ -7839,7 +8148,6 @@ static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const SCEV *B,
/// Find the roots of the quadratic equation for the given quadratic chrec
/// {L,+,M,+,N}. This returns either the two roots (which might be the same) or
/// two SCEVCouldNotCompute objects.
-///
static Optional<std::pair<const SCEVConstant *,const SCEVConstant *>>
SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!");
@@ -8080,7 +8388,6 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
/// expressions are equal, however for the purposes of looking for a condition
/// guarding a loop, it can be useful to be a little more general, since a
/// front-end may have replicated the controlling expression.
-///
static bool HasSameValue(const SCEV *A, const SCEV *B) {
// Quick check to see if they are the same SCEV.
if (A == B) return true;
@@ -8527,7 +8834,6 @@ bool ScalarEvolution::isKnownPredicateViaConstantRanges(
bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred,
const SCEV *LHS,
const SCEV *RHS) {
-
// Match Result to (X + Y)<ExpectedFlags> where Y is a constant integer.
// Return Y via OutY.
auto MatchBinaryAddToConst =
@@ -8693,7 +8999,6 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()];
DTN != HeaderDTN; DTN = DTN->getIDom()) {
-
assert(DTN && "should reach the loop header before reaching the root!");
BasicBlock *BB = DTN->getBlock();
@@ -9116,7 +9421,6 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
getNotSCEV(FoundLHS));
}
-
/// If Expr computes ~A, return A else return nullptr
static const SCEV *MatchNotExpr(const SCEV *Expr) {
const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
@@ -9132,7 +9436,6 @@ static const SCEV *MatchNotExpr(const SCEV *Expr) {
return AddRHS->getOperand(1);
}
-
/// Is MaybeMaxExpr an SMax or UMax of Candidate and some other values?
template<typename MaxExprType>
static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr,
@@ -9143,7 +9446,6 @@ static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr,
return find(MaxExpr->operands(), Candidate) != MaxExpr->op_end();
}
-
/// Is MaybeMinExpr an SMin or UMin of Candidate and some other values?
template<typename MaxExprType>
static bool IsMinConsistingOf(ScalarEvolution &SE,
@@ -9159,7 +9461,6 @@ static bool IsMinConsistingOf(ScalarEvolution &SE,
static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE,
ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS) {
-
// If both sides are affine addrecs for the same loop, with equal
// steps, and we know the recurrences don't wrap, then we only
// need to check the predicate on the starting values.
@@ -9295,7 +9596,9 @@ bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred,
} else if (auto *LHSUnknownExpr = dyn_cast<SCEVUnknown>(LHS)) {
Value *LL, *LR;
// FIXME: Once we have SDiv implemented, we can get rid of this matching.
+
using namespace llvm::PatternMatch;
+
if (match(LHSUnknownExpr->getValue(), m_SDiv(m_Value(LL), m_Value(LR)))) {
// Rules for division.
// We are going to perform some comparisons with Denominator and its
@@ -9510,14 +9813,54 @@ const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,
return getUDivExpr(Delta, Step);
}
+const SCEV *ScalarEvolution::computeMaxBECountForLT(const SCEV *Start,
+ const SCEV *Stride,
+ const SCEV *End,
+ unsigned BitWidth,
+ bool IsSigned) {
+
+ assert(!isKnownNonPositive(Stride) &&
+ "Stride is expected strictly positive!");
+ // Calculate the maximum backedge count based on the range of values
+ // permitted by Start, End, and Stride.
+ const SCEV *MaxBECount;
+ APInt MinStart =
+ IsSigned ? getSignedRangeMin(Start) : getUnsignedRangeMin(Start);
+
+ APInt StrideForMaxBECount =
+ IsSigned ? getSignedRangeMin(Stride) : getUnsignedRangeMin(Stride);
+
+ // We already know that the stride is positive, so we paper over conservatism
+ // in our range computation by forcing StrideForMaxBECount to be at least one.
+ // In theory this is unnecessary, but we expect MaxBECount to be a
+ // SCEVConstant, and (udiv <constant> 0) is not constant folded by SCEV (there
+ // is nothing to constant fold it to).
+ APInt One(BitWidth, 1, IsSigned);
+ StrideForMaxBECount = APIntOps::smax(One, StrideForMaxBECount);
+
+ APInt MaxValue = IsSigned ? APInt::getSignedMaxValue(BitWidth)
+ : APInt::getMaxValue(BitWidth);
+ APInt Limit = MaxValue - (StrideForMaxBECount - 1);
+
+ // Although End can be a MAX expression we estimate MaxEnd considering only
+ // the case End = RHS of the loop termination condition. This is safe because
+ // in the other case (End - Start) is zero, leading to a zero maximum backedge
+ // taken count.
+ APInt MaxEnd = IsSigned ? APIntOps::smin(getSignedRangeMax(End), Limit)
+ : APIntOps::umin(getUnsignedRangeMax(End), Limit);
+
+ MaxBECount = computeBECount(getConstant(MaxEnd - MinStart) /* Delta */,
+ getConstant(StrideForMaxBECount) /* Step */,
+ false /* Equality */);
+
+ return MaxBECount;
+}
+
ScalarEvolution::ExitLimit
ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
const Loop *L, bool IsSigned,
bool ControlsExit, bool AllowPredicates) {
SmallPtrSet<const SCEVPredicate *, 4> Predicates;
- // We handle only IV < Invariant
- if (!isLoopInvariant(RHS, L))
- return getCouldNotCompute();
const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
bool PredicatedIV = false;
@@ -9588,7 +9931,6 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
if (PredicatedIV || !NoWrap || isKnownNonPositive(Stride) ||
!loopHasNoSideEffects(L))
return getCouldNotCompute();
-
} else if (!Stride->isOne() &&
doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
// Avoid proven overflow cases: this will ensure that the backedge taken
@@ -9601,6 +9943,17 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
: ICmpInst::ICMP_ULT;
const SCEV *Start = IV->getStart();
const SCEV *End = RHS;
+ // When the RHS is not invariant, we do not know the end bound of the loop and
+ // cannot calculate the ExactBECount needed by ExitLimit. However, we can
+ // calculate the MaxBECount, given the start, stride and max value for the end
+ // bound of the loop (RHS), and the fact that IV does not overflow (which is
+ // checked above).
+ if (!isLoopInvariant(RHS, L)) {
+ const SCEV *MaxBECount = computeMaxBECountForLT(
+ Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned);
+ return ExitLimit(getCouldNotCompute() /* ExactNotTaken */, MaxBECount,
+ false /*MaxOrZero*/, Predicates);
+ }
// If the backedge is taken at least once, then it will be taken
// (End-Start)/Stride times (rounded up to a multiple of Stride), where Start
// is the LHS value of the less-than comparison the first time it is evaluated
@@ -9633,37 +9986,8 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
MaxBECount = BECountIfBackedgeTaken;
MaxOrZero = true;
} else {
- // Calculate the maximum backedge count based on the range of values
- // permitted by Start, End, and Stride.
- APInt MinStart = IsSigned ? getSignedRangeMin(Start)
- : getUnsignedRangeMin(Start);
-
- unsigned BitWidth = getTypeSizeInBits(LHS->getType());
-
- APInt StrideForMaxBECount;
-
- if (PositiveStride)
- StrideForMaxBECount =
- IsSigned ? getSignedRangeMin(Stride)
- : getUnsignedRangeMin(Stride);
- else
- // Using a stride of 1 is safe when computing max backedge taken count for
- // a loop with unknown stride.
- StrideForMaxBECount = APInt(BitWidth, 1, IsSigned);
-
- APInt Limit =
- IsSigned ? APInt::getSignedMaxValue(BitWidth) - (StrideForMaxBECount - 1)
- : APInt::getMaxValue(BitWidth) - (StrideForMaxBECount - 1);
-
- // Although End can be a MAX expression we estimate MaxEnd considering only
- // the case End = RHS. This is safe because in the other case (End - Start)
- // is zero, leading to a zero maximum backedge taken count.
- APInt MaxEnd =
- IsSigned ? APIntOps::smin(getSignedRangeMax(RHS), Limit)
- : APIntOps::umin(getUnsignedRangeMax(RHS), Limit);
-
- MaxBECount = computeBECount(getConstant(MaxEnd - MinStart),
- getConstant(StrideForMaxBECount), false);
+ MaxBECount = computeMaxBECountForLT(
+ Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned);
}
if (isa<SCEVCouldNotCompute>(MaxBECount) &&
@@ -9874,6 +10198,7 @@ static inline bool containsUndefs(const SCEV *S) {
}
namespace {
+
// Collect all steps of SCEV expressions.
struct SCEVCollectStrides {
ScalarEvolution &SE;
@@ -9887,6 +10212,7 @@ struct SCEVCollectStrides {
Strides.push_back(AR->getStepRecurrence(SE));
return true;
}
+
bool isDone() const { return false; }
};
@@ -9894,8 +10220,7 @@ struct SCEVCollectStrides {
struct SCEVCollectTerms {
SmallVectorImpl<const SCEV *> &Terms;
- SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T)
- : Terms(T) {}
+ SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T) : Terms(T) {}
bool follow(const SCEV *S) {
if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S) ||
@@ -9910,6 +10235,7 @@ struct SCEVCollectTerms {
// Keep looking.
return true;
}
+
bool isDone() const { return false; }
};
@@ -9918,7 +10244,7 @@ struct SCEVHasAddRec {
bool &ContainsAddRec;
SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) {
- ContainsAddRec = false;
+ ContainsAddRec = false;
}
bool follow(const SCEV *S) {
@@ -9932,6 +10258,7 @@ struct SCEVHasAddRec {
// Keep looking.
return true;
}
+
bool isDone() const { return false; }
};
@@ -9985,9 +10312,11 @@ struct SCEVCollectAddRecMultiplies {
// Keep looking.
return true;
}
+
bool isDone() const { return false; }
};
-}
+
+} // end anonymous namespace
/// Find parametric terms in this SCEVAddRecExpr. We first for parameters in
/// two places:
@@ -10066,7 +10395,6 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE,
return true;
}
-
// Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter.
static inline bool containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
for (const SCEV *T : Terms)
@@ -10181,7 +10509,6 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
void ScalarEvolution::computeAccessFunctions(
const SCEV *Expr, SmallVectorImpl<const SCEV *> &Subscripts,
SmallVectorImpl<const SCEV *> &Sizes) {
-
// Early exit in case this SCEV is not an affine multivariate function.
if (Sizes.empty())
return;
@@ -10285,7 +10612,6 @@ void ScalarEvolution::computeAccessFunctions(
/// DelinearizationPass that walks through all loads and stores of a function
/// asking for the SCEV of the memory access with respect to all enclosing
/// loops, calling SCEV->delinearize on that and printing the results.
-
void ScalarEvolution::delinearize(const SCEV *Expr,
SmallVectorImpl<const SCEV *> &Subscripts,
SmallVectorImpl<const SCEV *> &Sizes,
@@ -10374,11 +10700,8 @@ ScalarEvolution::ScalarEvolution(Function &F, TargetLibraryInfo &TLI,
AssumptionCache &AC, DominatorTree &DT,
LoopInfo &LI)
: F(F), TLI(TLI), AC(AC), DT(DT), LI(LI),
- CouldNotCompute(new SCEVCouldNotCompute()),
- WalkingBEDominatingConds(false), ProvingSplitPredicate(false),
- ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64),
- FirstUnknown(nullptr) {
-
+ CouldNotCompute(new SCEVCouldNotCompute()), ValuesAtScopes(64),
+ LoopDispositions(64), BlockDispositions(64) {
// To use guards for proving predicates, we need to scan every instruction in
// relevant basic blocks, and not just terminators. Doing this is a waste of
// time if the IR does not actually contain any calls to
@@ -10399,7 +10722,6 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
LI(Arg.LI), CouldNotCompute(std::move(Arg.CouldNotCompute)),
ValueExprMap(std::move(Arg.ValueExprMap)),
PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)),
- WalkingBEDominatingConds(false), ProvingSplitPredicate(false),
MinTrailingZerosCache(std::move(Arg.MinTrailingZerosCache)),
BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
PredicatedBackedgeTakenCounts(
@@ -10415,6 +10737,7 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
UniqueSCEVs(std::move(Arg.UniqueSCEVs)),
UniquePreds(std::move(Arg.UniquePreds)),
SCEVAllocator(std::move(Arg.SCEVAllocator)),
+ LoopUsers(std::move(Arg.LoopUsers)),
PredicatedSCEVRewrites(std::move(Arg.PredicatedSCEVRewrites)),
FirstUnknown(Arg.FirstUnknown) {
Arg.FirstUnknown = nullptr;
@@ -10647,9 +10970,11 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
if (!L)
return LoopVariant;
- // This recurrence is variant w.r.t. L if L contains AR's loop.
- if (L->contains(AR->getLoop()))
+ // Everything that is not defined at loop entry is variant.
+ if (DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))
return LoopVariant;
+ assert(!L->contains(AR->getLoop()) && "Containing loop's header does not"
+ " dominate the contained loop's header?");
// This recurrence is invariant w.r.t. L if AR's loop contains L.
if (AR->getLoop()->contains(L))
@@ -10806,7 +11131,16 @@ bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
return SCEVExprContains(S, [&](const SCEV *Expr) { return Expr == Op; });
}
-void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
+bool ScalarEvolution::ExitLimit::hasOperand(const SCEV *S) const {
+ auto IsS = [&](const SCEV *X) { return S == X; };
+ auto ContainsS = [&](const SCEV *X) {
+ return !isa<SCEVCouldNotCompute>(X) && SCEVExprContains(X, IsS);
+ };
+ return ContainsS(ExactNotTaken) || ContainsS(MaxNotTaken);
+}
+
+void
+ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
ValuesAtScopes.erase(S);
LoopDispositions.erase(S);
BlockDispositions.erase(S);
@@ -10816,7 +11150,7 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
HasRecMap.erase(S);
MinTrailingZerosCache.erase(S);
- for (auto I = PredicatedSCEVRewrites.begin();
+ for (auto I = PredicatedSCEVRewrites.begin();
I != PredicatedSCEVRewrites.end();) {
std::pair<const SCEV *, const Loop *> Entry = I->first;
if (Entry.first == S)
@@ -10841,6 +11175,25 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts);
}
+void ScalarEvolution::addToLoopUseLists(const SCEV *S) {
+ struct FindUsedLoops {
+ SmallPtrSet<const Loop *, 8> LoopsUsed;
+ bool follow(const SCEV *S) {
+ if (auto *AR = dyn_cast<SCEVAddRecExpr>(S))
+ LoopsUsed.insert(AR->getLoop());
+ return true;
+ }
+
+ bool isDone() const { return false; }
+ };
+
+ FindUsedLoops F;
+ SCEVTraversal<FindUsedLoops>(F).visitAll(S);
+
+ for (auto *L : F.LoopsUsed)
+ LoopUsers[L].push_back(S);
+}
+
void ScalarEvolution::verify() const {
ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
ScalarEvolution SE2(F, TLI, AC, DT, LI);
@@ -10849,9 +11202,12 @@ void ScalarEvolution::verify() const {
// Map's SCEV expressions from one ScalarEvolution "universe" to another.
struct SCEVMapper : public SCEVRewriteVisitor<SCEVMapper> {
+ SCEVMapper(ScalarEvolution &SE) : SCEVRewriteVisitor<SCEVMapper>(SE) {}
+
const SCEV *visitConstant(const SCEVConstant *Constant) {
return SE.getConstant(Constant->getAPInt());
}
+
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
return SE.getUnknown(Expr->getValue());
}
@@ -10859,7 +11215,6 @@ void ScalarEvolution::verify() const {
const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
return SE.getCouldNotCompute();
}
- SCEVMapper(ScalarEvolution &SE) : SCEVRewriteVisitor<SCEVMapper>(SE) {}
};
SCEVMapper SCM(SE2);
@@ -10948,6 +11303,7 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(ScalarEvolutionWrapperPass, "scalar-evolution",
"Scalar Evolution Analysis", false, true)
+
char ScalarEvolutionWrapperPass::ID = 0;
ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) {
@@ -11023,6 +11379,7 @@ namespace {
class SCEVPredicateRewriter : public SCEVRewriteVisitor<SCEVPredicateRewriter> {
public:
+
/// Rewrites \p S in the context of a loop L and the SCEV predication
/// infrastructure.
///
@@ -11038,11 +11395,6 @@ public:
return Rewriter.visit(S);
}
- SCEVPredicateRewriter(const Loop *L, ScalarEvolution &SE,
- SmallPtrSetImpl<const SCEVPredicate *> *NewPreds,
- SCEVUnionPredicate *Pred)
- : SCEVRewriteVisitor(SE), NewPreds(NewPreds), Pred(Pred), L(L) {}
-
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
if (Pred) {
auto ExprPreds = Pred->getPredicatesForExpr(Expr);
@@ -11087,6 +11439,11 @@ public:
}
private:
+ explicit SCEVPredicateRewriter(const Loop *L, ScalarEvolution &SE,
+ SmallPtrSetImpl<const SCEVPredicate *> *NewPreds,
+ SCEVUnionPredicate *Pred)
+ : SCEVRewriteVisitor(SE), NewPreds(NewPreds), Pred(Pred), L(L) {}
+
bool addOverflowAssumption(const SCEVPredicate *P) {
if (!NewPreds) {
// Check if we've already made this assumption.
@@ -11103,10 +11460,10 @@ private:
}
// If \p Expr represents a PHINode, we try to see if it can be represented
- // as an AddRec, possibly under a predicate (PHISCEVPred). If it is possible
+ // as an AddRec, possibly under a predicate (PHISCEVPred). If it is possible
// to add this predicate as a runtime overflow check, we return the AddRec.
- // If \p Expr does not meet these conditions (is not a PHI node, or we
- // couldn't create an AddRec for it, or couldn't add the predicate), we just
+ // If \p Expr does not meet these conditions (is not a PHI node, or we
+ // couldn't create an AddRec for it, or couldn't add the predicate), we just
// return \p Expr.
const SCEV *convertToAddRecWithPreds(const SCEVUnknown *Expr) {
if (!isa<PHINode>(Expr->getValue()))
@@ -11121,11 +11478,12 @@ private:
}
return PredicatedRewrite->first;
}
-
+
SmallPtrSetImpl<const SCEVPredicate *> *NewPreds;
SCEVUnionPredicate *Pred;
const Loop *L;
};
+
} // end anonymous namespace
const SCEV *ScalarEvolution::rewriteUsingPredicate(const SCEV *S, const Loop *L,
@@ -11136,7 +11494,6 @@ const SCEV *ScalarEvolution::rewriteUsingPredicate(const SCEV *S, const Loop *L,
const SCEVAddRecExpr *ScalarEvolution::convertSCEVToAddRecWithPredicates(
const SCEV *S, const Loop *L,
SmallPtrSetImpl<const SCEVPredicate *> &Preds) {
-
SmallPtrSet<const SCEVPredicate *, 4> TransformPreds;
S = SCEVPredicateRewriter::rewrite(S, L, *this, &TransformPreds, nullptr);
auto *AddRec = dyn_cast<SCEVAddRecExpr>(S);
@@ -11292,7 +11649,7 @@ void SCEVUnionPredicate::add(const SCEVPredicate *N) {
PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE,
Loop &L)
- : SE(SE), L(L), Generation(0), BackedgeCount(nullptr) {}
+ : SE(SE), L(L) {}
const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) {
const SCEV *Expr = SE.getSCEV(V);
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
index 47bdac00ae1f..86f714b930d0 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -878,7 +878,7 @@ bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
if (IncV->mayHaveSideEffects())
return false;
- if (IncV != PN)
+ if (IncV == PN)
return true;
return isNormalAddRecExprPHI(PN, IncV, L);
@@ -1143,7 +1143,11 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
for (auto &I : *L->getHeader()) {
auto *PN = dyn_cast<PHINode>(&I);
- if (!PN || !SE.isSCEVable(PN->getType()))
+ // Found first non-phi, the rest of instructions are also not Phis.
+ if (!PN)
+ break;
+
+ if (!SE.isSCEVable(PN->getType()))
continue;
const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PN));
@@ -1728,10 +1732,28 @@ Value *SCEVExpander::expand(const SCEV *S) {
InsertPt = &*L->getHeader()->getFirstInsertionPt();
}
} else {
+ // We can move insertion point only if there is no div or rem operations
+ // otherwise we are risky to move it over the check for zero denominator.
+ auto SafeToHoist = [](const SCEV *S) {
+ return !SCEVExprContains(S, [](const SCEV *S) {
+ if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
+ if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
+ // Division by non-zero constants can be hoisted.
+ return SC->getValue()->isZero();
+ // All other divisions should not be moved as they may be
+ // divisions by zero and should be kept within the
+ // conditions of the surrounding loops that guard their
+ // execution (see PR35406).
+ return true;
+ }
+ return false;
+ });
+ };
// If the SCEV is computable at this level, insert it into the header
// after the PHIs (and after any other instructions that we've inserted
// there) so that it is guaranteed to dominate any user inside the loop.
- if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
+ if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L) &&
+ SafeToHoist(S))
InsertPt = &*L->getHeader()->getFirstInsertionPt();
while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
(isInsertedInstruction(InsertPt) ||
@@ -2293,4 +2315,9 @@ bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) {
visitAll(S, Search);
return !Search.IsUnsafe;
}
+
+bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint,
+ ScalarEvolution &SE) {
+ return isSafeToExpand(S, SE) && SE.dominates(S, InsertionPoint->getParent());
+}
}
diff --git a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
index 833c6e09f6fd..f12275aff387 100644
--- a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
@@ -34,11 +34,12 @@
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/IR/Constants.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -47,17 +48,18 @@ using namespace llvm;
// can also be achieved by stripping the associated metadata tags from IR, but
// this option is sometimes more convenient.
static cl::opt<bool> EnableScopedNoAlias("enable-scoped-noalias",
- cl::init(true));
+ cl::init(true), cl::Hidden);
namespace {
+
/// This is a simple wrapper around an MDNode which provides a higher-level
/// interface by hiding the details of how alias analysis information is encoded
/// in its operands.
class AliasScopeNode {
- const MDNode *Node;
+ const MDNode *Node = nullptr;
public:
- AliasScopeNode() : Node(nullptr) {}
+ AliasScopeNode() = default;
explicit AliasScopeNode(const MDNode *N) : Node(N) {}
/// Get the MDNode for this AliasScopeNode.
@@ -70,7 +72,8 @@ public:
return dyn_cast_or_null<MDNode>(Node->getOperand(1));
}
};
-} // end of anonymous namespace
+
+} // end anonymous namespace
AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA,
const MemoryLocation &LocB) {
@@ -99,12 +102,12 @@ ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS,
if (!mayAliasInScopes(Loc.AATags.Scope, CS.getInstruction()->getMetadata(
LLVMContext::MD_noalias)))
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
if (!mayAliasInScopes(
CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
Loc.AATags.NoAlias))
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
return AAResultBase::getModRefInfo(CS, Loc);
}
@@ -117,12 +120,12 @@ ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS1,
if (!mayAliasInScopes(
CS1.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
CS2.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
if (!mayAliasInScopes(
CS2.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
CS1.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
return AAResultBase::getModRefInfo(CS1, CS2);
}
@@ -181,6 +184,7 @@ ScopedNoAliasAAResult ScopedNoAliasAA::run(Function &F,
}
char ScopedNoAliasAAWrapperPass::ID = 0;
+
INITIALIZE_PASS(ScopedNoAliasAAWrapperPass, "scoped-noalias",
"Scoped NoAlias Alias Analysis", false, true)
diff --git a/contrib/llvm/lib/Analysis/SparsePropagation.cpp b/contrib/llvm/lib/Analysis/SparsePropagation.cpp
deleted file mode 100644
index 470f4bee1e0a..000000000000
--- a/contrib/llvm/lib/Analysis/SparsePropagation.cpp
+++ /dev/null
@@ -1,347 +0,0 @@
-//===- SparsePropagation.cpp - Sparse Conditional Property Propagation ----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements an abstract sparse conditional propagation algorithm,
-// modeled after SCCP, but with a customizable lattice function.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/SparsePropagation.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "sparseprop"
-
-//===----------------------------------------------------------------------===//
-// AbstractLatticeFunction Implementation
-//===----------------------------------------------------------------------===//
-
-AbstractLatticeFunction::~AbstractLatticeFunction() {}
-
-/// PrintValue - Render the specified lattice value to the specified stream.
-void AbstractLatticeFunction::PrintValue(LatticeVal V, raw_ostream &OS) {
- if (V == UndefVal)
- OS << "undefined";
- else if (V == OverdefinedVal)
- OS << "overdefined";
- else if (V == UntrackedVal)
- OS << "untracked";
- else
- OS << "unknown lattice value";
-}
-
-//===----------------------------------------------------------------------===//
-// SparseSolver Implementation
-//===----------------------------------------------------------------------===//
-
-/// getOrInitValueState - Return the LatticeVal object that corresponds to the
-/// value, initializing the value's state if it hasn't been entered into the
-/// map yet. This function is necessary because not all values should start
-/// out in the underdefined state... Arguments should be overdefined, and
-/// constants should be marked as constants.
-///
-SparseSolver::LatticeVal SparseSolver::getOrInitValueState(Value *V) {
- DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(V);
- if (I != ValueState.end()) return I->second; // Common case, in the map
-
- LatticeVal LV;
- if (LatticeFunc->IsUntrackedValue(V))
- return LatticeFunc->getUntrackedVal();
- else if (Constant *C = dyn_cast<Constant>(V))
- LV = LatticeFunc->ComputeConstant(C);
- else if (Argument *A = dyn_cast<Argument>(V))
- LV = LatticeFunc->ComputeArgument(A);
- else if (!isa<Instruction>(V))
- // All other non-instructions are overdefined.
- LV = LatticeFunc->getOverdefinedVal();
- else
- // All instructions are underdefined by default.
- LV = LatticeFunc->getUndefVal();
-
- // If this value is untracked, don't add it to the map.
- if (LV == LatticeFunc->getUntrackedVal())
- return LV;
- return ValueState[V] = LV;
-}
-
-/// UpdateState - When the state for some instruction is potentially updated,
-/// this function notices and adds I to the worklist if needed.
-void SparseSolver::UpdateState(Instruction &Inst, LatticeVal V) {
- DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(&Inst);
- if (I != ValueState.end() && I->second == V)
- return; // No change.
-
- // An update. Visit uses of I.
- ValueState[&Inst] = V;
- InstWorkList.push_back(&Inst);
-}
-
-/// MarkBlockExecutable - This method can be used by clients to mark all of
-/// the blocks that are known to be intrinsically live in the processed unit.
-void SparseSolver::MarkBlockExecutable(BasicBlock *BB) {
- DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << "\n");
- BBExecutable.insert(BB); // Basic block is executable!
- BBWorkList.push_back(BB); // Add the block to the work list!
-}
-
-/// markEdgeExecutable - Mark a basic block as executable, adding it to the BB
-/// work list if it is not already executable...
-void SparseSolver::markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) {
- if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second)
- return; // This edge is already known to be executable!
-
- DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName()
- << " -> " << Dest->getName() << "\n");
-
- if (BBExecutable.count(Dest)) {
- // The destination is already executable, but we just made an edge
- // feasible that wasn't before. Revisit the PHI nodes in the block
- // because they have potentially new operands.
- for (BasicBlock::iterator I = Dest->begin(); isa<PHINode>(I); ++I)
- visitPHINode(*cast<PHINode>(I));
-
- } else {
- MarkBlockExecutable(Dest);
- }
-}
-
-
-/// getFeasibleSuccessors - Return a vector of booleans to indicate which
-/// successors are reachable from a given terminator instruction.
-void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI,
- SmallVectorImpl<bool> &Succs,
- bool AggressiveUndef) {
- Succs.resize(TI.getNumSuccessors());
- if (TI.getNumSuccessors() == 0) return;
-
- if (BranchInst *BI = dyn_cast<BranchInst>(&TI)) {
- if (BI->isUnconditional()) {
- Succs[0] = true;
- return;
- }
-
- LatticeVal BCValue;
- if (AggressiveUndef)
- BCValue = getOrInitValueState(BI->getCondition());
- else
- BCValue = getLatticeState(BI->getCondition());
-
- if (BCValue == LatticeFunc->getOverdefinedVal() ||
- BCValue == LatticeFunc->getUntrackedVal()) {
- // Overdefined condition variables can branch either way.
- Succs[0] = Succs[1] = true;
- return;
- }
-
- // If undefined, neither is feasible yet.
- if (BCValue == LatticeFunc->getUndefVal())
- return;
-
- Constant *C = LatticeFunc->GetConstant(BCValue, BI->getCondition(), *this);
- if (!C || !isa<ConstantInt>(C)) {
- // Non-constant values can go either way.
- Succs[0] = Succs[1] = true;
- return;
- }
-
- // Constant condition variables mean the branch can only go a single way
- Succs[C->isNullValue()] = true;
- return;
- }
-
- if (isa<InvokeInst>(TI)) {
- // Invoke instructions successors are always executable.
- // TODO: Could ask the lattice function if the value can throw.
- Succs[0] = Succs[1] = true;
- return;
- }
-
- if (isa<IndirectBrInst>(TI)) {
- Succs.assign(Succs.size(), true);
- return;
- }
-
- SwitchInst &SI = cast<SwitchInst>(TI);
- LatticeVal SCValue;
- if (AggressiveUndef)
- SCValue = getOrInitValueState(SI.getCondition());
- else
- SCValue = getLatticeState(SI.getCondition());
-
- if (SCValue == LatticeFunc->getOverdefinedVal() ||
- SCValue == LatticeFunc->getUntrackedVal()) {
- // All destinations are executable!
- Succs.assign(TI.getNumSuccessors(), true);
- return;
- }
-
- // If undefined, neither is feasible yet.
- if (SCValue == LatticeFunc->getUndefVal())
- return;
-
- Constant *C = LatticeFunc->GetConstant(SCValue, SI.getCondition(), *this);
- if (!C || !isa<ConstantInt>(C)) {
- // All destinations are executable!
- Succs.assign(TI.getNumSuccessors(), true);
- return;
- }
- SwitchInst::CaseHandle Case = *SI.findCaseValue(cast<ConstantInt>(C));
- Succs[Case.getSuccessorIndex()] = true;
-}
-
-
-/// isEdgeFeasible - Return true if the control flow edge from the 'From'
-/// basic block to the 'To' basic block is currently feasible...
-bool SparseSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To,
- bool AggressiveUndef) {
- SmallVector<bool, 16> SuccFeasible;
- TerminatorInst *TI = From->getTerminator();
- getFeasibleSuccessors(*TI, SuccFeasible, AggressiveUndef);
-
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- if (TI->getSuccessor(i) == To && SuccFeasible[i])
- return true;
-
- return false;
-}
-
-void SparseSolver::visitTerminatorInst(TerminatorInst &TI) {
- SmallVector<bool, 16> SuccFeasible;
- getFeasibleSuccessors(TI, SuccFeasible, true);
-
- BasicBlock *BB = TI.getParent();
-
- // Mark all feasible successors executable...
- for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i)
- if (SuccFeasible[i])
- markEdgeExecutable(BB, TI.getSuccessor(i));
-}
-
-void SparseSolver::visitPHINode(PHINode &PN) {
- // The lattice function may store more information on a PHINode than could be
- // computed from its incoming values. For example, SSI form stores its sigma
- // functions as PHINodes with a single incoming value.
- if (LatticeFunc->IsSpecialCasedPHI(&PN)) {
- LatticeVal IV = LatticeFunc->ComputeInstructionState(PN, *this);
- if (IV != LatticeFunc->getUntrackedVal())
- UpdateState(PN, IV);
- return;
- }
-
- LatticeVal PNIV = getOrInitValueState(&PN);
- LatticeVal Overdefined = LatticeFunc->getOverdefinedVal();
-
- // If this value is already overdefined (common) just return.
- if (PNIV == Overdefined || PNIV == LatticeFunc->getUntrackedVal())
- return; // Quick exit
-
- // Super-extra-high-degree PHI nodes are unlikely to ever be interesting,
- // and slow us down a lot. Just mark them overdefined.
- if (PN.getNumIncomingValues() > 64) {
- UpdateState(PN, Overdefined);
- return;
- }
-
- // Look at all of the executable operands of the PHI node. If any of them
- // are overdefined, the PHI becomes overdefined as well. Otherwise, ask the
- // transfer function to give us the merge of the incoming values.
- for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
- // If the edge is not yet known to be feasible, it doesn't impact the PHI.
- if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent(), true))
- continue;
-
- // Merge in this value.
- LatticeVal OpVal = getOrInitValueState(PN.getIncomingValue(i));
- if (OpVal != PNIV)
- PNIV = LatticeFunc->MergeValues(PNIV, OpVal);
-
- if (PNIV == Overdefined)
- break; // Rest of input values don't matter.
- }
-
- // Update the PHI with the compute value, which is the merge of the inputs.
- UpdateState(PN, PNIV);
-}
-
-
-void SparseSolver::visitInst(Instruction &I) {
- // PHIs are handled by the propagation logic, they are never passed into the
- // transfer functions.
- if (PHINode *PN = dyn_cast<PHINode>(&I))
- return visitPHINode(*PN);
-
- // Otherwise, ask the transfer function what the result is. If this is
- // something that we care about, remember it.
- LatticeVal IV = LatticeFunc->ComputeInstructionState(I, *this);
- if (IV != LatticeFunc->getUntrackedVal())
- UpdateState(I, IV);
-
- if (TerminatorInst *TI = dyn_cast<TerminatorInst>(&I))
- visitTerminatorInst(*TI);
-}
-
-void SparseSolver::Solve(Function &F) {
- MarkBlockExecutable(&F.getEntryBlock());
-
- // Process the work lists until they are empty!
- while (!BBWorkList.empty() || !InstWorkList.empty()) {
- // Process the instruction work list.
- while (!InstWorkList.empty()) {
- Instruction *I = InstWorkList.back();
- InstWorkList.pop_back();
-
- DEBUG(dbgs() << "\nPopped off I-WL: " << *I << "\n");
-
- // "I" got into the work list because it made a transition. See if any
- // users are both live and in need of updating.
- for (User *U : I->users()) {
- Instruction *UI = cast<Instruction>(U);
- if (BBExecutable.count(UI->getParent())) // Inst is executable?
- visitInst(*UI);
- }
- }
-
- // Process the basic block work list.
- while (!BBWorkList.empty()) {
- BasicBlock *BB = BBWorkList.back();
- BBWorkList.pop_back();
-
- DEBUG(dbgs() << "\nPopped off BBWL: " << *BB);
-
- // Notify all instructions in this basic block that they are newly
- // executable.
- for (Instruction &I : *BB)
- visitInst(I);
- }
- }
-}
-
-void SparseSolver::Print(Function &F, raw_ostream &OS) const {
- OS << "\nFUNCTION: " << F.getName() << "\n";
- for (auto &BB : F) {
- if (!BBExecutable.count(&BB))
- OS << "INFEASIBLE: ";
- OS << "\t";
- if (BB.hasName())
- OS << BB.getName() << ":\n";
- else
- OS << "; anon bb\n";
- for (auto &I : BB) {
- LatticeFunc->PrintValue(getLatticeState(&I), OS);
- OS << I << "\n";
- }
-
- OS << "\n";
- }
-}
-
diff --git a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 2be5d5caf7c2..d18246ac5941 100644
--- a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -50,9 +50,9 @@ static bool hasSinCosPiStret(const Triple &T) {
return true;
}
-/// initialize - Initialize the set of available library functions based on the
-/// specified target triple. This should be carefully written so that a missing
-/// target triple gets a sane set of defaults.
+/// Initialize the set of available library functions based on the specified
+/// target triple. This should be carefully written so that a missing target
+/// triple gets a sane set of defaults.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
ArrayRef<StringRef> StandardNames) {
// Verify that the StandardNames array is in alphabetical order.
@@ -182,6 +182,9 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_atanh);
TLI.setUnavailable(LibFunc_atanhf);
TLI.setUnavailable(LibFunc_atanhl);
+ TLI.setUnavailable(LibFunc_cabs);
+ TLI.setUnavailable(LibFunc_cabsf);
+ TLI.setUnavailable(LibFunc_cabsl);
TLI.setUnavailable(LibFunc_cbrt);
TLI.setUnavailable(LibFunc_cbrtf);
TLI.setUnavailable(LibFunc_cbrtl);
@@ -605,7 +608,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
return (NumParams == 3 && FTy.getReturnType()->isPointerTy() &&
FTy.getParamType(0) == FTy.getReturnType() &&
FTy.getParamType(1) == FTy.getReturnType() &&
- FTy.getParamType(2)->isIntegerTy());
+ IsSizeTTy(FTy.getParamType(2)));
case LibFunc_strcpy_chk:
case LibFunc_stpcpy_chk:
@@ -630,7 +633,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) &&
FTy.getParamType(0) == FTy.getParamType(1) &&
FTy.getParamType(0) == PCharTy &&
- FTy.getParamType(2)->isIntegerTy());
+ IsSizeTTy(FTy.getParamType(2)));
case LibFunc_strxfrm:
return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
@@ -645,7 +648,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
return (NumParams == 3 && FTy.getReturnType()->isIntegerTy(32) &&
FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(0) == FTy.getParamType(1) &&
- FTy.getParamType(2)->isIntegerTy());
+ IsSizeTTy(FTy.getParamType(2)));
case LibFunc_strspn:
case LibFunc_strcspn:
@@ -1267,6 +1270,25 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
return (NumParams == 1 && FTy.getParamType(0)->isPointerTy() &&
FTy.getReturnType()->isIntegerTy());
+ case LibFunc_cabs:
+ case LibFunc_cabsf:
+ case LibFunc_cabsl: {
+ Type* RetTy = FTy.getReturnType();
+ if (!RetTy->isFloatingPointTy())
+ return false;
+
+ // NOTE: These prototypes are target specific and currently support
+ // "complex" passed as an array or discrete real & imaginary parameters.
+ // Add other calling conventions to enable libcall optimizations.
+ if (NumParams == 1)
+ return (FTy.getParamType(0)->isArrayTy() &&
+ FTy.getParamType(0)->getArrayNumElements() == 2 &&
+ FTy.getParamType(0)->getArrayElementType() == RetTy);
+ else if (NumParams == 2)
+ return (FTy.getParamType(0) == RetTy && FTy.getParamType(1) == RetTy);
+ else
+ return false;
+ }
case LibFunc::NumLibFuncs:
break;
}
@@ -1519,20 +1541,11 @@ TargetLibraryInfoImpl &TargetLibraryAnalysis::lookupInfoImpl(const Triple &T) {
return *Impl;
}
-unsigned TargetLibraryInfoImpl::getTargetWCharSize(const Triple &T) {
- // See also clang/lib/Basic/Targets.cpp.
- if (T.isPS4() || T.isOSWindows() || T.isArch16Bit())
- return 2;
- if (T.getArch() == Triple::xcore)
- return 1;
- return 4;
-}
-
unsigned TargetLibraryInfoImpl::getWCharSize(const Module &M) const {
if (auto *ShortWChar = cast_or_null<ConstantAsMetadata>(
M.getModuleFlag("wchar_size")))
return cast<ConstantInt>(ShortWChar->getValue())->getZExtValue();
- return getTargetWCharSize(Triple(M.getTargetTriple()));
+ return 0;
}
TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass()
diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
index 25813c65037f..b744cae51ed7 100644
--- a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -16,18 +16,19 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include <utility>
using namespace llvm;
+using namespace PatternMatch;
#define DEBUG_TYPE "tti"
-static cl::opt<bool> UseWideMemcpyLoopLowering(
- "use-wide-memcpy-loop-lowering", cl::init(false),
- cl::desc("Enables the new wide memcpy loop lowering in Transforms/Utils."),
- cl::Hidden);
+static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
+ cl::Hidden,
+ cl::desc("Recognize reduction patterns."));
namespace {
/// \brief No-op implementation of the TTI interface using the utility base
@@ -144,9 +145,10 @@ bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset,
bool HasBaseReg,
int64_t Scale,
- unsigned AddrSpace) const {
+ unsigned AddrSpace,
+ Instruction *I) const {
return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
- Scale, AddrSpace);
+ Scale, AddrSpace, I);
}
bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const {
@@ -166,7 +168,16 @@ bool TargetTransformInfo::isLegalMaskedGather(Type *DataType) const {
}
bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const {
- return TTIImpl->isLegalMaskedGather(DataType);
+ return TTIImpl->isLegalMaskedScatter(DataType);
+}
+
+bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
+ return TTIImpl->hasDivRemOp(DataType, IsSigned);
+}
+
+bool TargetTransformInfo::hasVolatileVariant(Instruction *I,
+ unsigned AddrSpace) const {
+ return TTIImpl->hasVolatileVariant(I, AddrSpace);
}
bool TargetTransformInfo::prefersVectorizedAddressing() const {
@@ -184,9 +195,8 @@ int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
return Cost;
}
-bool TargetTransformInfo::isFoldableMemAccessOffset(Instruction *I,
- int64_t Offset) const {
- return TTIImpl->isFoldableMemAccessOffset(I, Offset);
+bool TargetTransformInfo::LSRWithInstrQueries() const {
+ return TTIImpl->LSRWithInstrQueries();
}
bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const {
@@ -235,8 +245,9 @@ bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) c
return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
}
-bool TargetTransformInfo::expandMemCmp(Instruction *I, unsigned &MaxLoadSize) const {
- return TTIImpl->expandMemCmp(I, MaxLoadSize);
+const TargetTransformInfo::MemCmpExpansionOptions *
+TargetTransformInfo::enableMemCmpExpansion(bool IsZeroCmp) const {
+ return TTIImpl->enableMemCmpExpansion(IsZeroCmp);
}
bool TargetTransformInfo::enableInterleavedAccessVectorization() const {
@@ -265,6 +276,10 @@ bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {
return TTIImpl->haveFastSqrt(Ty);
}
+bool TargetTransformInfo::isFCmpOrdCheaperThanFCmpZero(Type *Ty) const {
+ return TTIImpl->isFCmpOrdCheaperThanFCmpZero(Ty);
+}
+
int TargetTransformInfo::getFPOpCost(Type *Ty) const {
int Cost = TTIImpl->getFPOpCost(Ty);
assert(Cost >= 0 && "TTI should not produce negative costs!");
@@ -321,6 +336,16 @@ unsigned TargetTransformInfo::getCacheLineSize() const {
return TTIImpl->getCacheLineSize();
}
+llvm::Optional<unsigned> TargetTransformInfo::getCacheSize(CacheLevel Level)
+ const {
+ return TTIImpl->getCacheSize(Level);
+}
+
+llvm::Optional<unsigned> TargetTransformInfo::getCacheAssociativity(
+ CacheLevel Level) const {
+ return TTIImpl->getCacheAssociativity(Level);
+}
+
unsigned TargetTransformInfo::getPrefetchDistance() const {
return TTIImpl->getPrefetchDistance();
}
@@ -467,9 +492,18 @@ int TargetTransformInfo::getAddressComputationCost(Type *Tp,
return Cost;
}
-int TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty,
- bool IsPairwiseForm) const {
- int Cost = TTIImpl->getReductionCost(Opcode, Ty, IsPairwiseForm);
+int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, Type *Ty,
+ bool IsPairwiseForm) const {
+ int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
+}
+
+int TargetTransformInfo::getMinMaxReductionCost(Type *Ty, Type *CondTy,
+ bool IsPairwiseForm,
+ bool IsUnsigned) const {
+ int Cost =
+ TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
@@ -508,10 +542,6 @@ void TargetTransformInfo::getMemcpyLoopResidualLoweringType(
SrcAlign, DestAlign);
}
-bool TargetTransformInfo::useWideIRMemcpyLoopLowering() const {
- return UseWideMemcpyLoopLowering;
-}
-
bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
const Function *Callee) const {
return TTIImpl->areInlineCompatible(Caller, Callee);
@@ -564,6 +594,557 @@ bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
return TTIImpl->shouldExpandReduction(II);
}
+int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
+ return TTIImpl->getInstructionLatency(I);
+}
+
+static bool isReverseVectorMask(ArrayRef<int> Mask) {
+ for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i)
+ if (Mask[i] >= 0 && Mask[i] != (int)(MaskSize - 1 - i))
+ return false;
+ return true;
+}
+
+static bool isSingleSourceVectorMask(ArrayRef<int> Mask) {
+ bool Vec0 = false;
+ bool Vec1 = false;
+ for (unsigned i = 0, NumVecElts = Mask.size(); i < NumVecElts; ++i) {
+ if (Mask[i] >= 0) {
+ if ((unsigned)Mask[i] >= NumVecElts)
+ Vec1 = true;
+ else
+ Vec0 = true;
+ }
+ }
+ return !(Vec0 && Vec1);
+}
+
+static bool isZeroEltBroadcastVectorMask(ArrayRef<int> Mask) {
+ for (unsigned i = 0; i < Mask.size(); ++i)
+ if (Mask[i] > 0)
+ return false;
+ return true;
+}
+
+static bool isAlternateVectorMask(ArrayRef<int> Mask) {
+ bool isAlternate = true;
+ unsigned MaskSize = Mask.size();
+
+ // Example: shufflevector A, B, <0,5,2,7>
+ for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i);
+ }
+
+ if (isAlternate)
+ return true;
+
+ isAlternate = true;
+ // Example: shufflevector A, B, <4,1,6,3>
+ for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i);
+ }
+
+ return isAlternate;
+}
+
+static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
+ TargetTransformInfo::OperandValueKind OpInfo =
+ TargetTransformInfo::OK_AnyValue;
+
+ // Check for a splat of a constant or for a non uniform vector of constants.
+ if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
+ OpInfo = TargetTransformInfo::OK_NonUniformConstantValue;
+ if (cast<Constant>(V)->getSplatValue() != nullptr)
+ OpInfo = TargetTransformInfo::OK_UniformConstantValue;
+ }
+
+ // Check for a splat of a uniform value. This is not loop aware, so return
+ // true only for the obviously uniform cases (argument, globalvalue)
+ const Value *Splat = getSplatValue(V);
+ if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
+ OpInfo = TargetTransformInfo::OK_UniformValue;
+
+ return OpInfo;
+}
+
+static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
+ unsigned Level) {
+ // We don't need a shuffle if we just want to have element 0 in position 0 of
+ // the vector.
+ if (!SI && Level == 0 && IsLeft)
+ return true;
+ else if (!SI)
+ return false;
+
+ SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1);
+
+ // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
+ // we look at the left or right side.
+ for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
+ Mask[i] = val;
+
+ SmallVector<int, 16> ActualMask = SI->getShuffleMask();
+ return Mask == ActualMask;
+}
+
+namespace {
+/// Kind of the reduction data.
+enum ReductionKind {
+ RK_None, /// Not a reduction.
+ RK_Arithmetic, /// Binary reduction data.
+ RK_MinMax, /// Min/max reduction data.
+ RK_UnsignedMinMax, /// Unsigned min/max reduction data.
+};
+/// Contains opcode + LHS/RHS parts of the reduction operations.
+struct ReductionData {
+ ReductionData() = delete;
+ ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)
+ : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
+ assert(Kind != RK_None && "expected binary or min/max reduction only.");
+ }
+ unsigned Opcode = 0;
+ Value *LHS = nullptr;
+ Value *RHS = nullptr;
+ ReductionKind Kind = RK_None;
+ bool hasSameData(ReductionData &RD) const {
+ return Kind == RD.Kind && Opcode == RD.Opcode;
+ }
+};
+} // namespace
+
+static Optional<ReductionData> getReductionData(Instruction *I) {
+ Value *L, *R;
+ if (m_BinOp(m_Value(L), m_Value(R)).match(I))
+ return ReductionData(RK_Arithmetic, I->getOpcode(), L, R);
+ if (auto *SI = dyn_cast<SelectInst>(I)) {
+ if (m_SMin(m_Value(L), m_Value(R)).match(SI) ||
+ m_SMax(m_Value(L), m_Value(R)).match(SI) ||
+ m_OrdFMin(m_Value(L), m_Value(R)).match(SI) ||
+ m_OrdFMax(m_Value(L), m_Value(R)).match(SI) ||
+ m_UnordFMin(m_Value(L), m_Value(R)).match(SI) ||
+ m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) {
+ auto *CI = cast<CmpInst>(SI->getCondition());
+ return ReductionData(RK_MinMax, CI->getOpcode(), L, R);
+ }
+ if (m_UMin(m_Value(L), m_Value(R)).match(SI) ||
+ m_UMax(m_Value(L), m_Value(R)).match(SI)) {
+ auto *CI = cast<CmpInst>(SI->getCondition());
+ return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R);
+ }
+ }
+ return llvm::None;
+}
+
+static ReductionKind matchPairwiseReductionAtLevel(Instruction *I,
+ unsigned Level,
+ unsigned NumLevels) {
+ // Match one level of pairwise operations.
+ // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
+ // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
+ // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
+ // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+ // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
+ if (!I)
+ return RK_None;
+
+ assert(I->getType()->isVectorTy() && "Expecting a vector type");
+
+ Optional<ReductionData> RD = getReductionData(I);
+ if (!RD)
+ return RK_None;
+
+ ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(RD->LHS);
+ if (!LS && Level)
+ return RK_None;
+ ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(RD->RHS);
+ if (!RS && Level)
+ return RK_None;
+
+ // On level 0 we can omit one shufflevector instruction.
+ if (!Level && !RS && !LS)
+ return RK_None;
+
+ // Shuffle inputs must match.
+ Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
+ Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
+ Value *NextLevelOp = nullptr;
+ if (NextLevelOpR && NextLevelOpL) {
+ // If we have two shuffles their operands must match.
+ if (NextLevelOpL != NextLevelOpR)
+ return RK_None;
+
+ NextLevelOp = NextLevelOpL;
+ } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
+ // On the first level we can omit the shufflevector <0, undef,...>. So the
+ // input to the other shufflevector <1, undef> must match with one of the
+ // inputs to the current binary operation.
+ // Example:
+ // %NextLevelOpL = shufflevector %R, <1, undef ...>
+ // %BinOp = fadd %NextLevelOpL, %R
+ if (NextLevelOpL && NextLevelOpL != RD->RHS)
+ return RK_None;
+ else if (NextLevelOpR && NextLevelOpR != RD->LHS)
+ return RK_None;
+
+ NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS;
+ } else
+ return RK_None;
+
+ // Check that the next levels binary operation exists and matches with the
+ // current one.
+ if (Level + 1 != NumLevels) {
+ Optional<ReductionData> NextLevelRD =
+ getReductionData(cast<Instruction>(NextLevelOp));
+ if (!NextLevelRD || !RD->hasSameData(*NextLevelRD))
+ return RK_None;
+ }
+
+ // Shuffle mask for pairwise operation must match.
+ if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) {
+ if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level))
+ return RK_None;
+ } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) {
+ if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level))
+ return RK_None;
+ } else {
+ return RK_None;
+ }
+
+ if (++Level == NumLevels)
+ return RD->Kind;
+
+ // Match next level.
+ return matchPairwiseReductionAtLevel(cast<Instruction>(NextLevelOp), Level,
+ NumLevels);
+}
+
+static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
+ unsigned &Opcode, Type *&Ty) {
+ if (!EnableReduxCost)
+ return RK_None;
+
+ // Need to extract the first element.
+ ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
+ unsigned Idx = ~0u;
+ if (CI)
+ Idx = CI->getZExtValue();
+ if (Idx != 0)
+ return RK_None;
+
+ auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
+ if (!RdxStart)
+ return RK_None;
+ Optional<ReductionData> RD = getReductionData(RdxStart);
+ if (!RD)
+ return RK_None;
+
+ Type *VecTy = RdxStart->getType();
+ unsigned NumVecElems = VecTy->getVectorNumElements();
+ if (!isPowerOf2_32(NumVecElems))
+ return RK_None;
+
+ // We look for a sequence of shuffle,shuffle,add triples like the following
+ // that builds a pairwise reduction tree.
+ //
+ // (X0, X1, X2, X3)
+ // (X0 + X1, X2 + X3, undef, undef)
+ // ((X0 + X1) + (X2 + X3), undef, undef, undef)
+ //
+ // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
+ // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
+ // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
+ // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+ // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
+ // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
+ // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+ // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
+ // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+ // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
+ // %r = extractelement <4 x float> %bin.rdx8, i32 0
+ if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) ==
+ RK_None)
+ return RK_None;
+
+ Opcode = RD->Opcode;
+ Ty = VecTy;
+
+ return RD->Kind;
+}
+
+static std::pair<Value *, ShuffleVectorInst *>
+getShuffleAndOtherOprd(Value *L, Value *R) {
+ ShuffleVectorInst *S = nullptr;
+
+ if ((S = dyn_cast<ShuffleVectorInst>(L)))
+ return std::make_pair(R, S);
+
+ S = dyn_cast<ShuffleVectorInst>(R);
+ return std::make_pair(L, S);
+}
+
+static ReductionKind
+matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
+ unsigned &Opcode, Type *&Ty) {
+ if (!EnableReduxCost)
+ return RK_None;
+
+ // Need to extract the first element.
+ ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
+ unsigned Idx = ~0u;
+ if (CI)
+ Idx = CI->getZExtValue();
+ if (Idx != 0)
+ return RK_None;
+
+ auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
+ if (!RdxStart)
+ return RK_None;
+ Optional<ReductionData> RD = getReductionData(RdxStart);
+ if (!RD)
+ return RK_None;
+
+ Type *VecTy = ReduxRoot->getOperand(0)->getType();
+ unsigned NumVecElems = VecTy->getVectorNumElements();
+ if (!isPowerOf2_32(NumVecElems))
+ return RK_None;
+
+ // We look for a sequence of shuffles and adds like the following matching one
+ // fadd, shuffle vector pair at a time.
+ //
+ // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
+ // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+ // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
+ // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
+ // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+ // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
+ // %r = extractelement <4 x float> %bin.rdx8, i32 0
+
+ unsigned MaskStart = 1;
+ Instruction *RdxOp = RdxStart;
+ SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
+ unsigned NumVecElemsRemain = NumVecElems;
+ while (NumVecElemsRemain - 1) {
+ // Check for the right reduction operation.
+ if (!RdxOp)
+ return RK_None;
+ Optional<ReductionData> RDLevel = getReductionData(RdxOp);
+ if (!RDLevel || !RDLevel->hasSameData(*RD))
+ return RK_None;
+
+ Value *NextRdxOp;
+ ShuffleVectorInst *Shuffle;
+ std::tie(NextRdxOp, Shuffle) =
+ getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS);
+
+ // Check the current reduction operation and the shuffle use the same value.
+ if (Shuffle == nullptr)
+ return RK_None;
+ if (Shuffle->getOperand(0) != NextRdxOp)
+ return RK_None;
+
+ // Check that shuffle masks matches.
+ for (unsigned j = 0; j != MaskStart; ++j)
+ ShuffleMask[j] = MaskStart + j;
+ // Fill the rest of the mask with -1 for undef.
+ std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
+
+ SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
+ if (ShuffleMask != Mask)
+ return RK_None;
+
+ RdxOp = dyn_cast<Instruction>(NextRdxOp);
+ NumVecElemsRemain /= 2;
+ MaskStart *= 2;
+ }
+
+ Opcode = RD->Opcode;
+ Ty = VecTy;
+ return RD->Kind;
+}
+
+int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
+ switch (I->getOpcode()) {
+ case Instruction::GetElementPtr:
+ return getUserCost(I);
+
+ case Instruction::Ret:
+ case Instruction::PHI:
+ case Instruction::Br: {
+ return getCFInstrCost(I->getOpcode());
+ }
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ TargetTransformInfo::OperandValueKind Op1VK =
+ getOperandInfo(I->getOperand(0));
+ TargetTransformInfo::OperandValueKind Op2VK =
+ getOperandInfo(I->getOperand(1));
+ SmallVector<const Value*, 2> Operands(I->operand_values());
+ return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK,
+ Op2VK, TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None,
+ Operands);
+ }
+ case Instruction::Select: {
+ const SelectInst *SI = cast<SelectInst>(I);
+ Type *CondTy = SI->getCondition()->getType();
+ return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I);
+ }
+ case Instruction::ICmp:
+ case Instruction::FCmp: {
+ Type *ValTy = I->getOperand(0)->getType();
+ return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I);
+ }
+ case Instruction::Store: {
+ const StoreInst *SI = cast<StoreInst>(I);
+ Type *ValTy = SI->getValueOperand()->getType();
+ return getMemoryOpCost(I->getOpcode(), ValTy,
+ SI->getAlignment(),
+ SI->getPointerAddressSpace(), I);
+ }
+ case Instruction::Load: {
+ const LoadInst *LI = cast<LoadInst>(I);
+ return getMemoryOpCost(I->getOpcode(), I->getType(),
+ LI->getAlignment(),
+ LI->getPointerAddressSpace(), I);
+ }
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast:
+ case Instruction::AddrSpaceCast: {
+ Type *SrcTy = I->getOperand(0)->getType();
+ return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I);
+ }
+ case Instruction::ExtractElement: {
+ const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
+ ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
+ unsigned Idx = -1;
+ if (CI)
+ Idx = CI->getZExtValue();
+
+ // Try to match a reduction sequence (series of shufflevector and vector
+ // adds followed by a extractelement).
+ unsigned ReduxOpCode;
+ Type *ReduxType;
+
+ switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
+ case RK_Arithmetic:
+ return getArithmeticReductionCost(ReduxOpCode, ReduxType,
+ /*IsPairwiseForm=*/false);
+ case RK_MinMax:
+ return getMinMaxReductionCost(
+ ReduxType, CmpInst::makeCmpResultType(ReduxType),
+ /*IsPairwiseForm=*/false, /*IsUnsigned=*/false);
+ case RK_UnsignedMinMax:
+ return getMinMaxReductionCost(
+ ReduxType, CmpInst::makeCmpResultType(ReduxType),
+ /*IsPairwiseForm=*/false, /*IsUnsigned=*/true);
+ case RK_None:
+ break;
+ }
+
+ switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) {
+ case RK_Arithmetic:
+ return getArithmeticReductionCost(ReduxOpCode, ReduxType,
+ /*IsPairwiseForm=*/true);
+ case RK_MinMax:
+ return getMinMaxReductionCost(
+ ReduxType, CmpInst::makeCmpResultType(ReduxType),
+ /*IsPairwiseForm=*/true, /*IsUnsigned=*/false);
+ case RK_UnsignedMinMax:
+ return getMinMaxReductionCost(
+ ReduxType, CmpInst::makeCmpResultType(ReduxType),
+ /*IsPairwiseForm=*/true, /*IsUnsigned=*/true);
+ case RK_None:
+ break;
+ }
+
+ return getVectorInstrCost(I->getOpcode(),
+ EEI->getOperand(0)->getType(), Idx);
+ }
+ case Instruction::InsertElement: {
+ const InsertElementInst * IE = cast<InsertElementInst>(I);
+ ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
+ unsigned Idx = -1;
+ if (CI)
+ Idx = CI->getZExtValue();
+ return getVectorInstrCost(I->getOpcode(),
+ IE->getType(), Idx);
+ }
+ case Instruction::ShuffleVector: {
+ const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
+ Type *VecTypOp0 = Shuffle->getOperand(0)->getType();
+ unsigned NumVecElems = VecTypOp0->getVectorNumElements();
+ SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
+
+ if (NumVecElems == Mask.size()) {
+ if (isReverseVectorMask(Mask))
+ return getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0,
+ 0, nullptr);
+ if (isAlternateVectorMask(Mask))
+ return getShuffleCost(TargetTransformInfo::SK_Alternate,
+ VecTypOp0, 0, nullptr);
+
+ if (isZeroEltBroadcastVectorMask(Mask))
+ return getShuffleCost(TargetTransformInfo::SK_Broadcast,
+ VecTypOp0, 0, nullptr);
+
+ if (isSingleSourceVectorMask(Mask))
+ return getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
+ VecTypOp0, 0, nullptr);
+
+ return getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
+ VecTypOp0, 0, nullptr);
+ }
+
+ return -1;
+ }
+ case Instruction::Call:
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ SmallVector<Value *, 4> Args(II->arg_operands());
+
+ FastMathFlags FMF;
+ if (auto *FPMO = dyn_cast<FPMathOperator>(II))
+ FMF = FPMO->getFastMathFlags();
+
+ return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
+ Args, FMF);
+ }
+ return -1;
+ default:
+ // We don't have any information on this instruction.
+ return -1;
+ }
+}
+
TargetTransformInfo::Concept::~Concept() {}
TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
diff --git a/contrib/llvm/lib/Analysis/Trace.cpp b/contrib/llvm/lib/Analysis/Trace.cpp
index c7e2c0f3412a..34c998501a6c 100644
--- a/contrib/llvm/lib/Analysis/Trace.cpp
+++ b/contrib/llvm/lib/Analysis/Trace.cpp
@@ -16,9 +16,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/Trace.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Function.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
Function *Trace::getFunction() const {
@@ -30,7 +33,6 @@ Module *Trace::getModule() const {
}
/// print - Write trace to output stream.
-///
void Trace::print(raw_ostream &O) const {
Function *F = getFunction();
O << "; Trace from function " << F->getName() << ", blocks:\n";
@@ -45,7 +47,6 @@ void Trace::print(raw_ostream &O) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// dump - Debugger convenience method; writes trace to standard error
/// output stream.
-///
LLVM_DUMP_METHOD void Trace::dump() const {
print(dbgs());
}
diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 86c528de267a..c9ed026a1e33 100644
--- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -123,27 +123,38 @@
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
+
using namespace llvm;
// A handy option for disabling TBAA functionality. The same effect can also be
// achieved by stripping the !tbaa tags from IR, but this option is sometimes
// more convenient.
-static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
+static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true), cl::Hidden);
namespace {
+
/// This is a simple wrapper around an MDNode which provides a higher-level
/// interface by hiding the details of how alias analysis information is encoded
/// in its operands.
template<typename MDNodeTy>
class TBAANodeImpl {
- MDNodeTy *Node;
+ MDNodeTy *Node = nullptr;
public:
- TBAANodeImpl() : Node(nullptr) {}
+ TBAANodeImpl() = default;
explicit TBAANodeImpl(MDNodeTy *N) : Node(N) {}
/// getNode - Get the MDNode for this TBAANode.
@@ -176,8 +187,8 @@ public:
/// \name Specializations of \c TBAANodeImpl for const and non const qualified
/// \c MDNode.
/// @{
-typedef TBAANodeImpl<const MDNode> TBAANode;
-typedef TBAANodeImpl<MDNode> MutableTBAANode;
+using TBAANode = TBAANodeImpl<const MDNode>;
+using MutableTBAANode = TBAANodeImpl<MDNode>;
/// @}
/// This is a simple wrapper around an MDNode which provides a
@@ -197,12 +208,15 @@ public:
MDNodeTy *getBaseType() const {
return dyn_cast_or_null<MDNode>(Node->getOperand(0));
}
+
MDNodeTy *getAccessType() const {
return dyn_cast_or_null<MDNode>(Node->getOperand(1));
}
+
uint64_t getOffset() const {
return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue();
}
+
/// Test if this TBAAStructTagNode represents a type for objects
/// which are not modified (by any means) in the context where this
/// AliasAnalysis is relevant.
@@ -219,8 +233,8 @@ public:
/// \name Specializations of \c TBAAStructTagNodeImpl for const and non const
/// qualified \c MDNods.
/// @{
-typedef TBAAStructTagNodeImpl<const MDNode> TBAAStructTagNode;
-typedef TBAAStructTagNodeImpl<MDNode> MutableTBAAStructTagNode;
+using TBAAStructTagNode = TBAAStructTagNodeImpl<const MDNode>;
+using MutableTBAAStructTagNode = TBAAStructTagNodeImpl<MDNode>;
/// @}
/// This is a simple wrapper around an MDNode which provides a
@@ -228,10 +242,10 @@ typedef TBAAStructTagNodeImpl<MDNode> MutableTBAAStructTagNode;
/// information is encoded in its operands.
class TBAAStructTypeNode {
/// This node should be created with createTBAAStructTypeNode.
- const MDNode *Node;
+ const MDNode *Node = nullptr;
public:
- TBAAStructTypeNode() : Node(nullptr) {}
+ TBAAStructTypeNode() = default;
explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {}
/// Get the MDNode for this TBAAStructTypeNode.
@@ -283,7 +297,8 @@ public:
return TBAAStructTypeNode(P);
}
};
-}
+
+} // end anonymous namespace
/// Check the first operand of the tbaa tag node, if it is a MDNode, we treat
/// it as struct-path aware TBAA format, otherwise, we treat it as scalar TBAA
@@ -299,17 +314,8 @@ AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA,
if (!EnableTBAA)
return AAResultBase::alias(LocA, LocB);
- // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must
- // be conservative.
- const MDNode *AM = LocA.AATags.TBAA;
- if (!AM)
- return AAResultBase::alias(LocA, LocB);
- const MDNode *BM = LocB.AATags.TBAA;
- if (!BM)
- return AAResultBase::alias(LocA, LocB);
-
- // If they may alias, chain to the next AliasAnalysis.
- if (Aliases(AM, BM))
+ // If accesses may alias, chain to the next AliasAnalysis.
+ if (Aliases(LocA.AATags.TBAA, LocB.AATags.TBAA))
return AAResultBase::alias(LocA, LocB);
// Otherwise return a definitive result.
@@ -365,7 +371,7 @@ ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS,
if (const MDNode *M =
CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
if (!Aliases(L, M))
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
return AAResultBase::getModRefInfo(CS, Loc);
}
@@ -380,7 +386,7 @@ ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS1,
if (const MDNode *M2 =
CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
if (!Aliases(M1, M2))
- return MRI_NoModRef;
+ return ModRefInfo::NoModRef;
return AAResultBase::getModRefInfo(CS1, CS2);
}
@@ -409,25 +415,24 @@ bool MDNode::isTBAAVtableAccess() const {
return false;
}
+static bool matchAccessTags(const MDNode *A, const MDNode *B,
+ const MDNode **GenericTag = nullptr);
+
MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
+ const MDNode *GenericTag;
+ matchAccessTags(A, B, &GenericTag);
+ return const_cast<MDNode*>(GenericTag);
+}
+
+static const MDNode *getLeastCommonType(const MDNode *A, const MDNode *B) {
if (!A || !B)
return nullptr;
if (A == B)
return A;
- // For struct-path aware TBAA, we use the access type of the tag.
- assert(isStructPathTBAA(A) && isStructPathTBAA(B) &&
- "Auto upgrade should have taken care of this!");
- A = cast_or_null<MDNode>(MutableTBAAStructTagNode(A).getAccessType());
- if (!A)
- return nullptr;
- B = cast_or_null<MDNode>(MutableTBAAStructTagNode(B).getAccessType());
- if (!B)
- return nullptr;
-
- SmallSetVector<MDNode *, 4> PathA;
- MutableTBAANode TA(A);
+ SmallSetVector<const MDNode *, 4> PathA;
+ TBAANode TA(A);
while (TA.getNode()) {
if (PathA.count(TA.getNode()))
report_fatal_error("Cycle found in TBAA metadata.");
@@ -435,8 +440,8 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
TA = TA.getParent();
}
- SmallSetVector<MDNode *, 4> PathB;
- MutableTBAANode TB(B);
+ SmallSetVector<const MDNode *, 4> PathB;
+ TBAANode TB(B);
while (TB.getNode()) {
if (PathB.count(TB.getNode()))
report_fatal_error("Cycle found in TBAA metadata.");
@@ -447,7 +452,7 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
int IA = PathA.size() - 1;
int IB = PathB.size() - 1;
- MDNode *Ret = nullptr;
+ const MDNode *Ret = nullptr;
while (IA >= 0 && IB >= 0) {
if (PathA[IA] == PathB[IB])
Ret = PathA[IA];
@@ -457,17 +462,7 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
--IB;
}
- // We either did not find a match, or the only common base "type" is
- // the root node. In either case, we don't have any useful TBAA
- // metadata to attach.
- if (!Ret || Ret->getNumOperands() < 2)
- return nullptr;
-
- // We need to convert from a type node to a tag node.
- Type *Int64 = IntegerType::get(A->getContext(), 64);
- Metadata *Ops[3] = {Ret, Ret,
- ConstantAsMetadata::get(ConstantInt::get(Int64, 0))};
- return MDNode::get(A->getContext(), Ops);
+ return Ret;
}
void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const {
@@ -490,70 +485,96 @@ void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const {
N.NoAlias = getMetadata(LLVMContext::MD_noalias);
}
-/// Aliases - Test whether the type represented by A may alias the
-/// type represented by B.
-bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const {
- // Verify that both input nodes are struct-path aware. Auto-upgrade should
- // have taken care of this.
- assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware.");
- assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware.");
+static bool findAccessType(TBAAStructTagNode BaseTag,
+ const MDNode *AccessTypeNode,
+ uint64_t &OffsetInBase) {
+ // Start from the base type, follow the edge with the correct offset in
+ // the type DAG and adjust the offset until we reach the access type or
+ // until we reach a root node.
+ TBAAStructTypeNode BaseType(BaseTag.getBaseType());
+ OffsetInBase = BaseTag.getOffset();
+
+ while (const MDNode *BaseTypeNode = BaseType.getNode()) {
+ if (BaseTypeNode == AccessTypeNode)
+ return true;
- // Keep track of the root node for A and B.
- TBAAStructTypeNode RootA, RootB;
- TBAAStructTagNode TagA(A), TagB(B);
+ // Follow the edge with the correct offset, Offset will be adjusted to
+ // be relative to the field type.
+ BaseType = BaseType.getParent(OffsetInBase);
+ }
+ return false;
+}
- // TODO: We need to check if AccessType of TagA encloses AccessType of
- // TagB to support aggregate AccessType. If yes, return true.
+static const MDNode *createAccessTag(const MDNode *AccessType) {
+ // If there is no access type or the access type is the root node, then
+ // we don't have any useful access tag to return.
+ if (!AccessType || AccessType->getNumOperands() < 2)
+ return nullptr;
- // Start from the base type of A, follow the edge with the correct offset in
- // the type DAG and adjust the offset until we reach the base type of B or
- // until we reach the Root node.
- // Compare the adjusted offset once we have the same base.
+ Type *Int64 = IntegerType::get(AccessType->getContext(), 64);
+ auto *ImmutabilityFlag = ConstantAsMetadata::get(ConstantInt::get(Int64, 0));
+ Metadata *Ops[] = {const_cast<MDNode*>(AccessType),
+ const_cast<MDNode*>(AccessType), ImmutabilityFlag};
+ return MDNode::get(AccessType->getContext(), Ops);
+}
- // Climb the type DAG from base type of A to see if we reach base type of B.
- const MDNode *BaseA = TagA.getBaseType();
- const MDNode *BaseB = TagB.getBaseType();
- uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset();
- for (TBAAStructTypeNode T(BaseA);;) {
- if (T.getNode() == BaseB)
- // Base type of A encloses base type of B, check if the offsets match.
- return OffsetA == OffsetB;
-
- RootA = T;
- // Follow the edge with the correct offset, OffsetA will be adjusted to
- // be relative to the field type.
- T = T.getParent(OffsetA);
- if (!T.getNode())
- break;
+/// matchTags - Return true if the given couple of accesses are allowed to
+/// overlap. If \arg GenericTag is not null, then on return it points to the
+/// most generic access descriptor for the given two.
+static bool matchAccessTags(const MDNode *A, const MDNode *B,
+ const MDNode **GenericTag) {
+ if (A == B) {
+ if (GenericTag)
+ *GenericTag = A;
+ return true;
}
- // Reset OffsetA and climb the type DAG from base type of B to see if we reach
- // base type of A.
- OffsetA = TagA.getOffset();
- for (TBAAStructTypeNode T(BaseB);;) {
- if (T.getNode() == BaseA)
- // Base type of B encloses base type of A, check if the offsets match.
- return OffsetA == OffsetB;
-
- RootB = T;
- // Follow the edge with the correct offset, OffsetB will be adjusted to
- // be relative to the field type.
- T = T.getParent(OffsetB);
- if (!T.getNode())
- break;
+ // Accesses with no TBAA information may alias with any other accesses.
+ if (!A || !B) {
+ if (GenericTag)
+ *GenericTag = nullptr;
+ return true;
}
- // Neither node is an ancestor of the other.
+ // Verify that both input nodes are struct-path aware. Auto-upgrade should
+ // have taken care of this.
+ assert(isStructPathTBAA(A) && "Access A is not struct-path aware!");
+ assert(isStructPathTBAA(B) && "Access B is not struct-path aware!");
- // If they have different roots, they're part of different potentially
- // unrelated type systems, so we must be conservative.
- if (RootA.getNode() != RootB.getNode())
+ TBAAStructTagNode TagA(A), TagB(B);
+ const MDNode *CommonType = getLeastCommonType(TagA.getAccessType(),
+ TagB.getAccessType());
+ if (GenericTag)
+ *GenericTag = createAccessTag(CommonType);
+
+ // TODO: We need to check if AccessType of TagA encloses AccessType of
+ // TagB to support aggregate AccessType. If yes, return true.
+
+ // Climb the type DAG from base type of A to see if we reach base type of B.
+ uint64_t OffsetA;
+ if (findAccessType(TagA, TagB.getBaseType(), OffsetA))
+ return OffsetA == TagB.getOffset();
+
+ // Climb the type DAG from base type of B to see if we reach base type of A.
+ uint64_t OffsetB;
+ if (findAccessType(TagB, TagA.getBaseType(), OffsetB))
+ return OffsetB == TagA.getOffset();
+
+ // If the final access types have different roots, they're part of different
+ // potentially unrelated type systems, so we must be conservative.
+ if (!CommonType)
return true;
// If they have the same root, then we've proved there's no alias.
return false;
}
+/// Aliases - Test whether the access represented by tag A may alias the
+/// access represented by tag B.
+bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const {
+ return matchAccessTags(A, B);
+}
+
AnalysisKey TypeBasedAA::Key;
TypeBasedAAResult TypeBasedAA::run(Function &F, FunctionAnalysisManager &AM) {
diff --git a/contrib/llvm/lib/Analysis/ValueLattice.cpp b/contrib/llvm/lib/Analysis/ValueLattice.cpp
new file mode 100644
index 000000000000..7de437ca480e
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ValueLattice.cpp
@@ -0,0 +1,26 @@
+//===- ValueLattice.cpp - Value constraint analysis -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ValueLattice.h"
+
+namespace llvm {
+raw_ostream &operator<<(raw_ostream &OS, const ValueLatticeElement &Val) {
+ if (Val.isUndefined())
+ return OS << "undefined";
+ if (Val.isOverdefined())
+ return OS << "overdefined";
+
+ if (Val.isNotConstant())
+ return OS << "notconstant<" << *Val.getNotConstant() << ">";
+ if (Val.isConstantRange())
+ return OS << "constantrange<" << Val.getConstantRange().getLower() << ", "
+ << Val.getConstantRange().getUpper() << ">";
+ return OS << "constant<" << *Val.getConstant() << ">";
+}
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Analysis/ValueLatticeUtils.cpp b/contrib/llvm/lib/Analysis/ValueLatticeUtils.cpp
new file mode 100644
index 000000000000..22c9de4fe94d
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ValueLatticeUtils.cpp
@@ -0,0 +1,44 @@
+//===-- ValueLatticeUtils.cpp - Utils for solving lattices ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements common functions useful for performing data-flow
+// analyses that propagate values across function boundaries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ValueLatticeUtils.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+using namespace llvm;
+
+bool llvm::canTrackArgumentsInterprocedurally(Function *F) {
+ return F->hasLocalLinkage() && !F->hasAddressTaken();
+}
+
+bool llvm::canTrackReturnsInterprocedurally(Function *F) {
+ return F->hasExactDefinition() && !F->hasFnAttribute(Attribute::Naked);
+}
+
+bool llvm::canTrackGlobalVariableInterprocedurally(GlobalVariable *GV) {
+ if (GV->isConstant() || !GV->hasLocalLinkage() ||
+ !GV->hasDefinitiveInitializer())
+ return false;
+ return !any_of(GV->users(), [&](User *U) {
+ if (auto *Store = dyn_cast<StoreInst>(U)) {
+ if (Store->getValueOperand() == GV || Store->isVolatile())
+ return true;
+ } else if (auto *Load = dyn_cast<LoadInst>(U)) {
+ if (Load->isVolatile())
+ return true;
+ } else {
+ return true;
+ }
+ return false;
+ });
+}
diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp
index cdfe74d158c9..2730daefa625 100644
--- a/contrib/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@@ -13,37 +13,66 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
-#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/IR/Statepoint.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include <algorithm>
#include <array>
-#include <cstring>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <utility>
+
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -54,12 +83,6 @@ const unsigned MaxDepth = 6;
static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
cl::Hidden, cl::init(20));
-// This optimization is known to cause performance regressions is some cases,
-// keep it under a temporary flag for now.
-static cl::opt<bool>
-DontImproveNonNegativePhiBits("dont-improve-non-negative-phi-bits",
- cl::Hidden, cl::init(true));
-
/// Returns the bitwidth of the given scalar or pointer type. For vector types,
/// returns the element type's bitwidth.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
@@ -70,6 +93,7 @@ static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
}
namespace {
+
// Simplifying using an assume can only be done in a particular control-flow
// context (the context instruction provides that context). If an assume and
// the context instruction are not in the same block then the DT helps in
@@ -79,6 +103,7 @@ struct Query {
AssumptionCache *AC;
const Instruction *CxtI;
const DominatorTree *DT;
+
// Unlike the other analyses, this may be a nullptr because not all clients
// provide it currently.
OptimizationRemarkEmitter *ORE;
@@ -92,11 +117,12 @@ struct Query {
/// isKnownNonZero, which calls computeKnownBits and isKnownToBeAPowerOfTwo
/// (all of which can call computeKnownBits), and so on.
std::array<const Value *, MaxDepth> Excluded;
- unsigned NumExcluded;
+
+ unsigned NumExcluded = 0;
Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT, OptimizationRemarkEmitter *ORE = nullptr)
- : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), NumExcluded(0) {}
+ : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE) {}
Query(const Query &Q, const Value *NewExcl)
: DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE),
@@ -113,6 +139,7 @@ struct Query {
return std::find(Excluded.begin(), End, Value) != End;
}
};
+
} // end anonymous namespace
// Given the provided Value and, potentially, a context instruction, return
@@ -171,7 +198,6 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
return (LHSKnown.Zero | RHSKnown.Zero).isAllOnesValue();
}
-
bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI) {
for (const User *U : CxtI->users()) {
if (const ICmpInst *IC = dyn_cast<ICmpInst>(U))
@@ -275,47 +301,7 @@ static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
computeKnownBits(Op0, LHSKnown, Depth + 1, Q);
computeKnownBits(Op1, Known2, Depth + 1, Q);
- // Carry in a 1 for a subtract, rather than a 0.
- uint64_t CarryIn = 0;
- if (!Add) {
- // Sum = LHS + ~RHS + 1
- std::swap(Known2.Zero, Known2.One);
- CarryIn = 1;
- }
-
- APInt PossibleSumZero = ~LHSKnown.Zero + ~Known2.Zero + CarryIn;
- APInt PossibleSumOne = LHSKnown.One + Known2.One + CarryIn;
-
- // Compute known bits of the carry.
- APInt CarryKnownZero = ~(PossibleSumZero ^ LHSKnown.Zero ^ Known2.Zero);
- APInt CarryKnownOne = PossibleSumOne ^ LHSKnown.One ^ Known2.One;
-
- // Compute set of known bits (where all three relevant bits are known).
- APInt LHSKnownUnion = LHSKnown.Zero | LHSKnown.One;
- APInt RHSKnownUnion = Known2.Zero | Known2.One;
- APInt CarryKnownUnion = CarryKnownZero | CarryKnownOne;
- APInt Known = LHSKnownUnion & RHSKnownUnion & CarryKnownUnion;
-
- assert((PossibleSumZero & Known) == (PossibleSumOne & Known) &&
- "known bits of sum differ");
-
- // Compute known bits of the result.
- KnownOut.Zero = ~PossibleSumOne & Known;
- KnownOut.One = PossibleSumOne & Known;
-
- // Are we still trying to solve for the sign bit?
- if (!Known.isSignBitSet()) {
- if (NSW) {
- // Adding two non-negative numbers, or subtracting a negative number from
- // a non-negative one, can't wrap into negative.
- if (LHSKnown.isNonNegative() && Known2.isNonNegative())
- KnownOut.makeNonNegative();
- // Adding two negative numbers, or subtracting a non-negative number from
- // a negative one, can't wrap into non-negative.
- else if (LHSKnown.isNegative() && Known2.isNegative())
- KnownOut.makeNegative();
- }
- }
+ KnownOut = KnownBits::computeForAddSub(Add, NSW, LHSKnown, Known2);
}
static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
@@ -350,21 +336,78 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
}
}
- // If low bits are zero in either operand, output low known-0 bits.
- // Also compute a conservative estimate for high known-0 bits.
- // More trickiness is possible, but this is sufficient for the
- // interesting case of alignment computation.
- unsigned TrailZ = Known.countMinTrailingZeros() +
- Known2.countMinTrailingZeros();
+ assert(!Known.hasConflict() && !Known2.hasConflict());
+ // Compute a conservative estimate for high known-0 bits.
unsigned LeadZ = std::max(Known.countMinLeadingZeros() +
Known2.countMinLeadingZeros(),
BitWidth) - BitWidth;
-
- TrailZ = std::min(TrailZ, BitWidth);
LeadZ = std::min(LeadZ, BitWidth);
+
+ // The result of the bottom bits of an integer multiply can be
+ // inferred by looking at the bottom bits of both operands and
+ // multiplying them together.
+ // We can infer at least the minimum number of known trailing bits
+ // of both operands. Depending on number of trailing zeros, we can
+ // infer more bits, because (a*b) <=> ((a/m) * (b/n)) * (m*n) assuming
+ // a and b are divisible by m and n respectively.
+ // We then calculate how many of those bits are inferrable and set
+ // the output. For example, the i8 mul:
+ // a = XXXX1100 (12)
+ // b = XXXX1110 (14)
+ // We know the bottom 3 bits are zero since the first can be divided by
+ // 4 and the second by 2, thus having ((12/4) * (14/2)) * (2*4).
+ // Applying the multiplication to the trimmed arguments gets:
+ // XX11 (3)
+ // X111 (7)
+ // -------
+ // XX11
+ // XX11
+ // XX11
+ // XX11
+ // -------
+ // XXXXX01
+ // Which allows us to infer the 2 LSBs. Since we're multiplying the result
+ // by 8, the bottom 3 bits will be 0, so we can infer a total of 5 bits.
+ // The proof for this can be described as:
+ // Pre: (C1 >= 0) && (C1 < (1 << C5)) && (C2 >= 0) && (C2 < (1 << C6)) &&
+ // (C7 == (1 << (umin(countTrailingZeros(C1), C5) +
+ // umin(countTrailingZeros(C2), C6) +
+ // umin(C5 - umin(countTrailingZeros(C1), C5),
+ // C6 - umin(countTrailingZeros(C2), C6)))) - 1)
+ // %aa = shl i8 %a, C5
+ // %bb = shl i8 %b, C6
+ // %aaa = or i8 %aa, C1
+ // %bbb = or i8 %bb, C2
+ // %mul = mul i8 %aaa, %bbb
+ // %mask = and i8 %mul, C7
+ // =>
+ // %mask = i8 ((C1*C2)&C7)
+ // Where C5, C6 describe the known bits of %a, %b
+ // C1, C2 describe the known bottom bits of %a, %b.
+ // C7 describes the mask of the known bits of the result.
+ APInt Bottom0 = Known.One;
+ APInt Bottom1 = Known2.One;
+
+ // How many times we'd be able to divide each argument by 2 (shr by 1).
+ // This gives us the number of trailing zeros on the multiplication result.
+ unsigned TrailBitsKnown0 = (Known.Zero | Known.One).countTrailingOnes();
+ unsigned TrailBitsKnown1 = (Known2.Zero | Known2.One).countTrailingOnes();
+ unsigned TrailZero0 = Known.countMinTrailingZeros();
+ unsigned TrailZero1 = Known2.countMinTrailingZeros();
+ unsigned TrailZ = TrailZero0 + TrailZero1;
+
+ // Figure out the fewest known-bits operand.
+ unsigned SmallestOperand = std::min(TrailBitsKnown0 - TrailZero0,
+ TrailBitsKnown1 - TrailZero1);
+ unsigned ResultBitsKnown = std::min(SmallestOperand + TrailZ, BitWidth);
+
+ APInt BottomKnown = Bottom0.getLoBits(TrailBitsKnown0) *
+ Bottom1.getLoBits(TrailBitsKnown1);
+
Known.resetAll();
- Known.Zero.setLowBits(TrailZ);
Known.Zero.setHighBits(LeadZ);
+ Known.Zero |= (~BottomKnown).getLoBits(ResultBitsKnown);
+ Known.One |= BottomKnown.getLoBits(ResultBitsKnown);
// Only make use of no-wrap flags if we failed to compute the sign bit
// directly. This matters if the multiplication always overflows, in
@@ -420,17 +463,19 @@ static bool isEphemeralValueOf(const Instruction *I, const Value *E) {
continue;
// If all uses of this value are ephemeral, then so is this value.
- if (all_of(V->users(), [&](const User *U) { return EphValues.count(U); })) {
+ if (llvm::all_of(V->users(), [&](const User *U) {
+ return EphValues.count(U);
+ })) {
if (V == E)
return true;
- EphValues.insert(V);
- if (const User *U = dyn_cast<User>(V))
- for (User::const_op_iterator J = U->op_begin(), JE = U->op_end();
- J != JE; ++J) {
- if (isSafeToSpeculativelyExecute(*J))
- WorkSet.push_back(*J);
- }
+ if (V == I || isSafeToSpeculativelyExecute(V)) {
+ EphValues.insert(V);
+ if (const User *U = dyn_cast<User>(V))
+ for (User::const_op_iterator J = U->op_begin(), JE = U->op_end();
+ J != JE; ++J)
+ WorkSet.push_back(*J);
+ }
}
}
@@ -438,13 +483,14 @@ static bool isEphemeralValueOf(const Instruction *I, const Value *E) {
}
// Is this an intrinsic that cannot be speculated but also cannot trap?
-static bool isAssumeLikeIntrinsic(const Instruction *I) {
+bool llvm::isAssumeLikeIntrinsic(const Instruction *I) {
if (const CallInst *CI = dyn_cast<CallInst>(I))
if (Function *F = CI->getCalledFunction())
switch (F->getIntrinsicID()) {
default: break;
// FIXME: This list is repeated from NoTTI::getIntrinsicCost.
case Intrinsic::assume:
+ case Intrinsic::sideeffect:
case Intrinsic::dbg_declare:
case Intrinsic::dbg_value:
case Intrinsic::invariant_start:
@@ -463,7 +509,6 @@ static bool isAssumeLikeIntrinsic(const Instruction *I) {
bool llvm::isValidAssumeForContext(const Instruction *Inv,
const Instruction *CxtI,
const DominatorTree *DT) {
-
// There are two restrictions on the use of an assume:
// 1. The assume must dominate the context (or the control flow must
// reach the assume whenever it reaches the context).
@@ -560,7 +605,7 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
m_BitCast(m_Specific(V))));
CmpInst::Predicate Pred;
- ConstantInt *C;
+ uint64_t C;
// assume(v = a)
if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
@@ -662,51 +707,55 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
} else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ isValidAssumeForContext(I, Q.CxtI, Q.DT) &&
+ C < BitWidth) {
KnownBits RHSKnown(BitWidth);
computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
// For those bits in RHS that are known, we can propagate them to known
// bits in V shifted to the right by C.
- RHSKnown.Zero.lshrInPlace(C->getZExtValue());
+ RHSKnown.Zero.lshrInPlace(C);
Known.Zero |= RHSKnown.Zero;
- RHSKnown.One.lshrInPlace(C->getZExtValue());
+ RHSKnown.One.lshrInPlace(C);
Known.One |= RHSKnown.One;
// assume(~(v << c) = a)
} else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ isValidAssumeForContext(I, Q.CxtI, Q.DT) &&
+ C < BitWidth) {
KnownBits RHSKnown(BitWidth);
computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
// For those bits in RHS that are known, we can propagate them inverted
// to known bits in V shifted to the right by C.
- RHSKnown.One.lshrInPlace(C->getZExtValue());
+ RHSKnown.One.lshrInPlace(C);
Known.Zero |= RHSKnown.One;
- RHSKnown.Zero.lshrInPlace(C->getZExtValue());
+ RHSKnown.Zero.lshrInPlace(C);
Known.One |= RHSKnown.Zero;
// assume(v >> c = a)
} else if (match(Arg,
m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)),
m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ isValidAssumeForContext(I, Q.CxtI, Q.DT) &&
+ C < BitWidth) {
KnownBits RHSKnown(BitWidth);
computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
// For those bits in RHS that are known, we can propagate them to known
// bits in V shifted to the right by C.
- Known.Zero |= RHSKnown.Zero << C->getZExtValue();
- Known.One |= RHSKnown.One << C->getZExtValue();
+ Known.Zero |= RHSKnown.Zero << C;
+ Known.One |= RHSKnown.One << C;
// assume(~(v >> c) = a)
} else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))),
m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ isValidAssumeForContext(I, Q.CxtI, Q.DT) &&
+ C < BitWidth) {
KnownBits RHSKnown(BitWidth);
computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
// For those bits in RHS that are known, we can propagate them inverted
// to known bits in V shifted to the right by C.
- Known.Zero |= RHSKnown.One << C->getZExtValue();
- Known.One |= RHSKnown.Zero << C->getZExtValue();
+ Known.Zero |= RHSKnown.One << C;
+ Known.One |= RHSKnown.Zero << C;
// assume(v >=_s c) where c is non-negative
} else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
Pred == ICmpInst::ICMP_SGE &&
@@ -784,24 +833,26 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
if (Known.Zero.intersects(Known.One)) {
Known.resetAll();
- if (Q.ORE) {
- auto *CxtI = const_cast<Instruction *>(Q.CxtI);
- OptimizationRemarkAnalysis ORA("value-tracking", "BadAssumption", CxtI);
- Q.ORE->emit(ORA << "Detected conflicting code assumptions. Program may "
- "have undefined behavior, or compiler may have "
- "internal error.");
- }
+ if (Q.ORE)
+ Q.ORE->emit([&]() {
+ auto *CxtI = const_cast<Instruction *>(Q.CxtI);
+ return OptimizationRemarkAnalysis("value-tracking", "BadAssumption",
+ CxtI)
+ << "Detected conflicting code assumptions. Program may "
+ "have undefined behavior, or compiler may have "
+ "internal error.";
+ });
}
}
-// Compute known bits from a shift operator, including those with a
-// non-constant shift amount. Known is the outputs of this function. Known2 is a
-// pre-allocated temporary with the/ same bit width as Known. KZF and KOF are
-// operator-specific functors that, given the known-zero or known-one bits
-// respectively, and a shift amount, compute the implied known-zero or known-one
-// bits of the shift operator's result respectively for that shift amount. The
-// results from calling KZF and KOF are conservatively combined for all
-// permitted shift amounts.
+/// Compute known bits from a shift operator, including those with a
+/// non-constant shift amount. Known is the output of this function. Known2 is a
+/// pre-allocated temporary with the same bit width as Known. KZF and KOF are
+/// operator-specific functors that, given the known-zero or known-one bits
+/// respectively, and a shift amount, compute the implied known-zero or
+/// known-one bits of the shift operator's result respectively for that shift
+/// amount. The results from calling KZF and KOF are conservatively combined for
+/// all permitted shift amounts.
static void computeKnownBitsFromShiftOperator(
const Operator *I, KnownBits &Known, KnownBits &Known2,
unsigned Depth, const Query &Q,
@@ -815,19 +866,20 @@ static void computeKnownBitsFromShiftOperator(
computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
Known.Zero = KZF(Known.Zero, ShiftAmt);
Known.One = KOF(Known.One, ShiftAmt);
- // If there is conflict between Known.Zero and Known.One, this must be an
- // overflowing left shift, so the shift result is undefined. Clear Known
- // bits so that other code could propagate this undef.
- if ((Known.Zero & Known.One) != 0)
- Known.resetAll();
+ // If the known bits conflict, this must be an overflowing left shift, so
+ // the shift result is poison. We can return anything we want. Choose 0 for
+ // the best folding opportunity.
+ if (Known.hasConflict())
+ Known.setAllZero();
return;
}
computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
- // If the shift amount could be greater than or equal to the bit-width of the LHS, the
- // value could be undef, so we don't know anything about it.
+ // If the shift amount could be greater than or equal to the bit-width of the
+ // LHS, the value could be poison, but bail out because the check below is
+ // expensive. TODO: Should we just carry on?
if ((~Known.Zero).uge(BitWidth)) {
Known.resetAll();
return;
@@ -851,8 +903,7 @@ static void computeKnownBitsFromShiftOperator(
// Early exit if we can't constrain any well-defined shift amount.
if (!(ShiftAmtKZ & (PowerOf2Ceil(BitWidth) - 1)) &&
!(ShiftAmtKO & (PowerOf2Ceil(BitWidth) - 1))) {
- ShifterOperandIsNonZero =
- isKnownNonZero(I->getOperand(1), Depth + 1, Q);
+ ShifterOperandIsNonZero = isKnownNonZero(I->getOperand(1), Depth + 1, Q);
if (!*ShifterOperandIsNonZero)
return;
}
@@ -883,13 +934,10 @@ static void computeKnownBitsFromShiftOperator(
Known.One &= KOF(Known2.One, ShiftAmt);
}
- // If there are no compatible shift amounts, then we've proven that the shift
- // amount must be >= the BitWidth, and the result is undefined. We could
- // return anything we'd like, but we need to make sure the sets of known bits
- // stay disjoint (it should be better for some other code to actually
- // propagate the undef than to pick a value here using known bits).
- if (Known.Zero.intersects(Known.One))
- Known.resetAll();
+ // If the known bits conflict, the result is poison. Return a 0 and hope the
+ // caller can further optimize that.
+ if (Known.hasConflict())
+ Known.setAllZero();
}
static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
@@ -931,7 +979,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
}
break;
}
- case Instruction::Or: {
+ case Instruction::Or:
computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
@@ -940,7 +988,6 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
// Output known-1 are known to be set if set in either the LHS | RHS.
Known.One |= Known2.One;
break;
- }
case Instruction::Xor: {
computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
@@ -1103,7 +1150,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
break;
}
case Instruction::LShr: {
- // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
+ // (lshr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
auto KZF = [](const APInt &KnownZero, unsigned ShiftAmt) {
APInt KZResult = KnownZero.lshr(ShiftAmt);
// High bits known zero.
@@ -1298,9 +1345,6 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(),
Known3.countMinTrailingZeros()));
- if (DontImproveNonNegativePhiBits)
- break;
-
auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(LU);
if (OverflowOp && OverflowOp->hasNoSignedWrap()) {
// If initial value of recurrence is nonnegative, and we are adding
@@ -1525,9 +1569,8 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
// We know that CDS must be a vector of integers. Take the intersection of
// each element.
Known.Zero.setAllBits(); Known.One.setAllBits();
- APInt Elt(BitWidth, 0);
for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
- Elt = CDS->getElementAsInteger(i);
+ APInt Elt = CDS->getElementAsAPInt(i);
Known.Zero &= ~Elt;
Known.One &= Elt;
}
@@ -1538,7 +1581,6 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
// We know that CV must be a vector of integers. Take the intersection of
// each element.
Known.Zero.setAllBits(); Known.One.setAllBits();
- APInt Elt(BitWidth, 0);
for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
Constant *Element = CV->getAggregateElement(i);
auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
@@ -1546,7 +1588,7 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
Known.resetAll();
return;
}
- Elt = ElementCI->getValue();
+ const APInt &Elt = ElementCI->getValue();
Known.Zero &= ~Elt;
Known.One &= Elt;
}
@@ -1602,6 +1644,8 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
/// types and vectors of integers.
bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
const Query &Q) {
+ assert(Depth <= MaxDepth && "Limit Search Depth");
+
if (const Constant *C = dyn_cast<Constant>(V)) {
if (C->isNullValue())
return OrZero;
@@ -1755,6 +1799,58 @@ static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
return false;
}
+static bool isKnownNonNullFromDominatingCondition(const Value *V,
+ const Instruction *CtxI,
+ const DominatorTree *DT) {
+ assert(V->getType()->isPointerTy() && "V must be pointer type");
+ assert(!isa<ConstantData>(V) && "Did not expect ConstantPointerNull");
+
+ if (!CtxI || !DT)
+ return false;
+
+ unsigned NumUsesExplored = 0;
+ for (auto *U : V->users()) {
+ // Avoid massive lists
+ if (NumUsesExplored >= DomConditionsMaxUses)
+ break;
+ NumUsesExplored++;
+
+ // If the value is used as an argument to a call or invoke, then argument
+ // attributes may provide an answer about null-ness.
+ if (auto CS = ImmutableCallSite(U))
+ if (auto *CalledFunc = CS.getCalledFunction())
+ for (const Argument &Arg : CalledFunc->args())
+ if (CS.getArgOperand(Arg.getArgNo()) == V &&
+ Arg.hasNonNullAttr() && DT->dominates(CS.getInstruction(), CtxI))
+ return true;
+
+ // Consider only compare instructions uniquely controlling a branch
+ CmpInst::Predicate Pred;
+ if (!match(const_cast<User *>(U),
+ m_c_ICmp(Pred, m_Specific(V), m_Zero())) ||
+ (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE))
+ continue;
+
+ for (auto *CmpU : U->users()) {
+ if (const BranchInst *BI = dyn_cast<BranchInst>(CmpU)) {
+ assert(BI->isConditional() && "uses a comparison!");
+
+ BasicBlock *NonNullSuccessor =
+ BI->getSuccessor(Pred == ICmpInst::ICMP_EQ ? 1 : 0);
+ BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
+ if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent()))
+ return true;
+ } else if (Pred == ICmpInst::ICMP_NE &&
+ match(CmpU, m_Intrinsic<Intrinsic::experimental_guard>()) &&
+ DT->dominates(cast<Instruction>(CmpU), CtxI)) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
/// Does the 'Range' metadata (which must be a valid MD_range operand list)
/// ensure that the value it's attached to is never Value? 'RangeType' is
/// is the type of the value described by the range.
@@ -1800,7 +1896,15 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
return true;
}
- return false;
+ // A global variable in address space 0 is non null unless extern weak
+ // or an absolute symbol reference. Other address spaces may have null as a
+ // valid address for a global, so we can't assume anything.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ if (!GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() &&
+ GV->getType()->getAddressSpace() == 0)
+ return true;
+ } else
+ return false;
}
if (auto *I = dyn_cast<Instruction>(V)) {
@@ -1815,14 +1919,36 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
}
}
+ // Check for pointer simplifications.
+ if (V->getType()->isPointerTy()) {
+ // Alloca never returns null, malloc might.
+ if (isa<AllocaInst>(V) && Q.DL.getAllocaAddrSpace() == 0)
+ return true;
+
+ // A byval, inalloca, or nonnull argument is never null.
+ if (const Argument *A = dyn_cast<Argument>(V))
+ if (A->hasByValOrInAllocaAttr() || A->hasNonNullAttr())
+ return true;
+
+ // A Load tagged with nonnull metadata is never null.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(V))
+ if (LI->getMetadata(LLVMContext::MD_nonnull))
+ return true;
+
+ if (auto CS = ImmutableCallSite(V))
+ if (CS.isReturnNonNull())
+ return true;
+ }
+
// The remaining tests are all recursive, so bail out if we hit the limit.
if (Depth++ >= MaxDepth)
return false;
- // Check for pointer simplifications.
+ // Check for recursive pointer simplifications.
if (V->getType()->isPointerTy()) {
- if (isKnownNonNullAt(V, Q.CxtI, Q.DT))
+ if (isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT))
return true;
+
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V))
if (isGEPKnownNonNull(GEP, Depth, Q))
return true;
@@ -1949,7 +2075,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
}
}
// Check if all incoming values are non-zero constant.
- bool AllNonZeroConstants = all_of(PN->operands(), [](Value *V) {
+ bool AllNonZeroConstants = llvm::all_of(PN->operands(), [](Value *V) {
return isa<ConstantInt>(V) && !cast<ConstantInt>(V)->isZero();
});
if (AllNonZeroConstants)
@@ -2033,11 +2159,7 @@ static unsigned computeNumSignBitsVectorConstant(const Value *V,
if (!Elt)
return 0;
- // If the sign bit is 1, flip the bits, so we always count leading zeros.
- APInt EltVal = Elt->getValue();
- if (EltVal.isNegative())
- EltVal = ~EltVal;
- MinSignBits = std::min(MinSignBits, EltVal.countLeadingZeros());
+ MinSignBits = std::min(MinSignBits, Elt->getValue().getNumSignBits());
}
return MinSignBits;
@@ -2061,6 +2183,7 @@ static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
/// vector element with the mininum number of known sign bits.
static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
const Query &Q) {
+ assert(Depth <= MaxDepth && "Limit Search Depth");
// We return the minimum number of sign bits that are guaranteed to be present
// in V, so for undef we have to conservatively return 1. We don't have the
@@ -2236,6 +2359,17 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
if (Tmp == 1) return 1; // Early out.
return std::min(Tmp, Tmp2)-1;
+ case Instruction::Mul: {
+ // The output of the Mul can be at most twice the valid bits in the inputs.
+ unsigned SignBitsOp0 = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+ if (SignBitsOp0 == 1) return 1; // Early out.
+ unsigned SignBitsOp1 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
+ if (SignBitsOp1 == 1) return 1;
+ unsigned OutValidBits =
+ (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1);
+ return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1;
+ }
+
case Instruction::PHI: {
const PHINode *PN = cast<PHINode>(U);
unsigned NumIncomingValues = PN->getNumIncomingValues();
@@ -2507,9 +2641,7 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(ImmutableCallSite ICS,
case LibFunc_sqrt:
case LibFunc_sqrtf:
case LibFunc_sqrtl:
- if (ICS->hasNoNaNs())
- return Intrinsic::sqrt;
- return Intrinsic::not_intrinsic;
+ return Intrinsic::sqrt;
}
return Intrinsic::not_intrinsic;
@@ -2520,41 +2652,40 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(ImmutableCallSite ICS,
///
/// NOTE: this function will need to be revisited when we support non-default
/// rounding modes!
-///
bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
unsigned Depth) {
- if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
+ if (auto *CFP = dyn_cast<ConstantFP>(V))
return !CFP->getValueAPF().isNegZero();
+ // Limit search depth.
if (Depth == MaxDepth)
- return false; // Limit search depth.
+ return false;
- const Operator *I = dyn_cast<Operator>(V);
- if (!I) return false;
+ auto *Op = dyn_cast<Operator>(V);
+ if (!Op)
+ return false;
- // Check if the nsz fast-math flag is set
- if (const FPMathOperator *FPO = dyn_cast<FPMathOperator>(I))
+ // Check if the nsz fast-math flag is set.
+ if (auto *FPO = dyn_cast<FPMathOperator>(Op))
if (FPO->hasNoSignedZeros())
return true;
- // (add x, 0.0) is guaranteed to return +0.0, not -0.0.
- if (I->getOpcode() == Instruction::FAdd)
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(1)))
- if (CFP->isNullValue())
- return true;
+ // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
+ if (match(Op, m_FAdd(m_Value(), m_Zero())))
+ return true;
// sitofp and uitofp turn into +0.0 for zero.
- if (isa<SIToFPInst>(I) || isa<UIToFPInst>(I))
+ if (isa<SIToFPInst>(Op) || isa<UIToFPInst>(Op))
return true;
- if (const CallInst *CI = dyn_cast<CallInst>(I)) {
- Intrinsic::ID IID = getIntrinsicForCallSite(CI, TLI);
+ if (auto *Call = dyn_cast<CallInst>(Op)) {
+ Intrinsic::ID IID = getIntrinsicForCallSite(Call, TLI);
switch (IID) {
default:
break;
// sqrt(-0.0) = -0.0, no other negative results are possible.
case Intrinsic::sqrt:
- return CannotBeNegativeZero(CI->getArgOperand(0), TLI, Depth + 1);
+ return CannotBeNegativeZero(Call->getArgOperand(0), TLI, Depth + 1);
// fabs(x) != -0.0
case Intrinsic::fabs:
return true;
@@ -2690,6 +2821,41 @@ bool llvm::SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI) {
return cannotBeOrderedLessThanZeroImpl(V, TLI, true, 0);
}
+bool llvm::isKnownNeverNaN(const Value *V) {
+ assert(V->getType()->isFPOrFPVectorTy() && "Querying for NaN on non-FP type");
+
+ // If we're told that NaNs won't happen, assume they won't.
+ if (auto *FPMathOp = dyn_cast<FPMathOperator>(V))
+ if (FPMathOp->hasNoNaNs())
+ return true;
+
+ // TODO: Handle instructions and potentially recurse like other 'isKnown'
+ // functions. For example, the result of sitofp is never NaN.
+
+ // Handle scalar constants.
+ if (auto *CFP = dyn_cast<ConstantFP>(V))
+ return !CFP->isNaN();
+
+ // Bail out for constant expressions, but try to handle vector constants.
+ if (!V->getType()->isVectorTy() || !isa<Constant>(V))
+ return false;
+
+ // For vectors, verify that each element is not NaN.
+ unsigned NumElts = V->getType()->getVectorNumElements();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *Elt = cast<Constant>(V)->getAggregateElement(i);
+ if (!Elt)
+ return false;
+ if (isa<UndefValue>(Elt))
+ continue;
+ auto *CElt = dyn_cast<ConstantFP>(Elt);
+ if (!CElt || CElt->isNaN())
+ return false;
+ }
+ // All elements were confirmed not-NaN or undefined.
+ return true;
+}
+
/// If the specified value can be set by repeating the same byte in memory,
/// return the i8 value that it is represented with. This is
/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
@@ -2749,7 +2915,6 @@ Value *llvm::isBytewiseValue(Value *V) {
return nullptr;
}
-
// This is the recursive version of BuildSubAggregate. It takes a few different
// arguments. Idxs is the index within the nested struct From that we are
// looking at now (which is of type IndexedType). IdxSkip is the number of
@@ -2760,7 +2925,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType,
SmallVectorImpl<unsigned> &Idxs,
unsigned IdxSkip,
Instruction *InsertBefore) {
- llvm::StructType *STy = dyn_cast<llvm::StructType>(IndexedType);
+ StructType *STy = dyn_cast<StructType>(IndexedType);
if (STy) {
// Save the original To argument so we can modify it
Value *OrigTo = To;
@@ -2799,8 +2964,8 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType,
return nullptr;
// Insert the value in the new (sub) aggregrate
- return llvm::InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip),
- "tmp", InsertBefore);
+ return InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip),
+ "tmp", InsertBefore);
}
// This helper takes a nested struct and extracts a part of it (which is again a
@@ -3307,7 +3472,8 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
/// This is a wrapper around GetUnderlyingObjects and adds support for basic
/// ptrtoint+arithmetic+inttoptr sequences.
-void llvm::getUnderlyingObjectsForCodeGen(const Value *V,
+/// It returns false if unidentified object is found in GetUnderlyingObjects.
+bool llvm::getUnderlyingObjectsForCodeGen(const Value *V,
SmallVectorImpl<Value *> &Objects,
const DataLayout &DL) {
SmallPtrSet<const Value *, 16> Visited;
@@ -3333,11 +3499,12 @@ void llvm::getUnderlyingObjectsForCodeGen(const Value *V,
// getUnderlyingObjectsForCodeGen also fails for safety.
if (!isIdentifiedObject(V)) {
Objects.clear();
- return;
+ return false;
}
Objects.push_back(const_cast<Value *>(V));
}
} while (!Working.empty());
+ return true;
}
/// Return true if the only users of this pointer are lifetime markers.
@@ -3401,7 +3568,8 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
// Speculative load may create a race that did not exist in the source.
LI->getFunction()->hasFnAttribute(Attribute::SanitizeThread) ||
// Speculative load may load data from dirty regions.
- LI->getFunction()->hasFnAttribute(Attribute::SanitizeAddress))
+ LI->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
+ LI->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
return false;
const DataLayout &DL = LI->getModule()->getDataLayout();
return isDereferenceableAndAlignedPointer(LI->getPointerOperand(),
@@ -3443,100 +3611,6 @@ bool llvm::mayBeMemoryDependent(const Instruction &I) {
return I.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I);
}
-/// Return true if we know that the specified value is never null.
-bool llvm::isKnownNonNull(const Value *V) {
- assert(V->getType()->isPointerTy() && "V must be pointer type");
-
- // Alloca never returns null, malloc might.
- if (isa<AllocaInst>(V)) return true;
-
- // A byval, inalloca, or nonnull argument is never null.
- if (const Argument *A = dyn_cast<Argument>(V))
- return A->hasByValOrInAllocaAttr() || A->hasNonNullAttr();
-
- // A global variable in address space 0 is non null unless extern weak
- // or an absolute symbol reference. Other address spaces may have null as a
- // valid address for a global, so we can't assume anything.
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
- return !GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() &&
- GV->getType()->getAddressSpace() == 0;
-
- // A Load tagged with nonnull metadata is never null.
- if (const LoadInst *LI = dyn_cast<LoadInst>(V))
- return LI->getMetadata(LLVMContext::MD_nonnull);
-
- if (auto CS = ImmutableCallSite(V))
- if (CS.isReturnNonNull())
- return true;
-
- return false;
-}
-
-static bool isKnownNonNullFromDominatingCondition(const Value *V,
- const Instruction *CtxI,
- const DominatorTree *DT) {
- assert(V->getType()->isPointerTy() && "V must be pointer type");
- assert(!isa<ConstantData>(V) && "Did not expect ConstantPointerNull");
- assert(CtxI && "Context instruction required for analysis");
- assert(DT && "Dominator tree required for analysis");
-
- unsigned NumUsesExplored = 0;
- for (auto *U : V->users()) {
- // Avoid massive lists
- if (NumUsesExplored >= DomConditionsMaxUses)
- break;
- NumUsesExplored++;
-
- // If the value is used as an argument to a call or invoke, then argument
- // attributes may provide an answer about null-ness.
- if (auto CS = ImmutableCallSite(U))
- if (auto *CalledFunc = CS.getCalledFunction())
- for (const Argument &Arg : CalledFunc->args())
- if (CS.getArgOperand(Arg.getArgNo()) == V &&
- Arg.hasNonNullAttr() && DT->dominates(CS.getInstruction(), CtxI))
- return true;
-
- // Consider only compare instructions uniquely controlling a branch
- CmpInst::Predicate Pred;
- if (!match(const_cast<User *>(U),
- m_c_ICmp(Pred, m_Specific(V), m_Zero())) ||
- (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE))
- continue;
-
- for (auto *CmpU : U->users()) {
- if (const BranchInst *BI = dyn_cast<BranchInst>(CmpU)) {
- assert(BI->isConditional() && "uses a comparison!");
-
- BasicBlock *NonNullSuccessor =
- BI->getSuccessor(Pred == ICmpInst::ICMP_EQ ? 1 : 0);
- BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
- if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent()))
- return true;
- } else if (Pred == ICmpInst::ICMP_NE &&
- match(CmpU, m_Intrinsic<Intrinsic::experimental_guard>()) &&
- DT->dominates(cast<Instruction>(CmpU), CtxI)) {
- return true;
- }
- }
- }
-
- return false;
-}
-
-bool llvm::isKnownNonNullAt(const Value *V, const Instruction *CtxI,
- const DominatorTree *DT) {
- if (isa<ConstantPointerNull>(V) || isa<UndefValue>(V))
- return false;
-
- if (isKnownNonNull(V))
- return true;
-
- if (!CtxI || !DT)
- return false;
-
- return ::isKnownNonNullFromDominatingCondition(V, CtxI, DT);
-}
-
OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
const Value *RHS,
const DataLayout &DL,
@@ -3771,7 +3845,7 @@ bool llvm::isOverflowIntrinsicNoWrap(const IntrinsicInst *II,
return true;
};
- return any_of(GuardingBranches, AllUsesGuardedByBranch);
+ return llvm::any_of(GuardingBranches, AllUsesGuardedByBranch);
}
@@ -3846,7 +3920,8 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
// FIXME: This isn't aggressive enough; a call which only writes to a global
// is guaranteed to return.
return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory() ||
- match(I, m_Intrinsic<Intrinsic::assume>());
+ match(I, m_Intrinsic<Intrinsic::assume>()) ||
+ match(I, m_Intrinsic<Intrinsic::sideeffect>());
}
// Other instructions return normally.
@@ -3975,7 +4050,7 @@ bool llvm::programUndefinedIfFullPoison(const Instruction *PoisonI) {
}
break;
- };
+ }
return false;
}
@@ -3994,21 +4069,75 @@ static bool isKnownNonZero(const Value *V) {
return false;
}
-/// Match non-obvious integer minimum and maximum sequences.
-static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
- Value *CmpLHS, Value *CmpRHS,
- Value *TrueVal, Value *FalseVal,
- Value *&LHS, Value *&RHS) {
- // Assume success. If there's no match, callers should not use these anyway.
+/// Match clamp pattern for float types without care about NaNs or signed zeros.
+/// Given non-min/max outer cmp/select from the clamp pattern this
+/// function recognizes if it can be substitued by a "canonical" min/max
+/// pattern.
+static SelectPatternResult matchFastFloatClamp(CmpInst::Predicate Pred,
+ Value *CmpLHS, Value *CmpRHS,
+ Value *TrueVal, Value *FalseVal,
+ Value *&LHS, Value *&RHS) {
+ // Try to match
+ // X < C1 ? C1 : Min(X, C2) --> Max(C1, Min(X, C2))
+ // X > C1 ? C1 : Max(X, C2) --> Min(C1, Max(X, C2))
+ // and return description of the outer Max/Min.
+
+ // First, check if select has inverse order:
+ if (CmpRHS == FalseVal) {
+ std::swap(TrueVal, FalseVal);
+ Pred = CmpInst::getInversePredicate(Pred);
+ }
+
+ // Assume success now. If there's no match, callers should not use these anyway.
LHS = TrueVal;
RHS = FalseVal;
- // Recognize variations of:
- // CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v)))
+ const APFloat *FC1;
+ if (CmpRHS != TrueVal || !match(CmpRHS, m_APFloat(FC1)) || !FC1->isFinite())
+ return {SPF_UNKNOWN, SPNB_NA, false};
+
+ const APFloat *FC2;
+ switch (Pred) {
+ case CmpInst::FCMP_OLT:
+ case CmpInst::FCMP_OLE:
+ case CmpInst::FCMP_ULT:
+ case CmpInst::FCMP_ULE:
+ if (match(FalseVal,
+ m_CombineOr(m_OrdFMin(m_Specific(CmpLHS), m_APFloat(FC2)),
+ m_UnordFMin(m_Specific(CmpLHS), m_APFloat(FC2)))) &&
+ FC1->compare(*FC2) == APFloat::cmpResult::cmpLessThan)
+ return {SPF_FMAXNUM, SPNB_RETURNS_ANY, false};
+ break;
+ case CmpInst::FCMP_OGT:
+ case CmpInst::FCMP_OGE:
+ case CmpInst::FCMP_UGT:
+ case CmpInst::FCMP_UGE:
+ if (match(FalseVal,
+ m_CombineOr(m_OrdFMax(m_Specific(CmpLHS), m_APFloat(FC2)),
+ m_UnordFMax(m_Specific(CmpLHS), m_APFloat(FC2)))) &&
+ FC1->compare(*FC2) == APFloat::cmpResult::cmpGreaterThan)
+ return {SPF_FMINNUM, SPNB_RETURNS_ANY, false};
+ break;
+ default:
+ break;
+ }
+
+ return {SPF_UNKNOWN, SPNB_NA, false};
+}
+
+/// Recognize variations of:
+/// CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v)))
+static SelectPatternResult matchClamp(CmpInst::Predicate Pred,
+ Value *CmpLHS, Value *CmpRHS,
+ Value *TrueVal, Value *FalseVal) {
+ // Swap the select operands and predicate to match the patterns below.
+ if (CmpRHS != TrueVal) {
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ std::swap(TrueVal, FalseVal);
+ }
const APInt *C1;
if (CmpRHS == TrueVal && match(CmpRHS, m_APInt(C1))) {
const APInt *C2;
-
// (X <s C1) ? C1 : SMIN(X, C2) ==> SMAX(SMIN(X, C2), C1)
if (match(FalseVal, m_SMin(m_Specific(CmpLHS), m_APInt(C2))) &&
C1->slt(*C2) && Pred == CmpInst::ICMP_SLT)
@@ -4029,6 +4158,21 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
C1->ugt(*C2) && Pred == CmpInst::ICMP_UGT)
return {SPF_UMIN, SPNB_NA, false};
}
+ return {SPF_UNKNOWN, SPNB_NA, false};
+}
+
+/// Match non-obvious integer minimum and maximum sequences.
+static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
+ Value *CmpLHS, Value *CmpRHS,
+ Value *TrueVal, Value *FalseVal,
+ Value *&LHS, Value *&RHS) {
+ // Assume success. If there's no match, callers should not use these anyway.
+ LHS = TrueVal;
+ RHS = FalseVal;
+
+ SelectPatternResult SPR = matchClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal);
+ if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN)
+ return SPR;
if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT)
return {SPF_UNKNOWN, SPNB_NA, false};
@@ -4047,6 +4191,7 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
match(TrueVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS))))
return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false};
+ const APInt *C1;
if (!match(CmpRHS, m_APInt(C1)))
return {SPF_UNKNOWN, SPNB_NA, false};
@@ -4057,7 +4202,8 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
// Is the sign bit set?
// (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX
// (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN
- if (Pred == CmpInst::ICMP_SLT && *C1 == 0 && C2->isMaxSignedValue())
+ if (Pred == CmpInst::ICMP_SLT && C1->isNullValue() &&
+ C2->isMaxSignedValue())
return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false};
// Is the sign bit clear?
@@ -4189,21 +4335,48 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
// ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X
// NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X
- if (Pred == ICmpInst::ICMP_SGT && (*C1 == 0 || C1->isAllOnesValue())) {
+ if (Pred == ICmpInst::ICMP_SGT &&
+ (C1->isNullValue() || C1->isAllOnesValue())) {
return {(CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};
}
// ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X
// NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X
- if (Pred == ICmpInst::ICMP_SLT && (*C1 == 0 || *C1 == 1)) {
+ if (Pred == ICmpInst::ICMP_SLT &&
+ (C1->isNullValue() || C1->isOneValue())) {
return {(CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};
}
}
}
- return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS);
+ if (CmpInst::isIntPredicate(Pred))
+ return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS);
+
+ // According to (IEEE 754-2008 5.3.1), minNum(0.0, -0.0) and similar
+ // may return either -0.0 or 0.0, so fcmp/select pair has stricter
+ // semantics than minNum. Be conservative in such case.
+ if (NaNBehavior != SPNB_RETURNS_ANY ||
+ (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) &&
+ !isKnownNonZero(CmpRHS)))
+ return {SPF_UNKNOWN, SPNB_NA, false};
+
+ return matchFastFloatClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS);
}
+/// Helps to match a select pattern in case of a type mismatch.
+///
+/// The function processes the case when type of true and false values of a
+/// select instruction differs from type of the cmp instruction operands because
+/// of a cast instructon. The function checks if it is legal to move the cast
+/// operation after "select". If yes, it returns the new second value of
+/// "select" (with the assumption that cast is moved):
+/// 1. As operand of cast instruction when both values of "select" are same cast
+/// instructions.
+/// 2. As restored constant (by applying reverse cast operation) when the first
+/// value of the "select" is a cast operation and the second value is a
+/// constant.
+/// NOTE: We return only the new second value because the first value could be
+/// accessed as operand of cast instruction.
static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
Instruction::CastOps *CastOp) {
auto *Cast1 = dyn_cast<CastInst>(V1);
@@ -4234,7 +4407,34 @@ static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
CastedTo = ConstantExpr::getTrunc(C, SrcTy, true);
break;
case Instruction::Trunc:
- CastedTo = ConstantExpr::getIntegerCast(C, SrcTy, CmpI->isSigned());
+ Constant *CmpConst;
+ if (match(CmpI->getOperand(1), m_Constant(CmpConst)) &&
+ CmpConst->getType() == SrcTy) {
+ // Here we have the following case:
+ //
+ // %cond = cmp iN %x, CmpConst
+ // %tr = trunc iN %x to iK
+ // %narrowsel = select i1 %cond, iK %t, iK C
+ //
+ // We can always move trunc after select operation:
+ //
+ // %cond = cmp iN %x, CmpConst
+ // %widesel = select i1 %cond, iN %x, iN CmpConst
+ // %tr = trunc iN %widesel to iK
+ //
+ // Note that C could be extended in any way because we don't care about
+ // upper bits after truncation. It can't be abs pattern, because it would
+ // look like:
+ //
+ // select i1 %cond, x, -x.
+ //
+ // So only min/max pattern could be matched. Such match requires widened C
+ // == CmpConst. That is why set widened C = CmpConst, condition trunc
+ // CmpConst == C is checked below.
+ CastedTo = CmpConst;
+ } else {
+ CastedTo = ConstantExpr::getIntegerCast(C, SrcTy, CmpI->isSigned());
+ }
break;
case Instruction::FPTrunc:
CastedTo = ConstantExpr::getFPExtend(C, SrcTy, true);
@@ -4307,11 +4507,9 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
}
/// Return true if "icmp Pred LHS RHS" is always true.
-static bool isTruePredicate(CmpInst::Predicate Pred,
- const Value *LHS, const Value *RHS,
- const DataLayout &DL, unsigned Depth,
- AssumptionCache *AC, const Instruction *CxtI,
- const DominatorTree *DT) {
+static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS,
+ const Value *RHS, const DataLayout &DL,
+ unsigned Depth) {
assert(!LHS->getType()->isVectorTy() && "TODO: extend to handle vectors!");
if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS)
return true;
@@ -4348,8 +4546,8 @@ static bool isTruePredicate(CmpInst::Predicate Pred,
if (match(A, m_Or(m_Value(X), m_APInt(CA))) &&
match(B, m_Or(m_Specific(X), m_APInt(CB)))) {
KnownBits Known(CA->getBitWidth());
- computeKnownBits(X, Known, DL, Depth + 1, AC, CxtI, DT);
-
+ computeKnownBits(X, Known, DL, Depth + 1, /*AC*/ nullptr,
+ /*CxtI*/ nullptr, /*DT*/ nullptr);
if (CA->isSubsetOf(Known.Zero) && CB->isSubsetOf(Known.Zero))
return true;
}
@@ -4371,27 +4569,23 @@ static bool isTruePredicate(CmpInst::Predicate Pred,
/// ALHS ARHS" is true. Otherwise, return None.
static Optional<bool>
isImpliedCondOperands(CmpInst::Predicate Pred, const Value *ALHS,
- const Value *ARHS, const Value *BLHS,
- const Value *BRHS, const DataLayout &DL,
- unsigned Depth, AssumptionCache *AC,
- const Instruction *CxtI, const DominatorTree *DT) {
+ const Value *ARHS, const Value *BLHS, const Value *BRHS,
+ const DataLayout &DL, unsigned Depth) {
switch (Pred) {
default:
return None;
case CmpInst::ICMP_SLT:
case CmpInst::ICMP_SLE:
- if (isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth, AC, CxtI,
- DT) &&
- isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth, AC, CxtI, DT))
+ if (isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth) &&
+ isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth))
return true;
return None;
case CmpInst::ICMP_ULT:
case CmpInst::ICMP_ULE:
- if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth, AC, CxtI,
- DT) &&
- isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth, AC, CxtI, DT))
+ if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth) &&
+ isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth))
return true;
return None;
}
@@ -4453,66 +4647,22 @@ isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, const Value *ALHS,
return None;
}
-Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
- const DataLayout &DL, bool LHSIsFalse,
- unsigned Depth, AssumptionCache *AC,
- const Instruction *CxtI,
- const DominatorTree *DT) {
- // Bail out when we hit the limit.
- if (Depth == MaxDepth)
- return None;
-
- // A mismatch occurs when we compare a scalar cmp to a vector cmp, for example.
- if (LHS->getType() != RHS->getType())
- return None;
-
- Type *OpTy = LHS->getType();
- assert(OpTy->isIntOrIntVectorTy(1));
-
- // LHS ==> RHS by definition
- if (LHS == RHS)
- return !LHSIsFalse;
-
- if (OpTy->isVectorTy())
- // TODO: extending the code below to handle vectors
- return None;
- assert(OpTy->isIntegerTy(1) && "implied by above");
-
- Value *BLHS, *BRHS;
- ICmpInst::Predicate BPred;
- // We expect the RHS to be an icmp.
- if (!match(RHS, m_ICmp(BPred, m_Value(BLHS), m_Value(BRHS))))
- return None;
-
- Value *ALHS, *ARHS;
- ICmpInst::Predicate APred;
- // The LHS can be an 'or', 'and', or 'icmp'.
- if (!match(LHS, m_ICmp(APred, m_Value(ALHS), m_Value(ARHS)))) {
- // The remaining tests are all recursive, so bail out if we hit the limit.
- if (Depth == MaxDepth)
- return None;
- // If the result of an 'or' is false, then we know both legs of the 'or' are
- // false. Similarly, if the result of an 'and' is true, then we know both
- // legs of the 'and' are true.
- if ((LHSIsFalse && match(LHS, m_Or(m_Value(ALHS), m_Value(ARHS)))) ||
- (!LHSIsFalse && match(LHS, m_And(m_Value(ALHS), m_Value(ARHS))))) {
- if (Optional<bool> Implication = isImpliedCondition(
- ALHS, RHS, DL, LHSIsFalse, Depth + 1, AC, CxtI, DT))
- return Implication;
- if (Optional<bool> Implication = isImpliedCondition(
- ARHS, RHS, DL, LHSIsFalse, Depth + 1, AC, CxtI, DT))
- return Implication;
- return None;
- }
- return None;
- }
- // All of the below logic assumes both LHS and RHS are icmps.
- assert(isa<ICmpInst>(LHS) && isa<ICmpInst>(RHS) && "Expected icmps.");
-
+/// Return true if LHS implies RHS is true. Return false if LHS implies RHS is
+/// false. Otherwise, return None if we can't infer anything.
+static Optional<bool> isImpliedCondICmps(const ICmpInst *LHS,
+ const ICmpInst *RHS,
+ const DataLayout &DL, bool LHSIsTrue,
+ unsigned Depth) {
+ Value *ALHS = LHS->getOperand(0);
+ Value *ARHS = LHS->getOperand(1);
// The rest of the logic assumes the LHS condition is true. If that's not the
// case, invert the predicate to make it so.
- if (LHSIsFalse)
- APred = CmpInst::getInversePredicate(APred);
+ ICmpInst::Predicate APred =
+ LHSIsTrue ? LHS->getPredicate() : LHS->getInversePredicate();
+
+ Value *BLHS = RHS->getOperand(0);
+ Value *BRHS = RHS->getOperand(1);
+ ICmpInst::Predicate BPred = RHS->getPredicate();
// Can we infer anything when the two compares have matching operands?
bool IsSwappedOps;
@@ -4538,8 +4688,80 @@ Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
}
if (APred == BPred)
- return isImpliedCondOperands(APred, ALHS, ARHS, BLHS, BRHS, DL, Depth, AC,
- CxtI, DT);
+ return isImpliedCondOperands(APred, ALHS, ARHS, BLHS, BRHS, DL, Depth);
+ return None;
+}
+
+/// Return true if LHS implies RHS is true. Return false if LHS implies RHS is
+/// false. Otherwise, return None if we can't infer anything. We expect the
+/// RHS to be an icmp and the LHS to be an 'and' or an 'or' instruction.
+static Optional<bool> isImpliedCondAndOr(const BinaryOperator *LHS,
+ const ICmpInst *RHS,
+ const DataLayout &DL, bool LHSIsTrue,
+ unsigned Depth) {
+ // The LHS must be an 'or' or an 'and' instruction.
+ assert((LHS->getOpcode() == Instruction::And ||
+ LHS->getOpcode() == Instruction::Or) &&
+ "Expected LHS to be 'and' or 'or'.");
+
+ assert(Depth <= MaxDepth && "Hit recursion limit");
+
+ // If the result of an 'or' is false, then we know both legs of the 'or' are
+ // false. Similarly, if the result of an 'and' is true, then we know both
+ // legs of the 'and' are true.
+ Value *ALHS, *ARHS;
+ if ((!LHSIsTrue && match(LHS, m_Or(m_Value(ALHS), m_Value(ARHS)))) ||
+ (LHSIsTrue && match(LHS, m_And(m_Value(ALHS), m_Value(ARHS))))) {
+ // FIXME: Make this non-recursion.
+ if (Optional<bool> Implication =
+ isImpliedCondition(ALHS, RHS, DL, LHSIsTrue, Depth + 1))
+ return Implication;
+ if (Optional<bool> Implication =
+ isImpliedCondition(ARHS, RHS, DL, LHSIsTrue, Depth + 1))
+ return Implication;
+ return None;
+ }
+ return None;
+}
+Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
+ const DataLayout &DL, bool LHSIsTrue,
+ unsigned Depth) {
+ // Bail out when we hit the limit.
+ if (Depth == MaxDepth)
+ return None;
+
+ // A mismatch occurs when we compare a scalar cmp to a vector cmp, for
+ // example.
+ if (LHS->getType() != RHS->getType())
+ return None;
+
+ Type *OpTy = LHS->getType();
+ assert(OpTy->isIntOrIntVectorTy(1) && "Expected integer type only!");
+
+ // LHS ==> RHS by definition
+ if (LHS == RHS)
+ return LHSIsTrue;
+
+ // FIXME: Extending the code below to handle vectors.
+ if (OpTy->isVectorTy())
+ return None;
+
+ assert(OpTy->isIntegerTy(1) && "implied by above");
+
+ // Both LHS and RHS are icmps.
+ const ICmpInst *LHSCmp = dyn_cast<ICmpInst>(LHS);
+ const ICmpInst *RHSCmp = dyn_cast<ICmpInst>(RHS);
+ if (LHSCmp && RHSCmp)
+ return isImpliedCondICmps(LHSCmp, RHSCmp, DL, LHSIsTrue, Depth);
+
+ // The LHS should be an 'or' or an 'and' instruction. We expect the RHS to be
+ // an icmp. FIXME: Add support for and/or on the RHS.
+ const BinaryOperator *LHSBO = dyn_cast<BinaryOperator>(LHS);
+ if (LHSBO && RHSCmp) {
+ if ((LHSBO->getOpcode() == Instruction::And ||
+ LHSBO->getOpcode() == Instruction::Or))
+ return isImpliedCondAndOr(LHSBO, RHSCmp, DL, LHSIsTrue, Depth);
+ }
return None;
}
diff --git a/contrib/llvm/lib/Analysis/VectorUtils.cpp b/contrib/llvm/lib/Analysis/VectorUtils.cpp
index 554d132c2ab7..2becfbfe8a8d 100644
--- a/contrib/llvm/lib/Analysis/VectorUtils.cpp
+++ b/contrib/llvm/lib/Analysis/VectorUtils.cpp
@@ -91,7 +91,8 @@ Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI,
return Intrinsic::not_intrinsic;
if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
- ID == Intrinsic::lifetime_end || ID == Intrinsic::assume)
+ ID == Intrinsic::lifetime_end || ID == Intrinsic::assume ||
+ ID == Intrinsic::sideeffect)
return ID;
return Intrinsic::not_intrinsic;
}