aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Analysis
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Analysis')
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysis.cpp174
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp77
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysisSummary.h2
-rw-r--r--contrib/llvm/lib/Analysis/AliasSetTracker.cpp42
-rw-r--r--contrib/llvm/lib/Analysis/Analysis.cpp2
-rw-r--r--contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp145
-rw-r--r--contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp107
-rw-r--r--contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp213
-rw-r--r--contrib/llvm/lib/Analysis/CFGPrinter.cpp11
-rw-r--r--contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp19
-rw-r--r--contrib/llvm/lib/Analysis/CFLGraph.h67
-rw-r--r--contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp5
-rw-r--r--contrib/llvm/lib/Analysis/CGSCCPassManager.cpp43
-rw-r--r--contrib/llvm/lib/Analysis/CallGraph.cpp5
-rw-r--r--contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp62
-rw-r--r--contrib/llvm/lib/Analysis/CaptureTracking.cpp55
-rw-r--r--contrib/llvm/lib/Analysis/CodeMetrics.cpp2
-rw-r--r--contrib/llvm/lib/Analysis/ConstantFolding.cpp149
-rw-r--r--contrib/llvm/lib/Analysis/Delinearization.cpp12
-rw-r--r--contrib/llvm/lib/Analysis/DemandedBits.cpp8
-rw-r--r--contrib/llvm/lib/Analysis/DependenceAnalysis.cpp732
-rw-r--r--contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp21
-rw-r--r--contrib/llvm/lib/Analysis/DominanceFrontier.cpp1
-rw-r--r--contrib/llvm/lib/Analysis/EHPersonalities.cpp30
-rw-r--r--contrib/llvm/lib/Analysis/GlobalsModRef.cpp6
-rw-r--r--contrib/llvm/lib/Analysis/IVUsers.cpp18
-rw-r--r--contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp10
-rw-r--r--contrib/llvm/lib/Analysis/InlineCost.cpp132
-rw-r--r--contrib/llvm/lib/Analysis/InstructionSimplify.cpp453
-rw-r--r--contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp16
-rw-r--r--contrib/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp5
-rw-r--r--contrib/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp5
-rw-r--r--contrib/llvm/lib/Analysis/LazyCallGraph.cpp39
-rw-r--r--contrib/llvm/lib/Analysis/LazyValueInfo.cpp121
-rw-r--r--contrib/llvm/lib/Analysis/Lint.cpp14
-rw-r--r--contrib/llvm/lib/Analysis/Loads.cpp12
-rw-r--r--contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp345
-rw-r--r--contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp2
-rw-r--r--contrib/llvm/lib/Analysis/LoopInfo.cpp64
-rw-r--r--contrib/llvm/lib/Analysis/LoopPass.cpp33
-rw-r--r--contrib/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp2
-rw-r--r--contrib/llvm/lib/Analysis/MemoryBuiltins.cpp77
-rw-r--r--contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp67
-rw-r--r--contrib/llvm/lib/Analysis/MemoryLocation.cpp24
-rw-r--r--contrib/llvm/lib/Analysis/MemorySSA.cpp349
-rw-r--r--contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp229
-rw-r--r--contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp62
-rw-r--r--contrib/llvm/lib/Analysis/MustExecute.cpp269
-rw-r--r--contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp2
-rw-r--r--contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp19
-rw-r--r--contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp4
-rw-r--r--contrib/llvm/lib/Analysis/PHITransAddr.cpp1
-rw-r--r--contrib/llvm/lib/Analysis/PhiValues.cpp196
-rw-r--r--contrib/llvm/lib/Analysis/PostDominators.cpp16
-rw-r--r--contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp22
-rw-r--r--contrib/llvm/lib/Analysis/RegionInfo.cpp3
-rw-r--r--contrib/llvm/lib/Analysis/RegionPass.cpp15
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolution.cpp1409
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp61
-rw-r--r--contrib/llvm/lib/Analysis/StratifiedSets.h20
-rw-r--r--contrib/llvm/lib/Analysis/SyntheticCountsUtils.cpp113
-rw-r--r--contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp199
-rw-r--r--contrib/llvm/lib/Analysis/TargetTransformInfo.cpp171
-rw-r--r--contrib/llvm/lib/Analysis/Trace.cpp1
-rw-r--r--contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp313
-rw-r--r--contrib/llvm/lib/Analysis/ValueTracking.cpp431
-rw-r--r--contrib/llvm/lib/Analysis/VectorUtils.cpp20
67 files changed, 4914 insertions, 2440 deletions
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
index 55df66714178..a6585df949f8 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -126,7 +126,7 @@ ModRefInfo AAResults::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
// Early-exit the moment we reach the bottom of the lattice.
if (isNoModRef(Result))
- return Result;
+ return ModRefInfo::NoModRef;
}
return Result;
@@ -162,7 +162,7 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
// Early-exit the moment we reach the bottom of the lattice.
if (isNoModRef(Result))
- return Result;
+ return ModRefInfo::NoModRef;
}
// Try to refine the mod-ref info further using other API entry points to the
@@ -224,7 +224,7 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
// Early-exit the moment we reach the bottom of the lattice.
if (isNoModRef(Result))
- return Result;
+ return ModRefInfo::NoModRef;
}
// Try to refine the mod-ref info further using other API entry points to the
@@ -254,85 +254,91 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
// information from CS1's references to the memory referenced by
// CS2's arguments.
if (onlyAccessesArgPointees(CS2B)) {
+ if (!doesAccessArgPointees(CS2B))
+ return ModRefInfo::NoModRef;
ModRefInfo R = ModRefInfo::NoModRef;
- if (doesAccessArgPointees(CS2B)) {
- bool IsMustAlias = true;
- for (auto I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) {
- const Value *Arg = *I;
- if (!Arg->getType()->isPointerTy())
- continue;
- unsigned CS2ArgIdx = std::distance(CS2.arg_begin(), I);
- auto CS2ArgLoc = MemoryLocation::getForArgument(CS2, CS2ArgIdx, TLI);
-
- // ArgModRefCS2 indicates what CS2 might do to CS2ArgLoc, and the
- // dependence of CS1 on that location is the inverse:
- // - If CS2 modifies location, dependence exists if CS1 reads or writes.
- // - If CS2 only reads location, dependence exists if CS1 writes.
- ModRefInfo ArgModRefCS2 = getArgModRefInfo(CS2, CS2ArgIdx);
- ModRefInfo ArgMask = ModRefInfo::NoModRef;
- if (isModSet(ArgModRefCS2))
- ArgMask = ModRefInfo::ModRef;
- else if (isRefSet(ArgModRefCS2))
- ArgMask = ModRefInfo::Mod;
-
- // ModRefCS1 indicates what CS1 might do to CS2ArgLoc, and we use
- // above ArgMask to update dependence info.
- ModRefInfo ModRefCS1 = getModRefInfo(CS1, CS2ArgLoc);
- ArgMask = intersectModRef(ArgMask, ModRefCS1);
-
- // Conservatively clear IsMustAlias unless only MustAlias is found.
- IsMustAlias &= isMustSet(ModRefCS1);
-
- R = intersectModRef(unionModRef(R, ArgMask), Result);
- if (R == Result) {
- // On early exit, not all args were checked, cannot set Must.
- if (I + 1 != E)
- IsMustAlias = false;
- break;
- }
+ bool IsMustAlias = true;
+ for (auto I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) {
+ const Value *Arg = *I;
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ unsigned CS2ArgIdx = std::distance(CS2.arg_begin(), I);
+ auto CS2ArgLoc = MemoryLocation::getForArgument(CS2, CS2ArgIdx, TLI);
+
+ // ArgModRefCS2 indicates what CS2 might do to CS2ArgLoc, and the
+ // dependence of CS1 on that location is the inverse:
+ // - If CS2 modifies location, dependence exists if CS1 reads or writes.
+ // - If CS2 only reads location, dependence exists if CS1 writes.
+ ModRefInfo ArgModRefCS2 = getArgModRefInfo(CS2, CS2ArgIdx);
+ ModRefInfo ArgMask = ModRefInfo::NoModRef;
+ if (isModSet(ArgModRefCS2))
+ ArgMask = ModRefInfo::ModRef;
+ else if (isRefSet(ArgModRefCS2))
+ ArgMask = ModRefInfo::Mod;
+
+ // ModRefCS1 indicates what CS1 might do to CS2ArgLoc, and we use
+ // above ArgMask to update dependence info.
+ ModRefInfo ModRefCS1 = getModRefInfo(CS1, CS2ArgLoc);
+ ArgMask = intersectModRef(ArgMask, ModRefCS1);
+
+ // Conservatively clear IsMustAlias unless only MustAlias is found.
+ IsMustAlias &= isMustSet(ModRefCS1);
+
+ R = intersectModRef(unionModRef(R, ArgMask), Result);
+ if (R == Result) {
+ // On early exit, not all args were checked, cannot set Must.
+ if (I + 1 != E)
+ IsMustAlias = false;
+ break;
}
- // If Alias found and only MustAlias found above, set Must bit.
- R = IsMustAlias ? setMust(R) : clearMust(R);
}
- return R;
+
+ if (isNoModRef(R))
+ return ModRefInfo::NoModRef;
+
+ // If MustAlias found above, set Must bit.
+ return IsMustAlias ? setMust(R) : clearMust(R);
}
// If CS1 only accesses memory through arguments, check if CS2 references
// any of the memory referenced by CS1's arguments. If not, return NoModRef.
if (onlyAccessesArgPointees(CS1B)) {
+ if (!doesAccessArgPointees(CS1B))
+ return ModRefInfo::NoModRef;
ModRefInfo R = ModRefInfo::NoModRef;
- if (doesAccessArgPointees(CS1B)) {
- bool IsMustAlias = true;
- for (auto I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) {
- const Value *Arg = *I;
- if (!Arg->getType()->isPointerTy())
- continue;
- unsigned CS1ArgIdx = std::distance(CS1.arg_begin(), I);
- auto CS1ArgLoc = MemoryLocation::getForArgument(CS1, CS1ArgIdx, TLI);
-
- // ArgModRefCS1 indicates what CS1 might do to CS1ArgLoc; if CS1 might
- // Mod CS1ArgLoc, then we care about either a Mod or a Ref by CS2. If
- // CS1 might Ref, then we care only about a Mod by CS2.
- ModRefInfo ArgModRefCS1 = getArgModRefInfo(CS1, CS1ArgIdx);
- ModRefInfo ModRefCS2 = getModRefInfo(CS2, CS1ArgLoc);
- if ((isModSet(ArgModRefCS1) && isModOrRefSet(ModRefCS2)) ||
- (isRefSet(ArgModRefCS1) && isModSet(ModRefCS2)))
- R = intersectModRef(unionModRef(R, ArgModRefCS1), Result);
-
- // Conservatively clear IsMustAlias unless only MustAlias is found.
- IsMustAlias &= isMustSet(ModRefCS2);
-
- if (R == Result) {
- // On early exit, not all args were checked, cannot set Must.
- if (I + 1 != E)
- IsMustAlias = false;
- break;
- }
+ bool IsMustAlias = true;
+ for (auto I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) {
+ const Value *Arg = *I;
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ unsigned CS1ArgIdx = std::distance(CS1.arg_begin(), I);
+ auto CS1ArgLoc = MemoryLocation::getForArgument(CS1, CS1ArgIdx, TLI);
+
+ // ArgModRefCS1 indicates what CS1 might do to CS1ArgLoc; if CS1 might
+ // Mod CS1ArgLoc, then we care about either a Mod or a Ref by CS2. If
+ // CS1 might Ref, then we care only about a Mod by CS2.
+ ModRefInfo ArgModRefCS1 = getArgModRefInfo(CS1, CS1ArgIdx);
+ ModRefInfo ModRefCS2 = getModRefInfo(CS2, CS1ArgLoc);
+ if ((isModSet(ArgModRefCS1) && isModOrRefSet(ModRefCS2)) ||
+ (isRefSet(ArgModRefCS1) && isModSet(ModRefCS2)))
+ R = intersectModRef(unionModRef(R, ArgModRefCS1), Result);
+
+ // Conservatively clear IsMustAlias unless only MustAlias is found.
+ IsMustAlias &= isMustSet(ModRefCS2);
+
+ if (R == Result) {
+ // On early exit, not all args were checked, cannot set Must.
+ if (I + 1 != E)
+ IsMustAlias = false;
+ break;
}
- // If Alias found and only MustAlias found above, set Must bit.
- R = IsMustAlias ? setMust(R) : clearMust(R);
}
- return R;
+
+ if (isNoModRef(R))
+ return ModRefInfo::NoModRef;
+
+ // If MustAlias found above, set Must bit.
+ return IsMustAlias ? setMust(R) : clearMust(R);
}
return Result;
@@ -366,6 +372,24 @@ FunctionModRefBehavior AAResults::getModRefBehavior(const Function *F) {
return Result;
}
+raw_ostream &llvm::operator<<(raw_ostream &OS, AliasResult AR) {
+ switch (AR) {
+ case NoAlias:
+ OS << "NoAlias";
+ break;
+ case MustAlias:
+ OS << "MustAlias";
+ break;
+ case MayAlias:
+ OS << "MayAlias";
+ break;
+ case PartialAlias:
+ OS << "PartialAlias";
+ break;
+ }
+ return OS;
+}
+
//===----------------------------------------------------------------------===//
// Helper method implementation
//===----------------------------------------------------------------------===//
@@ -515,7 +539,7 @@ ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW,
return ModRefInfo::ModRef;
}
-/// \brief Return information about whether a particular call site modifies
+/// Return information about whether a particular call site modifies
/// or reads the specified memory location \p MemLoc before instruction \p I
/// in a BasicBlock. An ordered basic block \p OBB can be used to speed up
/// instruction-ordering queries inside the BasicBlock containing \p I.
@@ -548,7 +572,7 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
unsigned ArgNo = 0;
ModRefInfo R = ModRefInfo::NoModRef;
- bool MustAlias = true;
+ bool IsMustAlias = true;
// Set flag only if no May found and all operands processed.
for (auto CI = CS.data_operands_begin(), CE = CS.data_operands_end();
CI != CE; ++CI, ++ArgNo) {
@@ -566,7 +590,7 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
// assume that the call could touch the pointer, even though it doesn't
// escape.
if (AR != MustAlias)
- MustAlias = false;
+ IsMustAlias = false;
if (AR == NoAlias)
continue;
if (CS.doesNotAccessMemory(ArgNo))
@@ -578,7 +602,7 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
// Not returning MustModRef since we have not seen all the arguments.
return ModRefInfo::ModRef;
}
- return MustAlias ? setMust(R) : clearMust(R);
+ return IsMustAlias ? setMust(R) : clearMust(R);
}
/// canBasicBlockModify - Return true if it is possible for execution of the
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
index f737cecc43d1..764ae9160350 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -41,7 +41,7 @@ static cl::opt<bool> PrintMustModRef("print-mustmodref", cl::ReallyHidden);
static cl::opt<bool> EvalAAMD("evaluate-aa-metadata", cl::ReallyHidden);
-static void PrintResults(const char *Msg, bool P, const Value *V1,
+static void PrintResults(AliasResult AR, bool P, const Value *V1,
const Value *V2, const Module *M) {
if (PrintAll || P) {
std::string o1, o2;
@@ -50,18 +50,15 @@ static void PrintResults(const char *Msg, bool P, const Value *V1,
V1->printAsOperand(os1, true, M);
V2->printAsOperand(os2, true, M);
}
-
+
if (o2 < o1)
std::swap(o1, o2);
- errs() << " " << Msg << ":\t"
- << o1 << ", "
- << o2 << "\n";
+ errs() << " " << AR << ":\t" << o1 << ", " << o2 << "\n";
}
}
-static inline void
-PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr,
- Module *M) {
+static inline void PrintModRefResults(const char *Msg, bool P, Instruction *I,
+ Value *Ptr, Module *M) {
if (PrintAll || P) {
errs() << " " << Msg << ": Ptr: ";
Ptr->printAsOperand(errs(), true, M);
@@ -69,21 +66,19 @@ PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr,
}
}
-static inline void
-PrintModRefResults(const char *Msg, bool P, CallSite CSA, CallSite CSB,
- Module *M) {
+static inline void PrintModRefResults(const char *Msg, bool P, CallSite CSA,
+ CallSite CSB, Module *M) {
if (PrintAll || P) {
- errs() << " " << Msg << ": " << *CSA.getInstruction()
- << " <-> " << *CSB.getInstruction() << '\n';
+ errs() << " " << Msg << ": " << *CSA.getInstruction() << " <-> "
+ << *CSB.getInstruction() << '\n';
}
}
-static inline void
-PrintLoadStoreResults(const char *Msg, bool P, const Value *V1,
- const Value *V2, const Module *M) {
+static inline void PrintLoadStoreResults(AliasResult AR, bool P,
+ const Value *V1, const Value *V2,
+ const Module *M) {
if (PrintAll || P) {
- errs() << " " << Msg << ": " << *V1
- << " <-> " << *V2 << '\n';
+ errs() << " " << AR << ": " << *V1 << " <-> " << *V2 << '\n';
}
}
@@ -155,22 +150,22 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType();
if (I2ElTy->isSized()) I2Size = DL.getTypeStoreSize(I2ElTy);
- switch (AA.alias(*I1, I1Size, *I2, I2Size)) {
+ AliasResult AR = AA.alias(*I1, I1Size, *I2, I2Size);
+ switch (AR) {
case NoAlias:
- PrintResults("NoAlias", PrintNoAlias, *I1, *I2, F.getParent());
+ PrintResults(AR, PrintNoAlias, *I1, *I2, F.getParent());
++NoAliasCount;
break;
case MayAlias:
- PrintResults("MayAlias", PrintMayAlias, *I1, *I2, F.getParent());
+ PrintResults(AR, PrintMayAlias, *I1, *I2, F.getParent());
++MayAliasCount;
break;
case PartialAlias:
- PrintResults("PartialAlias", PrintPartialAlias, *I1, *I2,
- F.getParent());
+ PrintResults(AR, PrintPartialAlias, *I1, *I2, F.getParent());
++PartialAliasCount;
break;
case MustAlias:
- PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent());
+ PrintResults(AR, PrintMustAlias, *I1, *I2, F.getParent());
++MustAliasCount;
break;
}
@@ -181,26 +176,23 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
// iterate over all pairs of load, store
for (Value *Load : Loads) {
for (Value *Store : Stores) {
- switch (AA.alias(MemoryLocation::get(cast<LoadInst>(Load)),
- MemoryLocation::get(cast<StoreInst>(Store)))) {
+ AliasResult AR = AA.alias(MemoryLocation::get(cast<LoadInst>(Load)),
+ MemoryLocation::get(cast<StoreInst>(Store)));
+ switch (AR) {
case NoAlias:
- PrintLoadStoreResults("NoAlias", PrintNoAlias, Load, Store,
- F.getParent());
+ PrintLoadStoreResults(AR, PrintNoAlias, Load, Store, F.getParent());
++NoAliasCount;
break;
case MayAlias:
- PrintLoadStoreResults("MayAlias", PrintMayAlias, Load, Store,
- F.getParent());
+ PrintLoadStoreResults(AR, PrintMayAlias, Load, Store, F.getParent());
++MayAliasCount;
break;
case PartialAlias:
- PrintLoadStoreResults("PartialAlias", PrintPartialAlias, Load, Store,
- F.getParent());
+ PrintLoadStoreResults(AR, PrintPartialAlias, Load, Store, F.getParent());
++PartialAliasCount;
break;
case MustAlias:
- PrintLoadStoreResults("MustAlias", PrintMustAlias, Load, Store,
- F.getParent());
+ PrintLoadStoreResults(AR, PrintMustAlias, Load, Store, F.getParent());
++MustAliasCount;
break;
}
@@ -211,26 +203,23 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
for (SetVector<Value *>::iterator I1 = Stores.begin(), E = Stores.end();
I1 != E; ++I1) {
for (SetVector<Value *>::iterator I2 = Stores.begin(); I2 != I1; ++I2) {
- switch (AA.alias(MemoryLocation::get(cast<StoreInst>(*I1)),
- MemoryLocation::get(cast<StoreInst>(*I2)))) {
+ AliasResult AR = AA.alias(MemoryLocation::get(cast<StoreInst>(*I1)),
+ MemoryLocation::get(cast<StoreInst>(*I2)));
+ switch (AR) {
case NoAlias:
- PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2,
- F.getParent());
+ PrintLoadStoreResults(AR, PrintNoAlias, *I1, *I2, F.getParent());
++NoAliasCount;
break;
case MayAlias:
- PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2,
- F.getParent());
+ PrintLoadStoreResults(AR, PrintMayAlias, *I1, *I2, F.getParent());
++MayAliasCount;
break;
case PartialAlias:
- PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2,
- F.getParent());
+ PrintLoadStoreResults(AR, PrintPartialAlias, *I1, *I2, F.getParent());
++PartialAliasCount;
break;
case MustAlias:
- PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2,
- F.getParent());
+ PrintLoadStoreResults(AR, PrintMustAlias, *I1, *I2, F.getParent());
++MustAliasCount;
break;
}
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisSummary.h b/contrib/llvm/lib/Analysis/AliasAnalysisSummary.h
index 51a85f4e7061..fb93a12420f8 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysisSummary.h
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisSummary.h
@@ -13,7 +13,7 @@
/// Summary-based analysis, also known as bottom-up analysis, is a style of
/// interprocedrual static analysis that tries to analyze the callees before the
/// callers get analyzed. The key idea of summary-based analysis is to first
-/// process each function indepedently, outline its behavior in a condensed
+/// process each function independently, outline its behavior in a condensed
/// summary, and then instantiate the summary at the callsite when the said
/// function is called elsewhere. This is often in contrast to another style
/// called top-down analysis, in which callers are always analyzed first before
diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
index c88e0dd7dc44..8aee81b1f1d8 100644
--- a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
+++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -14,6 +14,7 @@
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -126,7 +127,7 @@ void AliasSet::removeFromTracker(AliasSetTracker &AST) {
}
void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
- uint64_t Size, const AAMDNodes &AAInfo,
+ LocationSize Size, const AAMDNodes &AAInfo,
bool KnownMustAlias) {
assert(!Entry.hasAliasSet() && "Entry already in set!");
@@ -182,7 +183,7 @@ void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
/// aliasesPointer - Return true if the specified pointer "may" (or must)
/// alias one of the members in the set.
///
-bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
+bool AliasSet::aliasesPointer(const Value *Ptr, LocationSize Size,
const AAMDNodes &AAInfo,
AliasAnalysis &AA) const {
if (AliasAny)
@@ -262,7 +263,7 @@ void AliasSetTracker::clear() {
/// alias the pointer. Return the unified set, or nullptr if no set that aliases
/// the pointer was found.
AliasSet *AliasSetTracker::mergeAliasSetsForPointer(const Value *Ptr,
- uint64_t Size,
+ LocationSize Size,
const AAMDNodes &AAInfo) {
AliasSet *FoundSet = nullptr;
for (iterator I = begin(), E = end(); I != E;) {
@@ -302,7 +303,8 @@ AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
/// getAliasSetForPointer - Return the alias set that the specified pointer
/// lives in.
-AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size,
+AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer,
+ LocationSize Size,
const AAMDNodes &AAInfo) {
AliasSet::PointerRec &Entry = getEntryFor(Pointer);
@@ -347,7 +349,8 @@ AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size,
return AliasSets.back();
}
-void AliasSetTracker::add(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) {
+void AliasSetTracker::add(Value *Ptr, LocationSize Size,
+ const AAMDNodes &AAInfo) {
addPointer(Ptr, Size, AAInfo, AliasSet::NoAccess);
}
@@ -386,7 +389,7 @@ void AliasSetTracker::add(VAArgInst *VAAI) {
AliasSet::ModRefAccess);
}
-void AliasSetTracker::add(MemSetInst *MSI) {
+void AliasSetTracker::add(AnyMemSetInst *MSI) {
AAMDNodes AAInfo;
MSI->getAAMetadata(AAInfo);
@@ -399,11 +402,12 @@ void AliasSetTracker::add(MemSetInst *MSI) {
AliasSet &AS =
addPointer(MSI->getRawDest(), Len, AAInfo, AliasSet::ModAccess);
- if (MSI->isVolatile())
+ auto *MS = dyn_cast<MemSetInst>(MSI);
+ if (MS && MS->isVolatile())
AS.setVolatile();
}
-void AliasSetTracker::add(MemTransferInst *MTI) {
+void AliasSetTracker::add(AnyMemTransferInst *MTI) {
AAMDNodes AAInfo;
MTI->getAAMetadata(AAInfo);
@@ -415,13 +419,15 @@ void AliasSetTracker::add(MemTransferInst *MTI) {
AliasSet &ASSrc =
addPointer(MTI->getRawSource(), Len, AAInfo, AliasSet::RefAccess);
- if (MTI->isVolatile())
- ASSrc.setVolatile();
AliasSet &ASDst =
addPointer(MTI->getRawDest(), Len, AAInfo, AliasSet::ModAccess);
- if (MTI->isVolatile())
+
+ auto* MT = dyn_cast<MemTransferInst>(MTI);
+ if (MT && MT->isVolatile()) {
+ ASSrc.setVolatile();
ASDst.setVolatile();
+ }
}
void AliasSetTracker::addUnknown(Instruction *Inst) {
@@ -461,9 +467,9 @@ void AliasSetTracker::add(Instruction *I) {
return add(SI);
if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
return add(VAAI);
- if (MemSetInst *MSI = dyn_cast<MemSetInst>(I))
+ if (AnyMemSetInst *MSI = dyn_cast<AnyMemSetInst>(I))
return add(MSI);
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I))
+ if (AnyMemTransferInst *MTI = dyn_cast<AnyMemTransferInst>(I))
return add(MTI);
return addUnknown(I);
}
@@ -588,7 +594,7 @@ AliasSet &AliasSetTracker::mergeAllAliasSets() {
return *AliasAnyAS;
}
-AliasSet &AliasSetTracker::addPointer(Value *P, uint64_t Size,
+AliasSet &AliasSetTracker::addPointer(Value *P, LocationSize Size,
const AAMDNodes &AAInfo,
AliasSet::AccessLattice E) {
AliasSet &AS = getAliasSetForPointer(P, Size, AAInfo);
@@ -633,8 +639,12 @@ void AliasSet::print(raw_ostream &OS) const {
OS << "\n " << UnknownInsts.size() << " Unknown instructions: ";
for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
if (i) OS << ", ";
- if (auto *I = getUnknownInst(i))
- I->printAsOperand(OS);
+ if (auto *I = getUnknownInst(i)) {
+ if (I->hasName())
+ I->printAsOperand(OS);
+ else
+ I->print(OS);
+ }
}
}
OS << "\n";
diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp
index 0e0b5c92a918..30576cf1ae10 100644
--- a/contrib/llvm/lib/Analysis/Analysis.cpp
+++ b/contrib/llvm/lib/Analysis/Analysis.cpp
@@ -65,8 +65,10 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeMemoryDependenceWrapperPassPass(Registry);
initializeModuleDebugInfoPrinterPass(Registry);
initializeModuleSummaryIndexWrapperPassPass(Registry);
+ initializeMustExecutePrinterPass(Registry);
initializeObjCARCAAWrapperPassPass(Registry);
initializeOptimizationRemarkEmitterWrapperPassPass(Registry);
+ initializePhiValuesWrapperPassPass(Registry);
initializePostDominatorTreeWrapperPassPass(Registry);
initializeRegionInfoPassPass(Registry);
initializeRegionViewerPass(Registry);
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 537813b6b752..96326347b712 100644
--- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -85,15 +85,15 @@ const unsigned MaxNumPhiBBsValueReachabilityCheck = 20;
// depth otherwise the algorithm in aliasGEP will assert.
static const unsigned MaxLookupSearchDepth = 6;
-bool BasicAAResult::invalidate(Function &F, const PreservedAnalyses &PA,
+bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &Inv) {
// We don't care if this analysis itself is preserved, it has no state. But
// we need to check that the analyses it depends on have been. Note that we
// may be created without handles to some analyses and in that case don't
// depend on them.
- if (Inv.invalidate<AssumptionAnalysis>(F, PA) ||
- (DT && Inv.invalidate<DominatorTreeAnalysis>(F, PA)) ||
- (LI && Inv.invalidate<LoopAnalysis>(F, PA)))
+ if (Inv.invalidate<AssumptionAnalysis>(Fn, PA) ||
+ (DT && Inv.invalidate<DominatorTreeAnalysis>(Fn, PA)) ||
+ (LI && Inv.invalidate<LoopAnalysis>(Fn, PA)))
return true;
// Otherwise this analysis result remains valid.
@@ -132,7 +132,10 @@ static bool isNonEscapingLocalObject(const Value *V) {
/// Returns true if the pointer is one which would have been considered an
/// escape by isNonEscapingLocalObject.
static bool isEscapeSource(const Value *V) {
- if (isa<CallInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V))
+ if (ImmutableCallSite(V))
+ return true;
+
+ if (isa<Argument>(V))
return true;
// The load case works because isNonEscapingLocalObject considers all
@@ -147,10 +150,12 @@ static bool isEscapeSource(const Value *V) {
/// Returns the size of the object specified by V or UnknownSize if unknown.
static uint64_t getObjectSize(const Value *V, const DataLayout &DL,
const TargetLibraryInfo &TLI,
+ bool NullIsValidLoc,
bool RoundToAlign = false) {
uint64_t Size;
ObjectSizeOpts Opts;
Opts.RoundToAlign = RoundToAlign;
+ Opts.NullIsUnknownSize = NullIsValidLoc;
if (getObjectSize(V, Size, DL, &TLI, Opts))
return Size;
return MemoryLocation::UnknownSize;
@@ -160,7 +165,8 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &DL,
/// Size.
static bool isObjectSmallerThan(const Value *V, uint64_t Size,
const DataLayout &DL,
- const TargetLibraryInfo &TLI) {
+ const TargetLibraryInfo &TLI,
+ bool NullIsValidLoc) {
// Note that the meanings of the "object" are slightly different in the
// following contexts:
// c1: llvm::getObjectSize()
@@ -192,15 +198,16 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size,
// This function needs to use the aligned object size because we allow
// reads a bit past the end given sufficient alignment.
- uint64_t ObjectSize = getObjectSize(V, DL, TLI, /*RoundToAlign*/ true);
+ uint64_t ObjectSize = getObjectSize(V, DL, TLI, NullIsValidLoc,
+ /*RoundToAlign*/ true);
return ObjectSize != MemoryLocation::UnknownSize && ObjectSize < Size;
}
/// Returns true if we can prove that the object specified by V has size Size.
static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
- const TargetLibraryInfo &TLI) {
- uint64_t ObjectSize = getObjectSize(V, DL, TLI);
+ const TargetLibraryInfo &TLI, bool NullIsValidLoc) {
+ uint64_t ObjectSize = getObjectSize(V, DL, TLI, NullIsValidLoc);
return ObjectSize != MemoryLocation::UnknownSize && ObjectSize == Size;
}
@@ -285,6 +292,19 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
case Instruction::Shl:
V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits,
SExtBits, DL, Depth + 1, AC, DT, NSW, NUW);
+
+ // We're trying to linearize an expression of the kind:
+ // shl i8 -128, 36
+ // where the shift count exceeds the bitwidth of the type.
+ // We can't decompose this further (the expression would return
+ // a poison value).
+ if (Offset.getBitWidth() < RHS.getLimitedValue() ||
+ Scale.getBitWidth() < RHS.getLimitedValue()) {
+ Scale = 1;
+ Offset = 0;
+ return V;
+ }
+
Offset <<= RHS.getLimitedValue();
Scale <<= RHS.getLimitedValue();
// the semantics of nsw and nuw for left shifts don't match those of
@@ -414,11 +434,21 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
if (!GEPOp) {
- if (auto CS = ImmutableCallSite(V))
- if (const Value *RV = CS.getReturnedArgOperand()) {
- V = RV;
+ if (auto CS = ImmutableCallSite(V)) {
+ // CaptureTracking can know about special capturing properties of some
+ // intrinsics like launder.invariant.group, that can't be expressed with
+ // the attributes, but have properties like returning aliasing pointer.
+ // Because some analysis may assume that nocaptured pointer is not
+ // returned from some special intrinsic (because function would have to
+ // be marked with returns attribute), it is crucial to use this function
+ // because it should be in sync with CaptureTracking. Not using it may
+ // cause weird miscompilations where 2 aliasing pointers are assumed to
+ // noalias.
+ if (auto *RP = getArgumentAliasingToReturnedPointer(CS)) {
+ V = RP;
continue;
}
+ }
// If it's not a GEP, hand it off to SimplifyInstruction to see if it
// can come up with something. This matches what GetUnderlyingObject does.
@@ -490,6 +520,13 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
Index = GetLinearExpression(Index, IndexScale, IndexOffset, ZExtBits,
SExtBits, DL, 0, AC, DT, NSW, NUW);
+ // All GEP math happens in the width of the pointer type,
+ // so we can truncate the value to 64-bits as we don't handle
+ // currently pointers larger than 64 bits and we would crash
+ // later. TODO: Make `Scale` an APInt to avoid this problem.
+ if (IndexScale.getBitWidth() > 64)
+ IndexScale = IndexScale.sextOrTrunc(64);
+
// The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
// This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
Decomposed.OtherOffset += IndexOffset.getSExtValue() * Scale;
@@ -832,8 +869,11 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
IsMustAlias = false;
// Early return if we improved mod ref information
- if (!isModAndRefSet(Result))
+ if (!isModAndRefSet(Result)) {
+ if (isNoModRef(Result))
+ return ModRefInfo::NoModRef;
return IsMustAlias ? setMust(Result) : clearMust(Result);
+ }
}
// If the CallSite is to malloc or calloc, we can assume that it doesn't
@@ -854,7 +894,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
// operands, i.e., source and destination of any given memcpy must no-alias.
// If Loc must-aliases either one of these two locations, then it necessarily
// no-aliases the other.
- if (auto *Inst = dyn_cast<MemCpyInst>(CS.getInstruction())) {
+ if (auto *Inst = dyn_cast<AnyMemCpyInst>(CS.getInstruction())) {
AliasResult SrcAA, DestAA;
if ((SrcAA = getBestAAResults().alias(MemoryLocation::getForSource(Inst),
@@ -958,12 +998,12 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS1,
/// Provide ad-hoc rules to disambiguate accesses through two GEP operators,
/// both having the exact same pointer operand.
static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
- uint64_t V1Size,
+ LocationSize V1Size,
const GEPOperator *GEP2,
- uint64_t V2Size,
+ LocationSize V2Size,
const DataLayout &DL) {
- assert(GEP1->getPointerOperand()->stripPointerCastsAndBarriers() ==
- GEP2->getPointerOperand()->stripPointerCastsAndBarriers() &&
+ assert(GEP1->getPointerOperand()->stripPointerCastsAndInvariantGroups() ==
+ GEP2->getPointerOperand()->stripPointerCastsAndInvariantGroups() &&
GEP1->getPointerOperandType() == GEP2->getPointerOperandType() &&
"Expected GEPs with the same pointer operand");
@@ -1135,8 +1175,8 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
// the highest %f1 can be is (%alloca + 3). This means %random can not be higher
// than (%alloca - 1), and so is not inbounds, a contradiction.
bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp,
- const DecomposedGEP &DecompGEP, const DecomposedGEP &DecompObject,
- uint64_t ObjectAccessSize) {
+ const DecomposedGEP &DecompGEP, const DecomposedGEP &DecompObject,
+ LocationSize ObjectAccessSize) {
// If the object access size is unknown, or the GEP isn't inbounds, bail.
if (ObjectAccessSize == MemoryLocation::UnknownSize || !GEPOp->isInBounds())
return false;
@@ -1153,13 +1193,13 @@ bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp,
DecompObject.OtherOffset;
// If the GEP has no variable indices, we know the precise offset
- // from the base, then use it. If the GEP has variable indices, we're in
- // a bit more trouble: we can't count on the constant offsets that come
- // from non-struct sources, since these can be "rewound" by a negative
- // variable offset. So use only offsets that came from structs.
+ // from the base, then use it. If the GEP has variable indices,
+ // we can't get exact GEP offset to identify pointer alias. So return
+ // false in that case.
+ if (!DecompGEP.VarIndices.empty())
+ return false;
int64_t GEPBaseOffset = DecompGEP.StructOffset;
- if (DecompGEP.VarIndices.empty())
- GEPBaseOffset += DecompGEP.OtherOffset;
+ GEPBaseOffset += DecompGEP.OtherOffset;
return (GEPBaseOffset >= ObjectBaseOffset + (int64_t)ObjectAccessSize);
}
@@ -1170,11 +1210,11 @@ bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp,
/// We know that V1 is a GEP, but we don't know anything about V2.
/// UnderlyingV1 is GetUnderlyingObject(GEP1, DL), UnderlyingV2 is the same for
/// V2.
-AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
- const AAMDNodes &V1AAInfo, const Value *V2,
- uint64_t V2Size, const AAMDNodes &V2AAInfo,
- const Value *UnderlyingV1,
- const Value *UnderlyingV2) {
+AliasResult
+BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
+ const AAMDNodes &V1AAInfo, const Value *V2,
+ LocationSize V2Size, const AAMDNodes &V2AAInfo,
+ const Value *UnderlyingV1, const Value *UnderlyingV2) {
DecomposedGEP DecompGEP1, DecompGEP2;
bool GEP1MaxLookupReached =
DecomposeGEPExpression(GEP1, DecompGEP1, DL, &AC, DT);
@@ -1241,8 +1281,8 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
// If we know the two GEPs are based off of the exact same pointer (and not
// just the same underlying object), see if that tells us anything about
// the resulting pointers.
- if (GEP1->getPointerOperand()->stripPointerCastsAndBarriers() ==
- GEP2->getPointerOperand()->stripPointerCastsAndBarriers() &&
+ if (GEP1->getPointerOperand()->stripPointerCastsAndInvariantGroups() ==
+ GEP2->getPointerOperand()->stripPointerCastsAndInvariantGroups() &&
GEP1->getPointerOperandType() == GEP2->getPointerOperandType()) {
AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, DL);
// If we couldn't find anything interesting, don't abandon just yet.
@@ -1403,9 +1443,10 @@ static AliasResult MergeAliasResults(AliasResult A, AliasResult B) {
/// Provides a bunch of ad-hoc rules to disambiguate a Select instruction
/// against another.
-AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, uint64_t SISize,
+AliasResult BasicAAResult::aliasSelect(const SelectInst *SI,
+ LocationSize SISize,
const AAMDNodes &SIAAInfo,
- const Value *V2, uint64_t V2Size,
+ const Value *V2, LocationSize V2Size,
const AAMDNodes &V2AAInfo,
const Value *UnderV2) {
// If the values are Selects with the same condition, we can do a more precise
@@ -1438,9 +1479,10 @@ AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, uint64_t SISize,
/// Provide a bunch of ad-hoc rules to disambiguate a PHI instruction against
/// another.
-AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize,
+AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
const AAMDNodes &PNAAInfo, const Value *V2,
- uint64_t V2Size, const AAMDNodes &V2AAInfo,
+ LocationSize V2Size,
+ const AAMDNodes &V2AAInfo,
const Value *UnderV2) {
// Track phi nodes we have visited. We use this information when we determine
// value equivalence.
@@ -1545,9 +1587,9 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize,
/// Provides a bunch of ad-hoc rules to disambiguate in common cases, such as
/// array references.
-AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size,
+AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
AAMDNodes V1AAInfo, const Value *V2,
- uint64_t V2Size, AAMDNodes V2AAInfo,
+ LocationSize V2Size, AAMDNodes V2AAInfo,
const Value *O1, const Value *O2) {
// If either of the memory references is empty, it doesn't matter what the
// pointer values are.
@@ -1555,8 +1597,8 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size,
return NoAlias;
// Strip off any casts if they exist.
- V1 = V1->stripPointerCastsAndBarriers();
- V2 = V2->stripPointerCastsAndBarriers();
+ V1 = V1->stripPointerCastsAndInvariantGroups();
+ V2 = V2->stripPointerCastsAndInvariantGroups();
// If V1 or V2 is undef, the result is NoAlias because we can always pick a
// value for undef that aliases nothing in the program.
@@ -1585,10 +1627,10 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size,
// Null values in the default address space don't point to any object, so they
// don't alias any other pointer.
if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O1))
- if (CPN->getType()->getAddressSpace() == 0)
+ if (!NullPointerIsDefined(&F, CPN->getType()->getAddressSpace()))
return NoAlias;
if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O2))
- if (CPN->getType()->getAddressSpace() == 0)
+ if (!NullPointerIsDefined(&F, CPN->getType()->getAddressSpace()))
return NoAlias;
if (O1 != O2) {
@@ -1624,10 +1666,11 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size,
// If the size of one access is larger than the entire object on the other
// side, then we know such behavior is undefined and can assume no alias.
+ bool NullIsValidLocation = NullPointerIsDefined(&F);
if ((V1Size != MemoryLocation::UnknownSize &&
- isObjectSmallerThan(O2, V1Size, DL, TLI)) ||
+ isObjectSmallerThan(O2, V1Size, DL, TLI, NullIsValidLocation)) ||
(V2Size != MemoryLocation::UnknownSize &&
- isObjectSmallerThan(O1, V2Size, DL, TLI)))
+ isObjectSmallerThan(O1, V2Size, DL, TLI, NullIsValidLocation)))
return NoAlias;
// Check the cache before climbing up use-def chains. This also terminates
@@ -1687,8 +1730,8 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size,
if (O1 == O2)
if (V1Size != MemoryLocation::UnknownSize &&
V2Size != MemoryLocation::UnknownSize &&
- (isObjectSize(O1, V1Size, DL, TLI) ||
- isObjectSize(O2, V2Size, DL, TLI)))
+ (isObjectSize(O1, V1Size, DL, TLI, NullIsValidLocation) ||
+ isObjectSize(O2, V2Size, DL, TLI, NullIsValidLocation)))
return AliasCache[Locs] = PartialAlias;
// Recurse back into the best AA results we have, potentially with refined
@@ -1771,8 +1814,8 @@ void BasicAAResult::GetIndexDifference(
}
bool BasicAAResult::constantOffsetHeuristic(
- const SmallVectorImpl<VariableGEPIndex> &VarIndices, uint64_t V1Size,
- uint64_t V2Size, int64_t BaseOffset, AssumptionCache *AC,
+ const SmallVectorImpl<VariableGEPIndex> &VarIndices, LocationSize V1Size,
+ LocationSize V2Size, int64_t BaseOffset, AssumptionCache *AC,
DominatorTree *DT) {
if (VarIndices.size() != 2 || V1Size == MemoryLocation::UnknownSize ||
V2Size == MemoryLocation::UnknownSize)
@@ -1832,6 +1875,7 @@ AnalysisKey BasicAA::Key;
BasicAAResult BasicAA::run(Function &F, FunctionAnalysisManager &AM) {
return BasicAAResult(F.getParent()->getDataLayout(),
+ F,
AM.getResult<TargetLibraryAnalysis>(F),
AM.getResult<AssumptionAnalysis>(F),
&AM.getResult<DominatorTreeAnalysis>(F),
@@ -1864,7 +1908,7 @@ bool BasicAAWrapperPass::runOnFunction(Function &F) {
auto &DTWP = getAnalysis<DominatorTreeWrapperPass>();
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
- Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), TLIWP.getTLI(),
+ Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), F, TLIWP.getTLI(),
ACT.getAssumptionCache(F), &DTWP.getDomTree(),
LIWP ? &LIWP->getLoopInfo() : nullptr));
@@ -1881,6 +1925,7 @@ void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
BasicAAResult llvm::createLegacyPMBasicAAResult(Pass &P, Function &F) {
return BasicAAResult(
F.getParent()->getDataLayout(),
+ F,
P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
P.getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F));
}
diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 7e323022d9ce..3d095068e7ff 100644
--- a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/SCCIterator.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/BranchProbability.h"
@@ -73,7 +74,7 @@ using LoopData = BlockFrequencyInfoImplBase::LoopData;
using Weight = BlockFrequencyInfoImplBase::Weight;
using FrequencyData = BlockFrequencyInfoImplBase::FrequencyData;
-/// \brief Dithering mass distributer.
+/// Dithering mass distributer.
///
/// This class splits up a single mass into portions by weight, dithering to
/// spread out error. No mass is lost. The dithering precision depends on the
@@ -155,9 +156,9 @@ static void combineWeight(Weight &W, const Weight &OtherW) {
static void combineWeightsBySorting(WeightList &Weights) {
// Sort so edges to the same node are adjacent.
- std::sort(Weights.begin(), Weights.end(),
- [](const Weight &L,
- const Weight &R) { return L.TargetNode < R.TargetNode; });
+ llvm::sort(Weights.begin(), Weights.end(),
+ [](const Weight &L,
+ const Weight &R) { return L.TargetNode < R.TargetNode; });
// Combine adjacent edges.
WeightList::iterator O = Weights.begin();
@@ -276,7 +277,7 @@ void BlockFrequencyInfoImplBase::clear() {
Loops.clear();
}
-/// \brief Clear all memory not needed downstream.
+/// Clear all memory not needed downstream.
///
/// Releases all memory not used downstream. In particular, saves Freqs.
static void cleanup(BlockFrequencyInfoImplBase &BFI) {
@@ -315,13 +316,13 @@ bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
#endif
if (isLoopHeader(Resolved)) {
- DEBUG(debugSuccessor("backedge"));
+ LLVM_DEBUG(debugSuccessor("backedge"));
Dist.addBackedge(Resolved, Weight);
return true;
}
if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
- DEBUG(debugSuccessor(" exit "));
+ LLVM_DEBUG(debugSuccessor(" exit "));
Dist.addExit(Resolved, Weight);
return true;
}
@@ -333,7 +334,7 @@ bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
"unhandled irreducible control flow");
// Irreducible backedge. Abort.
- DEBUG(debugSuccessor("abort!!!"));
+ LLVM_DEBUG(debugSuccessor("abort!!!"));
return false;
}
@@ -344,7 +345,7 @@ bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
"unhandled irreducible control flow");
}
- DEBUG(debugSuccessor(" local "));
+ LLVM_DEBUG(debugSuccessor(" local "));
Dist.addLocal(Resolved, Weight);
return true;
}
@@ -361,10 +362,10 @@ bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
return true;
}
-/// \brief Compute the loop scale for a loop.
+/// Compute the loop scale for a loop.
void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
// Compute loop scale.
- DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
+ LLVM_DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
// Infinite loops need special handling. If we give the back edge an infinite
// mass, they may saturate all the other scales in the function down to 1,
@@ -390,20 +391,21 @@ void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
Loop.Scale =
ExitMass.isEmpty() ? InfiniteLoopScale : ExitMass.toScaled().inverse();
- DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
- << " - " << TotalBackedgeMass << ")\n"
- << " - scale = " << Loop.Scale << "\n");
+ LLVM_DEBUG(dbgs() << " - exit-mass = " << ExitMass << " ("
+ << BlockMass::getFull() << " - " << TotalBackedgeMass
+ << ")\n"
+ << " - scale = " << Loop.Scale << "\n");
}
-/// \brief Package up a loop.
+/// Package up a loop.
void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) {
- DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n");
+ LLVM_DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n");
// Clear the subloop exits to prevent quadratic memory usage.
for (const BlockNode &M : Loop.Nodes) {
if (auto *Loop = Working[M.Index].getPackagedLoop())
Loop->Exits.clear();
- DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
+ LLVM_DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
}
Loop.IsPackaged = true;
}
@@ -425,7 +427,7 @@ void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
LoopData *OuterLoop,
Distribution &Dist) {
BlockMass Mass = Working[Source.Index].getMass();
- DEBUG(dbgs() << " => mass: " << Mass << "\n");
+ LLVM_DEBUG(dbgs() << " => mass: " << Mass << "\n");
// Distribute mass to successors as laid out in Dist.
DitheringDistributer D(Dist, Mass);
@@ -435,7 +437,7 @@ void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
BlockMass Taken = D.takeMass(W.Amount);
if (W.Type == Weight::Local) {
Working[W.TargetNode.Index].getMass() += Taken;
- DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
+ LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
continue;
}
@@ -445,14 +447,14 @@ void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
// Check for a backedge.
if (W.Type == Weight::Backedge) {
OuterLoop->BackedgeMass[OuterLoop->getHeaderIndex(W.TargetNode)] += Taken;
- DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back"));
+ LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back"));
continue;
}
// This must be an exit.
assert(W.Type == Weight::Exit);
OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
- DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit"));
+ LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit"));
}
}
@@ -480,28 +482,28 @@ static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
}
// Translate the floats to integers.
- DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
- << ", factor = " << ScalingFactor << "\n");
+ LLVM_DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
+ << ", factor = " << ScalingFactor << "\n");
for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor;
BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
- DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
- << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled
- << ", int = " << BFI.Freqs[Index].Integer << "\n");
+ LLVM_DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
+ << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled
+ << ", int = " << BFI.Freqs[Index].Integer << "\n");
}
}
-/// \brief Unwrap a loop package.
+/// Unwrap a loop package.
///
/// Visits all the members of a loop, adjusting their BlockData according to
/// the loop's pseudo-node.
static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
- DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
- << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
- << "\n");
+ LLVM_DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
+ << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
+ << "\n");
Loop.Scale *= Loop.Mass.toScaled();
Loop.IsPackaged = false;
- DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n");
+ LLVM_DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n");
// Propagate the head scale through the loop. Since members are visited in
// RPO, the head scale will be updated by the loop scale first, and then the
@@ -511,8 +513,8 @@ static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
: BFI.Freqs[N.Index].Scaled;
Scaled64 New = Loop.Scale * F;
- DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New
- << "\n");
+ LLVM_DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => "
+ << New << "\n");
F = New;
}
}
@@ -544,7 +546,7 @@ void BlockFrequencyInfoImplBase::finalizeMetrics() {
cleanup(*this);
// Print out the final stats.
- DEBUG(dump());
+ LLVM_DEBUG(dump());
}
BlockFrequency
@@ -567,7 +569,7 @@ BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F,
if (!EntryCount)
return None;
// Use 128 bit APInt to do the arithmetic to avoid overflow.
- APInt BlockCount(128, EntryCount.getValue());
+ APInt BlockCount(128, EntryCount.getCount());
APInt BlockFreq(128, Freq);
APInt EntryFreq(128, getEntryFreq());
BlockCount *= BlockFreq;
@@ -669,7 +671,7 @@ template <> struct GraphTraits<IrreducibleGraph> {
} // end namespace llvm
-/// \brief Find extra irreducible headers.
+/// Find extra irreducible headers.
///
/// Find entry blocks and other blocks with backedges, which exist when \c G
/// contains irreducible sub-SCCs.
@@ -694,7 +696,8 @@ static void findIrreducibleHeaders(
// This is an entry block.
I->second = true;
Headers.push_back(Irr.Node);
- DEBUG(dbgs() << " => entry = " << BFI.getBlockName(Irr.Node) << "\n");
+ LLVM_DEBUG(dbgs() << " => entry = " << BFI.getBlockName(Irr.Node)
+ << "\n");
break;
}
}
@@ -702,7 +705,7 @@ static void findIrreducibleHeaders(
"Expected irreducible CFG; -loop-info is likely invalid");
if (Headers.size() == InSCC.size()) {
// Every block is a header.
- std::sort(Headers.begin(), Headers.end());
+ llvm::sort(Headers.begin(), Headers.end());
return;
}
@@ -725,7 +728,8 @@ static void findIrreducibleHeaders(
// Store the extra header.
Headers.push_back(Irr.Node);
- DEBUG(dbgs() << " => extra = " << BFI.getBlockName(Irr.Node) << "\n");
+ LLVM_DEBUG(dbgs() << " => extra = " << BFI.getBlockName(Irr.Node)
+ << "\n");
break;
}
if (Headers.back() == Irr.Node)
@@ -734,10 +738,10 @@ static void findIrreducibleHeaders(
// This is not a header.
Others.push_back(Irr.Node);
- DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n");
+ LLVM_DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n");
}
- std::sort(Headers.begin(), Headers.end());
- std::sort(Others.begin(), Others.end());
+ llvm::sort(Headers.begin(), Headers.end());
+ llvm::sort(Others.begin(), Others.end());
}
static void createIrreducibleLoop(
@@ -745,7 +749,7 @@ static void createIrreducibleLoop(
LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
// Translate the SCC into RPO.
- DEBUG(dbgs() << " - found-scc\n");
+ LLVM_DEBUG(dbgs() << " - found-scc\n");
LoopData::NodeList Headers;
LoopData::NodeList Others;
@@ -806,27 +810,28 @@ void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) {
BlockMass LoopMass = BlockMass::getFull();
Distribution Dist;
- DEBUG(dbgs() << "adjust-loop-header-mass:\n");
+ LLVM_DEBUG(dbgs() << "adjust-loop-header-mass:\n");
for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
auto &HeaderNode = Loop.Nodes[H];
auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)];
- DEBUG(dbgs() << " - Add back edge mass for node "
- << getBlockName(HeaderNode) << ": " << BackedgeMass << "\n");
+ LLVM_DEBUG(dbgs() << " - Add back edge mass for node "
+ << getBlockName(HeaderNode) << ": " << BackedgeMass
+ << "\n");
if (BackedgeMass.getMass() > 0)
Dist.addLocal(HeaderNode, BackedgeMass.getMass());
else
- DEBUG(dbgs() << " Nothing added. Back edge mass is zero\n");
+ LLVM_DEBUG(dbgs() << " Nothing added. Back edge mass is zero\n");
}
DitheringDistributer D(Dist, LoopMass);
- DEBUG(dbgs() << " Distribute loop mass " << LoopMass
- << " to headers using above weights\n");
+ LLVM_DEBUG(dbgs() << " Distribute loop mass " << LoopMass
+ << " to headers using above weights\n");
for (const Weight &W : Dist.Weights) {
BlockMass Taken = D.takeMass(W.Amount);
assert(W.Type == Weight::Local && "all weights should be local");
Working[W.TargetNode.Index].getMass() = Taken;
- DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
+ LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
}
}
@@ -837,6 +842,6 @@ void BlockFrequencyInfoImplBase::distributeIrrLoopHeaderMass(Distribution &Dist)
BlockMass Taken = D.takeMass(W.Amount);
assert(W.Type == Weight::Local && "all weights should be local");
Working[W.TargetNode.Index].getMass() = Taken;
- DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
+ LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
}
}
diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index 58ccad89d508..54a657073f0f 100644
--- a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
@@ -85,15 +86,17 @@ char BranchProbabilityInfoWrapperPass::ID = 0;
// Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125
static const uint32_t LBH_TAKEN_WEIGHT = 124;
static const uint32_t LBH_NONTAKEN_WEIGHT = 4;
+// Unlikely edges within a loop are half as likely as other edges
+static const uint32_t LBH_UNLIKELY_WEIGHT = 62;
-/// \brief Unreachable-terminating branch taken probability.
+/// Unreachable-terminating branch taken probability.
///
/// This is the probability for a branch being taken to a block that terminates
/// (eventually) in unreachable. These are predicted as unlikely as possible.
/// All reachable probability will equally share the remaining part.
static const BranchProbability UR_TAKEN_PROB = BranchProbability::getRaw(1);
-/// \brief Weight for a branch taken going into a cold block.
+/// Weight for a branch taken going into a cold block.
///
/// This is the weight for a branch taken toward a block marked
/// cold. A block is marked cold if it's postdominated by a
@@ -101,7 +104,7 @@ static const BranchProbability UR_TAKEN_PROB = BranchProbability::getRaw(1);
/// are those marked with attribute 'cold'.
static const uint32_t CC_TAKEN_WEIGHT = 4;
-/// \brief Weight for a branch not-taken into a cold block.
+/// Weight for a branch not-taken into a cold block.
///
/// This is the weight for a branch not taken toward a block marked
/// cold.
@@ -116,20 +119,20 @@ static const uint32_t ZH_NONTAKEN_WEIGHT = 12;
static const uint32_t FPH_TAKEN_WEIGHT = 20;
static const uint32_t FPH_NONTAKEN_WEIGHT = 12;
-/// \brief Invoke-terminating normal branch taken weight
+/// Invoke-terminating normal branch taken weight
///
/// This is the weight for branching to the normal destination of an invoke
/// instruction. We expect this to happen most of the time. Set the weight to an
/// absurdly high value so that nested loops subsume it.
static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1;
-/// \brief Invoke-terminating normal branch not-taken weight.
+/// Invoke-terminating normal branch not-taken weight.
///
/// This is the weight for branching to the unwind destination of an invoke
/// instruction. This is essentially never taken.
static const uint32_t IH_NONTAKEN_WEIGHT = 1;
-/// \brief Add \p BB to PostDominatedByUnreachable set if applicable.
+/// Add \p BB to PostDominatedByUnreachable set if applicable.
void
BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) {
const TerminatorInst *TI = BB->getTerminator();
@@ -160,7 +163,7 @@ BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) {
PostDominatedByUnreachable.insert(BB);
}
-/// \brief Add \p BB to PostDominatedByColdCall set if applicable.
+/// Add \p BB to PostDominatedByColdCall set if applicable.
void
BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) {
assert(!PostDominatedByColdCall.count(BB));
@@ -194,18 +197,16 @@ BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) {
}
}
-/// \brief Calculate edge weights for successors lead to unreachable.
+/// Calculate edge weights for successors lead to unreachable.
///
/// Predict that a successor which leads necessarily to an
/// unreachable-terminated block as extremely unlikely.
bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
const TerminatorInst *TI = BB->getTerminator();
+ (void) TI;
assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
-
- // Return false here so that edge weights for InvokeInst could be decided
- // in calcInvokeHeuristics().
- if (isa<InvokeInst>(TI))
- return false;
+ assert(!isa<InvokeInst>(TI) &&
+ "Invokes should have already been handled by calcInvokeHeuristics");
SmallVector<unsigned, 4> UnreachableEdges;
SmallVector<unsigned, 4> ReachableEdges;
@@ -338,7 +339,7 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
return true;
}
-/// \brief Calculate edge weights for edges leading to cold blocks.
+/// Calculate edge weights for edges leading to cold blocks.
///
/// A cold block is one post-dominated by a block with a call to a
/// cold function. Those edges are unlikely to be taken, so we give
@@ -348,12 +349,10 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
/// Return false, otherwise.
bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) {
const TerminatorInst *TI = BB->getTerminator();
+ (void) TI;
assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
-
- // Return false here so that edge weights for InvokeInst could be decided
- // in calcInvokeHeuristics().
- if (isa<InvokeInst>(TI))
- return false;
+ assert(!isa<InvokeInst>(TI) &&
+ "Invokes should have already been handled by calcInvokeHeuristics");
// Determine which successors are post-dominated by a cold block.
SmallVector<unsigned, 4> ColdEdges;
@@ -390,7 +389,7 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) {
return true;
}
-// Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion
+// Calculate Edge Weights using "Pointer Heuristics". Predict a comparison
// between two pointer or pointer and NULL will fail.
bool BranchProbabilityInfo::calcPointerHeuristics(const BasicBlock *BB) {
const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
@@ -457,6 +456,113 @@ static bool isSCCHeader(const BasicBlock *BB, int SccNum,
return HeaderMapIt->second;
}
+// Compute the unlikely successors to the block BB in the loop L, specifically
+// those that are unlikely because this is a loop, and add them to the
+// UnlikelyBlocks set.
+static void
+computeUnlikelySuccessors(const BasicBlock *BB, Loop *L,
+ SmallPtrSetImpl<const BasicBlock*> &UnlikelyBlocks) {
+ // Sometimes in a loop we have a branch whose condition is made false by
+ // taking it. This is typically something like
+ // int n = 0;
+ // while (...) {
+ // if (++n >= MAX) {
+ // n = 0;
+ // }
+ // }
+ // In this sort of situation taking the branch means that at the very least it
+ // won't be taken again in the next iteration of the loop, so we should
+ // consider it less likely than a typical branch.
+ //
+ // We detect this by looking back through the graph of PHI nodes that sets the
+ // value that the condition depends on, and seeing if we can reach a successor
+ // block which can be determined to make the condition false.
+ //
+ // FIXME: We currently consider unlikely blocks to be half as likely as other
+ // blocks, but if we consider the example above the likelyhood is actually
+ // 1/MAX. We could therefore be more precise in how unlikely we consider
+ // blocks to be, but it would require more careful examination of the form
+ // of the comparison expression.
+ const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || !BI->isConditional())
+ return;
+
+ // Check if the branch is based on an instruction compared with a constant
+ CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+ if (!CI || !isa<Instruction>(CI->getOperand(0)) ||
+ !isa<Constant>(CI->getOperand(1)))
+ return;
+
+ // Either the instruction must be a PHI, or a chain of operations involving
+ // constants that ends in a PHI which we can then collapse into a single value
+ // if the PHI value is known.
+ Instruction *CmpLHS = dyn_cast<Instruction>(CI->getOperand(0));
+ PHINode *CmpPHI = dyn_cast<PHINode>(CmpLHS);
+ Constant *CmpConst = dyn_cast<Constant>(CI->getOperand(1));
+ // Collect the instructions until we hit a PHI
+ SmallVector<BinaryOperator *, 1> InstChain;
+ while (!CmpPHI && CmpLHS && isa<BinaryOperator>(CmpLHS) &&
+ isa<Constant>(CmpLHS->getOperand(1))) {
+ // Stop if the chain extends outside of the loop
+ if (!L->contains(CmpLHS))
+ return;
+ InstChain.push_back(cast<BinaryOperator>(CmpLHS));
+ CmpLHS = dyn_cast<Instruction>(CmpLHS->getOperand(0));
+ if (CmpLHS)
+ CmpPHI = dyn_cast<PHINode>(CmpLHS);
+ }
+ if (!CmpPHI || !L->contains(CmpPHI))
+ return;
+
+ // Trace the phi node to find all values that come from successors of BB
+ SmallPtrSet<PHINode*, 8> VisitedInsts;
+ SmallVector<PHINode*, 8> WorkList;
+ WorkList.push_back(CmpPHI);
+ VisitedInsts.insert(CmpPHI);
+ while (!WorkList.empty()) {
+ PHINode *P = WorkList.back();
+ WorkList.pop_back();
+ for (BasicBlock *B : P->blocks()) {
+ // Skip blocks that aren't part of the loop
+ if (!L->contains(B))
+ continue;
+ Value *V = P->getIncomingValueForBlock(B);
+ // If the source is a PHI add it to the work list if we haven't
+ // already visited it.
+ if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ if (VisitedInsts.insert(PN).second)
+ WorkList.push_back(PN);
+ continue;
+ }
+ // If this incoming value is a constant and B is a successor of BB, then
+ // we can constant-evaluate the compare to see if it makes the branch be
+ // taken or not.
+ Constant *CmpLHSConst = dyn_cast<Constant>(V);
+ if (!CmpLHSConst ||
+ std::find(succ_begin(BB), succ_end(BB), B) == succ_end(BB))
+ continue;
+ // First collapse InstChain
+ for (Instruction *I : llvm::reverse(InstChain)) {
+ CmpLHSConst = ConstantExpr::get(I->getOpcode(), CmpLHSConst,
+ cast<Constant>(I->getOperand(1)), true);
+ if (!CmpLHSConst)
+ break;
+ }
+ if (!CmpLHSConst)
+ continue;
+ // Now constant-evaluate the compare
+ Constant *Result = ConstantExpr::getCompare(CI->getPredicate(),
+ CmpLHSConst, CmpConst, true);
+ // If the result means we don't branch to the block then that block is
+ // unlikely.
+ if (Result &&
+ ((Result->isZeroValue() && B == BI->getSuccessor(0)) ||
+ (Result->isOneValue() && B == BI->getSuccessor(1))))
+ UnlikelyBlocks.insert(B);
+ }
+ }
+}
+
// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges
// as taken, exiting edges as not-taken.
bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB,
@@ -470,15 +576,22 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB,
return false;
}
+ SmallPtrSet<const BasicBlock*, 8> UnlikelyBlocks;
+ if (L)
+ computeUnlikelySuccessors(BB, L, UnlikelyBlocks);
+
SmallVector<unsigned, 8> BackEdges;
SmallVector<unsigned, 8> ExitingEdges;
SmallVector<unsigned, 8> InEdges; // Edges from header to the loop.
+ SmallVector<unsigned, 8> UnlikelyEdges;
for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
// Use LoopInfo if we have it, otherwise fall-back to SCC info to catch
// irreducible loops.
if (L) {
- if (!L->contains(*I))
+ if (UnlikelyBlocks.count(*I) != 0)
+ UnlikelyEdges.push_back(I.getSuccessorIndex());
+ else if (!L->contains(*I))
ExitingEdges.push_back(I.getSuccessorIndex());
else if (L->getHeader() == *I)
BackEdges.push_back(I.getSuccessorIndex());
@@ -494,42 +607,46 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB,
}
}
- if (BackEdges.empty() && ExitingEdges.empty())
+ if (BackEdges.empty() && ExitingEdges.empty() && UnlikelyEdges.empty())
return false;
// Collect the sum of probabilities of back-edges/in-edges/exiting-edges, and
// normalize them so that they sum up to one.
- BranchProbability Probs[] = {BranchProbability::getZero(),
- BranchProbability::getZero(),
- BranchProbability::getZero()};
unsigned Denom = (BackEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) +
(InEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) +
+ (UnlikelyEdges.empty() ? 0 : LBH_UNLIKELY_WEIGHT) +
(ExitingEdges.empty() ? 0 : LBH_NONTAKEN_WEIGHT);
- if (!BackEdges.empty())
- Probs[0] = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
- if (!InEdges.empty())
- Probs[1] = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
- if (!ExitingEdges.empty())
- Probs[2] = BranchProbability(LBH_NONTAKEN_WEIGHT, Denom);
if (uint32_t numBackEdges = BackEdges.size()) {
- auto Prob = Probs[0] / numBackEdges;
+ BranchProbability TakenProb = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
+ auto Prob = TakenProb / numBackEdges;
for (unsigned SuccIdx : BackEdges)
setEdgeProbability(BB, SuccIdx, Prob);
}
if (uint32_t numInEdges = InEdges.size()) {
- auto Prob = Probs[1] / numInEdges;
+ BranchProbability TakenProb = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
+ auto Prob = TakenProb / numInEdges;
for (unsigned SuccIdx : InEdges)
setEdgeProbability(BB, SuccIdx, Prob);
}
if (uint32_t numExitingEdges = ExitingEdges.size()) {
- auto Prob = Probs[2] / numExitingEdges;
+ BranchProbability NotTakenProb = BranchProbability(LBH_NONTAKEN_WEIGHT,
+ Denom);
+ auto Prob = NotTakenProb / numExitingEdges;
for (unsigned SuccIdx : ExitingEdges)
setEdgeProbability(BB, SuccIdx, Prob);
}
+ if (uint32_t numUnlikelyEdges = UnlikelyEdges.size()) {
+ BranchProbability UnlikelyProb = BranchProbability(LBH_UNLIKELY_WEIGHT,
+ Denom);
+ auto Prob = UnlikelyProb / numUnlikelyEdges;
+ for (unsigned SuccIdx : UnlikelyEdges)
+ setEdgeProbability(BB, SuccIdx, Prob);
+ }
+
return true;
}
@@ -752,8 +869,7 @@ BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src,
if (I != Probs.end())
return I->second;
- return {1,
- static_cast<uint32_t>(std::distance(succ_begin(Src), succ_end(Src)))};
+ return {1, static_cast<uint32_t>(succ_size(Src))};
}
BranchProbability
@@ -788,8 +904,9 @@ void BranchProbabilityInfo::setEdgeProbability(const BasicBlock *Src,
BranchProbability Prob) {
Probs[std::make_pair(Src, IndexInSuccessors)] = Prob;
Handles.insert(BasicBlockCallbackVH(Src, this));
- DEBUG(dbgs() << "set edge " << Src->getName() << " -> " << IndexInSuccessors
- << " successor probability to " << Prob << "\n");
+ LLVM_DEBUG(dbgs() << "set edge " << Src->getName() << " -> "
+ << IndexInSuccessors << " successor probability to " << Prob
+ << "\n");
}
raw_ostream &
@@ -814,8 +931,8 @@ void BranchProbabilityInfo::eraseBlock(const BasicBlock *BB) {
void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI,
const TargetLibraryInfo *TLI) {
- DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
- << " ----\n\n");
+ LLVM_DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
+ << " ----\n\n");
LastF = &F; // Store the last function we ran on for printing.
assert(PostDominatedByUnreachable.empty());
assert(PostDominatedByColdCall.empty());
@@ -833,18 +950,19 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI,
if (Scc.size() == 1)
continue;
- DEBUG(dbgs() << "BPI: SCC " << SccNum << ":");
+ LLVM_DEBUG(dbgs() << "BPI: SCC " << SccNum << ":");
for (auto *BB : Scc) {
- DEBUG(dbgs() << " " << BB->getName());
+ LLVM_DEBUG(dbgs() << " " << BB->getName());
SccI.SccNums[BB] = SccNum;
}
- DEBUG(dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "\n");
}
// Walk the basic blocks in post-order so that we can build up state about
// the successors of a block iteratively.
for (auto BB : post_order(&F.getEntryBlock())) {
- DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName()
+ << "\n");
updatePostDominatedByUnreachable(BB);
updatePostDominatedByColdCall(BB);
// If there is no at least two successors, no sense to set probability.
@@ -852,6 +970,8 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI,
continue;
if (calcMetadataWeights(BB))
continue;
+ if (calcInvokeHeuristics(BB))
+ continue;
if (calcUnreachableHeuristics(BB))
continue;
if (calcColdCallHeuristics(BB))
@@ -864,7 +984,6 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI,
continue;
if (calcFloatingPointHeuristics(BB))
continue;
- calcInvokeHeuristics(BB);
}
PostDominatedByUnreachable.clear();
@@ -879,6 +998,10 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI,
void BranchProbabilityInfoWrapperPass::getAnalysisUsage(
AnalysisUsage &AU) const {
+ // We require DT so it's available when LI is available. The LI updating code
+ // asserts that DT is also present so if we don't make sure that we have DT
+ // here, that assert will trigger.
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.setPreservesAll();
diff --git a/contrib/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
index fb261755e5d1..fc25cef8ddca 100644
--- a/contrib/llvm/lib/Analysis/CFGPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
@@ -22,6 +22,11 @@
#include "llvm/Support/FileSystem.h"
using namespace llvm;
+static cl::opt<std::string> CFGFuncName(
+ "cfg-func-name", cl::Hidden,
+ cl::desc("The name of a function (or its substring)"
+ " whose CFG is viewed/printed."));
+
namespace {
struct CFGViewerLegacyPass : public FunctionPass {
static char ID; // Pass identifcation, replacement for typeid
@@ -83,6 +88,8 @@ PreservedAnalyses CFGOnlyViewerPass::run(Function &F,
}
static void writeCFGToDotFile(Function &F, bool CFGOnly = false) {
+ if (!CFGFuncName.empty() && !F.getName().contains(CFGFuncName))
+ return;
std::string Filename = ("cfg." + F.getName() + ".dot").str();
errs() << "Writing '" << Filename << "'...";
@@ -162,6 +169,8 @@ PreservedAnalyses CFGOnlyPrinterPass::run(Function &F,
/// being a 'dot' and 'gv' program in your path.
///
void Function::viewCFG() const {
+ if (!CFGFuncName.empty() && !getName().contains(CFGFuncName))
+ return;
ViewGraph(this, "cfg" + getName());
}
@@ -171,6 +180,8 @@ void Function::viewCFG() const {
/// this can make the graph smaller.
///
void Function::viewCFGOnly() const {
+ if (!CFGFuncName.empty() && !getName().contains(CFGFuncName))
+ return;
ViewGraph(this, "cfg" + getName(), true);
}
diff --git a/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
index 076a2b205d00..194983418b08 100644
--- a/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
@@ -18,7 +18,7 @@
//
// The algorithm used here is based on recursive state machine matching scheme
// proposed in "Demand-driven alias analysis for C" by Xin Zheng and Radu
-// Rugina. The general idea is to extend the tranditional transitive closure
+// Rugina. The general idea is to extend the traditional transitive closure
// algorithm to perform CFL matching along the way: instead of recording
// "whether X is reachable from Y", we keep track of "whether X is reachable
// from Y at state Z", where the "state" field indicates where we are in the CFL
@@ -337,7 +337,7 @@ public:
FunctionInfo(const Function &, const SmallVectorImpl<Value *> &,
const ReachabilitySet &, const AliasAttrMap &);
- bool mayAlias(const Value *, uint64_t, const Value *, uint64_t) const;
+ bool mayAlias(const Value *, LocationSize, const Value *, LocationSize) const;
const AliasSummary &getAliasSummary() const { return Summary; }
};
@@ -395,7 +395,7 @@ populateAliasMap(DenseMap<const Value *, std::vector<OffsetValue>> &AliasMap,
}
// Sort AliasList for faster lookup
- std::sort(AliasList.begin(), AliasList.end());
+ llvm::sort(AliasList.begin(), AliasList.end());
}
}
@@ -479,7 +479,7 @@ static void populateExternalRelations(
}
// Remove duplicates in ExtRelations
- std::sort(ExtRelations.begin(), ExtRelations.end());
+ llvm::sort(ExtRelations.begin(), ExtRelations.end());
ExtRelations.erase(std::unique(ExtRelations.begin(), ExtRelations.end()),
ExtRelations.end());
}
@@ -516,9 +516,9 @@ CFLAndersAAResult::FunctionInfo::getAttrs(const Value *V) const {
}
bool CFLAndersAAResult::FunctionInfo::mayAlias(const Value *LHS,
- uint64_t LHSSize,
+ LocationSize LHSSize,
const Value *RHS,
- uint64_t RHSSize) const {
+ LocationSize RHSSize) const {
assert(LHS && RHS);
// Check if we've seen LHS and RHS before. Sometimes LHS or RHS can be created
@@ -645,7 +645,7 @@ static void processWorkListItem(const WorkListItem &Item, const CFLGraph &Graph,
// relations that are symmetric, we could actually cut the storage by half by
// sorting FromNode and ToNode before insertion happens.
- // The newly added value alias pair may pontentially generate more memory
+ // The newly added value alias pair may potentially generate more memory
// alias pairs. Check for them here.
auto FromNodeBelow = getNodeBelow(Graph, FromNode);
auto ToNodeBelow = getNodeBelow(Graph, ToNode);
@@ -855,8 +855,9 @@ AliasResult CFLAndersAAResult::query(const MemoryLocation &LocA,
if (!Fn) {
// The only times this is known to happen are when globals + InlineAsm are
// involved
- DEBUG(dbgs()
- << "CFLAndersAA: could not extract parent function information.\n");
+ LLVM_DEBUG(
+ dbgs()
+ << "CFLAndersAA: could not extract parent function information.\n");
return MayAlias;
}
} else {
diff --git a/contrib/llvm/lib/Analysis/CFLGraph.h b/contrib/llvm/lib/Analysis/CFLGraph.h
index e4e92864061f..86812009da7c 100644
--- a/contrib/llvm/lib/Analysis/CFLGraph.h
+++ b/contrib/llvm/lib/Analysis/CFLGraph.h
@@ -46,7 +46,7 @@
namespace llvm {
namespace cflaa {
-/// \brief The Program Expression Graph (PEG) of CFL analysis
+/// The Program Expression Graph (PEG) of CFL analysis
/// CFLGraph is auxiliary data structure used by CFL-based alias analysis to
/// describe flow-insensitive pointer-related behaviors. Given an LLVM function,
/// the main purpose of this graph is to abstract away unrelated facts and
@@ -154,7 +154,7 @@ public:
}
};
-///\brief A builder class used to create CFLGraph instance from a given function
+///A builder class used to create CFLGraph instance from a given function
/// The CFL-AA that uses this builder must provide its own type as a template
/// argument. This is necessary for interprocedural processing: CFLGraphBuilder
/// needs a way of obtaining the summary of other functions when callinsts are
@@ -423,17 +423,15 @@ template <typename CFLAA> class CFLGraphBuilder {
addNode(Inst);
// Check if Inst is a call to a library function that
- // allocates/deallocates
- // on the heap. Those kinds of functions do not introduce any aliases.
+ // allocates/deallocates on the heap. Those kinds of functions do not
+ // introduce any aliases.
// TODO: address other common library functions such as realloc(),
- // strdup(),
- // etc.
+ // strdup(), etc.
if (isMallocOrCallocLikeFn(Inst, &TLI) || isFreeCall(Inst, &TLI))
return;
// TODO: Add support for noalias args/all the other fun function
- // attributes
- // that we can tack on.
+ // attributes that we can tack on.
SmallVector<Function *, 4> Targets;
if (getPossibleTargets(CS, Targets))
if (tryInterproceduralAnalysis(CS, Targets))
@@ -515,14 +513,16 @@ template <typename CFLAA> class CFLGraphBuilder {
visitGEP(*GEPOp);
break;
}
+
case Instruction::PtrToInt: {
- auto *Ptr = CE->getOperand(0);
- addNode(Ptr, getAttrEscaped());
+ addNode(CE->getOperand(0), getAttrEscaped());
break;
}
- case Instruction::IntToPtr:
+
+ case Instruction::IntToPtr: {
addNode(CE, getAttrUnknown());
break;
+ }
case Instruction::BitCast:
case Instruction::AddrSpaceCast:
@@ -535,48 +535,29 @@ template <typename CFLAA> class CFLGraphBuilder {
case Instruction::SIToFP:
case Instruction::FPToUI:
case Instruction::FPToSI: {
- auto *Src = CE->getOperand(0);
- addAssignEdge(Src, CE);
+ addAssignEdge(CE->getOperand(0), CE);
break;
}
+
case Instruction::Select: {
- auto *TrueVal = CE->getOperand(0);
- auto *FalseVal = CE->getOperand(1);
- addAssignEdge(TrueVal, CE);
- addAssignEdge(FalseVal, CE);
- break;
- }
- case Instruction::InsertElement: {
- auto *Vec = CE->getOperand(0);
- auto *Val = CE->getOperand(1);
- addAssignEdge(Vec, CE);
- addStoreEdge(Val, CE);
- break;
- }
- case Instruction::ExtractElement: {
- auto *Ptr = CE->getOperand(0);
- addLoadEdge(Ptr, CE);
+ addAssignEdge(CE->getOperand(1), CE);
+ addAssignEdge(CE->getOperand(2), CE);
break;
}
+
+ case Instruction::InsertElement:
case Instruction::InsertValue: {
- auto *Agg = CE->getOperand(0);
- auto *Val = CE->getOperand(1);
- addAssignEdge(Agg, CE);
- addStoreEdge(Val, CE);
+ addAssignEdge(CE->getOperand(0), CE);
+ addStoreEdge(CE->getOperand(1), CE);
break;
}
+
+ case Instruction::ExtractElement:
case Instruction::ExtractValue: {
- auto *Ptr = CE->getOperand(0);
- addLoadEdge(Ptr, CE);
- break;
- }
- case Instruction::ShuffleVector: {
- auto *From1 = CE->getOperand(0);
- auto *From2 = CE->getOperand(1);
- addAssignEdge(From1, CE);
- addAssignEdge(From2, CE);
+ addLoadEdge(CE->getOperand(0), CE);
break;
}
+
case Instruction::Add:
case Instruction::Sub:
case Instruction::FSub:
@@ -596,9 +577,11 @@ template <typename CFLAA> class CFLGraphBuilder {
case Instruction::AShr:
case Instruction::ICmp:
case Instruction::FCmp:
+ case Instruction::ShuffleVector: {
addAssignEdge(CE->getOperand(0), CE);
addAssignEdge(CE->getOperand(1), CE);
break;
+ }
default:
llvm_unreachable("Unknown instruction type encountered!");
diff --git a/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
index eee6d26ba787..30ce13578e54 100644
--- a/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
@@ -276,8 +276,9 @@ AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA,
if (!MaybeFnA && !MaybeFnB) {
// The only times this is known to happen are when globals + InlineAsm are
// involved
- DEBUG(dbgs()
- << "CFLSteensAA: could not extract parent function information.\n");
+ LLVM_DEBUG(
+ dbgs()
+ << "CFLSteensAA: could not extract parent function information.\n");
return MayAlias;
}
diff --git a/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp b/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp
index ceff94756fe3..b325afb8e7c5 100644
--- a/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp
+++ b/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp
@@ -32,7 +32,7 @@
using namespace llvm;
-// Explicit template instantiations and specialization defininitions for core
+// Explicit template instantiations and specialization definitions for core
// template typedefs.
namespace llvm {
@@ -75,7 +75,7 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
// If the CGSCC pass wasn't able to provide a valid updated SCC, the
// current SCC may simply need to be skipped if invalid.
if (UR.InvalidatedSCCs.count(C)) {
- DEBUG(dbgs() << "Skipping invalidated root or island SCC!\n");
+ LLVM_DEBUG(dbgs() << "Skipping invalidated root or island SCC!\n");
break;
}
// Check that we didn't miss any update scenario.
@@ -96,7 +96,7 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
// ...getContext().yield();
}
- // Invaliadtion was handled after each pass in the above loop for the current
+ // Invalidation was handled after each pass in the above loop for the current
// SCC. Therefore, the remaining analysis results in the AnalysisManager are
// preserved. We mark this with a set so that we don't need to inspect each
// one individually.
@@ -353,7 +353,8 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G,
// Add the current SCC to the worklist as its shape has changed.
UR.CWorklist.insert(C);
- DEBUG(dbgs() << "Enqueuing the existing SCC in the worklist:" << *C << "\n");
+ LLVM_DEBUG(dbgs() << "Enqueuing the existing SCC in the worklist:" << *C
+ << "\n");
SCC *OldC = C;
@@ -372,7 +373,7 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G,
// We need to propagate an invalidation call to all but the newly current SCC
// because the outer pass manager won't do that for us after splitting them.
// FIXME: We should accept a PreservedAnalysis from the CG updater so that if
- // there are preserved ananalyses we can avoid invalidating them here for
+ // there are preserved analysis we can avoid invalidating them here for
// split-off SCCs.
// We know however that this will preserve any FAM proxy so go ahead and mark
// that.
@@ -389,7 +390,7 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G,
assert(C != &NewC && "No need to re-visit the current SCC!");
assert(OldC != &NewC && "Already handled the original SCC!");
UR.CWorklist.insert(&NewC);
- DEBUG(dbgs() << "Enqueuing a newly formed SCC:" << NewC << "\n");
+ LLVM_DEBUG(dbgs() << "Enqueuing a newly formed SCC:" << NewC << "\n");
// Ensure new SCCs' function analyses are updated.
if (NeedFAMProxy)
@@ -514,8 +515,8 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
return false;
RC->removeOutgoingEdge(N, *TargetN);
- DEBUG(dbgs() << "Deleting outgoing edge from '" << N
- << "' to '" << TargetN << "'\n");
+ LLVM_DEBUG(dbgs() << "Deleting outgoing edge from '"
+ << N << "' to '" << TargetN << "'\n");
return true;
}),
DeadTargets.end());
@@ -546,8 +547,8 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
assert(NewRC != RC && "Should not encounter the current RefSCC further "
"in the postorder list of new RefSCCs.");
UR.RCWorklist.insert(NewRC);
- DEBUG(dbgs() << "Enqueuing a new RefSCC in the update worklist: "
- << *NewRC << "\n");
+ LLVM_DEBUG(dbgs() << "Enqueuing a new RefSCC in the update worklist: "
+ << *NewRC << "\n");
}
}
@@ -564,8 +565,8 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
assert(RC->isAncestorOf(TargetRC) &&
"Cannot potentially form RefSCC cycles here!");
RC->switchOutgoingEdgeToRef(N, *RefTarget);
- DEBUG(dbgs() << "Switch outgoing call edge to a ref edge from '" << N
- << "' to '" << *RefTarget << "'\n");
+ LLVM_DEBUG(dbgs() << "Switch outgoing call edge to a ref edge from '" << N
+ << "' to '" << *RefTarget << "'\n");
continue;
}
@@ -593,12 +594,12 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
assert(RC->isAncestorOf(TargetRC) &&
"Cannot potentially form RefSCC cycles here!");
RC->switchOutgoingEdgeToCall(N, *CallTarget);
- DEBUG(dbgs() << "Switch outgoing ref edge to a call edge from '" << N
- << "' to '" << *CallTarget << "'\n");
+ LLVM_DEBUG(dbgs() << "Switch outgoing ref edge to a call edge from '" << N
+ << "' to '" << *CallTarget << "'\n");
continue;
}
- DEBUG(dbgs() << "Switch an internal ref edge to a call edge from '" << N
- << "' to '" << *CallTarget << "'\n");
+ LLVM_DEBUG(dbgs() << "Switch an internal ref edge to a call edge from '"
+ << N << "' to '" << *CallTarget << "'\n");
// Otherwise we are switching an internal ref edge to a call edge. This
// may merge away some SCCs, and we add those to the UpdateResult. We also
@@ -635,7 +636,7 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
// If one of the invalidated SCCs had a cached proxy to a function
// analysis manager, we need to create a proxy in the new current SCC as
- // the invaliadted SCCs had their functions moved.
+ // the invalidated SCCs had their functions moved.
if (HasFunctionAnalysisProxy)
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, G);
@@ -661,14 +662,14 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
// post-order sequence, and may end up observing more precise context to
// optimize the current SCC.
UR.CWorklist.insert(C);
- DEBUG(dbgs() << "Enqueuing the existing SCC in the worklist: " << *C
- << "\n");
+ LLVM_DEBUG(dbgs() << "Enqueuing the existing SCC in the worklist: " << *C
+ << "\n");
// Enqueue in reverse order as we pop off the back of the worklist.
for (SCC &MovedC : llvm::reverse(make_range(RC->begin() + InitialSCCIndex,
RC->begin() + NewSCCIndex))) {
UR.CWorklist.insert(&MovedC);
- DEBUG(dbgs() << "Enqueuing a newly earlier in post-order SCC: "
- << MovedC << "\n");
+ LLVM_DEBUG(dbgs() << "Enqueuing a newly earlier in post-order SCC: "
+ << MovedC << "\n");
}
}
}
diff --git a/contrib/llvm/lib/Analysis/CallGraph.cpp b/contrib/llvm/lib/Analysis/CallGraph.cpp
index ac3ea2b73fed..7d5d2d2e4496 100644
--- a/contrib/llvm/lib/Analysis/CallGraph.cpp
+++ b/contrib/llvm/lib/Analysis/CallGraph.cpp
@@ -10,6 +10,7 @@
#include "llvm/Analysis/CallGraph.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Function.h"
@@ -96,8 +97,8 @@ void CallGraph::print(raw_ostream &OS) const {
for (const auto &I : *this)
Nodes.push_back(I.second.get());
- std::sort(Nodes.begin(), Nodes.end(),
- [](CallGraphNode *LHS, CallGraphNode *RHS) {
+ llvm::sort(Nodes.begin(), Nodes.end(),
+ [](CallGraphNode *LHS, CallGraphNode *RHS) {
if (Function *LF = LHS->getFunction())
if (Function *RF = RHS->getFunction())
return LF->getName() < RF->getName();
diff --git a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
index a2dda58a6a2f..f2211edba216 100644
--- a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
+++ b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
@@ -120,6 +120,7 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
bool &DevirtualizedCall) {
bool Changed = false;
PMDataManager *PM = P->getAsPMDataManager();
+ Module &M = CG.getModule();
if (!PM) {
CallGraphSCCPass *CGSP = (CallGraphSCCPass*)P;
@@ -129,8 +130,17 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
}
{
+ unsigned InstrCount = 0;
+ bool EmitICRemark = M.shouldEmitInstrCountChangedRemark();
TimeRegion PassTimer(getPassTimer(CGSP));
+ if (EmitICRemark)
+ InstrCount = initSizeRemarkInfo(M);
Changed = CGSP->runOnSCC(CurSCC);
+
+ // If the pass modified the module, it may have modified the instruction
+ // count of the module. Try emitting a remark.
+ if (EmitICRemark)
+ emitInstrCountChangedRemark(P, M, InstrCount);
}
// After the CGSCCPass is done, when assertions are enabled, use
@@ -162,8 +172,8 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
// The function pass(es) modified the IR, they may have clobbered the
// callgraph.
if (Changed && CallGraphUpToDate) {
- DEBUG(dbgs() << "CGSCCPASSMGR: Pass Dirtied SCC: "
- << P->getPassName() << '\n');
+ LLVM_DEBUG(dbgs() << "CGSCCPASSMGR: Pass Dirtied SCC: " << P->getPassName()
+ << '\n');
CallGraphUpToDate = false;
}
return Changed;
@@ -181,12 +191,11 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
bool CheckingMode) {
DenseMap<Value*, CallGraphNode*> CallSites;
-
- DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size()
- << " nodes:\n";
- for (CallGraphNode *CGN : CurSCC)
- CGN->dump();
- );
+
+ LLVM_DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size()
+ << " nodes:\n";
+ for (CallGraphNode *CGN
+ : CurSCC) CGN->dump(););
bool MadeChange = false;
bool DevirtualizedCall = false;
@@ -307,8 +316,8 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
// one.
if (!ExistingNode->getFunction()) {
DevirtualizedCall = true;
- DEBUG(dbgs() << " CGSCCPASSMGR: Devirtualized call to '"
- << Callee->getName() << "'\n");
+ LLVM_DEBUG(dbgs() << " CGSCCPASSMGR: Devirtualized call to '"
+ << Callee->getName() << "'\n");
}
} else {
CalleeNode = CG.getCallsExternalNode();
@@ -363,17 +372,15 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
CallSites.clear();
}
- DEBUG(if (MadeChange) {
- dbgs() << "CGSCCPASSMGR: Refreshed SCC is now:\n";
- for (CallGraphNode *CGN : CurSCC)
- CGN->dump();
- if (DevirtualizedCall)
- dbgs() << "CGSCCPASSMGR: Refresh devirtualized a call!\n";
-
- } else {
- dbgs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n";
- }
- );
+ LLVM_DEBUG(if (MadeChange) {
+ dbgs() << "CGSCCPASSMGR: Refreshed SCC is now:\n";
+ for (CallGraphNode *CGN : CurSCC)
+ CGN->dump();
+ if (DevirtualizedCall)
+ dbgs() << "CGSCCPASSMGR: Refresh devirtualized a call!\n";
+ } else {
+ dbgs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n";
+ });
(void)MadeChange;
return DevirtualizedCall;
@@ -472,16 +479,17 @@ bool CGPassManager::runOnModule(Module &M) {
unsigned Iteration = 0;
bool DevirtualizedCall = false;
do {
- DEBUG(if (Iteration)
- dbgs() << " SCCPASSMGR: Re-visiting SCC, iteration #"
- << Iteration << '\n');
+ LLVM_DEBUG(if (Iteration) dbgs()
+ << " SCCPASSMGR: Re-visiting SCC, iteration #" << Iteration
+ << '\n');
DevirtualizedCall = false;
Changed |= RunAllPassesOnSCC(CurSCC, CG, DevirtualizedCall);
} while (Iteration++ < MaxIterations && DevirtualizedCall);
if (DevirtualizedCall)
- DEBUG(dbgs() << " CGSCCPASSMGR: Stopped iteration after " << Iteration
- << " times, due to -max-cg-scc-iterations\n");
+ LLVM_DEBUG(dbgs() << " CGSCCPASSMGR: Stopped iteration after "
+ << Iteration
+ << " times, due to -max-cg-scc-iterations\n");
MaxSCCIterations.updateMax(Iteration);
}
@@ -648,7 +656,7 @@ Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &OS,
bool CallGraphSCCPass::skipSCC(CallGraphSCC &SCC) const {
return !SCC.getCallGraph().getModule()
.getContext()
- .getOptBisect()
+ .getOptPassGate()
.shouldRunPass(this, SCC);
}
diff --git a/contrib/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
index 3b0026ba10e9..d4f73bdb4361 100644
--- a/contrib/llvm/lib/Analysis/CaptureTracking.cpp
+++ b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
@@ -22,6 +22,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/OrderedBasicBlock.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
@@ -60,7 +61,7 @@ namespace {
/// as the given instruction and the use.
struct CapturesBefore : public CaptureTracker {
- CapturesBefore(bool ReturnCaptures, const Instruction *I, DominatorTree *DT,
+ CapturesBefore(bool ReturnCaptures, const Instruction *I, const DominatorTree *DT,
bool IncludeI, OrderedBasicBlock *IC)
: OrderedBB(IC), BeforeHere(I), DT(DT),
ReturnCaptures(ReturnCaptures), IncludeI(IncludeI), Captured(false) {}
@@ -140,7 +141,7 @@ namespace {
OrderedBasicBlock *OrderedBB;
const Instruction *BeforeHere;
- DominatorTree *DT;
+ const DominatorTree *DT;
bool ReturnCaptures;
bool IncludeI;
@@ -184,7 +185,7 @@ bool llvm::PointerMayBeCaptured(const Value *V,
/// queries about relative order among instructions in the same basic block.
bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
bool StoreCaptures, const Instruction *I,
- DominatorTree *DT, bool IncludeI,
+ const DominatorTree *DT, bool IncludeI,
OrderedBasicBlock *OBB) {
assert(!isa<GlobalValue>(V) &&
"It doesn't make sense to ask whether a global is captured.");
@@ -215,18 +216,22 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
assert(V->getType()->isPointerTy() && "Capture is for pointers only!");
SmallVector<const Use *, Threshold> Worklist;
SmallSet<const Use *, Threshold> Visited;
- int Count = 0;
- for (const Use &U : V->uses()) {
- // If there are lots of uses, conservatively say that the value
- // is captured to avoid taking too much compile time.
- if (Count++ >= Threshold)
- return Tracker->tooManyUses();
-
- if (!Tracker->shouldExplore(&U)) continue;
- Visited.insert(&U);
- Worklist.push_back(&U);
- }
+ auto AddUses = [&](const Value *V) {
+ int Count = 0;
+ for (const Use &U : V->uses()) {
+ // If there are lots of uses, conservatively say that the value
+ // is captured to avoid taking too much compile time.
+ if (Count++ >= Threshold)
+ return Tracker->tooManyUses();
+ if (!Visited.insert(&U).second)
+ continue;
+ if (!Tracker->shouldExplore(&U))
+ continue;
+ Worklist.push_back(&U);
+ }
+ };
+ AddUses(V);
while (!Worklist.empty()) {
const Use *U = Worklist.pop_back_val();
@@ -243,6 +248,16 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
if (CS.onlyReadsMemory() && CS.doesNotThrow() && I->getType()->isVoidTy())
break;
+ // The pointer is not captured if returned pointer is not captured.
+ // NOTE: CaptureTracking users should not assume that only functions
+ // marked with nocapture do not capture. This means that places like
+ // GetUnderlyingObject in ValueTracking or DecomposeGEPExpression
+ // in BasicAA also need to know about this property.
+ if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(CS)) {
+ AddUses(I);
+ break;
+ }
+
// Volatile operations effectively capture the memory location that they
// load and store to.
if (auto *MI = dyn_cast<MemIntrinsic>(I))
@@ -313,17 +328,7 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
case Instruction::Select:
case Instruction::AddrSpaceCast:
// The original value is not captured via this if the new value isn't.
- Count = 0;
- for (Use &UU : I->uses()) {
- // If there are lots of uses, conservatively say that the value
- // is captured to avoid taking too much compile time.
- if (Count++ >= Threshold)
- return Tracker->tooManyUses();
-
- if (Visited.insert(&UU).second)
- if (Tracker->shouldExplore(&UU))
- Worklist.push_back(&UU);
- }
+ AddUses(I);
break;
case Instruction::ICmp: {
// Don't count comparisons of a no-alias return value against null as
diff --git a/contrib/llvm/lib/Analysis/CodeMetrics.cpp b/contrib/llvm/lib/Analysis/CodeMetrics.cpp
index ac7d14ebdaea..46cc87d2b178 100644
--- a/contrib/llvm/lib/Analysis/CodeMetrics.cpp
+++ b/contrib/llvm/lib/Analysis/CodeMetrics.cpp
@@ -61,7 +61,7 @@ static void completeEphemeralValues(SmallPtrSetImpl<const Value *> &Visited,
continue;
EphValues.insert(V);
- DEBUG(dbgs() << "Ephemeral Value: " << *V << "\n");
+ LLVM_DEBUG(dbgs() << "Ephemeral Value: " << *V << "\n");
// Append any more operands to consider.
appendSpeculatableOperands(V, Visited, Worklist);
diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
index e88b8f14d54e..c5281c57bc19 100644
--- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
@@ -286,7 +286,7 @@ bool llvm::IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
APInt &Offset, const DataLayout &DL) {
// Trivial case, constant is the global.
if ((GV = dyn_cast<GlobalValue>(C))) {
- unsigned BitWidth = DL.getPointerTypeSizeInBits(GV->getType());
+ unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType());
Offset = APInt(BitWidth, 0);
return true;
}
@@ -305,7 +305,7 @@ bool llvm::IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
if (!GEP)
return false;
- unsigned BitWidth = DL.getPointerTypeSizeInBits(GEP->getType());
+ unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
APInt TmpOffset(BitWidth, 0);
// If the base isn't a global+constant, we aren't either.
@@ -320,6 +320,41 @@ bool llvm::IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
return true;
}
+Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy,
+ const DataLayout &DL) {
+ do {
+ Type *SrcTy = C->getType();
+
+ // If the type sizes are the same and a cast is legal, just directly
+ // cast the constant.
+ if (DL.getTypeSizeInBits(DestTy) == DL.getTypeSizeInBits(SrcTy)) {
+ Instruction::CastOps Cast = Instruction::BitCast;
+ // If we are going from a pointer to int or vice versa, we spell the cast
+ // differently.
+ if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
+ Cast = Instruction::IntToPtr;
+ else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
+ Cast = Instruction::PtrToInt;
+
+ if (CastInst::castIsValid(Cast, C, DestTy))
+ return ConstantExpr::getCast(Cast, C, DestTy);
+ }
+
+ // If this isn't an aggregate type, there is nothing we can do to drill down
+ // and find a bitcastable constant.
+ if (!SrcTy->isAggregateType())
+ return nullptr;
+
+ // We're simulating a load through a pointer that was bitcast to point to
+ // a different type, so we can try to walk down through the initial
+ // elements of an aggregate to see if some part of th e aggregate is
+ // castable to implement the "load" semantic model.
+ C = C->getAggregateElement(0u);
+ } while (C);
+
+ return nullptr;
+}
+
namespace {
/// Recursive helper to read bits out of global. C is the constant being copied
@@ -537,8 +572,8 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy,
return ConstantInt::get(IntType->getContext(), ResultVal);
}
-Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE, Type *DestTy,
- const DataLayout &DL) {
+Constant *ConstantFoldLoadThroughBitcastExpr(ConstantExpr *CE, Type *DestTy,
+ const DataLayout &DL) {
auto *SrcPtr = CE->getOperand(0);
auto *SrcPtrTy = dyn_cast<PointerType>(SrcPtr->getType());
if (!SrcPtrTy)
@@ -549,37 +584,7 @@ Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE, Type *DestTy,
if (!C)
return nullptr;
- do {
- Type *SrcTy = C->getType();
-
- // If the type sizes are the same and a cast is legal, just directly
- // cast the constant.
- if (DL.getTypeSizeInBits(DestTy) == DL.getTypeSizeInBits(SrcTy)) {
- Instruction::CastOps Cast = Instruction::BitCast;
- // If we are going from a pointer to int or vice versa, we spell the cast
- // differently.
- if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
- Cast = Instruction::IntToPtr;
- else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
- Cast = Instruction::PtrToInt;
-
- if (CastInst::castIsValid(Cast, C, DestTy))
- return ConstantExpr::getCast(Cast, C, DestTy);
- }
-
- // If this isn't an aggregate type, there is nothing we can do to drill down
- // and find a bitcastable constant.
- if (!SrcTy->isAggregateType())
- return nullptr;
-
- // We're simulating a load through a pointer that was bitcast to point to
- // a different type, so we can try to walk down through the initial
- // elements of an aggregate to see if some part of th e aggregate is
- // castable to implement the "load" semantic model.
- C = C->getAggregateElement(0u);
- } while (C);
-
- return nullptr;
+ return llvm::ConstantFoldLoadThroughBitcast(C, DestTy, DL);
}
} // end anonymous namespace
@@ -611,7 +616,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,
}
if (CE->getOpcode() == Instruction::BitCast)
- if (Constant *LoadedC = ConstantFoldLoadThroughBitcast(CE, Ty, DL))
+ if (Constant *LoadedC = ConstantFoldLoadThroughBitcastExpr(CE, Ty, DL))
return LoadedC;
// Instead of loading constant c string, use corresponding integer value
@@ -808,26 +813,26 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
// If this is a constant expr gep that is effectively computing an
// "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12'
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
- if (!isa<ConstantInt>(Ops[i])) {
-
- // If this is "gep i8* Ptr, (sub 0, V)", fold this as:
- // "inttoptr (sub (ptrtoint Ptr), V)"
- if (Ops.size() == 2 && ResElemTy->isIntegerTy(8)) {
- auto *CE = dyn_cast<ConstantExpr>(Ops[1]);
- assert((!CE || CE->getType() == IntPtrTy) &&
- "CastGEPIndices didn't canonicalize index types!");
- if (CE && CE->getOpcode() == Instruction::Sub &&
- CE->getOperand(0)->isNullValue()) {
- Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType());
- Res = ConstantExpr::getSub(Res, CE->getOperand(1));
- Res = ConstantExpr::getIntToPtr(Res, ResTy);
- if (auto *FoldedRes = ConstantFoldConstant(Res, DL, TLI))
- Res = FoldedRes;
- return Res;
+ if (!isa<ConstantInt>(Ops[i])) {
+
+ // If this is "gep i8* Ptr, (sub 0, V)", fold this as:
+ // "inttoptr (sub (ptrtoint Ptr), V)"
+ if (Ops.size() == 2 && ResElemTy->isIntegerTy(8)) {
+ auto *CE = dyn_cast<ConstantExpr>(Ops[1]);
+ assert((!CE || CE->getType() == IntPtrTy) &&
+ "CastGEPIndices didn't canonicalize index types!");
+ if (CE && CE->getOpcode() == Instruction::Sub &&
+ CE->getOperand(0)->isNullValue()) {
+ Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType());
+ Res = ConstantExpr::getSub(Res, CE->getOperand(1));
+ Res = ConstantExpr::getIntToPtr(Res, ResTy);
+ if (auto *FoldedRes = ConstantFoldConstant(Res, DL, TLI))
+ Res = FoldedRes;
+ return Res;
+ }
}
+ return nullptr;
}
- return nullptr;
- }
unsigned BitWidth = DL.getTypeSizeInBits(IntPtrTy);
APInt Offset =
@@ -1387,6 +1392,8 @@ bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
case Intrinsic::fma:
case Intrinsic::fmuladd:
case Intrinsic::copysign:
+ case Intrinsic::launder_invariant_group:
+ case Intrinsic::strip_invariant_group:
case Intrinsic::round:
case Intrinsic::masked_load:
case Intrinsic::sadd_with_overflow:
@@ -1582,16 +1589,37 @@ double getValueAsDouble(ConstantFP *Op) {
Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
ArrayRef<Constant *> Operands,
- const TargetLibraryInfo *TLI) {
+ const TargetLibraryInfo *TLI,
+ ImmutableCallSite CS) {
if (Operands.size() == 1) {
if (isa<UndefValue>(Operands[0])) {
// cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN
if (IntrinsicID == Intrinsic::cos)
return Constant::getNullValue(Ty);
if (IntrinsicID == Intrinsic::bswap ||
- IntrinsicID == Intrinsic::bitreverse)
+ IntrinsicID == Intrinsic::bitreverse ||
+ IntrinsicID == Intrinsic::launder_invariant_group ||
+ IntrinsicID == Intrinsic::strip_invariant_group)
return Operands[0];
}
+
+ if (isa<ConstantPointerNull>(Operands[0])) {
+ // launder(null) == null == strip(null) iff in addrspace 0
+ if (IntrinsicID == Intrinsic::launder_invariant_group ||
+ IntrinsicID == Intrinsic::strip_invariant_group) {
+ // If instruction is not yet put in a basic block (e.g. when cloning
+ // a function during inlining), CS caller may not be available.
+ // So check CS's BB first before querying CS.getCaller.
+ const Function *Caller = CS.getParent() ? CS.getCaller() : nullptr;
+ if (Caller &&
+ !NullPointerIsDefined(
+ Caller, Operands[0]->getType()->getPointerAddressSpace())) {
+ return Operands[0];
+ }
+ return nullptr;
+ }
+ }
+
if (auto *Op = dyn_cast<ConstantFP>(Operands[0])) {
if (IntrinsicID == Intrinsic::convert_to_fp16) {
APFloat Val(Op->getValueAPF());
@@ -1988,7 +2016,8 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID,
VectorType *VTy, ArrayRef<Constant *> Operands,
const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
+ const TargetLibraryInfo *TLI,
+ ImmutableCallSite CS) {
SmallVector<Constant *, 4> Result(VTy->getNumElements());
SmallVector<Constant *, 4> Lane(Operands.size());
Type *Ty = VTy->getElementType();
@@ -2051,7 +2080,7 @@ Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID,
}
// Use the regular scalar folding to simplify this column.
- Constant *Folded = ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI);
+ Constant *Folded = ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI, CS);
if (!Folded)
return nullptr;
Result[I] = Folded;
@@ -2076,9 +2105,9 @@ llvm::ConstantFoldCall(ImmutableCallSite CS, Function *F,
if (auto *VTy = dyn_cast<VectorType>(Ty))
return ConstantFoldVectorCall(Name, F->getIntrinsicID(), VTy, Operands,
- F->getParent()->getDataLayout(), TLI);
+ F->getParent()->getDataLayout(), TLI, CS);
- return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI);
+ return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI, CS);
}
bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
diff --git a/contrib/llvm/lib/Analysis/Delinearization.cpp b/contrib/llvm/lib/Analysis/Delinearization.cpp
index dd5af9d43ef8..4cafb7da16d3 100644
--- a/contrib/llvm/lib/Analysis/Delinearization.cpp
+++ b/contrib/llvm/lib/Analysis/Delinearization.cpp
@@ -69,16 +69,6 @@ bool Delinearization::runOnFunction(Function &F) {
return false;
}
-static Value *getPointerOperand(Instruction &Inst) {
- if (LoadInst *Load = dyn_cast<LoadInst>(&Inst))
- return Load->getPointerOperand();
- else if (StoreInst *Store = dyn_cast<StoreInst>(&Inst))
- return Store->getPointerOperand();
- else if (GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(&Inst))
- return Gep->getPointerOperand();
- return nullptr;
-}
-
void Delinearization::print(raw_ostream &O, const Module *) const {
O << "Delinearization on function " << F->getName() << ":\n";
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
@@ -93,7 +83,7 @@ void Delinearization::print(raw_ostream &O, const Module *) const {
// Delinearize the memory access as analyzed in all the surrounding loops.
// Do not analyze memory accesses outside loops.
for (Loop *L = LI->getLoopFor(BB); L != nullptr; L = L->getParentLoop()) {
- const SCEV *AccessFn = SE->getSCEVAtScope(getPointerOperand(*Inst), L);
+ const SCEV *AccessFn = SE->getSCEVAtScope(getPointerOperand(Inst), L);
const SCEVUnknown *BasePointer =
dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFn));
diff --git a/contrib/llvm/lib/Analysis/DemandedBits.cpp b/contrib/llvm/lib/Analysis/DemandedBits.cpp
index de7d21f9f133..58c5bccff65d 100644
--- a/contrib/llvm/lib/Analysis/DemandedBits.cpp
+++ b/contrib/llvm/lib/Analysis/DemandedBits.cpp
@@ -283,7 +283,7 @@ void DemandedBits::performAnalysis() {
if (!isAlwaysLive(&I))
continue;
- DEBUG(dbgs() << "DemandedBits: Root: " << I << "\n");
+ LLVM_DEBUG(dbgs() << "DemandedBits: Root: " << I << "\n");
// For integer-valued instructions, set up an initial empty set of alive
// bits and add the instruction to the work list. For other instructions
// add their operands to the work list (for integer values operands, mark
@@ -313,13 +313,13 @@ void DemandedBits::performAnalysis() {
while (!Worklist.empty()) {
Instruction *UserI = Worklist.pop_back_val();
- DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI);
+ LLVM_DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI);
APInt AOut;
if (UserI->getType()->isIntegerTy()) {
AOut = AliveBits[UserI];
- DEBUG(dbgs() << " Alive Out: " << AOut);
+ LLVM_DEBUG(dbgs() << " Alive Out: " << AOut);
}
- DEBUG(dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "\n");
if (!UserI->getType()->isIntegerTy())
Visited.insert(UserI);
diff --git a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
index 34eccc07f265..79c2728d5620 100644
--- a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -24,8 +24,7 @@
// Both of these are conservative weaknesses;
// that is, not a source of correctness problems.
//
-// The implementation depends on the GEP instruction to differentiate
-// subscripts. Since Clang linearizes some array subscripts, the dependence
+// Since Clang linearizes some array subscripts, the dependence
// analysis is using SCEV->delinearize to recover the representation of multiple
// subscripts, and thus avoid the more expensive and less precise MIV tests. The
// delinearization is controlled by the flag -da-delinearize.
@@ -59,6 +58,7 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
@@ -108,8 +108,8 @@ STATISTIC(BanerjeeIndependence, "Banerjee independence");
STATISTIC(BanerjeeSuccesses, "Banerjee successes");
static cl::opt<bool>
-Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore,
- cl::desc("Try to delinearize array references."));
+ Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Try to delinearize array references."));
//===----------------------------------------------------------------------===//
// basics
@@ -415,9 +415,9 @@ LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const {
// PLDI 1991
bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
++DeltaApplications;
- DEBUG(dbgs() << "\tintersect constraints\n");
- DEBUG(dbgs() << "\t X ="; X->dump(dbgs()));
- DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs()));
+ LLVM_DEBUG(dbgs() << "\tintersect constraints\n");
+ LLVM_DEBUG(dbgs() << "\t X ="; X->dump(dbgs()));
+ LLVM_DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs()));
assert(!Y->isPoint() && "Y must not be a Point");
if (X->isAny()) {
if (Y->isAny())
@@ -433,7 +433,7 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
}
if (X->isDistance() && Y->isDistance()) {
- DEBUG(dbgs() << "\t intersect 2 distances\n");
+ LLVM_DEBUG(dbgs() << "\t intersect 2 distances\n");
if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD()))
return false;
if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) {
@@ -460,12 +460,12 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
"We shouldn't ever see X->isPoint() && Y->isPoint()");
if (X->isLine() && Y->isLine()) {
- DEBUG(dbgs() << "\t intersect 2 lines\n");
+ LLVM_DEBUG(dbgs() << "\t intersect 2 lines\n");
const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB());
const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA());
if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) {
// slopes are equal, so lines are parallel
- DEBUG(dbgs() << "\t\tsame slope\n");
+ LLVM_DEBUG(dbgs() << "\t\tsame slope\n");
Prod1 = SE->getMulExpr(X->getC(), Y->getB());
Prod2 = SE->getMulExpr(X->getB(), Y->getC());
if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2))
@@ -479,7 +479,7 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
}
if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {
// slopes differ, so lines intersect
- DEBUG(dbgs() << "\t\tdifferent slopes\n");
+ LLVM_DEBUG(dbgs() << "\t\tdifferent slopes\n");
const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB());
const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA());
const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB());
@@ -501,10 +501,10 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
APInt Xbot = A1B2_A2B1->getAPInt();
APInt Ytop = C1A2_C2A1->getAPInt();
APInt Ybot = A2B1_A1B2->getAPInt();
- DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n");
- DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n");
- DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n");
- DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n");
+ LLVM_DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n");
+ LLVM_DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n");
+ LLVM_DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n");
+ LLVM_DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n");
APInt Xq = Xtop; // these need to be initialized, even
APInt Xr = Xtop; // though they're just going to be overwritten
APInt::sdivrem(Xtop, Xbot, Xq, Xr);
@@ -516,7 +516,7 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
++DeltaSuccesses;
return true;
}
- DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n");
+ LLVM_DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n");
if (Xq.slt(0) || Yq.slt(0)) {
X->setEmpty();
++DeltaSuccesses;
@@ -525,7 +525,7 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
if (const SCEVConstant *CUB =
collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) {
const APInt &UpperBound = CUB->getAPInt();
- DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n");
+ LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n");
if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) {
X->setEmpty();
++DeltaSuccesses;
@@ -545,7 +545,7 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
assert(!(X->isLine() && Y->isPoint()) && "This case should never occur");
if (X->isPoint() && Y->isLine()) {
- DEBUG(dbgs() << "\t intersect Point and Line\n");
+ LLVM_DEBUG(dbgs() << "\t intersect Point and Line\n");
const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX());
const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY());
const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1);
@@ -622,13 +622,38 @@ void Dependence::dump(raw_ostream &OS) const {
OS << "!\n";
}
+// Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their
+// underlaying objects. If LocA and LocB are known to not alias (for any reason:
+// tbaa, non-overlapping regions etc), then it is known there is no dependecy.
+// Otherwise the underlying objects are checked to see if they point to
+// different identifiable objects.
static AliasResult underlyingObjectsAlias(AliasAnalysis *AA,
- const DataLayout &DL, const Value *A,
- const Value *B) {
- const Value *AObj = GetUnderlyingObject(A, DL);
- const Value *BObj = GetUnderlyingObject(B, DL);
- return AA->alias(AObj, DL.getTypeStoreSize(AObj->getType()),
- BObj, DL.getTypeStoreSize(BObj->getType()));
+ const DataLayout &DL,
+ const MemoryLocation &LocA,
+ const MemoryLocation &LocB) {
+ // Check the original locations (minus size) for noalias, which can happen for
+ // tbaa, incompatible underlying object locations, etc.
+ MemoryLocation LocAS(LocA.Ptr, MemoryLocation::UnknownSize, LocA.AATags);
+ MemoryLocation LocBS(LocB.Ptr, MemoryLocation::UnknownSize, LocB.AATags);
+ if (AA->alias(LocAS, LocBS) == NoAlias)
+ return NoAlias;
+
+ // Check the underlying objects are the same
+ const Value *AObj = GetUnderlyingObject(LocA.Ptr, DL);
+ const Value *BObj = GetUnderlyingObject(LocB.Ptr, DL);
+
+ // If the underlying objects are the same, they must alias
+ if (AObj == BObj)
+ return MustAlias;
+
+ // We may have hit the recursion limit for underlying objects, or have
+ // underlying objects where we don't know they will alias.
+ if (!isIdentifiedObject(AObj) || !isIdentifiedObject(BObj))
+ return MayAlias;
+
+ // Otherwise we know the objects are different and both identified objects so
+ // must not alias.
+ return NoAlias;
}
@@ -644,17 +669,6 @@ bool isLoadOrStore(const Instruction *I) {
}
-static
-Value *getPointerOperand(Instruction *I) {
- if (LoadInst *LI = dyn_cast<LoadInst>(I))
- return LI->getPointerOperand();
- if (StoreInst *SI = dyn_cast<StoreInst>(I))
- return SI->getPointerOperand();
- llvm_unreachable("Value is not load or store instruction");
- return nullptr;
-}
-
-
// Examines the loop nesting of the Src and Dst
// instructions and establishes their shared loops. Sets the variables
// CommonLevels, SrcLevels, and MaxLevels.
@@ -980,6 +994,57 @@ bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X,
}
}
+/// Compare to see if S is less than Size, using isKnownNegative(S - max(Size, 1))
+/// with some extra checking if S is an AddRec and we can prove less-than using
+/// the loop bounds.
+bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const {
+ // First unify to the same type
+ auto *SType = dyn_cast<IntegerType>(S->getType());
+ auto *SizeType = dyn_cast<IntegerType>(Size->getType());
+ if (!SType || !SizeType)
+ return false;
+ Type *MaxType =
+ (SType->getBitWidth() >= SizeType->getBitWidth()) ? SType : SizeType;
+ S = SE->getTruncateOrZeroExtend(S, MaxType);
+ Size = SE->getTruncateOrZeroExtend(Size, MaxType);
+
+ // Special check for addrecs using BE taken count
+ const SCEV *Bound = SE->getMinusSCEV(S, Size);
+ if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Bound)) {
+ if (AddRec->isAffine()) {
+ const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop());
+ if (!isa<SCEVCouldNotCompute>(BECount)) {
+ const SCEV *Limit = AddRec->evaluateAtIteration(BECount, *SE);
+ if (SE->isKnownNegative(Limit))
+ return true;
+ }
+ }
+ }
+
+ // Check using normal isKnownNegative
+ const SCEV *LimitedBound =
+ SE->getMinusSCEV(S, SE->getSMaxExpr(Size, SE->getOne(Size->getType())));
+ return SE->isKnownNegative(LimitedBound);
+}
+
+bool DependenceInfo::isKnownNonNegative(const SCEV *S, const Value *Ptr) const {
+ bool Inbounds = false;
+ if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(Ptr))
+ Inbounds = SrcGEP->isInBounds();
+ if (Inbounds) {
+ if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
+ if (AddRec->isAffine()) {
+ // We know S is for Ptr, the operand on a load/store, so doesn't wrap.
+ // If both parts are NonNegative, the end result will be NonNegative
+ if (SE->isKnownNonNegative(AddRec->getStart()) &&
+ SE->isKnownNonNegative(AddRec->getOperand(1)))
+ return true;
+ }
+ }
+ }
+
+ return SE->isKnownNonNegative(S);
+}
// All subscripts are all the same type.
// Loop bound may be smaller (e.g., a char).
@@ -1019,19 +1084,19 @@ const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L,
// Return true if dependence disproved.
bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst,
FullDependence &Result) const {
- DEBUG(dbgs() << " src = " << *Src << "\n");
- DEBUG(dbgs() << " dst = " << *Dst << "\n");
+ LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
+ LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
++ZIVapplications;
if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) {
- DEBUG(dbgs() << " provably dependent\n");
+ LLVM_DEBUG(dbgs() << " provably dependent\n");
return false; // provably dependent
}
if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) {
- DEBUG(dbgs() << " provably independent\n");
+ LLVM_DEBUG(dbgs() << " provably independent\n");
++ZIVindependence;
return true; // provably independent
}
- DEBUG(dbgs() << " possibly dependent\n");
+ LLVM_DEBUG(dbgs() << " possibly dependent\n");
Result.Consistent = false;
return false; // possibly dependent
}
@@ -1068,25 +1133,25 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
const SCEV *DstConst, const Loop *CurLoop,
unsigned Level, FullDependence &Result,
Constraint &NewConstraint) const {
- DEBUG(dbgs() << "\tStrong SIV test\n");
- DEBUG(dbgs() << "\t Coeff = " << *Coeff);
- DEBUG(dbgs() << ", " << *Coeff->getType() << "\n");
- DEBUG(dbgs() << "\t SrcConst = " << *SrcConst);
- DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n");
- DEBUG(dbgs() << "\t DstConst = " << *DstConst);
- DEBUG(dbgs() << ", " << *DstConst->getType() << "\n");
+ LLVM_DEBUG(dbgs() << "\tStrong SIV test\n");
+ LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff);
+ LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n");
+ LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst);
+ LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n");
+ LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst);
+ LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n");
++StrongSIVapplications;
assert(0 < Level && Level <= CommonLevels && "level out of range");
Level--;
const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
- DEBUG(dbgs() << "\t Delta = " << *Delta);
- DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
+ LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta);
+ LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
// check that |Delta| < iteration count
if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
- DEBUG(dbgs() << "\t UpperBound = " << *UpperBound);
- DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n");
+ LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound);
+ LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n");
const SCEV *AbsDelta =
SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta);
const SCEV *AbsCoeff =
@@ -1107,8 +1172,8 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
APInt Distance = ConstDelta; // these need to be initialized
APInt Remainder = ConstDelta;
APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder);
- DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
- DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
+ LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
+ LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
// Make sure Coeff divides Delta exactly
if (Remainder != 0) {
// Coeff doesn't divide Distance, no dependence
@@ -1135,7 +1200,7 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
}
else {
if (Coeff->isOne()) {
- DEBUG(dbgs() << "\t Distance = " << *Delta << "\n");
+ LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n");
Result.DV[Level].Distance = Delta; // since X/1 == X
NewConstraint.setDistance(Delta, CurLoop);
}
@@ -1204,16 +1269,16 @@ bool DependenceInfo::weakCrossingSIVtest(
const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst,
const Loop *CurLoop, unsigned Level, FullDependence &Result,
Constraint &NewConstraint, const SCEV *&SplitIter) const {
- DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
- DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n");
- DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
- DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
+ LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
+ LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n");
+ LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
+ LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
++WeakCrossingSIVapplications;
assert(0 < Level && Level <= CommonLevels && "Level out of range");
Level--;
Result.Consistent = false;
const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
- DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
+ LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop);
if (Delta->isZero()) {
Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT);
@@ -1243,7 +1308,7 @@ bool DependenceInfo::weakCrossingSIVtest(
SplitIter = SE->getUDivExpr(
SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta),
SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff));
- DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n");
+ LLVM_DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n");
const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
if (!ConstDelta)
@@ -1251,8 +1316,8 @@ bool DependenceInfo::weakCrossingSIVtest(
// We're certain that ConstCoeff > 0; therefore,
// if Delta < 0, then no dependence.
- DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
- DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n");
+ LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
+ LLVM_DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n");
if (SE->isKnownNegative(Delta)) {
// No dependence, Delta < 0
++WeakCrossingSIVindependence;
@@ -1263,11 +1328,11 @@ bool DependenceInfo::weakCrossingSIVtest(
// We're certain that Delta > 0 and ConstCoeff > 0.
// Check Delta/(2*ConstCoeff) against upper loop bound
if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
- DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
+ LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2);
const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound),
ConstantTwo);
- DEBUG(dbgs() << "\t ML = " << *ML << "\n");
+ LLVM_DEBUG(dbgs() << "\t ML = " << *ML << "\n");
if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) {
// Delta too big, no dependence
++WeakCrossingSIVindependence;
@@ -1295,19 +1360,19 @@ bool DependenceInfo::weakCrossingSIVtest(
APInt Distance = APDelta; // these need to be initialzed
APInt Remainder = APDelta;
APInt::sdivrem(APDelta, APCoeff, Distance, Remainder);
- DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
+ LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
if (Remainder != 0) {
// Coeff doesn't divide Delta, no dependence
++WeakCrossingSIVindependence;
++WeakCrossingSIVsuccesses;
return true;
}
- DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
+ LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
// if 2*Coeff doesn't divide Delta, then the equal direction isn't possible
APInt Two = APInt(Distance.getBitWidth(), 2, true);
Remainder = Distance.srem(Two);
- DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
+ LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
if (Remainder != 0) {
// Equal direction isn't possible
Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ);
@@ -1343,7 +1408,7 @@ static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM,
APInt::sdivrem(G0, G1, Q, R);
}
G = G1;
- DEBUG(dbgs() << "\t GCD = " << G << "\n");
+ LLVM_DEBUG(dbgs() << "\t GCD = " << G << "\n");
X = AM.slt(0) ? -A1 : A1;
Y = BM.slt(0) ? B1 : -B1;
@@ -1416,17 +1481,17 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
const Loop *CurLoop, unsigned Level,
FullDependence &Result,
Constraint &NewConstraint) const {
- DEBUG(dbgs() << "\tExact SIV test\n");
- DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");
- DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");
- DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
- DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
+ LLVM_DEBUG(dbgs() << "\tExact SIV test\n");
+ LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");
+ LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");
+ LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
+ LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
++ExactSIVapplications;
assert(0 < Level && Level <= CommonLevels && "Level out of range");
Level--;
Result.Consistent = false;
const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
- DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
+ LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff),
Delta, CurLoop);
const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
@@ -1447,7 +1512,7 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
return true;
}
- DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
+ LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
// since SCEV construction normalizes, LM = 0
APInt UM(Bits, 1, true);
@@ -1456,7 +1521,7 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
if (const SCEVConstant *CUB =
collectConstantUpperBound(CurLoop, Delta->getType())) {
UM = CUB->getAPInt();
- DEBUG(dbgs() << "\t UM = " << UM << "\n");
+ LLVM_DEBUG(dbgs() << "\t UM = " << UM << "\n");
UMvalid = true;
}
@@ -1467,18 +1532,18 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
APInt TMUL = BM.sdiv(G);
if (TMUL.sgt(0)) {
TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL));
- DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
if (UMvalid) {
TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL));
- DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
}
}
else {
TU = minAPInt(TU, floorOfQuotient(-X, TMUL));
- DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
if (UMvalid) {
TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL));
- DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
}
}
@@ -1486,18 +1551,18 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
TMUL = AM.sdiv(G);
if (TMUL.sgt(0)) {
TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL));
- DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
if (UMvalid) {
TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL));
- DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
}
}
else {
TU = minAPInt(TU, floorOfQuotient(-Y, TMUL));
- DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
if (UMvalid) {
TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL));
- DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
}
}
if (TL.sgt(TU)) {
@@ -1512,15 +1577,15 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
// less than
APInt SaveTU(TU); // save these
APInt SaveTL(TL);
- DEBUG(dbgs() << "\t exploring LT direction\n");
+ LLVM_DEBUG(dbgs() << "\t exploring LT direction\n");
TMUL = AM - BM;
if (TMUL.sgt(0)) {
TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL));
- DEBUG(dbgs() << "\t\t TL = " << TL << "\n");
+ LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n");
}
else {
TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL));
- DEBUG(dbgs() << "\t\t TU = " << TU << "\n");
+ LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n");
}
if (TL.sle(TU)) {
NewDirection |= Dependence::DVEntry::LT;
@@ -1530,23 +1595,23 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
// equal
TU = SaveTU; // restore
TL = SaveTL;
- DEBUG(dbgs() << "\t exploring EQ direction\n");
+ LLVM_DEBUG(dbgs() << "\t exploring EQ direction\n");
if (TMUL.sgt(0)) {
TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL));
- DEBUG(dbgs() << "\t\t TL = " << TL << "\n");
+ LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n");
}
else {
TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL));
- DEBUG(dbgs() << "\t\t TU = " << TU << "\n");
+ LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n");
}
TMUL = BM - AM;
if (TMUL.sgt(0)) {
TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL));
- DEBUG(dbgs() << "\t\t TL = " << TL << "\n");
+ LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n");
}
else {
TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL));
- DEBUG(dbgs() << "\t\t TU = " << TU << "\n");
+ LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n");
}
if (TL.sle(TU)) {
NewDirection |= Dependence::DVEntry::EQ;
@@ -1556,14 +1621,14 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
// greater than
TU = SaveTU; // restore
TL = SaveTL;
- DEBUG(dbgs() << "\t exploring GT direction\n");
+ LLVM_DEBUG(dbgs() << "\t exploring GT direction\n");
if (TMUL.sgt(0)) {
TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL));
- DEBUG(dbgs() << "\t\t TL = " << TL << "\n");
+ LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n");
}
else {
TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL));
- DEBUG(dbgs() << "\t\t TU = " << TU << "\n");
+ LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n");
}
if (TL.sle(TU)) {
NewDirection |= Dependence::DVEntry::GT;
@@ -1607,9 +1672,9 @@ bool isRemainderZero(const SCEVConstant *Dividend,
//
// If i is not an integer, there's no dependence.
// If i < 0 or > UB, there's no dependence.
-// If i = 0, the direction is <= and peeling the
+// If i = 0, the direction is >= and peeling the
// 1st iteration will break the dependence.
-// If i = UB, the direction is >= and peeling the
+// If i = UB, the direction is <= and peeling the
// last iteration will break the dependence.
// Otherwise, the direction is *.
//
@@ -1629,10 +1694,10 @@ bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
// For the WeakSIV test, it's possible the loop isn't common to
// the Src and Dst loops. If it isn't, then there's no need to
// record a direction.
- DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n");
- DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n");
- DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
- DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
+ LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n");
+ LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n");
+ LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
+ LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
++WeakZeroSIVapplications;
assert(0 < Level && Level <= MaxLevels && "Level out of range");
Level--;
@@ -1640,10 +1705,10 @@ bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta,
CurLoop);
- DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
+ LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) {
if (Level < CommonLevels) {
- Result.DV[Level].Direction &= Dependence::DVEntry::LE;
+ Result.DV[Level].Direction &= Dependence::DVEntry::GE;
Result.DV[Level].PeelFirst = true;
++WeakZeroSIVsuccesses;
}
@@ -1661,7 +1726,7 @@ bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
// check that Delta/SrcCoeff < iteration count
// really check NewDelta < count*AbsCoeff
if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
- DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
+ LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
++WeakZeroSIVindependence;
@@ -1671,7 +1736,7 @@ bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
// dependences caused by last iteration
if (Level < CommonLevels) {
- Result.DV[Level].Direction &= Dependence::DVEntry::GE;
+ Result.DV[Level].Direction &= Dependence::DVEntry::LE;
Result.DV[Level].PeelLast = true;
++WeakZeroSIVsuccesses;
}
@@ -1738,10 +1803,10 @@ bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff,
Constraint &NewConstraint) const {
// For the WeakSIV test, it's possible the loop isn't common to the
// Src and Dst loops. If it isn't, then there's no need to record a direction.
- DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
- DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n");
- DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
- DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
+ LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
+ LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n");
+ LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
+ LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
++WeakZeroSIVapplications;
assert(0 < Level && Level <= SrcLevels && "Level out of range");
Level--;
@@ -1749,7 +1814,7 @@ bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff,
const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta,
CurLoop);
- DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
+ LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) {
if (Level < CommonLevels) {
Result.DV[Level].Direction &= Dependence::DVEntry::LE;
@@ -1770,7 +1835,7 @@ bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff,
// check that Delta/SrcCoeff < iteration count
// really check NewDelta < count*AbsCoeff
if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
- DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
+ LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
++WeakZeroSIVindependence;
@@ -1819,15 +1884,15 @@ bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
const SCEV *SrcConst, const SCEV *DstConst,
const Loop *SrcLoop, const Loop *DstLoop,
FullDependence &Result) const {
- DEBUG(dbgs() << "\tExact RDIV test\n");
- DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");
- DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");
- DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
- DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
+ LLVM_DEBUG(dbgs() << "\tExact RDIV test\n");
+ LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");
+ LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");
+ LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
+ LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
++ExactRDIVapplications;
Result.Consistent = false;
const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
- DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
+ LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
@@ -1845,7 +1910,7 @@ bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
return true;
}
- DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
+ LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
// since SCEV construction seems to normalize, LM = 0
APInt SrcUM(Bits, 1, true);
@@ -1854,7 +1919,7 @@ bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
if (const SCEVConstant *UpperBound =
collectConstantUpperBound(SrcLoop, Delta->getType())) {
SrcUM = UpperBound->getAPInt();
- DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n");
+ LLVM_DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n");
SrcUMvalid = true;
}
@@ -1864,7 +1929,7 @@ bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
if (const SCEVConstant *UpperBound =
collectConstantUpperBound(DstLoop, Delta->getType())) {
DstUM = UpperBound->getAPInt();
- DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n");
+ LLVM_DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n");
DstUMvalid = true;
}
@@ -1875,18 +1940,18 @@ bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
APInt TMUL = BM.sdiv(G);
if (TMUL.sgt(0)) {
TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL));
- DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
if (SrcUMvalid) {
TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL));
- DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
}
}
else {
TU = minAPInt(TU, floorOfQuotient(-X, TMUL));
- DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
if (SrcUMvalid) {
TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL));
- DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
}
}
@@ -1894,18 +1959,18 @@ bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
TMUL = AM.sdiv(G);
if (TMUL.sgt(0)) {
TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL));
- DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
if (DstUMvalid) {
TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL));
- DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
}
}
else {
TU = minAPInt(TU, floorOfQuotient(-Y, TMUL));
- DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
if (DstUMvalid) {
TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL));
- DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
}
}
if (TL.sgt(TU))
@@ -1961,27 +2026,27 @@ bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2,
const Loop *Loop1,
const Loop *Loop2) const {
++SymbolicRDIVapplications;
- DEBUG(dbgs() << "\ttry symbolic RDIV test\n");
- DEBUG(dbgs() << "\t A1 = " << *A1);
- DEBUG(dbgs() << ", type = " << *A1->getType() << "\n");
- DEBUG(dbgs() << "\t A2 = " << *A2 << "\n");
- DEBUG(dbgs() << "\t C1 = " << *C1 << "\n");
- DEBUG(dbgs() << "\t C2 = " << *C2 << "\n");
+ LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n");
+ LLVM_DEBUG(dbgs() << "\t A1 = " << *A1);
+ LLVM_DEBUG(dbgs() << ", type = " << *A1->getType() << "\n");
+ LLVM_DEBUG(dbgs() << "\t A2 = " << *A2 << "\n");
+ LLVM_DEBUG(dbgs() << "\t C1 = " << *C1 << "\n");
+ LLVM_DEBUG(dbgs() << "\t C2 = " << *C2 << "\n");
const SCEV *N1 = collectUpperBound(Loop1, A1->getType());
const SCEV *N2 = collectUpperBound(Loop2, A1->getType());
- DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n");
- DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n");
+ LLVM_DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n");
+ LLVM_DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n");
const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1);
const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2);
- DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n");
- DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n");
+ LLVM_DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n");
+ LLVM_DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n");
if (SE->isKnownNonNegative(A1)) {
if (SE->isKnownNonNegative(A2)) {
// A1 >= 0 && A2 >= 0
if (N1) {
// make sure that c2 - c1 <= a1*N1
const SCEV *A1N1 = SE->getMulExpr(A1, N1);
- DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n");
+ LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n");
if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) {
++SymbolicRDIVindependence;
return true;
@@ -1990,7 +2055,7 @@ bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2,
if (N2) {
// make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2
const SCEV *A2N2 = SE->getMulExpr(A2, N2);
- DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n");
+ LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n");
if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) {
++SymbolicRDIVindependence;
return true;
@@ -2004,7 +2069,7 @@ bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2,
const SCEV *A1N1 = SE->getMulExpr(A1, N1);
const SCEV *A2N2 = SE->getMulExpr(A2, N2);
const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
- DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
+ LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) {
++SymbolicRDIVindependence;
return true;
@@ -2025,7 +2090,7 @@ bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2,
const SCEV *A1N1 = SE->getMulExpr(A1, N1);
const SCEV *A2N2 = SE->getMulExpr(A2, N2);
const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
- DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
+ LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) {
++SymbolicRDIVindependence;
return true;
@@ -2042,7 +2107,7 @@ bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2,
if (N1) {
// make sure that a1*N1 <= c2 - c1
const SCEV *A1N1 = SE->getMulExpr(A1, N1);
- DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n");
+ LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n");
if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) {
++SymbolicRDIVindependence;
return true;
@@ -2051,7 +2116,7 @@ bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2,
if (N2) {
// make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2
const SCEV *A2N2 = SE->getMulExpr(A2, N2);
- DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n");
+ LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n");
if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) {
++SymbolicRDIVindependence;
return true;
@@ -2074,8 +2139,8 @@ bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2,
bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level,
FullDependence &Result, Constraint &NewConstraint,
const SCEV *&SplitIter) const {
- DEBUG(dbgs() << " src = " << *Src << "\n");
- DEBUG(dbgs() << " dst = " << *Dst << "\n");
+ LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
+ LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
if (SrcAddRec && DstAddRec) {
@@ -2151,8 +2216,8 @@ bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst,
const SCEV *SrcCoeff, *DstCoeff;
const Loop *SrcLoop, *DstLoop;
- DEBUG(dbgs() << " src = " << *Src << "\n");
- DEBUG(dbgs() << " dst = " << *Dst << "\n");
+ LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
+ LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
if (SrcAddRec && DstAddRec) {
@@ -2208,8 +2273,8 @@ bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst,
bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst,
const SmallBitVector &Loops,
FullDependence &Result) const {
- DEBUG(dbgs() << " src = " << *Src << "\n");
- DEBUG(dbgs() << " dst = " << *Dst << "\n");
+ LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
+ LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
Result.Consistent = false;
return gcdMIVtest(Src, Dst, Result) ||
banerjeeMIVtest(Src, Dst, Loops, Result);
@@ -2249,7 +2314,7 @@ const SCEVConstant *getConstantPart(const SCEV *Expr) {
// to "a common divisor".
bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
FullDependence &Result) const {
- DEBUG(dbgs() << "starting gcd\n");
+ LLVM_DEBUG(dbgs() << "starting gcd\n");
++GCDapplications;
unsigned BitWidth = SE->getTypeSizeInBits(Src->getType());
APInt RunningGCD = APInt::getNullValue(BitWidth);
@@ -2294,7 +2359,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
APInt ExtraGCD = APInt::getNullValue(BitWidth);
const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
- DEBUG(dbgs() << " Delta = " << *Delta << "\n");
+ LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n");
const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta);
if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) {
// If Delta is a sum of products, we may be able to make further progress.
@@ -2321,11 +2386,11 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
if (!Constant)
return false;
APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt();
- DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n");
+ LLVM_DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n");
if (ConstDelta == 0)
return false;
RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD);
- DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n");
+ LLVM_DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n");
APInt Remainder = ConstDelta.srem(RunningGCD);
if (Remainder != 0) {
++GCDindependence;
@@ -2344,7 +2409,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
// Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5],
// we need to remember that the constant part is 5 and the RunningGCD should
// be initialized to ExtraGCD = 30.
- DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n');
+ LLVM_DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n');
bool Improved = false;
Coefficients = Src;
@@ -2399,10 +2464,10 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
continue;
APInt ConstCoeff = Constant->getAPInt();
RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
- DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");
+ LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");
if (RunningGCD != 0) {
Remainder = ConstDelta.srem(RunningGCD);
- DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n");
+ LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n");
if (Remainder != 0) {
unsigned Level = mapSrcLoop(CurLoop);
Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ);
@@ -2412,7 +2477,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
}
if (Improved)
++GCDsuccesses;
- DEBUG(dbgs() << "all done\n");
+ LLVM_DEBUG(dbgs() << "all done\n");
return false;
}
@@ -2453,35 +2518,35 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst,
const SmallBitVector &Loops,
FullDependence &Result) const {
- DEBUG(dbgs() << "starting Banerjee\n");
+ LLVM_DEBUG(dbgs() << "starting Banerjee\n");
++BanerjeeApplications;
- DEBUG(dbgs() << " Src = " << *Src << '\n');
+ LLVM_DEBUG(dbgs() << " Src = " << *Src << '\n');
const SCEV *A0;
CoefficientInfo *A = collectCoeffInfo(Src, true, A0);
- DEBUG(dbgs() << " Dst = " << *Dst << '\n');
+ LLVM_DEBUG(dbgs() << " Dst = " << *Dst << '\n');
const SCEV *B0;
CoefficientInfo *B = collectCoeffInfo(Dst, false, B0);
BoundInfo *Bound = new BoundInfo[MaxLevels + 1];
const SCEV *Delta = SE->getMinusSCEV(B0, A0);
- DEBUG(dbgs() << "\tDelta = " << *Delta << '\n');
+ LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n');
// Compute bounds for all the * directions.
- DEBUG(dbgs() << "\tBounds[*]\n");
+ LLVM_DEBUG(dbgs() << "\tBounds[*]\n");
for (unsigned K = 1; K <= MaxLevels; ++K) {
Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations;
Bound[K].Direction = Dependence::DVEntry::ALL;
Bound[K].DirSet = Dependence::DVEntry::NONE;
findBoundsALL(A, B, Bound, K);
#ifndef NDEBUG
- DEBUG(dbgs() << "\t " << K << '\t');
+ LLVM_DEBUG(dbgs() << "\t " << K << '\t');
if (Bound[K].Lower[Dependence::DVEntry::ALL])
- DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t');
+ LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t');
else
- DEBUG(dbgs() << "-inf\t");
+ LLVM_DEBUG(dbgs() << "-inf\t");
if (Bound[K].Upper[Dependence::DVEntry::ALL])
- DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n');
+ LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n');
else
- DEBUG(dbgs() << "+inf\n");
+ LLVM_DEBUG(dbgs() << "+inf\n");
#endif
}
@@ -2537,23 +2602,23 @@ unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A,
const SCEV *Delta) const {
if (Level > CommonLevels) {
// record result
- DEBUG(dbgs() << "\t[");
+ LLVM_DEBUG(dbgs() << "\t[");
for (unsigned K = 1; K <= CommonLevels; ++K) {
if (Loops[K]) {
Bound[K].DirSet |= Bound[K].Direction;
#ifndef NDEBUG
switch (Bound[K].Direction) {
case Dependence::DVEntry::LT:
- DEBUG(dbgs() << " <");
+ LLVM_DEBUG(dbgs() << " <");
break;
case Dependence::DVEntry::EQ:
- DEBUG(dbgs() << " =");
+ LLVM_DEBUG(dbgs() << " =");
break;
case Dependence::DVEntry::GT:
- DEBUG(dbgs() << " >");
+ LLVM_DEBUG(dbgs() << " >");
break;
case Dependence::DVEntry::ALL:
- DEBUG(dbgs() << " *");
+ LLVM_DEBUG(dbgs() << " *");
break;
default:
llvm_unreachable("unexpected Bound[K].Direction");
@@ -2561,7 +2626,7 @@ unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A,
#endif
}
}
- DEBUG(dbgs() << " ]\n");
+ LLVM_DEBUG(dbgs() << " ]\n");
return 1;
}
if (Loops[Level]) {
@@ -2572,34 +2637,40 @@ unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A,
findBoundsGT(A, B, Bound, Level);
findBoundsEQ(A, B, Bound, Level);
#ifndef NDEBUG
- DEBUG(dbgs() << "\tBound for level = " << Level << '\n');
- DEBUG(dbgs() << "\t <\t");
+ LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n');
+ LLVM_DEBUG(dbgs() << "\t <\t");
if (Bound[Level].Lower[Dependence::DVEntry::LT])
- DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t');
+ LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT]
+ << '\t');
else
- DEBUG(dbgs() << "-inf\t");
+ LLVM_DEBUG(dbgs() << "-inf\t");
if (Bound[Level].Upper[Dependence::DVEntry::LT])
- DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n');
+ LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT]
+ << '\n');
else
- DEBUG(dbgs() << "+inf\n");
- DEBUG(dbgs() << "\t =\t");
+ LLVM_DEBUG(dbgs() << "+inf\n");
+ LLVM_DEBUG(dbgs() << "\t =\t");
if (Bound[Level].Lower[Dependence::DVEntry::EQ])
- DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t');
+ LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ]
+ << '\t');
else
- DEBUG(dbgs() << "-inf\t");
+ LLVM_DEBUG(dbgs() << "-inf\t");
if (Bound[Level].Upper[Dependence::DVEntry::EQ])
- DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n');
+ LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ]
+ << '\n');
else
- DEBUG(dbgs() << "+inf\n");
- DEBUG(dbgs() << "\t >\t");
+ LLVM_DEBUG(dbgs() << "+inf\n");
+ LLVM_DEBUG(dbgs() << "\t >\t");
if (Bound[Level].Lower[Dependence::DVEntry::GT])
- DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t');
+ LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT]
+ << '\t');
else
- DEBUG(dbgs() << "-inf\t");
+ LLVM_DEBUG(dbgs() << "-inf\t");
if (Bound[Level].Upper[Dependence::DVEntry::GT])
- DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n');
+ LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT]
+ << '\n');
else
- DEBUG(dbgs() << "+inf\n");
+ LLVM_DEBUG(dbgs() << "+inf\n");
#endif
}
@@ -2846,21 +2917,21 @@ DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag,
}
Constant = Subscript;
#ifndef NDEBUG
- DEBUG(dbgs() << "\tCoefficient Info\n");
+ LLVM_DEBUG(dbgs() << "\tCoefficient Info\n");
for (unsigned K = 1; K <= MaxLevels; ++K) {
- DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff);
- DEBUG(dbgs() << "\tPos Part = ");
- DEBUG(dbgs() << *CI[K].PosPart);
- DEBUG(dbgs() << "\tNeg Part = ");
- DEBUG(dbgs() << *CI[K].NegPart);
- DEBUG(dbgs() << "\tUpper Bound = ");
+ LLVM_DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff);
+ LLVM_DEBUG(dbgs() << "\tPos Part = ");
+ LLVM_DEBUG(dbgs() << *CI[K].PosPart);
+ LLVM_DEBUG(dbgs() << "\tNeg Part = ");
+ LLVM_DEBUG(dbgs() << *CI[K].NegPart);
+ LLVM_DEBUG(dbgs() << "\tUpper Bound = ");
if (CI[K].Iterations)
- DEBUG(dbgs() << *CI[K].Iterations);
+ LLVM_DEBUG(dbgs() << *CI[K].Iterations);
else
- DEBUG(dbgs() << "+inf");
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << "+inf");
+ LLVM_DEBUG(dbgs() << '\n');
}
- DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n');
+ LLVM_DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n');
#endif
return CI;
}
@@ -2985,8 +3056,8 @@ bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst,
bool &Consistent) {
bool Result = false;
for (unsigned LI : Loops.set_bits()) {
- DEBUG(dbgs() << "\t Constraint[" << LI << "] is");
- DEBUG(Constraints[LI].dump(dbgs()));
+ LLVM_DEBUG(dbgs() << "\t Constraint[" << LI << "] is");
+ LLVM_DEBUG(Constraints[LI].dump(dbgs()));
if (Constraints[LI].isDistance())
Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent);
else if (Constraints[LI].isLine())
@@ -3007,17 +3078,17 @@ bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst,
Constraint &CurConstraint,
bool &Consistent) {
const Loop *CurLoop = CurConstraint.getAssociatedLoop();
- DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
+ LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
const SCEV *A_K = findCoefficient(Src, CurLoop);
if (A_K->isZero())
return false;
const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD());
Src = SE->getMinusSCEV(Src, DA_K);
Src = zeroCoefficient(Src, CurLoop);
- DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
- DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
+ LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
+ LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K));
- DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
+ LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
if (!findCoefficient(Dst, CurLoop)->isZero())
Consistent = false;
return true;
@@ -3036,9 +3107,10 @@ bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst,
const SCEV *A = CurConstraint.getA();
const SCEV *B = CurConstraint.getB();
const SCEV *C = CurConstraint.getC();
- DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n");
- DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n");
- DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n");
+ LLVM_DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C
+ << "\n");
+ LLVM_DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n");
+ LLVM_DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n");
if (A->isZero()) {
const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B);
const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
@@ -3094,8 +3166,8 @@ bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst,
if (!findCoefficient(Dst, CurLoop)->isZero())
Consistent = false;
}
- DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n");
- DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n");
+ LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n");
+ LLVM_DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n");
return true;
}
@@ -3110,13 +3182,13 @@ bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst,
const SCEV *AP_K = findCoefficient(Dst, CurLoop);
const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX());
const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY());
- DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
+ LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K));
Src = zeroCoefficient(Src, CurLoop);
- DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
- DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
+ LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
+ LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
Dst = zeroCoefficient(Dst, CurLoop);
- DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
+ LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
return true;
}
@@ -3124,8 +3196,8 @@ bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst,
// Update direction vector entry based on the current constraint.
void DependenceInfo::updateDirection(Dependence::DVEntry &Level,
const Constraint &CurConstraint) const {
- DEBUG(dbgs() << "\tUpdate direction, constraint =");
- DEBUG(CurConstraint.dump(dbgs()));
+ LLVM_DEBUG(dbgs() << "\tUpdate direction, constraint =");
+ LLVM_DEBUG(CurConstraint.dump(dbgs()));
if (CurConstraint.isAny())
; // use defaults
else if (CurConstraint.isDistance()) {
@@ -3177,8 +3249,10 @@ void DependenceInfo::updateDirection(Dependence::DVEntry &Level,
/// for each loop level.
bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
SmallVectorImpl<Subscript> &Pair) {
- Value *SrcPtr = getPointerOperand(Src);
- Value *DstPtr = getPointerOperand(Dst);
+ assert(isLoadOrStore(Src) && "instruction is not load or store");
+ assert(isLoadOrStore(Dst) && "instruction is not load or store");
+ Value *SrcPtr = getLoadStorePointerOperand(Src);
+ Value *DstPtr = getLoadStorePointerOperand(Dst);
Loop *SrcLoop = LI->getLoopFor(Src->getParent());
Loop *DstLoop = LI->getLoopFor(Dst->getParent());
@@ -3230,14 +3304,34 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
int size = SrcSubscripts.size();
- DEBUG({
- dbgs() << "\nSrcSubscripts: ";
+ // Statically check that the array bounds are in-range. The first subscript we
+ // don't have a size for and it cannot overflow into another subscript, so is
+ // always safe. The others need to be 0 <= subscript[i] < bound, for both src
+ // and dst.
+ // FIXME: It may be better to record these sizes and add them as constraints
+ // to the dependency checks.
+ for (int i = 1; i < size; ++i) {
+ if (!isKnownNonNegative(SrcSubscripts[i], SrcPtr))
+ return false;
+
+ if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1]))
+ return false;
+
+ if (!isKnownNonNegative(DstSubscripts[i], DstPtr))
+ return false;
+
+ if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1]))
+ return false;
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "\nSrcSubscripts: ";
for (int i = 0; i < size; i++)
dbgs() << *SrcSubscripts[i];
dbgs() << "\nDstSubscripts: ";
for (int i = 0; i < size; i++)
dbgs() << *DstSubscripts[i];
- });
+ });
// The delinearization transforms a single-subscript MIV dependence test into
// a multi-subscript SIV dependence test that is easier to compute. So we
@@ -3248,13 +3342,6 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
Pair[i].Src = SrcSubscripts[i];
Pair[i].Dst = DstSubscripts[i];
unifySubscriptType(&Pair[i]);
-
- // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the
- // delinearization has found, and add these constraints to the dependence
- // check to avoid memory accesses overflow from one dimension into another.
- // This is related to the problem of determining the existence of data
- // dependences in array accesses using a different number of subscripts: in
- // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc.
}
return true;
@@ -3299,23 +3386,26 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) {
// can only analyze simple loads and stores, i.e., no calls, invokes, etc.
- DEBUG(dbgs() << "can only handle simple loads and stores\n");
+ LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n");
return make_unique<Dependence>(Src, Dst);
}
- Value *SrcPtr = getPointerOperand(Src);
- Value *DstPtr = getPointerOperand(Dst);
+ assert(isLoadOrStore(Src) && "instruction is not load or store");
+ assert(isLoadOrStore(Dst) && "instruction is not load or store");
+ Value *SrcPtr = getLoadStorePointerOperand(Src);
+ Value *DstPtr = getLoadStorePointerOperand(Dst);
- switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr,
- SrcPtr)) {
+ switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(),
+ MemoryLocation::get(Dst),
+ MemoryLocation::get(Src))) {
case MayAlias:
case PartialAlias:
// cannot analyse objects if we don't understand their aliasing.
- DEBUG(dbgs() << "can't analyze may or partial alias\n");
+ LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n");
return make_unique<Dependence>(Src, Dst);
case NoAlias:
// If the objects noalias, they are distinct, accesses are independent.
- DEBUG(dbgs() << "no alias\n");
+ LLVM_DEBUG(dbgs() << "no alias\n");
return nullptr;
case MustAlias:
break; // The underlying objects alias; test accesses for dependence.
@@ -3323,56 +3413,24 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
// establish loop nesting levels
establishNestingLevels(Src, Dst);
- DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n");
- DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n");
+ LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n");
+ LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n");
FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
++TotalArrayPairs;
- // See if there are GEPs we can use.
- bool UsefulGEP = false;
- GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr);
- GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr);
- if (SrcGEP && DstGEP &&
- SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) {
- const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand());
- const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand());
- DEBUG(dbgs() << " SrcPtrSCEV = " << *SrcPtrSCEV << "\n");
- DEBUG(dbgs() << " DstPtrSCEV = " << *DstPtrSCEV << "\n");
-
- UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
- isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) &&
- (SrcGEP->getNumOperands() == DstGEP->getNumOperands()) &&
- isKnownPredicate(CmpInst::ICMP_EQ, SrcPtrSCEV, DstPtrSCEV);
- }
- unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1;
- SmallVector<Subscript, 4> Pair(Pairs);
- if (UsefulGEP) {
- DEBUG(dbgs() << " using GEPs\n");
- unsigned P = 0;
- for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(),
- SrcEnd = SrcGEP->idx_end(),
- DstIdx = DstGEP->idx_begin();
- SrcIdx != SrcEnd;
- ++SrcIdx, ++DstIdx, ++P) {
- Pair[P].Src = SE->getSCEV(*SrcIdx);
- Pair[P].Dst = SE->getSCEV(*DstIdx);
- unifySubscriptType(&Pair[P]);
- }
- }
- else {
- DEBUG(dbgs() << " ignoring GEPs\n");
- const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
- const SCEV *DstSCEV = SE->getSCEV(DstPtr);
- DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n");
- DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n");
- Pair[0].Src = SrcSCEV;
- Pair[0].Dst = DstSCEV;
- }
+ unsigned Pairs = 1;
+ SmallVector<Subscript, 2> Pair(Pairs);
+ const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
+ const SCEV *DstSCEV = SE->getSCEV(DstPtr);
+ LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n");
+ LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n");
+ Pair[0].Src = SrcSCEV;
+ Pair[0].Dst = DstSCEV;
- if (Delinearize && CommonLevels > 1) {
+ if (Delinearize) {
if (tryDelinearize(Src, Dst, Pair)) {
- DEBUG(dbgs() << " delinearized GEP\n");
+ LLVM_DEBUG(dbgs() << " delinearized\n");
Pairs = Pair.size();
}
}
@@ -3388,12 +3446,12 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
Pair[P].Loops);
Pair[P].GroupLoops = Pair[P].Loops;
Pair[P].Group.set(P);
- DEBUG(dbgs() << " subscript " << P << "\n");
- DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n");
- DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n");
- DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n");
- DEBUG(dbgs() << "\tloops = ");
- DEBUG(dumpSmallBitVector(Pair[P].Loops));
+ LLVM_DEBUG(dbgs() << " subscript " << P << "\n");
+ LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n");
+ LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n");
+ LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n");
+ LLVM_DEBUG(dbgs() << "\tloops = ");
+ LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops));
}
SmallBitVector Separable(Pairs);
@@ -3498,25 +3556,25 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
}
}
- DEBUG(dbgs() << " Separable = ");
- DEBUG(dumpSmallBitVector(Separable));
- DEBUG(dbgs() << " Coupled = ");
- DEBUG(dumpSmallBitVector(Coupled));
+ LLVM_DEBUG(dbgs() << " Separable = ");
+ LLVM_DEBUG(dumpSmallBitVector(Separable));
+ LLVM_DEBUG(dbgs() << " Coupled = ");
+ LLVM_DEBUG(dumpSmallBitVector(Coupled));
Constraint NewConstraint;
NewConstraint.setAny(SE);
// test separable subscripts
for (unsigned SI : Separable.set_bits()) {
- DEBUG(dbgs() << "testing subscript " << SI);
+ LLVM_DEBUG(dbgs() << "testing subscript " << SI);
switch (Pair[SI].Classification) {
case Subscript::ZIV:
- DEBUG(dbgs() << ", ZIV\n");
+ LLVM_DEBUG(dbgs() << ", ZIV\n");
if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result))
return nullptr;
break;
case Subscript::SIV: {
- DEBUG(dbgs() << ", SIV\n");
+ LLVM_DEBUG(dbgs() << ", SIV\n");
unsigned Level;
const SCEV *SplitIter = nullptr;
if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint,
@@ -3525,12 +3583,12 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
break;
}
case Subscript::RDIV:
- DEBUG(dbgs() << ", RDIV\n");
+ LLVM_DEBUG(dbgs() << ", RDIV\n");
if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result))
return nullptr;
break;
case Subscript::MIV:
- DEBUG(dbgs() << ", MIV\n");
+ LLVM_DEBUG(dbgs() << ", MIV\n");
if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result))
return nullptr;
break;
@@ -3541,20 +3599,20 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
if (Coupled.count()) {
// test coupled subscript groups
- DEBUG(dbgs() << "starting on coupled subscripts\n");
- DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n");
+ LLVM_DEBUG(dbgs() << "starting on coupled subscripts\n");
+ LLVM_DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n");
SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
for (unsigned II = 0; II <= MaxLevels; ++II)
Constraints[II].setAny(SE);
for (unsigned SI : Coupled.set_bits()) {
- DEBUG(dbgs() << "testing subscript group " << SI << " { ");
+ LLVM_DEBUG(dbgs() << "testing subscript group " << SI << " { ");
SmallBitVector Group(Pair[SI].Group);
SmallBitVector Sivs(Pairs);
SmallBitVector Mivs(Pairs);
SmallBitVector ConstrainedLevels(MaxLevels + 1);
SmallVector<Subscript *, 4> PairsInGroup;
for (unsigned SJ : Group.set_bits()) {
- DEBUG(dbgs() << SJ << " ");
+ LLVM_DEBUG(dbgs() << SJ << " ");
if (Pair[SJ].Classification == Subscript::SIV)
Sivs.set(SJ);
else
@@ -3562,15 +3620,15 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
PairsInGroup.push_back(&Pair[SJ]);
}
unifySubscriptType(PairsInGroup);
- DEBUG(dbgs() << "}\n");
+ LLVM_DEBUG(dbgs() << "}\n");
while (Sivs.any()) {
bool Changed = false;
for (unsigned SJ : Sivs.set_bits()) {
- DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n");
+ LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n");
// SJ is an SIV subscript that's part of the current coupled group
unsigned Level;
const SCEV *SplitIter = nullptr;
- DEBUG(dbgs() << "SIV\n");
+ LLVM_DEBUG(dbgs() << "SIV\n");
if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint,
SplitIter))
return nullptr;
@@ -3586,15 +3644,15 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
}
if (Changed) {
// propagate, possibly creating new SIVs and ZIVs
- DEBUG(dbgs() << " propagating\n");
- DEBUG(dbgs() << "\tMivs = ");
- DEBUG(dumpSmallBitVector(Mivs));
+ LLVM_DEBUG(dbgs() << " propagating\n");
+ LLVM_DEBUG(dbgs() << "\tMivs = ");
+ LLVM_DEBUG(dumpSmallBitVector(Mivs));
for (unsigned SJ : Mivs.set_bits()) {
// SJ is an MIV subscript that's part of the current coupled group
- DEBUG(dbgs() << "\tSJ = " << SJ << "\n");
+ LLVM_DEBUG(dbgs() << "\tSJ = " << SJ << "\n");
if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops,
Constraints, Result.Consistent)) {
- DEBUG(dbgs() << "\t Changed\n");
+ LLVM_DEBUG(dbgs() << "\t Changed\n");
++DeltaPropagations;
Pair[SJ].Classification =
classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),
@@ -3602,7 +3660,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
Pair[SJ].Loops);
switch (Pair[SJ].Classification) {
case Subscript::ZIV:
- DEBUG(dbgs() << "ZIV\n");
+ LLVM_DEBUG(dbgs() << "ZIV\n");
if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
return nullptr;
Mivs.reset(SJ);
@@ -3625,7 +3683,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
// test & propagate remaining RDIVs
for (unsigned SJ : Mivs.set_bits()) {
if (Pair[SJ].Classification == Subscript::RDIV) {
- DEBUG(dbgs() << "RDIV test\n");
+ LLVM_DEBUG(dbgs() << "RDIV test\n");
if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
return nullptr;
// I don't yet understand how to propagate RDIV results
@@ -3638,7 +3696,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
// Better to somehow test all remaining subscripts simultaneously.
for (unsigned SJ : Mivs.set_bits()) {
if (Pair[SJ].Classification == Subscript::MIV) {
- DEBUG(dbgs() << "MIV test\n");
+ LLVM_DEBUG(dbgs() << "MIV test\n");
if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result))
return nullptr;
}
@@ -3647,7 +3705,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
}
// update Result.DV from constraint vector
- DEBUG(dbgs() << " updating\n");
+ LLVM_DEBUG(dbgs() << " updating\n");
for (unsigned SJ : ConstrainedLevels.set_bits()) {
if (SJ > CommonLevels)
break;
@@ -3753,51 +3811,27 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory());
assert(isLoadOrStore(Src));
assert(isLoadOrStore(Dst));
- Value *SrcPtr = getPointerOperand(Src);
- Value *DstPtr = getPointerOperand(Dst);
- assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr,
- SrcPtr) == MustAlias);
+ Value *SrcPtr = getLoadStorePointerOperand(Src);
+ Value *DstPtr = getLoadStorePointerOperand(Dst);
+ assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(),
+ MemoryLocation::get(Dst),
+ MemoryLocation::get(Src)) == MustAlias);
// establish loop nesting levels
establishNestingLevels(Src, Dst);
FullDependence Result(Src, Dst, false, CommonLevels);
- // See if there are GEPs we can use.
- bool UsefulGEP = false;
- GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr);
- GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr);
- if (SrcGEP && DstGEP &&
- SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) {
- const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand());
- const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand());
- UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
- isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) &&
- (SrcGEP->getNumOperands() == DstGEP->getNumOperands());
- }
- unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1;
- SmallVector<Subscript, 4> Pair(Pairs);
- if (UsefulGEP) {
- unsigned P = 0;
- for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(),
- SrcEnd = SrcGEP->idx_end(),
- DstIdx = DstGEP->idx_begin();
- SrcIdx != SrcEnd;
- ++SrcIdx, ++DstIdx, ++P) {
- Pair[P].Src = SE->getSCEV(*SrcIdx);
- Pair[P].Dst = SE->getSCEV(*DstIdx);
- }
- }
- else {
- const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
- const SCEV *DstSCEV = SE->getSCEV(DstPtr);
- Pair[0].Src = SrcSCEV;
- Pair[0].Dst = DstSCEV;
- }
+ unsigned Pairs = 1;
+ SmallVector<Subscript, 2> Pair(Pairs);
+ const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
+ const SCEV *DstSCEV = SE->getSCEV(DstPtr);
+ Pair[0].Src = SrcSCEV;
+ Pair[0].Dst = DstSCEV;
- if (Delinearize && CommonLevels > 1) {
+ if (Delinearize) {
if (tryDelinearize(Src, Dst, Pair)) {
- DEBUG(dbgs() << " delinearized GEP\n");
+ LLVM_DEBUG(dbgs() << " delinearized\n");
Pairs = Pair.size();
}
}
diff --git a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
index ac684ec18466..f5f1874c9303 100644
--- a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
@@ -77,6 +77,8 @@
#include <vector>
using namespace llvm;
+#define DEBUG_TYPE "divergence"
+
namespace {
class DivergencePropagator {
@@ -299,6 +301,10 @@ bool DivergenceAnalysis::runOnFunction(Function &F) {
PDT, DivergentValues);
DP.populateWithSourcesOfDivergence();
DP.propagate();
+ LLVM_DEBUG(
+ dbgs() << "\nAfter divergence analysis on " << F.getName() << ":\n";
+ print(dbgs(), F.getParent())
+ );
return false;
}
@@ -318,12 +324,17 @@ void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
// Dumps all divergent values in F, arguments and then instructions.
for (auto &Arg : F->args()) {
- if (DivergentValues.count(&Arg))
- OS << "DIVERGENT: " << Arg << "\n";
+ OS << (DivergentValues.count(&Arg) ? "DIVERGENT: " : " ");
+ OS << Arg << "\n";
}
// Iterate instructions using instructions() to ensure a deterministic order.
- for (auto &I : instructions(F)) {
- if (DivergentValues.count(&I))
- OS << "DIVERGENT:" << I << "\n";
+ for (auto BI = F->begin(), BE = F->end(); BI != BE; ++BI) {
+ auto &BB = *BI;
+ OS << "\n " << BB.getName() << ":\n";
+ for (auto &I : BB.instructionsWithoutDebug()) {
+ OS << (DivergentValues.count(&I) ? "DIVERGENT: " : " ");
+ OS << I << "\n";
+ }
}
+ OS << "\n";
}
diff --git a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
index bb8caf4a5174..de7f62cf4ecd 100644
--- a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
+++ b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
@@ -9,6 +9,7 @@
#include "llvm/Analysis/DominanceFrontier.h"
#include "llvm/Analysis/DominanceFrontierImpl.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
diff --git a/contrib/llvm/lib/Analysis/EHPersonalities.cpp b/contrib/llvm/lib/Analysis/EHPersonalities.cpp
index b12ae9884e3d..2d35a3fa9118 100644
--- a/contrib/llvm/lib/Analysis/EHPersonalities.cpp
+++ b/contrib/llvm/lib/Analysis/EHPersonalities.cpp
@@ -25,20 +25,21 @@ EHPersonality llvm::classifyEHPersonality(const Value *Pers) {
if (!F)
return EHPersonality::Unknown;
return StringSwitch<EHPersonality>(F->getName())
- .Case("__gnat_eh_personality", EHPersonality::GNU_Ada)
- .Case("__gxx_personality_v0", EHPersonality::GNU_CXX)
- .Case("__gxx_personality_seh0",EHPersonality::GNU_CXX)
- .Case("__gxx_personality_sj0", EHPersonality::GNU_CXX_SjLj)
- .Case("__gcc_personality_v0", EHPersonality::GNU_C)
- .Case("__gcc_personality_seh0",EHPersonality::GNU_C)
- .Case("__gcc_personality_sj0", EHPersonality::GNU_C_SjLj)
- .Case("__objc_personality_v0", EHPersonality::GNU_ObjC)
- .Case("_except_handler3", EHPersonality::MSVC_X86SEH)
- .Case("_except_handler4", EHPersonality::MSVC_X86SEH)
- .Case("__C_specific_handler", EHPersonality::MSVC_Win64SEH)
- .Case("__CxxFrameHandler3", EHPersonality::MSVC_CXX)
- .Case("ProcessCLRException", EHPersonality::CoreCLR)
- .Case("rust_eh_personality", EHPersonality::Rust)
+ .Case("__gnat_eh_personality", EHPersonality::GNU_Ada)
+ .Case("__gxx_personality_v0", EHPersonality::GNU_CXX)
+ .Case("__gxx_personality_seh0", EHPersonality::GNU_CXX)
+ .Case("__gxx_personality_sj0", EHPersonality::GNU_CXX_SjLj)
+ .Case("__gcc_personality_v0", EHPersonality::GNU_C)
+ .Case("__gcc_personality_seh0", EHPersonality::GNU_C)
+ .Case("__gcc_personality_sj0", EHPersonality::GNU_C_SjLj)
+ .Case("__objc_personality_v0", EHPersonality::GNU_ObjC)
+ .Case("_except_handler3", EHPersonality::MSVC_X86SEH)
+ .Case("_except_handler4", EHPersonality::MSVC_X86SEH)
+ .Case("__C_specific_handler", EHPersonality::MSVC_Win64SEH)
+ .Case("__CxxFrameHandler3", EHPersonality::MSVC_CXX)
+ .Case("ProcessCLRException", EHPersonality::CoreCLR)
+ .Case("rust_eh_personality", EHPersonality::Rust)
+ .Case("__gxx_wasm_personality_v0", EHPersonality::Wasm_CXX)
.Default(EHPersonality::Unknown);
}
@@ -55,6 +56,7 @@ StringRef llvm::getEHPersonalityName(EHPersonality Pers) {
case EHPersonality::MSVC_CXX: return "__CxxFrameHandler3";
case EHPersonality::CoreCLR: return "ProcessCLRException";
case EHPersonality::Rust: return "rust_eh_personality";
+ case EHPersonality::Wasm_CXX: return "__gxx_wasm_personality_v0";
case EHPersonality::Unknown: llvm_unreachable("Unknown EHPersonality!");
}
diff --git a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
index 94306d0f54ad..197aee9dacb7 100644
--- a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
+++ b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
@@ -65,7 +65,7 @@ class GlobalsAAResult::FunctionInfo {
/// Build a wrapper struct that has 8-byte alignment. All heap allocations
/// should provide this much alignment at least, but this makes it clear we
/// specifically rely on this amount of alignment.
- struct LLVM_ALIGNAS(8) AlignedMap {
+ struct alignas(8) AlignedMap {
AlignedMap() {}
AlignedMap(const AlignedMap &Arg) : Map(Arg.Map) {}
GlobalInfoMapType Map;
@@ -584,6 +584,10 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
} else if (Function *Callee = CS.getCalledFunction()) {
// The callgraph doesn't include intrinsic calls.
if (Callee->isIntrinsic()) {
+ if (isa<DbgInfoIntrinsic>(I))
+ // Don't let dbg intrinsics affect alias info.
+ continue;
+
FunctionModRefBehavior Behaviour =
AAResultBase::getModRefBehavior(Callee);
FI.addModRefInfo(createModRefInfo(Behaviour));
diff --git a/contrib/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm/lib/Analysis/IVUsers.cpp
index c30feb973e60..609e5e3a1448 100644
--- a/contrib/llvm/lib/Analysis/IVUsers.cpp
+++ b/contrib/llvm/lib/Analysis/IVUsers.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -234,13 +235,13 @@ bool IVUsers::AddUsersImpl(Instruction *I,
if (LI->getLoopFor(User->getParent()) != L) {
if (isa<PHINode>(User) || Processed.count(User) ||
!AddUsersImpl(User, SimpleLoopNests)) {
- DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n'
- << " OF SCEV: " << *ISE << '\n');
+ LLVM_DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n'
+ << " OF SCEV: " << *ISE << '\n');
AddUserToIVUsers = true;
}
} else if (Processed.count(User) || !AddUsersImpl(User, SimpleLoopNests)) {
- DEBUG(dbgs() << "FOUND USER: " << *User << '\n'
- << " OF SCEV: " << *ISE << '\n');
+ LLVM_DEBUG(dbgs() << "FOUND USER: " << *User << '\n'
+ << " OF SCEV: " << *ISE << '\n');
AddUserToIVUsers = true;
}
@@ -273,14 +274,15 @@ bool IVUsers::AddUsersImpl(Instruction *I,
// If we normalized the expression, but denormalization doesn't give the
// original one, discard this user.
if (OriginalISE != DenormalizedISE) {
- DEBUG(dbgs() << " DISCARDING (NORMALIZATION ISN'T INVERTIBLE): "
- << *ISE << '\n');
+ LLVM_DEBUG(dbgs()
+ << " DISCARDING (NORMALIZATION ISN'T INVERTIBLE): "
+ << *ISE << '\n');
IVUses.pop_back();
return false;
}
}
- DEBUG(if (SE->getSCEV(I) != ISE)
- dbgs() << " NORMALIZED TO: " << *ISE << '\n');
+ LLVM_DEBUG(if (SE->getSCEV(I) != ISE) dbgs()
+ << " NORMALIZED TO: " << *ISE << '\n');
}
}
return true;
diff --git a/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
index c11176bbb9c8..4659c0a00629 100644
--- a/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
@@ -71,19 +71,19 @@ uint32_t ICallPromotionAnalysis::getProfitablePromotionCandidates(
const Instruction *Inst, uint32_t NumVals, uint64_t TotalCount) {
ArrayRef<InstrProfValueData> ValueDataRef(ValueDataArray.get(), NumVals);
- DEBUG(dbgs() << " \nWork on callsite " << *Inst << " Num_targets: " << NumVals
- << "\n");
+ LLVM_DEBUG(dbgs() << " \nWork on callsite " << *Inst
+ << " Num_targets: " << NumVals << "\n");
uint32_t I = 0;
uint64_t RemainingCount = TotalCount;
for (; I < MaxNumPromotions && I < NumVals; I++) {
uint64_t Count = ValueDataRef[I].Count;
assert(Count <= RemainingCount);
- DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
- << " Target_func: " << ValueDataRef[I].Value << "\n");
+ LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
+ << " Target_func: " << ValueDataRef[I].Value << "\n");
if (!isPromotionProfitable(Count, TotalCount, RemainingCount)) {
- DEBUG(dbgs() << " Not promote: Cold target.\n");
+ LLVM_DEBUG(dbgs() << " Not promote: Cold target.\n");
return I;
}
RemainingCount -= Count;
diff --git a/contrib/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm/lib/Analysis/InlineCost.cpp
index b0cb29203a5a..a6cccc3b5910 100644
--- a/contrib/llvm/lib/Analysis/InlineCost.cpp
+++ b/contrib/llvm/lib/Analysis/InlineCost.cpp
@@ -26,6 +26,7 @@
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
@@ -135,7 +136,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool ContainsNoDuplicateCall;
bool HasReturn;
bool HasIndirectBr;
- bool HasFrameEscape;
+ bool HasUninlineableIntrinsic;
+ bool UsesVarArgs;
/// Number of bytes allocated statically by the callee.
uint64_t AllocatedSize;
@@ -280,12 +282,13 @@ public:
IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false),
ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
- HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
- NumVectorInstructions(0), VectorBonus(0), SingleBBBonus(0),
- EnableLoadElimination(true), LoadEliminationCost(0), NumConstantArgs(0),
- NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
- NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
- SROACostSavings(0), SROACostSavingsLost(0) {}
+ HasUninlineableIntrinsic(false), UsesVarArgs(false), AllocatedSize(0),
+ NumInstructions(0), NumVectorInstructions(0), VectorBonus(0),
+ SingleBBBonus(0), EnableLoadElimination(true), LoadEliminationCost(0),
+ NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
+ NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
+ NumInstructionsSimplified(0), SROACostSavings(0),
+ SROACostSavingsLost(0) {}
bool analyzeCall(CallSite CS);
@@ -308,12 +311,12 @@ public:
} // namespace
-/// \brief Test whether the given value is an Alloca-derived function argument.
+/// Test whether the given value is an Alloca-derived function argument.
bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
return SROAArgValues.count(V);
}
-/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
+/// Lookup the SROA-candidate argument and cost iterator which V maps to.
/// Returns false if V does not map to a SROA-candidate.
bool CallAnalyzer::lookupSROAArgAndCost(
Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
@@ -329,7 +332,7 @@ bool CallAnalyzer::lookupSROAArgAndCost(
return CostIt != SROAArgCosts.end();
}
-/// \brief Disable SROA for the candidate marked by this cost iterator.
+/// Disable SROA for the candidate marked by this cost iterator.
///
/// This marks the candidate as no longer viable for SROA, and adds the cost
/// savings associated with it back into the inline cost measurement.
@@ -343,7 +346,7 @@ void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
disableLoadElimination();
}
-/// \brief If 'V' maps to a SROA candidate, disable SROA for it.
+/// If 'V' maps to a SROA candidate, disable SROA for it.
void CallAnalyzer::disableSROA(Value *V) {
Value *SROAArg;
DenseMap<Value *, int>::iterator CostIt;
@@ -351,7 +354,7 @@ void CallAnalyzer::disableSROA(Value *V) {
disableSROA(CostIt);
}
-/// \brief Accumulate the given cost for a particular SROA candidate.
+/// Accumulate the given cost for a particular SROA candidate.
void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
int InstructionCost) {
CostIt->second += InstructionCost;
@@ -366,12 +369,12 @@ void CallAnalyzer::disableLoadElimination() {
}
}
-/// \brief Accumulate a constant GEP offset into an APInt if possible.
+/// Accumulate a constant GEP offset into an APInt if possible.
///
/// Returns false if unable to compute the offset for any reason. Respects any
/// simplified values known during the analysis of this callsite.
bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
- unsigned IntPtrWidth = DL.getPointerSizeInBits();
+ unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType());
assert(IntPtrWidth == Offset.getBitWidth());
for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
@@ -399,7 +402,7 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
return true;
}
-/// \brief Use TTI to check whether a GEP is free.
+/// Use TTI to check whether a GEP is free.
///
/// Respects any simplified values known during the analysis of this callsite.
bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
@@ -450,8 +453,12 @@ bool CallAnalyzer::visitPHI(PHINode &I) {
// SROA if it *might* be used in an inappropriate manner.
// Phi nodes are always zero-cost.
-
- APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits());
+ // FIXME: Pointer sizes may differ between different address spaces, so do we
+ // need to use correct address space in the call to getPointerSizeInBits here?
+ // Or could we skip the getPointerSizeInBits call completely? As far as I can
+ // see the ZeroOffset is used as a dummy value, so we can probably use any
+ // bit width for the ZeroOffset?
+ APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits(0));
bool CheckSROA = I.getType()->isPointerTy();
// Track the constant or pointer with constant offset we've seen so far.
@@ -536,7 +543,7 @@ bool CallAnalyzer::visitPHI(PHINode &I) {
return true;
}
-/// \brief Check we can fold GEPs of constant-offset call site argument pointers.
+/// Check we can fold GEPs of constant-offset call site argument pointers.
/// This requires target data and inbounds GEPs.
///
/// \return true if the specified GEP can be folded.
@@ -641,7 +648,8 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
// Track base/offset pairs when converted to a plain integer provided the
// integer is large enough to represent the pointer.
unsigned IntegerSize = I.getType()->getScalarSizeInBits();
- if (IntegerSize >= DL.getPointerSizeInBits()) {
+ unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace();
+ if (IntegerSize >= DL.getPointerSizeInBits(AS)) {
std::pair<Value *, APInt> BaseAndOffset =
ConstantOffsetPtrs.lookup(I.getOperand(0));
if (BaseAndOffset.first)
@@ -674,7 +682,7 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
// modifications provided the integer is not too large.
Value *Op = I.getOperand(0);
unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
- if (IntegerSize <= DL.getPointerSizeInBits()) {
+ if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {
std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
if (BaseAndOffset.first)
ConstantOffsetPtrs[&I] = BaseAndOffset;
@@ -913,14 +921,14 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
auto HotCallSiteThreshold = getHotCallSiteThreshold(CS, CallerBFI);
if (!Caller->optForSize() && HotCallSiteThreshold) {
- DEBUG(dbgs() << "Hot callsite.\n");
+ LLVM_DEBUG(dbgs() << "Hot callsite.\n");
// FIXME: This should update the threshold only if it exceeds the
// current threshold, but AutoFDO + ThinLTO currently relies on this
// behavior to prevent inlining of hot callsites during ThinLTO
// compile phase.
Threshold = HotCallSiteThreshold.getValue();
} else if (isColdCallSite(CS, CallerBFI)) {
- DEBUG(dbgs() << "Cold callsite.\n");
+ LLVM_DEBUG(dbgs() << "Cold callsite.\n");
// Do not apply bonuses for a cold callsite including the
// LastCallToStatic bonus. While this bonus might result in code size
// reduction, it can cause the size of a non-cold caller to increase
@@ -931,13 +939,13 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
// Use callee's global profile information only if we have no way of
// determining this via callsite information.
if (PSI->isFunctionEntryHot(&Callee)) {
- DEBUG(dbgs() << "Hot callee.\n");
+ LLVM_DEBUG(dbgs() << "Hot callee.\n");
// If callsite hotness can not be determined, we may still know
// that the callee is hot and treat it as a weaker hint for threshold
// increase.
Threshold = MaxIfValid(Threshold, Params.HintThreshold);
} else if (PSI->isFunctionEntryCold(&Callee)) {
- DEBUG(dbgs() << "Cold callee.\n");
+ LLVM_DEBUG(dbgs() << "Cold callee.\n");
// Do not apply bonuses for a cold callee including the
// LastCallToStatic bonus. While this bonus might result in code size
// reduction, it can cause the size of a non-cold caller to increase
@@ -1155,7 +1163,7 @@ bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
return false;
}
-/// \brief Try to simplify a call site.
+/// Try to simplify a call site.
///
/// Takes a concrete function and callsite and tries to actually simplify it by
/// analyzing the arguments and call itself with instsimplify. Returns true if
@@ -1225,8 +1233,13 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
disableLoadElimination();
// SROA can usually chew through these intrinsics, but they aren't free.
return false;
+ case Intrinsic::icall_branch_funnel:
case Intrinsic::localescape:
- HasFrameEscape = true;
+ HasUninlineableIntrinsic = true;
+ return false;
+ case Intrinsic::vastart:
+ case Intrinsic::vaend:
+ UsesVarArgs = true;
return false;
}
}
@@ -1521,7 +1534,7 @@ bool CallAnalyzer::visitInstruction(Instruction &I) {
return false;
}
-/// \brief Analyze a basic block for its contribution to the inline cost.
+/// Analyze a basic block for its contribution to the inline cost.
///
/// This method walks the analyzer over every instruction in the given basic
/// block and accounts for their cost during inlining at this callsite. It
@@ -1562,7 +1575,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
using namespace ore;
// If the visit this instruction detected an uninlinable pattern, abort.
if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
- HasIndirectBr || HasFrameEscape) {
+ HasIndirectBr || HasUninlineableIntrinsic || UsesVarArgs) {
if (ORE)
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
@@ -1598,7 +1611,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
return true;
}
-/// \brief Compute the base pointer and cumulative constant offsets for V.
+/// Compute the base pointer and cumulative constant offsets for V.
///
/// This strips all constant offsets off of V, leaving it the base pointer, and
/// accumulates the total constant offset applied in the returned constant. It
@@ -1608,7 +1621,8 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
if (!V->getType()->isPointerTy())
return nullptr;
- unsigned IntPtrWidth = DL.getPointerSizeInBits();
+ unsigned AS = V->getType()->getPointerAddressSpace();
+ unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
APInt Offset = APInt::getNullValue(IntPtrWidth);
// Even though we don't look through PHI nodes, we could be called on an
@@ -1632,11 +1646,11 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
assert(V->getType()->isPointerTy() && "Unexpected operand type!");
} while (Visited.insert(V).second);
- Type *IntPtrTy = DL.getIntPtrType(V->getContext());
+ Type *IntPtrTy = DL.getIntPtrType(V->getContext(), AS);
return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
}
-/// \brief Find dead blocks due to deleted CFG edges during inlining.
+/// Find dead blocks due to deleted CFG edges during inlining.
///
/// If we know the successor of the current block, \p CurrBB, has to be \p
/// NextBB, the other successors of \p CurrBB are dead if these successors have
@@ -1674,7 +1688,7 @@ void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
}
}
-/// \brief Analyze a call site for potential inlining.
+/// Analyze a call site for potential inlining.
///
/// Returns true if inlining this call is viable, and false if it is not
/// viable. It computes the cost and adjusts the threshold based on numerous
@@ -1867,7 +1881,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-/// \brief Dump stats about this call's analysis.
+/// Dump stats about this call's analysis.
LLVM_DUMP_METHOD void CallAnalyzer::dump() {
#define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n"
DEBUG_PRINT_STAT(NumConstantArgs);
@@ -1887,7 +1901,7 @@ LLVM_DUMP_METHOD void CallAnalyzer::dump() {
}
#endif
-/// \brief Test that there are no attribute conflicts between Caller and Callee
+/// Test that there are no attribute conflicts between Caller and Callee
/// that prevent inlining.
static bool functionsHaveCompatibleAttributes(Function *Caller,
Function *Callee,
@@ -1904,7 +1918,8 @@ int llvm::getCallsiteCost(CallSite CS, const DataLayout &DL) {
// size of the byval type by the target's pointer size.
PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
- unsigned PointerSize = DL.getPointerSizeInBits();
+ unsigned AS = PTy->getAddressSpace();
+ unsigned PointerSize = DL.getPointerSizeInBits(AS);
// Ceiling division.
unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
@@ -1948,6 +1963,19 @@ InlineCost llvm::getInlineCost(
if (!Callee)
return llvm::InlineCost::getNever();
+ // Never inline calls with byval arguments that does not have the alloca
+ // address space. Since byval arguments can be replaced with a copy to an
+ // alloca, the inlined code would need to be adjusted to handle that the
+ // argument is in the alloca address space (so it is a little bit complicated
+ // to solve).
+ unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace();
+ for (unsigned I = 0, E = CS.arg_size(); I != E; ++I)
+ if (CS.isByValArgument(I)) {
+ PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
+ if (PTy->getAddressSpace() != AllocaAS)
+ return llvm::InlineCost::getNever();
+ }
+
// Calls to functions with always-inline attributes should be inlined
// whenever possible.
if (CS.hasFnAttr(Attribute::AlwaysInline)) {
@@ -1966,6 +1994,11 @@ InlineCost llvm::getInlineCost(
if (Caller->hasFnAttribute(Attribute::OptimizeNone))
return llvm::InlineCost::getNever();
+ // Don't inline a function that treats null pointer as valid into a caller
+ // that does not have this attribute.
+ if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
+ return llvm::InlineCost::getNever();
+
// Don't inline functions which can be interposed at link-time. Don't inline
// functions marked noinline or call sites marked noinline.
// Note: inlining non-exact non-interposable functions is fine, since we know
@@ -1974,14 +2007,14 @@ InlineCost llvm::getInlineCost(
CS.isNoInline())
return llvm::InlineCost::getNever();
- DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
- << "... (caller:" << Caller->getName() << ")\n");
+ LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
+ << "... (caller:" << Caller->getName() << ")\n");
CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS,
Params);
bool ShouldInline = CA.analyzeCall(CS);
- DEBUG(CA.dump());
+ LLVM_DEBUG(CA.dump());
// Check if there was a reason to force inlining or no inlining.
if (!ShouldInline && CA.getCost() < CA.getThreshold())
@@ -2015,12 +2048,21 @@ bool llvm::isInlineViable(Function &F) {
cast<CallInst>(CS.getInstruction())->canReturnTwice())
return false;
- // Disallow inlining functions that call @llvm.localescape. Doing this
- // correctly would require major changes to the inliner.
- if (CS.getCalledFunction() &&
- CS.getCalledFunction()->getIntrinsicID() ==
- llvm::Intrinsic::localescape)
- return false;
+ if (CS.getCalledFunction())
+ switch (CS.getCalledFunction()->getIntrinsicID()) {
+ default:
+ break;
+ // Disallow inlining of @llvm.icall.branch.funnel because current
+ // backend can't separate call targets from call arguments.
+ case llvm::Intrinsic::icall_branch_funnel:
+ // Disallow inlining functions that call @llvm.localescape. Doing this
+ // correctly would require major changes to the inliner.
+ case llvm::Intrinsic::localescape:
+ // Disallow inlining of functions that access VarArgs.
+ case llvm::Intrinsic::vastart:
+ case llvm::Intrinsic::vaend:
+ return false;
+ }
}
}
diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
index c814ff122e44..519d6d67be51 100644
--- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -62,6 +62,8 @@ static Value *SimplifyOrInst(Value *, Value *, const SimplifyQuery &, unsigned);
static Value *SimplifyXorInst(Value *, Value *, const SimplifyQuery &, unsigned);
static Value *SimplifyCastInst(unsigned, Value *, Type *,
const SimplifyQuery &, unsigned);
+static Value *SimplifyGEPInst(Type *, ArrayRef<Value *>, const SimplifyQuery &,
+ unsigned);
/// For a boolean type or a vector of boolean type, return false or a vector
/// with every element false.
@@ -90,7 +92,7 @@ static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS,
}
/// Does the given value dominate the specified phi node?
-static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
+static bool valueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I)
// Arguments and constants dominate all instructions.
@@ -99,7 +101,7 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
// If we are processing instructions (and/or basic blocks) that have not been
// fully added to a function, the parent nodes may still be null. Simply
// return the conservative answer in these cases.
- if (!I->getParent() || !P->getParent() || !I->getParent()->getParent())
+ if (!I->getParent() || !P->getParent() || !I->getFunction())
return false;
// If we have a DominatorTree then do a precise test.
@@ -108,7 +110,7 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
// Otherwise, if the instruction is in the entry block and is not an invoke,
// then it obviously dominates all phi nodes.
- if (I->getParent() == &I->getParent()->getParent()->getEntryBlock() &&
+ if (I->getParent() == &I->getFunction()->getEntryBlock() &&
!isa<InvokeInst>(I))
return true;
@@ -443,13 +445,13 @@ static Value *ThreadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS,
if (isa<PHINode>(LHS)) {
PI = cast<PHINode>(LHS);
// Bail out if RHS and the phi may be mutually interdependent due to a loop.
- if (!ValueDominatesPHI(RHS, PI, Q.DT))
+ if (!valueDominatesPHI(RHS, PI, Q.DT))
return nullptr;
} else {
assert(isa<PHINode>(RHS) && "No PHI instruction operand!");
PI = cast<PHINode>(RHS);
// Bail out if LHS and the phi may be mutually interdependent due to a loop.
- if (!ValueDominatesPHI(LHS, PI, Q.DT))
+ if (!valueDominatesPHI(LHS, PI, Q.DT))
return nullptr;
}
@@ -490,7 +492,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
PHINode *PI = cast<PHINode>(LHS);
// Bail out if RHS and the phi may be mutually interdependent due to a loop.
- if (!ValueDominatesPHI(RHS, PI, Q.DT))
+ if (!valueDominatesPHI(RHS, PI, Q.DT))
return nullptr;
// Evaluate the BinOp on the incoming phi values.
@@ -525,7 +527,7 @@ static Constant *foldOrCommuteConstant(Instruction::BinaryOps Opcode,
/// Given operands for an Add, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW,
const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Instruction::Add, Op0, Op1, Q))
return C;
@@ -538,6 +540,10 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
if (match(Op1, m_Zero()))
return Op0;
+ // If two operands are negative, return 0.
+ if (isKnownNegation(Op0, Op1))
+ return Constant::getNullValue(Op0->getType());
+
// X + (Y - X) -> Y
// (Y - X) + X -> Y
// Eg: X + -X -> 0
@@ -555,10 +561,14 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
// add nsw/nuw (xor Y, signmask), signmask --> Y
// The no-wrapping add guarantees that the top bit will be set by the add.
// Therefore, the xor must be clearing the already set sign bit of Y.
- if ((isNSW || isNUW) && match(Op1, m_SignMask()) &&
+ if ((IsNSW || IsNUW) && match(Op1, m_SignMask()) &&
match(Op0, m_Xor(m_Value(Y), m_SignMask())))
return Y;
+ // add nuw %x, -1 -> -1, because %x can only be 0.
+ if (IsNUW && match(Op1, m_AllOnes()))
+ return Op1; // Which is -1.
+
/// i1 add -> xor.
if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1))
if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
@@ -581,12 +591,12 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
return nullptr;
}
-Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW,
const SimplifyQuery &Query) {
- return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query, RecursionLimit);
+ return ::SimplifyAddInst(Op0, Op1, IsNSW, IsNUW, Query, RecursionLimit);
}
-/// \brief Compute the base pointer and cumulative constant offsets for V.
+/// Compute the base pointer and cumulative constant offsets for V.
///
/// This strips all constant offsets off of V, leaving it the base pointer, and
/// accumulates the total constant offset applied in the returned constant. It
@@ -637,7 +647,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V,
return OffsetIntPtr;
}
-/// \brief Compute the constant difference between two pointer values.
+/// Compute the constant difference between two pointer values.
/// If the difference is not a constant, returns zero.
static Constant *computePointerDifference(const DataLayout &DL, Value *LHS,
Value *RHS) {
@@ -680,14 +690,14 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
if (match(Op0, m_Zero())) {
// 0 - X -> 0 if the sub is NUW.
if (isNUW)
- return Op0;
+ return Constant::getNullValue(Op0->getType());
KnownBits Known = computeKnownBits(Op1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
if (Known.Zero.isMaxSignedValue()) {
// Op1 is either 0 or the minimum signed value. If the sub is NSW, then
// Op1 must be 0 because negating the minimum signed value is undefined.
if (isNSW)
- return Op0;
+ return Constant::getNullValue(Op0->getType());
// 0 - X -> X if X is 0 or the minimum signed value.
return Op1;
@@ -799,12 +809,9 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return C;
// X * undef -> 0
- if (match(Op1, m_Undef()))
- return Constant::getNullValue(Op0->getType());
-
// X * 0 -> 0
- if (match(Op1, m_Zero()))
- return Op1;
+ if (match(Op1, m_CombineOr(m_Undef(), m_Zero())))
+ return Constant::getNullValue(Op0->getType());
// X * 1 -> X
if (match(Op1, m_One()))
@@ -868,13 +875,14 @@ static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv) {
if (match(Op1, m_Zero()))
return UndefValue::get(Ty);
- // If any element of a constant divisor vector is zero, the whole op is undef.
+ // If any element of a constant divisor vector is zero or undef, the whole op
+ // is undef.
auto *Op1C = dyn_cast<Constant>(Op1);
if (Op1C && Ty->isVectorTy()) {
unsigned NumElts = Ty->getVectorNumElements();
for (unsigned i = 0; i != NumElts; ++i) {
Constant *Elt = Op1C->getAggregateElement(i);
- if (Elt && Elt->isNullValue())
+ if (Elt && (Elt->isNullValue() || isa<UndefValue>(Elt)))
return UndefValue::get(Ty);
}
}
@@ -887,7 +895,7 @@ static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv) {
// 0 / X -> 0
// 0 % X -> 0
if (match(Op0, m_Zero()))
- return Op0;
+ return Constant::getNullValue(Op0->getType());
// X / X -> 1
// X % X -> 0
@@ -898,7 +906,10 @@ static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv) {
// X % 1 -> 0
// If this is a boolean op (single-bit element type), we can't have
// division-by-zero or remainder-by-zero, so assume the divisor is 1.
- if (match(Op1, m_One()) || Ty->isIntOrIntVectorTy(1))
+ // Similarly, if we're zero-extending a boolean divisor, then assume it's a 1.
+ Value *X;
+ if (match(Op1, m_One()) || Ty->isIntOrIntVectorTy(1) ||
+ (match(Op1, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)))
return IsDiv ? Op0 : Constant::getNullValue(Ty);
return nullptr;
@@ -978,18 +989,17 @@ static Value *simplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
bool IsSigned = Opcode == Instruction::SDiv;
// (X * Y) / Y -> X if the multiplication does not overflow.
- Value *X = nullptr, *Y = nullptr;
- if (match(Op0, m_Mul(m_Value(X), m_Value(Y))) && (X == Op1 || Y == Op1)) {
- if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1
- OverflowingBinaryOperator *Mul = cast<OverflowingBinaryOperator>(Op0);
- // If the Mul knows it does not overflow, then we are good to go.
+ Value *X;
+ if (match(Op0, m_c_Mul(m_Value(X), m_Specific(Op1)))) {
+ auto *Mul = cast<OverflowingBinaryOperator>(Op0);
+ // If the Mul does not overflow, then we are good to go.
if ((IsSigned && Mul->hasNoSignedWrap()) ||
(!IsSigned && Mul->hasNoUnsignedWrap()))
return X;
- // If X has the form X = A / Y then X * Y cannot overflow.
- if (BinaryOperator *Div = dyn_cast<BinaryOperator>(X))
- if (Div->getOpcode() == Opcode && Div->getOperand(1) == Y)
- return X;
+ // If X has the form X = A / Y, then X * Y cannot overflow.
+ if ((IsSigned && match(X, m_SDiv(m_Value(), m_Specific(Op1)))) ||
+ (!IsSigned && match(X, m_UDiv(m_Value(), m_Specific(Op1)))))
+ return X;
}
// (X rem Y) / Y -> 0
@@ -1041,6 +1051,13 @@ static Value *simplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
match(Op0, m_URem(m_Value(), m_Specific(Op1)))))
return Op0;
+ // (X << Y) % X -> 0
+ if ((Opcode == Instruction::SRem &&
+ match(Op0, m_NSWShl(m_Specific(Op1), m_Value()))) ||
+ (Opcode == Instruction::URem &&
+ match(Op0, m_NUWShl(m_Specific(Op1), m_Value()))))
+ return Constant::getNullValue(Op0->getType());
+
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
@@ -1064,6 +1081,10 @@ static Value *simplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
/// If not, this returns null.
static Value *SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
+ // If two operands are negated and no signed overflow, return -1.
+ if (isKnownNegation(Op0, Op1, /*NeedNSW=*/true))
+ return Constant::getAllOnesValue(Op0->getType());
+
return simplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse);
}
@@ -1086,6 +1107,16 @@ Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
/// If not, this returns null.
static Value *SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
+ // If the divisor is 0, the result is undefined, so assume the divisor is -1.
+ // srem Op0, (sext i1 X) --> srem Op0, -1 --> 0
+ Value *X;
+ if (match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))
+ return ConstantInt::getNullValue(Op0->getType());
+
+ // If the two operands are negated, return 0.
+ if (isKnownNegation(Op0, Op1))
+ return ConstantInt::getNullValue(Op0->getType());
+
return simplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse);
}
@@ -1140,10 +1171,14 @@ static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0,
// 0 shift by X -> 0
if (match(Op0, m_Zero()))
- return Op0;
+ return Constant::getNullValue(Op0->getType());
// X shift by 0 -> X
- if (match(Op1, m_Zero()))
+ // Shift-by-sign-extended bool must be shift-by-0 because shift-by-all-ones
+ // would be poison.
+ Value *X;
+ if (match(Op1, m_Zero()) ||
+ (match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)))
return Op0;
// Fold undefined shifts.
@@ -1177,7 +1212,7 @@ static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0,
return nullptr;
}
-/// \brief Given operands for an Shl, LShr or AShr, see if we can
+/// Given operands for an Shl, LShr or AShr, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0,
Value *Op1, bool isExact, const SimplifyQuery &Q,
@@ -1220,6 +1255,13 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
Value *X;
if (match(Op0, m_Exact(m_Shr(m_Value(X), m_Specific(Op1)))))
return X;
+
+ // shl nuw i8 C, %x -> C iff C has sign bit set.
+ if (isNUW && match(Op0, m_Negative()))
+ return Op0;
+ // NOTE: could use computeKnownBits() / LazyValueInfo,
+ // but the cost-benefit analysis suggests it isn't worth it.
+
return nullptr;
}
@@ -1257,9 +1299,10 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
MaxRecurse))
return V;
- // all ones >>a X -> all ones
+ // all ones >>a X -> -1
+ // Do not return Op0 because it may contain undef elements if it's a vector.
if (match(Op0, m_AllOnes()))
- return Op0;
+ return Constant::getAllOnesValue(Op0->getType());
// (X << A) >> A -> X
Value *X;
@@ -1295,7 +1338,7 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
ICmpInst::isUnsigned(UnsignedPred))
;
else if (match(UnsignedICmp,
- m_ICmp(UnsignedPred, m_Value(Y), m_Specific(X))) &&
+ m_ICmp(UnsignedPred, m_Specific(Y), m_Value(X))) &&
ICmpInst::isUnsigned(UnsignedPred))
UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred);
else
@@ -1413,6 +1456,43 @@ static Value *simplifyAndOrOfICmpsWithConstants(ICmpInst *Cmp0, ICmpInst *Cmp1,
return nullptr;
}
+static Value *simplifyAndOrOfICmpsWithZero(ICmpInst *Cmp0, ICmpInst *Cmp1,
+ bool IsAnd) {
+ ICmpInst::Predicate P0 = Cmp0->getPredicate(), P1 = Cmp1->getPredicate();
+ if (!match(Cmp0->getOperand(1), m_Zero()) ||
+ !match(Cmp1->getOperand(1), m_Zero()) || P0 != P1)
+ return nullptr;
+
+ if ((IsAnd && P0 != ICmpInst::ICMP_NE) || (!IsAnd && P1 != ICmpInst::ICMP_EQ))
+ return nullptr;
+
+ // We have either "(X == 0 || Y == 0)" or "(X != 0 && Y != 0)".
+ Value *X = Cmp0->getOperand(0);
+ Value *Y = Cmp1->getOperand(0);
+
+ // If one of the compares is a masked version of a (not) null check, then
+ // that compare implies the other, so we eliminate the other. Optionally, look
+ // through a pointer-to-int cast to match a null check of a pointer type.
+
+ // (X == 0) || (([ptrtoint] X & ?) == 0) --> ([ptrtoint] X & ?) == 0
+ // (X == 0) || ((? & [ptrtoint] X) == 0) --> (? & [ptrtoint] X) == 0
+ // (X != 0) && (([ptrtoint] X & ?) != 0) --> ([ptrtoint] X & ?) != 0
+ // (X != 0) && ((? & [ptrtoint] X) != 0) --> (? & [ptrtoint] X) != 0
+ if (match(Y, m_c_And(m_Specific(X), m_Value())) ||
+ match(Y, m_c_And(m_PtrToInt(m_Specific(X)), m_Value())))
+ return Cmp1;
+
+ // (([ptrtoint] Y & ?) == 0) || (Y == 0) --> ([ptrtoint] Y & ?) == 0
+ // ((? & [ptrtoint] Y) == 0) || (Y == 0) --> (? & [ptrtoint] Y) == 0
+ // (([ptrtoint] Y & ?) != 0) && (Y != 0) --> ([ptrtoint] Y & ?) != 0
+ // ((? & [ptrtoint] Y) != 0) && (Y != 0) --> (? & [ptrtoint] Y) != 0
+ if (match(X, m_c_And(m_Specific(Y), m_Value())) ||
+ match(X, m_c_And(m_PtrToInt(m_Specific(Y)), m_Value())))
+ return Cmp0;
+
+ return nullptr;
+}
+
static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) {
// (icmp (add V, C0), C1) & (icmp V, C0)
ICmpInst::Predicate Pred0, Pred1;
@@ -1473,6 +1553,9 @@ static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, true))
return X;
+ if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, true))
+ return X;
+
if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1))
return X;
if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0))
@@ -1541,6 +1624,9 @@ static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, false))
return X;
+ if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, false))
+ return X;
+
if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1))
return X;
if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0))
@@ -1638,7 +1724,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
// X & 0 = 0
if (match(Op1, m_Zero()))
- return Op1;
+ return Constant::getNullValue(Op0->getType());
// X & -1 = X
if (match(Op1, m_AllOnes()))
@@ -1733,21 +1819,16 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return C;
// X | undef -> -1
- if (match(Op1, m_Undef()))
+ // X | -1 = -1
+ // Do not return Op1 because it may contain undef elements if it's a vector.
+ if (match(Op1, m_Undef()) || match(Op1, m_AllOnes()))
return Constant::getAllOnesValue(Op0->getType());
// X | X = X
- if (Op0 == Op1)
- return Op0;
-
// X | 0 = X
- if (match(Op1, m_Zero()))
+ if (Op0 == Op1 || match(Op1, m_Zero()))
return Op0;
- // X | -1 = -1
- if (match(Op1, m_AllOnes()))
- return Op1;
-
// A | ~A = ~A | A = -1
if (match(Op0, m_Not(m_Specific(Op1))) ||
match(Op1, m_Not(m_Specific(Op0))))
@@ -2051,9 +2132,12 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
ConstantInt *LHSOffsetCI = dyn_cast<ConstantInt>(LHSOffset);
ConstantInt *RHSOffsetCI = dyn_cast<ConstantInt>(RHSOffset);
uint64_t LHSSize, RHSSize;
+ ObjectSizeOpts Opts;
+ Opts.NullIsUnknownSize =
+ NullPointerIsDefined(cast<AllocaInst>(LHS)->getFunction());
if (LHSOffsetCI && RHSOffsetCI &&
- getObjectSize(LHS, LHSSize, DL, TLI) &&
- getObjectSize(RHS, RHSSize, DL, TLI)) {
+ getObjectSize(LHS, LHSSize, DL, TLI, Opts) &&
+ getObjectSize(RHS, RHSSize, DL, TLI, Opts)) {
const APInt &LHSOffsetValue = LHSOffsetCI->getValue();
const APInt &RHSOffsetValue = RHSOffsetCI->getValue();
if (!LHSOffsetValue.isNegative() &&
@@ -2442,6 +2526,20 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) {
static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
Value *RHS) {
+ Type *ITy = GetCompareTy(RHS); // The return type.
+
+ Value *X;
+ // Sign-bit checks can be optimized to true/false after unsigned
+ // floating-point casts:
+ // icmp slt (bitcast (uitofp X)), 0 --> false
+ // icmp sgt (bitcast (uitofp X)), -1 --> true
+ if (match(LHS, m_BitCast(m_UIToFP(m_Value(X))))) {
+ if (Pred == ICmpInst::ICMP_SLT && match(RHS, m_Zero()))
+ return ConstantInt::getFalse(ITy);
+ if (Pred == ICmpInst::ICMP_SGT && match(RHS, m_AllOnes()))
+ return ConstantInt::getTrue(ITy);
+ }
+
const APInt *C;
if (!match(RHS, m_APInt(C)))
return nullptr;
@@ -2449,9 +2547,9 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
// Rule out tautological comparisons (eg., ult 0 or uge 0).
ConstantRange RHS_CR = ConstantRange::makeExactICmpRegion(Pred, *C);
if (RHS_CR.isEmptySet())
- return ConstantInt::getFalse(GetCompareTy(RHS));
+ return ConstantInt::getFalse(ITy);
if (RHS_CR.isFullSet())
- return ConstantInt::getTrue(GetCompareTy(RHS));
+ return ConstantInt::getTrue(ITy);
// Find the range of possible values for binary operators.
unsigned Width = C->getBitWidth();
@@ -2469,9 +2567,9 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
if (!LHS_CR.isFullSet()) {
if (RHS_CR.contains(LHS_CR))
- return ConstantInt::getTrue(GetCompareTy(RHS));
+ return ConstantInt::getTrue(ITy);
if (RHS_CR.inverse().contains(LHS_CR))
- return ConstantInt::getFalse(GetCompareTy(RHS));
+ return ConstantInt::getFalse(ITy);
}
return nullptr;
@@ -3008,8 +3106,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
Type *ITy = GetCompareTy(LHS); // The return type.
// icmp X, X -> true/false
- // X icmp undef -> true/false. For example, icmp ugt %X, undef -> false
- // because X could be 0.
+ // icmp X, undef -> true/false because undef could be X.
if (LHS == RHS || isa<UndefValue>(RHS))
return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
@@ -3309,6 +3406,12 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getTrue(RetTy);
}
+ // NaN is unordered; NaN is not ordered.
+ assert((FCmpInst::isOrdered(Pred) || FCmpInst::isUnordered(Pred)) &&
+ "Comparison must be either ordered or unordered");
+ if (match(RHS, m_NaN()))
+ return ConstantInt::get(RetTy, CmpInst::isUnordered(Pred));
+
// fcmp pred x, undef and fcmp pred undef, x
// fold to true if unordered, false if ordered
if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) {
@@ -3328,15 +3431,6 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// Handle fcmp with constant RHS.
const APFloat *C;
if (match(RHS, m_APFloat(C))) {
- // If the constant is a nan, see if we can fold the comparison based on it.
- if (C->isNaN()) {
- if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo"
- return getFalse(RetTy);
- assert(FCmpInst::isUnordered(Pred) &&
- "Comparison must be either ordered or unordered!");
- // True if unordered.
- return getTrue(RetTy);
- }
// Check whether the constant is an infinity.
if (C->isInfinity()) {
if (C->isNegative()) {
@@ -3475,6 +3569,17 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
}
}
+ // Same for GEPs.
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ if (MaxRecurse) {
+ SmallVector<Value *, 8> NewOps(GEP->getNumOperands());
+ transform(GEP->operands(), NewOps.begin(),
+ [&](Value *V) { return V == Op ? RepOp : V; });
+ return SimplifyGEPInst(GEP->getSourceElementType(), NewOps, Q,
+ MaxRecurse - 1);
+ }
+ }
+
// TODO: We could hand off more cases to instsimplify here.
// If all operands are constant after substituting Op for RepOp then we can
@@ -3581,24 +3686,6 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
TrueVal, FalseVal))
return V;
- if (CondVal->hasOneUse()) {
- const APInt *C;
- if (match(CmpRHS, m_APInt(C))) {
- // X < MIN ? T : F --> F
- if (Pred == ICmpInst::ICMP_SLT && C->isMinSignedValue())
- return FalseVal;
- // X < MIN ? T : F --> F
- if (Pred == ICmpInst::ICMP_ULT && C->isMinValue())
- return FalseVal;
- // X > MAX ? T : F --> F
- if (Pred == ICmpInst::ICMP_SGT && C->isMaxSignedValue())
- return FalseVal;
- // X > MAX ? T : F --> F
- if (Pred == ICmpInst::ICMP_UGT && C->isMaxValue())
- return FalseVal;
- }
- }
-
// If we have an equality comparison, then we know the value in one of the
// arms of the select. See if substituting this value into the arm and
// simplifying the result yields the same value as the other arm.
@@ -3631,37 +3718,38 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
/// Given operands for a SelectInst, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
- Value *FalseVal, const SimplifyQuery &Q,
- unsigned MaxRecurse) {
- // select true, X, Y -> X
- // select false, X, Y -> Y
- if (Constant *CB = dyn_cast<Constant>(CondVal)) {
- if (Constant *CT = dyn_cast<Constant>(TrueVal))
- if (Constant *CF = dyn_cast<Constant>(FalseVal))
- return ConstantFoldSelectInstruction(CB, CT, CF);
- if (CB->isAllOnesValue())
+static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
+ if (auto *CondC = dyn_cast<Constant>(Cond)) {
+ if (auto *TrueC = dyn_cast<Constant>(TrueVal))
+ if (auto *FalseC = dyn_cast<Constant>(FalseVal))
+ return ConstantFoldSelectInstruction(CondC, TrueC, FalseC);
+
+ // select undef, X, Y -> X or Y
+ if (isa<UndefValue>(CondC))
+ return isa<Constant>(FalseVal) ? FalseVal : TrueVal;
+
+ // TODO: Vector constants with undef elements don't simplify.
+
+ // select true, X, Y -> X
+ if (CondC->isAllOnesValue())
return TrueVal;
- if (CB->isNullValue())
+ // select false, X, Y -> Y
+ if (CondC->isNullValue())
return FalseVal;
}
- // select C, X, X -> X
+ // select ?, X, X -> X
if (TrueVal == FalseVal)
return TrueVal;
- if (isa<UndefValue>(CondVal)) { // select undef, X, Y -> X or Y
- if (isa<Constant>(FalseVal))
- return FalseVal;
- return TrueVal;
- }
- if (isa<UndefValue>(TrueVal)) // select C, undef, X -> X
+ if (isa<UndefValue>(TrueVal)) // select ?, undef, X -> X
return FalseVal;
- if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X
+ if (isa<UndefValue>(FalseVal)) // select ?, X, undef -> X
return TrueVal;
if (Value *V =
- simplifySelectWithICmpCond(CondVal, TrueVal, FalseVal, Q, MaxRecurse))
+ simplifySelectWithICmpCond(Cond, TrueVal, FalseVal, Q, MaxRecurse))
return V;
return nullptr;
@@ -3712,7 +3800,7 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
// The following transforms are only safe if the ptrtoint cast
// doesn't truncate the pointers.
if (Ops[1]->getType()->getScalarSizeInBits() ==
- Q.DL.getPointerSizeInBits(AS)) {
+ Q.DL.getIndexSizeInBits(AS)) {
auto PtrToIntOrZero = [GEPTy](Value *P) -> Value * {
if (match(P, m_Zero()))
return Constant::getNullValue(GEPTy);
@@ -3752,10 +3840,10 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
if (Q.DL.getTypeAllocSize(LastType) == 1 &&
all_of(Ops.slice(1).drop_back(1),
[](Value *Idx) { return match(Idx, m_Zero()); })) {
- unsigned PtrWidth =
- Q.DL.getPointerSizeInBits(Ops[0]->getType()->getPointerAddressSpace());
- if (Q.DL.getTypeSizeInBits(Ops.back()->getType()) == PtrWidth) {
- APInt BasePtrOffset(PtrWidth, 0);
+ unsigned IdxWidth =
+ Q.DL.getIndexSizeInBits(Ops[0]->getType()->getPointerAddressSpace());
+ if (Q.DL.getTypeSizeInBits(Ops.back()->getType()) == IdxWidth) {
+ APInt BasePtrOffset(IdxWidth, 0);
Value *StrippedBasePtr =
Ops[0]->stripAndAccumulateInBoundsConstantOffsets(Q.DL,
BasePtrOffset);
@@ -3946,7 +4034,7 @@ static Value *SimplifyPHINode(PHINode *PN, const SimplifyQuery &Q) {
// instruction, we cannot return X as the result of the PHI node unless it
// dominates the PHI block.
if (HasUndefInput)
- return ValueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : nullptr;
+ return valueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : nullptr;
return CommonValue;
}
@@ -4123,6 +4211,28 @@ Value *llvm::SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit);
}
+static Constant *propagateNaN(Constant *In) {
+ // If the input is a vector with undef elements, just return a default NaN.
+ if (!In->isNaN())
+ return ConstantFP::getNaN(In->getType());
+
+ // Propagate the existing NaN constant when possible.
+ // TODO: Should we quiet a signaling NaN?
+ return In;
+}
+
+static Constant *simplifyFPBinop(Value *Op0, Value *Op1) {
+ if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1))
+ return ConstantFP::getNaN(Op0->getType());
+
+ if (match(Op0, m_NaN()))
+ return propagateNaN(cast<Constant>(Op0));
+ if (match(Op1, m_NaN()))
+ return propagateNaN(cast<Constant>(Op1));
+
+ return nullptr;
+}
+
/// Given operands for an FAdd, see if we can fold the result. If not, this
/// returns null.
static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
@@ -4130,29 +4240,28 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q))
return C;
+ if (Constant *C = simplifyFPBinop(Op0, Op1))
+ return C;
+
// fadd X, -0 ==> X
- if (match(Op1, m_NegZero()))
+ if (match(Op1, m_NegZeroFP()))
return Op0;
// fadd X, 0 ==> X, when we know X is not -0
- if (match(Op1, m_Zero()) &&
+ if (match(Op1, m_PosZeroFP()) &&
(FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI)))
return Op0;
- // fadd [nnan ninf] X, (fsub [nnan ninf] 0, X) ==> 0
- // where nnan and ninf have to occur at least once somewhere in this
- // expression
- Value *SubOp = nullptr;
- if (match(Op1, m_FSub(m_AnyZero(), m_Specific(Op0))))
- SubOp = Op1;
- else if (match(Op0, m_FSub(m_AnyZero(), m_Specific(Op1))))
- SubOp = Op0;
- if (SubOp) {
- Instruction *FSub = cast<Instruction>(SubOp);
- if ((FMF.noNaNs() || FSub->hasNoNaNs()) &&
- (FMF.noInfs() || FSub->hasNoInfs()))
- return Constant::getNullValue(Op0->getType());
- }
+ // With nnan: (+/-0.0 - X) + X --> 0.0 (and commuted variant)
+ // We don't have to explicitly exclude infinities (ninf): INF + -INF == NaN.
+ // Negative zeros are allowed because we always end up with positive zero:
+ // X = -0.0: (-0.0 - (-0.0)) + (-0.0) == ( 0.0) + (-0.0) == 0.0
+ // X = -0.0: ( 0.0 - (-0.0)) + (-0.0) == ( 0.0) + (-0.0) == 0.0
+ // X = 0.0: (-0.0 - ( 0.0)) + ( 0.0) == (-0.0) + ( 0.0) == 0.0
+ // X = 0.0: ( 0.0 - ( 0.0)) + ( 0.0) == ( 0.0) + ( 0.0) == 0.0
+ if (FMF.noNaNs() && (match(Op0, m_FSub(m_AnyZeroFP(), m_Specific(Op1))) ||
+ match(Op1, m_FSub(m_AnyZeroFP(), m_Specific(Op0)))))
+ return ConstantFP::getNullValue(Op0->getType());
return nullptr;
}
@@ -4164,23 +4273,27 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q))
return C;
- // fsub X, 0 ==> X
- if (match(Op1, m_Zero()))
+ if (Constant *C = simplifyFPBinop(Op0, Op1))
+ return C;
+
+ // fsub X, +0 ==> X
+ if (match(Op1, m_PosZeroFP()))
return Op0;
// fsub X, -0 ==> X, when we know X is not -0
- if (match(Op1, m_NegZero()) &&
+ if (match(Op1, m_NegZeroFP()) &&
(FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI)))
return Op0;
// fsub -0.0, (fsub -0.0, X) ==> X
Value *X;
- if (match(Op0, m_NegZero()) && match(Op1, m_FSub(m_NegZero(), m_Value(X))))
+ if (match(Op0, m_NegZeroFP()) &&
+ match(Op1, m_FSub(m_NegZeroFP(), m_Value(X))))
return X;
// fsub 0.0, (fsub 0.0, X) ==> X if signed zeros are ignored.
- if (FMF.noSignedZeros() && match(Op0, m_AnyZero()) &&
- match(Op1, m_FSub(m_AnyZero(), m_Value(X))))
+ if (FMF.noSignedZeros() && match(Op0, m_AnyZeroFP()) &&
+ match(Op1, m_FSub(m_AnyZeroFP(), m_Value(X))))
return X;
// fsub nnan x, x ==> 0.0
@@ -4196,13 +4309,25 @@ static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q))
return C;
+ if (Constant *C = simplifyFPBinop(Op0, Op1))
+ return C;
+
// fmul X, 1.0 ==> X
if (match(Op1, m_FPOne()))
return Op0;
// fmul nnan nsz X, 0 ==> 0
- if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero()))
- return Op1;
+ if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZeroFP()))
+ return ConstantFP::getNullValue(Op0->getType());
+
+ // sqrt(X) * sqrt(X) --> X, if we can:
+ // 1. Remove the intermediate rounding (reassociate).
+ // 2. Ignore non-zero negative numbers because sqrt would produce NAN.
+ // 3. Ignore -0.0 because sqrt(-0.0) == -0.0, but -0.0 * -0.0 == 0.0.
+ Value *X;
+ if (Op0 == Op1 && match(Op0, m_Intrinsic<Intrinsic::sqrt>(m_Value(X))) &&
+ FMF.allowReassoc() && FMF.noNaNs() && FMF.noSignedZeros())
+ return X;
return nullptr;
}
@@ -4228,13 +4353,8 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q))
return C;
- // undef / X -> undef (the undef could be a snan).
- if (match(Op0, m_Undef()))
- return Op0;
-
- // X / undef -> undef
- if (match(Op1, m_Undef()))
- return Op1;
+ if (Constant *C = simplifyFPBinop(Op0, Op1))
+ return C;
// X / 1.0 -> X
if (match(Op1, m_FPOne()))
@@ -4243,14 +4363,20 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
// 0 / X -> 0
// Requires that NaNs are off (X could be zero) and signed zeroes are
// ignored (X could be positive or negative, so the output sign is unknown).
- if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZero()))
- return Op0;
+ if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZeroFP()))
+ return ConstantFP::getNullValue(Op0->getType());
if (FMF.noNaNs()) {
// X / X -> 1.0 is legal when NaNs are ignored.
+ // We can ignore infinities because INF/INF is NaN.
if (Op0 == Op1)
return ConstantFP::get(Op0->getType(), 1.0);
+ // (X * Y) / Y --> X if we can reassociate to the above form.
+ Value *X;
+ if (FMF.allowReassoc() && match(Op0, m_c_FMul(m_Value(X), m_Specific(Op1))))
+ return X;
+
// -X / X -> -1.0 and
// X / -X -> -1.0 are legal when NaNs are ignored.
// We can ignore signed zeros because +-0.0/+-0.0 is NaN and ignored.
@@ -4274,19 +4400,20 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q))
return C;
- // undef % X -> undef (the undef could be a snan).
- if (match(Op0, m_Undef()))
- return Op0;
-
- // X % undef -> undef
- if (match(Op1, m_Undef()))
- return Op1;
+ if (Constant *C = simplifyFPBinop(Op0, Op1))
+ return C;
- // 0 % X -> 0
- // Requires that NaNs are off (X could be zero) and signed zeroes are
- // ignored (X could be positive or negative, so the output sign is unknown).
- if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZero()))
- return Op0;
+ // Unlike fdiv, the result of frem always matches the sign of the dividend.
+ // The constant match may include undef elements in a vector, so return a full
+ // zero constant as the result.
+ if (FMF.noNaNs()) {
+ // +0 % X -> 0
+ if (match(Op0, m_PosZeroFP()))
+ return ConstantFP::getNullValue(Op0->getType());
+ // -0 % X -> -0
+ if (match(Op0, m_NegZeroFP()))
+ return ConstantFP::getNegativeZero(Op0->getType());
+ }
return nullptr;
}
@@ -4515,28 +4642,28 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
}
case Intrinsic::exp: {
// exp(log(x)) -> x
- if (Q.CxtI->isFast() &&
+ if (Q.CxtI->hasAllowReassoc() &&
match(IIOperand, m_Intrinsic<Intrinsic::log>(m_Value(X))))
return X;
return nullptr;
}
case Intrinsic::exp2: {
// exp2(log2(x)) -> x
- if (Q.CxtI->isFast() &&
+ if (Q.CxtI->hasAllowReassoc() &&
match(IIOperand, m_Intrinsic<Intrinsic::log2>(m_Value(X))))
return X;
return nullptr;
}
case Intrinsic::log: {
// log(exp(x)) -> x
- if (Q.CxtI->isFast() &&
+ if (Q.CxtI->hasAllowReassoc() &&
match(IIOperand, m_Intrinsic<Intrinsic::exp>(m_Value(X))))
return X;
return nullptr;
}
case Intrinsic::log2: {
// log2(exp2(x)) -> x
- if (Q.CxtI->isFast() &&
+ if (Q.CxtI->hasAllowReassoc() &&
match(IIOperand, m_Intrinsic<Intrinsic::exp2>(m_Value(X)))) {
return X;
}
@@ -4606,6 +4733,14 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
return LHS;
}
return nullptr;
+ case Intrinsic::maxnum:
+ case Intrinsic::minnum:
+ // If one argument is NaN, return the other argument.
+ if (match(LHS, m_NaN()))
+ return RHS;
+ if (match(RHS, m_NaN()))
+ return LHS;
+ return nullptr;
default:
return nullptr;
}
@@ -4843,7 +4978,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
return Result == I ? UndefValue::get(I->getType()) : Result;
}
-/// \brief Implementation of recursive simplification through an instruction's
+/// Implementation of recursive simplification through an instruction's
/// uses.
///
/// This is the common implementation of the recursive simplification routines.
diff --git a/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp b/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp
index 3992657417c5..e7751d32aab3 100644
--- a/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp
+++ b/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp
@@ -21,15 +21,20 @@ template <class NodeTy, bool IsPostDom>
void IDFCalculator<NodeTy, IsPostDom>::calculate(
SmallVectorImpl<BasicBlock *> &PHIBlocks) {
// Use a priority queue keyed on dominator tree level so that inserted nodes
- // are handled from the bottom of the dominator tree upwards.
- typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair;
+ // are handled from the bottom of the dominator tree upwards. We also augment
+ // the level with a DFS number to ensure that the blocks are ordered in a
+ // deterministic way.
+ typedef std::pair<DomTreeNode *, std::pair<unsigned, unsigned>>
+ DomTreeNodePair;
typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
less_second> IDFPriorityQueue;
IDFPriorityQueue PQ;
+ DT.updateDFSNumbers();
+
for (BasicBlock *BB : *DefBlocks) {
if (DomTreeNode *Node = DT.getNode(BB))
- PQ.push({Node, Node->getLevel()});
+ PQ.push({Node, std::make_pair(Node->getLevel(), Node->getDFSNumIn())});
}
SmallVector<DomTreeNode *, 32> Worklist;
@@ -40,7 +45,7 @@ void IDFCalculator<NodeTy, IsPostDom>::calculate(
DomTreeNodePair RootPair = PQ.top();
PQ.pop();
DomTreeNode *Root = RootPair.first;
- unsigned RootLevel = RootPair.second;
+ unsigned RootLevel = RootPair.second.first;
// Walk all dominator tree children of Root, inspecting their CFG edges with
// targets elsewhere on the dominator tree. Only targets whose level is at
@@ -77,7 +82,8 @@ void IDFCalculator<NodeTy, IsPostDom>::calculate(
PHIBlocks.emplace_back(SuccBB);
if (!DefBlocks->count(SuccBB))
- PQ.push(std::make_pair(SuccNode, SuccLevel));
+ PQ.push(std::make_pair(
+ SuccNode, std::make_pair(SuccLevel, SuccNode->getDFSNumIn())));
}
for (auto DomChild : *Node) {
diff --git a/contrib/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp
index a8178ecc0a24..93c23bca96af 100644
--- a/contrib/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LazyBranchProbabilityInfo.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Dominators.h"
using namespace llvm;
@@ -41,6 +42,10 @@ void LazyBlockFrequencyInfoPass::print(raw_ostream &OS, const Module *) const {
void LazyBlockFrequencyInfoPass::getAnalysisUsage(AnalysisUsage &AU) const {
LazyBranchProbabilityInfoPass::getLazyBPIAnalysisUsage(AU);
+ // We require DT so it's available when LI is available. The LI updating code
+ // asserts that DT is also present so if we don't make sure that we have DT
+ // here, that assert will trigger.
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.setPreservesAll();
}
diff --git a/contrib/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp
index e2884d0a4564..429b78c3a47e 100644
--- a/contrib/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/LazyBranchProbabilityInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Dominators.h"
using namespace llvm;
@@ -42,6 +43,10 @@ void LazyBranchProbabilityInfoPass::print(raw_ostream &OS,
}
void LazyBranchProbabilityInfoPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ // We require DT so it's available when LI is available. The LI updating code
+ // asserts that DT is also present so if we don't make sure that we have DT
+ // here, that assert will trigger.
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.setPreservesAll();
diff --git a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
index 54299d078be5..b1d585bfc683 100644
--- a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
+++ b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
@@ -65,15 +66,15 @@ static void addEdge(SmallVectorImpl<LazyCallGraph::Edge> &Edges,
if (!EdgeIndexMap.insert({&N, Edges.size()}).second)
return;
- DEBUG(dbgs() << " Added callable function: " << N.getName() << "\n");
+ LLVM_DEBUG(dbgs() << " Added callable function: " << N.getName() << "\n");
Edges.emplace_back(LazyCallGraph::Edge(N, EK));
}
LazyCallGraph::EdgeSequence &LazyCallGraph::Node::populateSlow() {
assert(!Edges && "Must not have already populated the edges for this node!");
- DEBUG(dbgs() << " Adding functions called by '" << getName()
- << "' to the graph.\n");
+ LLVM_DEBUG(dbgs() << " Adding functions called by '" << getName()
+ << "' to the graph.\n");
Edges = EdgeSequence();
@@ -151,8 +152,8 @@ static bool isKnownLibFunction(Function &F, TargetLibraryInfo &TLI) {
}
LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) {
- DEBUG(dbgs() << "Building CG for module: " << M.getModuleIdentifier()
- << "\n");
+ LLVM_DEBUG(dbgs() << "Building CG for module: " << M.getModuleIdentifier()
+ << "\n");
for (Function &F : M) {
if (F.isDeclaration())
continue;
@@ -167,8 +168,8 @@ LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) {
// External linkage defined functions have edges to them from other
// modules.
- DEBUG(dbgs() << " Adding '" << F.getName()
- << "' to entry set of the graph.\n");
+ LLVM_DEBUG(dbgs() << " Adding '" << F.getName()
+ << "' to entry set of the graph.\n");
addEdge(EntryEdges.Edges, EntryEdges.EdgeIndexMap, get(F), Edge::Ref);
}
@@ -180,8 +181,9 @@ LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) {
if (Visited.insert(GV.getInitializer()).second)
Worklist.push_back(GV.getInitializer());
- DEBUG(dbgs() << " Adding functions referenced by global initializers to the "
- "entry set.\n");
+ LLVM_DEBUG(
+ dbgs() << " Adding functions referenced by global initializers to the "
+ "entry set.\n");
visitReferences(Worklist, Visited, [&](Function &F) {
addEdge(EntryEdges.Edges, EntryEdges.EdgeIndexMap, get(F),
LazyCallGraph::Edge::Ref);
@@ -427,7 +429,7 @@ bool LazyCallGraph::RefSCC::isAncestorOf(const RefSCC &RC) const {
/// source to target.
///
/// This helper routine, in addition to updating the postorder sequence itself
-/// will also update a map from SCCs to indices within that sequecne.
+/// will also update a map from SCCs to indices within that sequence.
///
/// The sequence and the map must operate on pointers to the SCC type.
///
@@ -713,7 +715,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) {
//
// However, we specially handle the target node. The target node is known to
// reach all other nodes in the original SCC by definition. This means that
- // we want the old SCC to be replaced with an SCC contaning that node as it
+ // we want the old SCC to be replaced with an SCC containing that node as it
// will be the root of whatever SCC DAG results from the DFS. Assumptions
// about an SCC such as the set of functions called will continue to hold,
// etc.
@@ -822,7 +824,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) {
// Cleared the DFS early, start another round.
break;
- // We've finished processing N and its descendents, put it on our pending
+ // We've finished processing N and its descendants, put it on our pending
// SCC stack to eventually get merged into an SCC of nodes.
PendingSCCStack.push_back(N);
@@ -1234,7 +1236,7 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN,
++I;
}
- // We've finished processing N and its descendents, put it on our pending
+ // We've finished processing N and its descendants, put it on our pending
// stack to eventually get merged into a RefSCC.
PendingRefSCCStack.push_back(N);
@@ -1271,8 +1273,7 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN,
// the removal hasn't changed the structure at all. This is an important
// special case and we can directly exit the entire routine more
// efficiently as soon as we discover it.
- if (std::distance(RefSCCNodes.begin(), RefSCCNodes.end()) ==
- NumRefSCCNodes) {
+ if (llvm::size(RefSCCNodes) == NumRefSCCNodes) {
// Clear out the low link field as we won't need it.
for (Node *N : RefSCCNodes)
N->LowLink = -1;
@@ -1294,7 +1295,7 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN,
// Otherwise we create a collection of new RefSCC nodes and build
// a radix-sort style map from postorder number to these new RefSCCs. We then
- // append SCCs to each of these RefSCCs in the order they occured in the
+ // append SCCs to each of these RefSCCs in the order they occurred in the
// original SCCs container.
for (int i = 0; i < PostOrderNumber; ++i)
Result.push_back(G->createRefSCC(*G));
@@ -1617,7 +1618,7 @@ void LazyCallGraph::buildGenericSCCs(RootsT &&Roots, GetBeginT &&GetBegin,
++I;
}
- // We've finished processing N and its descendents, put it on our pending
+ // We've finished processing N and its descendants, put it on our pending
// SCC stack to eventually get merged into an SCC of nodes.
PendingSCCStack.push_back(N);
@@ -1738,7 +1739,7 @@ static void printNode(raw_ostream &OS, LazyCallGraph::Node &N) {
}
static void printSCC(raw_ostream &OS, LazyCallGraph::SCC &C) {
- ptrdiff_t Size = std::distance(C.begin(), C.end());
+ ptrdiff_t Size = size(C);
OS << " SCC with " << Size << " functions:\n";
for (LazyCallGraph::Node &N : C)
@@ -1746,7 +1747,7 @@ static void printSCC(raw_ostream &OS, LazyCallGraph::SCC &C) {
}
static void printRefSCC(raw_ostream &OS, LazyCallGraph::RefSCC &C) {
- ptrdiff_t Size = std::distance(C.begin(), C.end());
+ ptrdiff_t Size = size(C);
OS << " RefSCC with " << Size << " call SCCs:\n";
for (LazyCallGraph::SCC &InnerC : C)
diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
index d7da669f6e79..435b6f205199 100644
--- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -392,8 +392,8 @@ namespace {
if (!BlockValueSet.insert(BV).second)
return false; // It's already in the stack.
- DEBUG(dbgs() << "PUSH: " << *BV.second << " in " << BV.first->getName()
- << "\n");
+ LLVM_DEBUG(dbgs() << "PUSH: " << *BV.second << " in "
+ << BV.first->getName() << "\n");
BlockValueStack.push_back(BV);
return true;
}
@@ -401,6 +401,7 @@ namespace {
AssumptionCache *AC; ///< A pointer to the cache of @llvm.assume calls.
const DataLayout &DL; ///< A mandatory DataLayout
DominatorTree *DT; ///< An optional DT pointer.
+ DominatorTree *DisabledDT; ///< Stores DT if it's disabled.
ValueLatticeElement getBlockValue(Value *Val, BasicBlock *BB);
bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T,
@@ -463,13 +464,30 @@ namespace {
TheCache.eraseBlock(BB);
}
+ /// Disables use of the DominatorTree within LVI.
+ void disableDT() {
+ if (DT) {
+ assert(!DisabledDT && "Both DT and DisabledDT are not nullptr!");
+ std::swap(DT, DisabledDT);
+ }
+ }
+
+ /// Enables use of the DominatorTree within LVI. Does nothing if the class
+ /// instance was initialized without a DT pointer.
+ void enableDT() {
+ if (DisabledDT) {
+ assert(!DT && "Both DT and DisabledDT are not nullptr!");
+ std::swap(DT, DisabledDT);
+ }
+ }
+
/// This is the update interface to inform the cache that an edge from
/// PredBB to OldSucc has been threaded to be from PredBB to NewSucc.
void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc);
LazyValueInfoImpl(AssumptionCache *AC, const DataLayout &DL,
DominatorTree *DT = nullptr)
- : AC(AC), DL(DL), DT(DT) {}
+ : AC(AC), DL(DL), DT(DT), DisabledDT(nullptr) {}
};
} // end anonymous namespace
@@ -490,7 +508,8 @@ void LazyValueInfoImpl::solve() {
// PredicateInfo is used in LVI or CVP, we should be able to make the
// overdefined cache global, and remove this throttle.
if (processedCount > MaxProcessedPerValue) {
- DEBUG(dbgs() << "Giving up on stack because we are getting too deep\n");
+ LLVM_DEBUG(
+ dbgs() << "Giving up on stack because we are getting too deep\n");
// Fill in the original values
while (!StartingStack.empty()) {
std::pair<BasicBlock *, Value *> &e = StartingStack.back();
@@ -511,8 +530,9 @@ void LazyValueInfoImpl::solve() {
assert(TheCache.hasCachedValueInfo(e.second, e.first) &&
"Result should be in cache!");
- DEBUG(dbgs() << "POP " << *e.second << " in " << e.first->getName()
- << " = " << TheCache.getCachedValueInfo(e.second, e.first) << "\n");
+ LLVM_DEBUG(
+ dbgs() << "POP " << *e.second << " in " << e.first->getName() << " = "
+ << TheCache.getCachedValueInfo(e.second, e.first) << "\n");
BlockValueStack.pop_back();
BlockValueSet.erase(e);
@@ -563,8 +583,8 @@ bool LazyValueInfoImpl::solveBlockValue(Value *Val, BasicBlock *BB) {
if (TheCache.hasCachedValueInfo(Val, BB)) {
// If we have a cached value, use that.
- DEBUG(dbgs() << " reuse BB '" << BB->getName()
- << "' val=" << TheCache.getCachedValueInfo(Val, BB) << '\n');
+ LLVM_DEBUG(dbgs() << " reuse BB '" << BB->getName() << "' val="
+ << TheCache.getCachedValueInfo(Val, BB) << '\n');
// Since we're reusing a cached value, we don't need to update the
// OverDefinedCache. The cache will have been properly updated whenever the
@@ -619,8 +639,8 @@ bool LazyValueInfoImpl::solveBlockValueImpl(ValueLatticeElement &Res,
return solveBlockValueBinaryOp(Res, BO, BB);
}
- DEBUG(dbgs() << " compute BB '" << BB->getName()
- << "' - unknown inst def found.\n");
+ LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - unknown inst def found.\n");
Res = getFromRangeMetadata(BBI);
return true;
}
@@ -684,9 +704,11 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(ValueLatticeElement &BBLV,
assert(isa<Argument>(Val) && "Unknown live-in to the entry block");
// Before giving up, see if we can prove the pointer non-null local to
// this particular block.
- if (Val->getType()->isPointerTy() &&
- (isKnownNonZero(Val, DL) || isObjectDereferencedInBlock(Val, BB))) {
- PointerType *PTy = cast<PointerType>(Val->getType());
+ PointerType *PTy = dyn_cast<PointerType>(Val->getType());
+ if (PTy &&
+ (isKnownNonZero(Val, DL) ||
+ (isObjectDereferencedInBlock(Val, BB) &&
+ !NullPointerIsDefined(BB->getParent(), PTy->getAddressSpace())))) {
Result = ValueLatticeElement::getNot(ConstantPointerNull::get(PTy));
} else {
Result = ValueLatticeElement::getOverdefined();
@@ -715,13 +737,13 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(ValueLatticeElement &BBLV,
// If we hit overdefined, exit early. The BlockVals entry is already set
// to overdefined.
if (Result.isOverdefined()) {
- DEBUG(dbgs() << " compute BB '" << BB->getName()
- << "' - overdefined because of pred (non local).\n");
+ LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because of pred (non local).\n");
// Before giving up, see if we can prove the pointer non-null local to
// this particular block.
- if (Val->getType()->isPointerTy() &&
- isObjectDereferencedInBlock(Val, BB)) {
- PointerType *PTy = cast<PointerType>(Val->getType());
+ PointerType *PTy = dyn_cast<PointerType>(Val->getType());
+ if (PTy && isObjectDereferencedInBlock(Val, BB) &&
+ !NullPointerIsDefined(BB->getParent(), PTy->getAddressSpace())) {
Result = ValueLatticeElement::getNot(ConstantPointerNull::get(PTy));
}
@@ -759,8 +781,8 @@ bool LazyValueInfoImpl::solveBlockValuePHINode(ValueLatticeElement &BBLV,
// If we hit overdefined, exit early. The BlockVals entry is already set
// to overdefined.
if (Result.isOverdefined()) {
- DEBUG(dbgs() << " compute BB '" << BB->getName()
- << "' - overdefined because of pred (local).\n");
+ LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because of pred (local).\n");
BBLV = Result;
return true;
@@ -950,8 +972,8 @@ bool LazyValueInfoImpl::solveBlockValueCast(ValueLatticeElement &BBLV,
break;
default:
// Unhandled instructions are overdefined.
- DEBUG(dbgs() << " compute BB '" << BB->getName()
- << "' - overdefined (unknown cast).\n");
+ LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined (unknown cast).\n");
BBLV = ValueLatticeElement::getOverdefined();
return true;
}
@@ -1009,8 +1031,8 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(ValueLatticeElement &BBLV,
break;
default:
// Unhandled instructions are overdefined.
- DEBUG(dbgs() << " compute BB '" << BB->getName()
- << "' - overdefined (unknown binary operator).\n");
+ LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined (unknown binary operator).\n");
BBLV = ValueLatticeElement::getOverdefined();
return true;
};
@@ -1127,9 +1149,17 @@ getValueFromConditionImpl(Value *Val, Value *Cond, bool isTrueDest,
(!isTrueDest && BO->getOpcode() != BinaryOperator::Or))
return ValueLatticeElement::getOverdefined();
- auto RHS = getValueFromCondition(Val, BO->getOperand(0), isTrueDest, Visited);
- auto LHS = getValueFromCondition(Val, BO->getOperand(1), isTrueDest, Visited);
- return intersect(RHS, LHS);
+ // Prevent infinite recursion if Cond references itself as in this example:
+ // Cond: "%tmp4 = and i1 %tmp4, undef"
+ // BL: "%tmp4 = and i1 %tmp4, undef"
+ // BR: "i1 undef"
+ Value *BL = BO->getOperand(0);
+ Value *BR = BO->getOperand(1);
+ if (BL == Cond || BR == Cond)
+ return ValueLatticeElement::getOverdefined();
+
+ return intersect(getValueFromCondition(Val, BL, isTrueDest, Visited),
+ getValueFromCondition(Val, BR, isTrueDest, Visited));
}
static ValueLatticeElement
@@ -1196,7 +1226,7 @@ static ValueLatticeElement constantFoldUser(User *Usr, Value *Op,
return ValueLatticeElement::getOverdefined();
}
-/// \brief Compute the value of Val on the edge BBFrom -> BBTo. Returns false if
+/// Compute the value of Val on the edge BBFrom -> BBTo. Returns false if
/// Val is not constrained on the edge. Result is unspecified if return value
/// is false.
static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
@@ -1321,7 +1351,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
return false;
}
-/// \brief Compute the value of Val on the edge BBFrom -> BBTo or the value at
+/// Compute the value of Val on the edge BBFrom -> BBTo or the value at
/// the basic block if the edge does not constrain Val.
bool LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom,
BasicBlock *BBTo,
@@ -1373,8 +1403,8 @@ bool LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom,
ValueLatticeElement LazyValueInfoImpl::getValueInBlock(Value *V, BasicBlock *BB,
Instruction *CxtI) {
- DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '"
- << BB->getName() << "'\n");
+ LLVM_DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '"
+ << BB->getName() << "'\n");
assert(BlockValueStack.empty() && BlockValueSet.empty());
if (!hasBlockValue(V, BB)) {
@@ -1384,13 +1414,13 @@ ValueLatticeElement LazyValueInfoImpl::getValueInBlock(Value *V, BasicBlock *BB,
ValueLatticeElement Result = getBlockValue(V, BB);
intersectAssumeOrGuardBlockValueConstantRange(V, Result, CxtI);
- DEBUG(dbgs() << " Result = " << Result << "\n");
+ LLVM_DEBUG(dbgs() << " Result = " << Result << "\n");
return Result;
}
ValueLatticeElement LazyValueInfoImpl::getValueAt(Value *V, Instruction *CxtI) {
- DEBUG(dbgs() << "LVI Getting value " << *V << " at '"
- << CxtI->getName() << "'\n");
+ LLVM_DEBUG(dbgs() << "LVI Getting value " << *V << " at '" << CxtI->getName()
+ << "'\n");
if (auto *C = dyn_cast<Constant>(V))
return ValueLatticeElement::get(C);
@@ -1400,15 +1430,16 @@ ValueLatticeElement LazyValueInfoImpl::getValueAt(Value *V, Instruction *CxtI) {
Result = getFromRangeMetadata(I);
intersectAssumeOrGuardBlockValueConstantRange(V, Result, CxtI);
- DEBUG(dbgs() << " Result = " << Result << "\n");
+ LLVM_DEBUG(dbgs() << " Result = " << Result << "\n");
return Result;
}
ValueLatticeElement LazyValueInfoImpl::
getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB,
Instruction *CxtI) {
- DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '"
- << FromBB->getName() << "' to '" << ToBB->getName() << "'\n");
+ LLVM_DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '"
+ << FromBB->getName() << "' to '" << ToBB->getName()
+ << "'\n");
ValueLatticeElement Result;
if (!getEdgeValue(V, FromBB, ToBB, Result, CxtI)) {
@@ -1418,7 +1449,7 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB,
assert(WasFastQuery && "More work to do after problem solved?");
}
- DEBUG(dbgs() << " Result = " << Result << "\n");
+ LLVM_DEBUG(dbgs() << " Result = " << Result << "\n");
return Result;
}
@@ -1791,6 +1822,16 @@ void LazyValueInfo::printLVI(Function &F, DominatorTree &DTree, raw_ostream &OS)
}
}
+void LazyValueInfo::disableDT() {
+ if (PImpl)
+ getImpl(PImpl, AC, DL, DT).disableDT();
+}
+
+void LazyValueInfo::enableDT() {
+ if (PImpl)
+ getImpl(PImpl, AC, DL, DT).enableDT();
+}
+
// Print the LVI for the function arguments at the start of each basic block.
void LazyValueInfoAnnotatedWriter::emitBasicBlockStartAnnot(
const BasicBlock *BB, formatted_raw_ostream &OS) {
@@ -1807,7 +1848,7 @@ void LazyValueInfoAnnotatedWriter::emitBasicBlockStartAnnot(
// This function prints the LVI analysis for the instruction I at the beginning
// of various basic blocks. It relies on calculated values that are stored in
-// the LazyValueInfoCache, and in the absence of cached values, recalculte the
+// the LazyValueInfoCache, and in the absence of cached values, recalculate the
// LazyValueInfo for `I`, and print that info.
void LazyValueInfoAnnotatedWriter::emitInstructionAnnot(
const Instruction *I, formatted_raw_ostream &OS) {
@@ -1830,7 +1871,7 @@ void LazyValueInfoAnnotatedWriter::emitInstructionAnnot(
};
printResult(ParentBB);
- // Print the LVI analysis results for the the immediate successor blocks, that
+ // Print the LVI analysis results for the immediate successor blocks, that
// are dominated by `ParentBB`.
for (auto *BBSucc : successors(ParentBB))
if (DT.dominates(ParentBB, BBSucc))
diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp
index 0e3f498cb14c..db919bd233bf 100644
--- a/contrib/llvm/lib/Analysis/Lint.cpp
+++ b/contrib/llvm/lib/Analysis/Lint.cpp
@@ -165,13 +165,13 @@ namespace {
}
}
- /// \brief A check failed, so printout out the condition and the message.
+ /// A check failed, so printout out the condition and the message.
///
/// This provides a nice place to put a breakpoint if you want to see why
/// something is not correct.
void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; }
- /// \brief A check failed (with values to print).
+ /// A check failed (with values to print).
///
/// This calls the Message-only version so that the above is easier to set
/// a breakpoint on.
@@ -323,9 +323,9 @@ void Lint::visitCallSite(CallSite CS) {
MemCpyInst *MCI = cast<MemCpyInst>(&I);
// TODO: If the size is known, use it.
visitMemoryReference(I, MCI->getDest(), MemoryLocation::UnknownSize,
- MCI->getAlignment(), nullptr, MemRef::Write);
+ MCI->getDestAlignment(), nullptr, MemRef::Write);
visitMemoryReference(I, MCI->getSource(), MemoryLocation::UnknownSize,
- MCI->getAlignment(), nullptr, MemRef::Read);
+ MCI->getSourceAlignment(), nullptr, MemRef::Read);
// Check that the memcpy arguments don't overlap. The AliasAnalysis API
// isn't expressive enough for what we really want to do. Known partial
@@ -345,16 +345,16 @@ void Lint::visitCallSite(CallSite CS) {
MemMoveInst *MMI = cast<MemMoveInst>(&I);
// TODO: If the size is known, use it.
visitMemoryReference(I, MMI->getDest(), MemoryLocation::UnknownSize,
- MMI->getAlignment(), nullptr, MemRef::Write);
+ MMI->getDestAlignment(), nullptr, MemRef::Write);
visitMemoryReference(I, MMI->getSource(), MemoryLocation::UnknownSize,
- MMI->getAlignment(), nullptr, MemRef::Read);
+ MMI->getSourceAlignment(), nullptr, MemRef::Read);
break;
}
case Intrinsic::memset: {
MemSetInst *MSI = cast<MemSetInst>(&I);
// TODO: If the size is known, use it.
visitMemoryReference(I, MSI->getDest(), MemoryLocation::UnknownSize,
- MSI->getAlignment(), nullptr, MemRef::Write);
+ MSI->getDestAlignment(), nullptr, MemRef::Write);
break;
}
diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp
index 834727c9224d..d319d4c249d3 100644
--- a/contrib/llvm/lib/Analysis/Loads.cpp
+++ b/contrib/llvm/lib/Analysis/Loads.cpp
@@ -80,7 +80,7 @@ static bool isDereferenceableAndAlignedPointer(
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
const Value *Base = GEP->getPointerOperand();
- APInt Offset(DL.getPointerTypeSizeInBits(GEP->getType()), 0);
+ APInt Offset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
if (!GEP->accumulateConstantOffset(DL, Offset) || Offset.isNegative() ||
!Offset.urem(APInt(Offset.getBitWidth(), Align)).isMinValue())
return false;
@@ -108,8 +108,8 @@ static bool isDereferenceableAndAlignedPointer(
DL, CtxI, DT, Visited);
if (auto CS = ImmutableCallSite(V))
- if (const Value *RV = CS.getReturnedArgOperand())
- return isDereferenceableAndAlignedPointer(RV, Align, Size, DL, CtxI, DT,
+ if (auto *RP = getArgumentAliasingToReturnedPointer(CS))
+ return isDereferenceableAndAlignedPointer(RP, Align, Size, DL, CtxI, DT,
Visited);
// If we don't know, assume the worst.
@@ -146,7 +146,7 @@ bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
SmallPtrSet<const Value *, 32> Visited;
return ::isDereferenceableAndAlignedPointer(
- V, Align, APInt(DL.getTypeSizeInBits(VTy), DL.getTypeStoreSize(Ty)), DL,
+ V, Align, APInt(DL.getIndexTypeSizeInBits(VTy), DL.getTypeStoreSize(Ty)), DL,
CtxI, DT, Visited);
}
@@ -156,7 +156,7 @@ bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL,
return isDereferenceableAndAlignedPointer(V, 1, DL, CtxI, DT);
}
-/// \brief Test if A and B will obviously have the same value.
+/// Test if A and B will obviously have the same value.
///
/// This includes recognizing that %t0 and %t1 will have the same
/// value in code like this:
@@ -187,7 +187,7 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
return false;
}
-/// \brief Check if executing a load of this pointer value cannot trap.
+/// Check if executing a load of this pointer value cannot trap.
///
/// If DT and ScanFrom are specified this method performs context-sensitive
/// analysis and returns true if it is safe to load immediately before ScanFrom.
diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index e141d6c58b65..c6175bf9bee9 100644
--- a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -92,7 +92,7 @@ static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold(
cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8));
unsigned VectorizerParams::RuntimeMemoryCheckThreshold;
-/// \brief The maximum iterations used to merge memory checks
+/// The maximum iterations used to merge memory checks
static cl::opt<unsigned> MemoryCheckMergeThreshold(
"memory-check-merge-threshold", cl::Hidden,
cl::desc("Maximum number of comparisons done when trying to merge "
@@ -102,7 +102,7 @@ static cl::opt<unsigned> MemoryCheckMergeThreshold(
/// Maximum SIMD width.
const unsigned VectorizerParams::MaxVectorWidth = 64;
-/// \brief We collect dependences up to this threshold.
+/// We collect dependences up to this threshold.
static cl::opt<unsigned>
MaxDependences("max-dependences", cl::Hidden,
cl::desc("Maximum number of dependences collected by "
@@ -124,7 +124,7 @@ static cl::opt<bool> EnableMemAccessVersioning(
"enable-mem-access-versioning", cl::init(true), cl::Hidden,
cl::desc("Enable symbolic stride memory access versioning"));
-/// \brief Enable store-to-load forwarding conflict detection. This option can
+/// Enable store-to-load forwarding conflict detection. This option can
/// be disabled for correctness testing.
static cl::opt<bool> EnableForwardingConflictDetection(
"store-to-load-forwarding-conflict-detection", cl::Hidden,
@@ -165,8 +165,8 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
PSE.addPredicate(*SE->getEqualPredicate(U, CT));
auto *Expr = PSE.getSCEV(Ptr);
- DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *Expr
- << "\n");
+ LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
+ << " by: " << *Expr << "\n");
return Expr;
}
@@ -490,23 +490,23 @@ void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const {
namespace {
-/// \brief Analyses memory accesses in a loop.
+/// Analyses memory accesses in a loop.
///
/// Checks whether run time pointer checks are needed and builds sets for data
/// dependence checking.
class AccessAnalysis {
public:
- /// \brief Read or write access location.
+ /// Read or write access location.
typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
- AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,
- MemoryDepChecker::DepCandidates &DA,
+ AccessAnalysis(const DataLayout &Dl, Loop *TheLoop, AliasAnalysis *AA,
+ LoopInfo *LI, MemoryDepChecker::DepCandidates &DA,
PredicatedScalarEvolution &PSE)
- : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(false),
- PSE(PSE) {}
+ : DL(Dl), TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA),
+ IsRTCheckAnalysisNeeded(false), PSE(PSE) {}
- /// \brief Register a load and whether it is only read from.
+ /// Register a load and whether it is only read from.
void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
Value *Ptr = const_cast<Value*>(Loc.Ptr);
AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
@@ -515,14 +515,14 @@ public:
ReadOnlyPtr.insert(Ptr);
}
- /// \brief Register a store.
+ /// Register a store.
void addStore(MemoryLocation &Loc) {
Value *Ptr = const_cast<Value*>(Loc.Ptr);
AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
Accesses.insert(MemAccessInfo(Ptr, true));
}
- /// \brief Check if we can emit a run-time no-alias check for \p Access.
+ /// Check if we can emit a run-time no-alias check for \p Access.
///
/// Returns true if we can emit a run-time no alias check for \p Access.
/// If we can check this access, this also adds it to a dependence set and
@@ -537,7 +537,7 @@ public:
unsigned ASId, bool ShouldCheckStride,
bool Assume);
- /// \brief Check whether we can check the pointers at runtime for
+ /// Check whether we can check the pointers at runtime for
/// non-intersection.
///
/// Returns true if we need no check or if we do and we can generate them
@@ -546,13 +546,13 @@ public:
Loop *TheLoop, const ValueToValueMap &Strides,
bool ShouldCheckWrap = false);
- /// \brief Goes over all memory accesses, checks whether a RT check is needed
+ /// Goes over all memory accesses, checks whether a RT check is needed
/// and builds sets of dependent accesses.
void buildDependenceSets() {
processMemAccesses();
}
- /// \brief Initial processing of memory accesses determined that we need to
+ /// Initial processing of memory accesses determined that we need to
/// perform dependency checking.
///
/// Note that this can later be cleared if we retry memcheck analysis without
@@ -570,7 +570,7 @@ public:
private:
typedef SetVector<MemAccessInfo> PtrAccessSet;
- /// \brief Go over all memory access and check whether runtime pointer checks
+ /// Go over all memory access and check whether runtime pointer checks
/// are needed and build sets of dependency check candidates.
void processMemAccesses();
@@ -579,6 +579,9 @@ private:
const DataLayout &DL;
+ /// The loop being checked.
+ const Loop *TheLoop;
+
/// List of accesses that need a further dependence check.
MemAccessInfoList CheckDeps;
@@ -596,7 +599,7 @@ private:
/// dependence check.
MemoryDepChecker::DepCandidates &DepCands;
- /// \brief Initial processing of memory accesses determined that we may need
+ /// Initial processing of memory accesses determined that we may need
/// to add memchecks. Perform the analysis to determine the necessary checks.
///
/// Note that, this is different from isDependencyCheckNeeded. When we retry
@@ -611,7 +614,7 @@ private:
} // end anonymous namespace
-/// \brief Check whether a pointer can participate in a runtime bounds check.
+/// Check whether a pointer can participate in a runtime bounds check.
/// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
/// by adding run-time checks (overflow checks) if necessary.
static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
@@ -634,7 +637,7 @@ static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
return AR->isAffine();
}
-/// \brief Check whether a pointer address cannot wrap.
+/// Check whether a pointer address cannot wrap.
static bool isNoWrap(PredicatedScalarEvolution &PSE,
const ValueToValueMap &Strides, Value *Ptr, Loop *L) {
const SCEV *PtrScev = PSE.getSCEV(Ptr);
@@ -684,7 +687,7 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
bool IsWrite = Access.getInt();
RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
- DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
+ LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
return true;
}
@@ -729,7 +732,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
RunningDepId, ASId, ShouldCheckWrap, false)) {
- DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n');
+ LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n');
Retries.push_back(Access);
CanDoAliasSetRT = false;
}
@@ -791,8 +794,9 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
unsigned ASi = PtrI->getType()->getPointerAddressSpace();
unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
if (ASi != ASj) {
- DEBUG(dbgs() << "LAA: Runtime check would require comparison between"
- " different address spaces\n");
+ LLVM_DEBUG(
+ dbgs() << "LAA: Runtime check would require comparison between"
+ " different address spaces\n");
return false;
}
}
@@ -801,8 +805,8 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
if (NeedRTCheck && CanDoRT)
RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
- DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
- << " pointer comparisons.\n");
+ LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
+ << " pointer comparisons.\n");
RtCheck.Need = NeedRTCheck;
@@ -817,10 +821,10 @@ void AccessAnalysis::processMemAccesses() {
// process read-only pointers. This allows us to skip dependence tests for
// read-only pointers.
- DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
- DEBUG(dbgs() << " AST: "; AST.dump());
- DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
- DEBUG({
+ LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
+ LLVM_DEBUG(dbgs() << " AST: "; AST.dump());
+ LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
+ LLVM_DEBUG({
for (auto A : Accesses)
dbgs() << "\t" << *A.getPointer() << " (" <<
(A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
@@ -904,11 +908,15 @@ void AccessAnalysis::processMemAccesses() {
ValueVector TempObjects;
GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
- DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n");
+ LLVM_DEBUG(dbgs()
+ << "Underlying objects for pointer " << *Ptr << "\n");
for (Value *UnderlyingObj : TempObjects) {
// nullptr never alias, don't join sets for pointer that have "null"
// in their UnderlyingObjects list.
- if (isa<ConstantPointerNull>(UnderlyingObj))
+ if (isa<ConstantPointerNull>(UnderlyingObj) &&
+ !NullPointerIsDefined(
+ TheLoop->getHeader()->getParent(),
+ UnderlyingObj->getType()->getPointerAddressSpace()))
continue;
UnderlyingObjToAccessMap::iterator Prev =
@@ -917,7 +925,7 @@ void AccessAnalysis::processMemAccesses() {
DepCands.unionSets(Access, Prev->second);
ObjToLastAccess[UnderlyingObj] = Access;
- DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
+ LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
}
}
}
@@ -931,7 +939,7 @@ static bool isInBoundsGep(Value *Ptr) {
return false;
}
-/// \brief Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
+/// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
/// i.e. monotonically increasing/decreasing.
static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
PredicatedScalarEvolution &PSE, const Loop *L) {
@@ -979,7 +987,7 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
return false;
}
-/// \brief Check whether the access through \p Ptr has a constant stride.
+/// Check whether the access through \p Ptr has a constant stride.
int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
const Loop *Lp, const ValueToValueMap &StridesMap,
bool Assume, bool ShouldCheckWrap) {
@@ -989,8 +997,8 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
// Make sure that the pointer does not point to aggregate types.
auto *PtrTy = cast<PointerType>(Ty);
if (PtrTy->getElementType()->isAggregateType()) {
- DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type" << *Ptr
- << "\n");
+ LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
+ << *Ptr << "\n");
return 0;
}
@@ -1001,15 +1009,15 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
AR = PSE.getAsAddRec(Ptr);
if (!AR) {
- DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
- << " SCEV: " << *PtrScev << "\n");
+ LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
+ << " SCEV: " << *PtrScev << "\n");
return 0;
}
// The accesss function must stride over the innermost loop.
if (Lp != AR->getLoop()) {
- DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " <<
- *Ptr << " SCEV: " << *AR << "\n");
+ LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "
+ << *Ptr << " SCEV: " << *AR << "\n");
return 0;
}
@@ -1024,18 +1032,20 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
bool IsNoWrapAddRec = !ShouldCheckWrap ||
PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
isNoWrapAddRec(Ptr, AR, PSE, Lp);
- bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
- if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
+ if (!IsNoWrapAddRec && !IsInBoundsGEP &&
+ NullPointerIsDefined(Lp->getHeader()->getParent(),
+ PtrTy->getAddressSpace())) {
if (Assume) {
PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
IsNoWrapAddRec = true;
- DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n"
- << "LAA: Pointer: " << *Ptr << "\n"
- << "LAA: SCEV: " << *AR << "\n"
- << "LAA: Added an overflow assumption\n");
+ LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n"
+ << "LAA: Pointer: " << *Ptr << "\n"
+ << "LAA: SCEV: " << *AR << "\n"
+ << "LAA: Added an overflow assumption\n");
} else {
- DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
- << *Ptr << " SCEV: " << *AR << "\n");
+ LLVM_DEBUG(
+ dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
+ << *Ptr << " SCEV: " << *AR << "\n");
return 0;
}
}
@@ -1046,8 +1056,8 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
// Calculate the pointer stride and check if it is constant.
const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
if (!C) {
- DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr <<
- " SCEV: " << *AR << "\n");
+ LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr
+ << " SCEV: " << *AR << "\n");
return 0;
}
@@ -1070,15 +1080,16 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
// If the SCEV could wrap but we have an inbounds gep with a unit stride we
// know we can't "wrap around the address space". In case of address space
// zero we know that this won't happen without triggering undefined behavior.
- if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) &&
- Stride != 1 && Stride != -1) {
+ if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 &&
+ (IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(),
+ PtrTy->getAddressSpace()))) {
if (Assume) {
// We can avoid this case by adding a run-time check.
- DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
- << "inbouds or in address space 0 may wrap:\n"
- << "LAA: Pointer: " << *Ptr << "\n"
- << "LAA: SCEV: " << *AR << "\n"
- << "LAA: Added an overflow assumption\n");
+ LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
+ << "inbouds or in address space 0 may wrap:\n"
+ << "LAA: Pointer: " << *Ptr << "\n"
+ << "LAA: SCEV: " << *AR << "\n"
+ << "LAA: Added an overflow assumption\n");
PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
} else
return 0;
@@ -1087,14 +1098,65 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
return Stride;
}
-/// Take the pointer operand from the Load/Store instruction.
-/// Returns NULL if this is not a valid Load/Store instruction.
-static Value *getPointerOperand(Value *I) {
- if (auto *LI = dyn_cast<LoadInst>(I))
- return LI->getPointerOperand();
- if (auto *SI = dyn_cast<StoreInst>(I))
- return SI->getPointerOperand();
- return nullptr;
+bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
+ ScalarEvolution &SE,
+ SmallVectorImpl<unsigned> &SortedIndices) {
+ assert(llvm::all_of(
+ VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
+ "Expected list of pointer operands.");
+ SmallVector<std::pair<int64_t, Value *>, 4> OffValPairs;
+ OffValPairs.reserve(VL.size());
+
+ // Walk over the pointers, and map each of them to an offset relative to
+ // first pointer in the array.
+ Value *Ptr0 = VL[0];
+ const SCEV *Scev0 = SE.getSCEV(Ptr0);
+ Value *Obj0 = GetUnderlyingObject(Ptr0, DL);
+
+ llvm::SmallSet<int64_t, 4> Offsets;
+ for (auto *Ptr : VL) {
+ // TODO: Outline this code as a special, more time consuming, version of
+ // computeConstantDifference() function.
+ if (Ptr->getType()->getPointerAddressSpace() !=
+ Ptr0->getType()->getPointerAddressSpace())
+ return false;
+ // If a pointer refers to a different underlying object, bail - the
+ // pointers are by definition incomparable.
+ Value *CurrObj = GetUnderlyingObject(Ptr, DL);
+ if (CurrObj != Obj0)
+ return false;
+
+ const SCEV *Scev = SE.getSCEV(Ptr);
+ const auto *Diff = dyn_cast<SCEVConstant>(SE.getMinusSCEV(Scev, Scev0));
+ // The pointers may not have a constant offset from each other, or SCEV
+ // may just not be smart enough to figure out they do. Regardless,
+ // there's nothing we can do.
+ if (!Diff)
+ return false;
+
+ // Check if the pointer with the same offset is found.
+ int64_t Offset = Diff->getAPInt().getSExtValue();
+ if (!Offsets.insert(Offset).second)
+ return false;
+ OffValPairs.emplace_back(Offset, Ptr);
+ }
+ SortedIndices.clear();
+ SortedIndices.resize(VL.size());
+ std::iota(SortedIndices.begin(), SortedIndices.end(), 0);
+
+ // Sort the memory accesses and keep the order of their uses in UseOrder.
+ std::stable_sort(SortedIndices.begin(), SortedIndices.end(),
+ [&OffValPairs](unsigned Left, unsigned Right) {
+ return OffValPairs[Left].first < OffValPairs[Right].first;
+ });
+
+ // Check if the order is consecutive already.
+ if (llvm::all_of(SortedIndices, [&SortedIndices](const unsigned I) {
+ return I == SortedIndices[I];
+ }))
+ SortedIndices.clear();
+
+ return true;
}
/// Take the address space operand from the Load/Store instruction.
@@ -1110,8 +1172,8 @@ static unsigned getAddressSpaceOperand(Value *I) {
/// Returns true if the memory operations \p A and \p B are consecutive.
bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
ScalarEvolution &SE, bool CheckType) {
- Value *PtrA = getPointerOperand(A);
- Value *PtrB = getPointerOperand(B);
+ Value *PtrA = getLoadStorePointerOperand(A);
+ Value *PtrB = getLoadStorePointerOperand(B);
unsigned ASA = getAddressSpaceOperand(A);
unsigned ASB = getAddressSpaceOperand(B);
@@ -1127,11 +1189,11 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
if (CheckType && PtrA->getType() != PtrB->getType())
return false;
- unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
+ unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
- APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty));
+ APInt Size(IdxWidth, DL.getTypeStoreSize(Ty));
- APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
+ APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
@@ -1242,8 +1304,9 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
}
if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
- DEBUG(dbgs() << "LAA: Distance " << Distance
- << " that could cause a store-load forwarding conflict\n");
+ LLVM_DEBUG(
+ dbgs() << "LAA: Distance " << Distance
+ << " that could cause a store-load forwarding conflict\n");
return true;
}
@@ -1321,7 +1384,7 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
return false;
}
-/// \brief Check the dependence for two accesses with the same stride \p Stride.
+/// Check the dependence for two accesses with the same stride \p Stride.
/// \p Distance is the positive distance and \p TypeByteSize is type size in
/// bytes.
///
@@ -1395,16 +1458,16 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
- DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
- << "(Induction step: " << StrideAPtr << ")\n");
- DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
- << *InstMap[BIdx] << ": " << *Dist << "\n");
+ LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
+ << "(Induction step: " << StrideAPtr << ")\n");
+ LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
+ << *InstMap[BIdx] << ": " << *Dist << "\n");
// Need accesses with constant stride. We don't want to vectorize
// "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
// the address space.
if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
- DEBUG(dbgs() << "Pointer access with non-constant stride\n");
+ LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
return Dependence::Unknown;
}
@@ -1421,7 +1484,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
TypeByteSize))
return Dependence::NoDep;
- DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
+ LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
ShouldRetryWithRuntimeCheck = true;
return Dependence::Unknown;
}
@@ -1432,7 +1495,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// Attempt to prove strided accesses independent.
if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
- DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
+ LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
return Dependence::NoDep;
}
@@ -1442,11 +1505,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
if (IsTrueDataDependence && EnableForwardingConflictDetection &&
(couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
ATy != BTy)) {
- DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
+ LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
return Dependence::ForwardButPreventsForwarding;
}
- DEBUG(dbgs() << "LAA: Dependence is negative\n");
+ LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
return Dependence::Forward;
}
@@ -1455,15 +1518,17 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
if (Val == 0) {
if (ATy == BTy)
return Dependence::Forward;
- DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n");
+ LLVM_DEBUG(
+ dbgs() << "LAA: Zero dependence difference but different types\n");
return Dependence::Unknown;
}
assert(Val.isStrictlyPositive() && "Expect a positive value");
if (ATy != BTy) {
- DEBUG(dbgs() <<
- "LAA: ReadWrite-Write positive dependency with different types\n");
+ LLVM_DEBUG(
+ dbgs()
+ << "LAA: ReadWrite-Write positive dependency with different types\n");
return Dependence::Unknown;
}
@@ -1504,15 +1569,15 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
uint64_t MinDistanceNeeded =
TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
- DEBUG(dbgs() << "LAA: Failure because of positive distance " << Distance
- << '\n');
+ LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "
+ << Distance << '\n');
return Dependence::Backward;
}
// Unsafe if the minimum distance needed is greater than max safe distance.
if (MinDistanceNeeded > MaxSafeDepDistBytes) {
- DEBUG(dbgs() << "LAA: Failure because it needs at least "
- << MinDistanceNeeded << " size in bytes");
+ LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
+ << MinDistanceNeeded << " size in bytes");
return Dependence::Backward;
}
@@ -1541,8 +1606,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
return Dependence::BackwardVectorizableButPreventsForwarding;
uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride);
- DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
- << " with max VF = " << MaxVF << '\n');
+ LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
+ << " with max VF = " << MaxVF << '\n');
uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
MaxSafeRegisterWidth = std::min(MaxSafeRegisterWidth, MaxVFInBits);
return Dependence::BackwardVectorizable;
@@ -1600,7 +1665,8 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
if (Dependences.size() >= MaxDependences) {
RecordDependences = false;
Dependences.clear();
- DEBUG(dbgs() << "Too many dependences, stopped recording\n");
+ LLVM_DEBUG(dbgs()
+ << "Too many dependences, stopped recording\n");
}
}
if (!RecordDependences && !SafeForVectorization)
@@ -1612,7 +1678,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
}
}
- DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
+ LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
return SafeForVectorization;
}
@@ -1642,20 +1708,21 @@ void MemoryDepChecker::Dependence::print(
bool LoopAccessInfo::canAnalyzeLoop() {
// We need to have a loop header.
- DEBUG(dbgs() << "LAA: Found a loop in "
- << TheLoop->getHeader()->getParent()->getName() << ": "
- << TheLoop->getHeader()->getName() << '\n');
+ LLVM_DEBUG(dbgs() << "LAA: Found a loop in "
+ << TheLoop->getHeader()->getParent()->getName() << ": "
+ << TheLoop->getHeader()->getName() << '\n');
// We can only analyze innermost loops.
if (!TheLoop->empty()) {
- DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
+ LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
return false;
}
// We must have a single backedge.
if (TheLoop->getNumBackEdges() != 1) {
- DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
+ LLVM_DEBUG(
+ dbgs() << "LAA: loop control flow is not understood by analyzer\n");
recordAnalysis("CFGNotUnderstood")
<< "loop control flow is not understood by analyzer";
return false;
@@ -1663,7 +1730,8 @@ bool LoopAccessInfo::canAnalyzeLoop() {
// We must have a single exiting block.
if (!TheLoop->getExitingBlock()) {
- DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
+ LLVM_DEBUG(
+ dbgs() << "LAA: loop control flow is not understood by analyzer\n");
recordAnalysis("CFGNotUnderstood")
<< "loop control flow is not understood by analyzer";
return false;
@@ -1673,7 +1741,8 @@ bool LoopAccessInfo::canAnalyzeLoop() {
// checked at the end of each iteration. With that we can assume that all
// instructions in the loop are executed the same number of times.
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
- DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
+ LLVM_DEBUG(
+ dbgs() << "LAA: loop control flow is not understood by analyzer\n");
recordAnalysis("CFGNotUnderstood")
<< "loop control flow is not understood by analyzer";
return false;
@@ -1684,7 +1753,7 @@ bool LoopAccessInfo::canAnalyzeLoop() {
if (ExitCount == PSE->getSE()->getCouldNotCompute()) {
recordAnalysis("CantComputeNumberOfIterations")
<< "could not determine number of loop iterations";
- DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
+ LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
return false;
}
@@ -1734,7 +1803,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
recordAnalysis("NonSimpleLoad", Ld)
<< "read with atomic ordering or volatile read";
- DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
+ LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
CanVecMem = false;
return;
}
@@ -1758,7 +1827,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
if (!St->isSimple() && !IsAnnotatedParallel) {
recordAnalysis("NonSimpleStore", St)
<< "write with atomic ordering or volatile write";
- DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
+ LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
CanVecMem = false;
return;
}
@@ -1777,14 +1846,14 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
// Check if we see any stores. If there are no stores, then we don't
// care if the pointers are *restrict*.
if (!Stores.size()) {
- DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
+ LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
CanVecMem = true;
return;
}
MemoryDepChecker::DepCandidates DependentAccesses;
AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
- AA, LI, DependentAccesses, *PSE);
+ TheLoop, AA, LI, DependentAccesses, *PSE);
// Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
// multiple times on the same object. If the ptr is accessed twice, once
@@ -1814,9 +1883,9 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
}
if (IsAnnotatedParallel) {
- DEBUG(dbgs()
- << "LAA: A loop annotated parallel, ignore memory dependency "
- << "checks.\n");
+ LLVM_DEBUG(
+ dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
+ << "checks.\n");
CanVecMem = true;
return;
}
@@ -1851,7 +1920,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
// If we write (or read-write) to a single destination and there are no
// other reads in this loop then is it safe to vectorize.
if (NumReadWrites == 1 && NumReads == 0) {
- DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
+ LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
CanVecMem = true;
return;
}
@@ -1866,23 +1935,24 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
TheLoop, SymbolicStrides);
if (!CanDoRTIfNeeded) {
recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds";
- DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
- << "the array bounds.\n");
+ LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
+ << "the array bounds.\n");
CanVecMem = false;
return;
}
- DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
+ LLVM_DEBUG(
+ dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
CanVecMem = true;
if (Accesses.isDependencyCheckNeeded()) {
- DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
+ LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
CanVecMem = DepChecker->areDepsSafe(
DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
- DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
+ LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
// Clear the dependency checks. We assume they are not needed.
Accesses.resetDepChecks(*DepChecker);
@@ -1898,7 +1968,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
if (!CanDoRTIfNeeded) {
recordAnalysis("CantCheckMemDepsAtRunTime")
<< "cannot check memory dependencies at runtime";
- DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
+ LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
CanVecMem = false;
return;
}
@@ -1908,16 +1978,17 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
}
if (CanVecMem)
- DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
- << (PtrRtChecking->Need ? "" : " don't")
- << " need runtime memory checks.\n");
+ LLVM_DEBUG(
+ dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
+ << (PtrRtChecking->Need ? "" : " don't")
+ << " need runtime memory checks.\n");
else {
recordAnalysis("UnsafeMemDep")
<< "unsafe dependent memory operations in loop. Use "
"#pragma loop distribute(enable) to allow loop distribution "
"to attempt to isolate the offending operations into a separate "
"loop";
- DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
+ LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
}
}
@@ -1974,7 +2045,7 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
namespace {
-/// \brief IR Values for the lower and upper bounds of a pointer evolution. We
+/// IR Values for the lower and upper bounds of a pointer evolution. We
/// need to use value-handles because SCEV expansion can invalidate previously
/// expanded values. Thus expansion of a pointer can invalidate the bounds for
/// a previous one.
@@ -1985,7 +2056,7 @@ struct PointerBounds {
} // end anonymous namespace
-/// \brief Expand code for the lower and upper bound of the pointer group \p CG
+/// Expand code for the lower and upper bound of the pointer group \p CG
/// in \p TheLoop. \return the values for the bounds.
static PointerBounds
expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop,
@@ -2001,8 +2072,8 @@ expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop,
Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
if (SE->isLoopInvariant(Sc, TheLoop)) {
- DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr
- << "\n");
+ LLVM_DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:"
+ << *Ptr << "\n");
// Ptr could be in the loop body. If so, expand a new one at the correct
// location.
Instruction *Inst = dyn_cast<Instruction>(Ptr);
@@ -2015,15 +2086,16 @@ expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop,
return {NewPtr, NewPtrPlusOne};
} else {
Value *Start = nullptr, *End = nullptr;
- DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
+ LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
- DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n");
+ LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High
+ << "\n");
return {Start, End};
}
}
-/// \brief Turns a collection of checks into a collection of expanded upper and
+/// Turns a collection of checks into a collection of expanded upper and
/// lower bounds for both pointers in the check.
static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> expandBounds(
const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks,
@@ -2136,9 +2208,9 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
if (!Stride)
return;
- DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
- "versioning:");
- DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
+ LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
+ "versioning:");
+ LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
// Avoid adding the "Stride == 1" predicate when we know that
// Stride >= Trip-Count. Such a predicate will effectively optimize a single
@@ -2174,12 +2246,13 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
// "Stride >= TripCount" is equivalent to checking:
// Stride - BETakenCount > 0
if (SE->isKnownPositive(StrideMinusBETaken)) {
- DEBUG(dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
- "Stride==1 predicate will imply that the loop executes "
- "at most once.\n");
+ LLVM_DEBUG(
+ dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
+ "Stride==1 predicate will imply that the loop executes "
+ "at most once.\n");
return;
- }
- DEBUG(dbgs() << "LAA: Found a strided access that we can version.");
+ }
+ LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.");
SymbolicStrides[Ptr] = Stride;
StrideSet.insert(Stride);
diff --git a/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp b/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp
index ea7a62d179c4..074023a7e1e2 100644
--- a/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp
+++ b/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp
@@ -24,7 +24,7 @@ cl::opt<bool> EnableMSSALoopDependency(
"enable-mssa-loop-dependency", cl::Hidden, cl::init(false),
cl::desc("Enable MemorySSA dependency for loop pass manager"));
-// Explicit template instantiations and specialization defininitions for core
+// Explicit template instantiations and specialization definitions for core
// template typedefs.
template class AllAnalysesOn<Loop>;
template class AnalysisManager<Loop, LoopStandardAnalysisResults &>;
diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp
index 9e54d60779a0..3f78456b3586 100644
--- a/contrib/llvm/lib/Analysis/LoopInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/LoopInfoImpl.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugLoc.h"
@@ -377,69 +378,6 @@ Loop::LocRange Loop::getLocRange() const {
return LocRange();
}
-bool Loop::hasDedicatedExits() const {
- // Each predecessor of each exit block of a normal loop is contained
- // within the loop.
- SmallVector<BasicBlock *, 4> ExitBlocks;
- getExitBlocks(ExitBlocks);
- for (BasicBlock *BB : ExitBlocks)
- for (BasicBlock *Predecessor : predecessors(BB))
- if (!contains(Predecessor))
- return false;
- // All the requirements are met.
- return true;
-}
-
-void Loop::getUniqueExitBlocks(
- SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
- assert(hasDedicatedExits() &&
- "getUniqueExitBlocks assumes the loop has canonical form exits!");
-
- SmallVector<BasicBlock *, 32> SwitchExitBlocks;
- for (BasicBlock *BB : this->blocks()) {
- SwitchExitBlocks.clear();
- for (BasicBlock *Successor : successors(BB)) {
- // If block is inside the loop then it is not an exit block.
- if (contains(Successor))
- continue;
-
- pred_iterator PI = pred_begin(Successor);
- BasicBlock *FirstPred = *PI;
-
- // If current basic block is this exit block's first predecessor
- // then only insert exit block in to the output ExitBlocks vector.
- // This ensures that same exit block is not inserted twice into
- // ExitBlocks vector.
- if (BB != FirstPred)
- continue;
-
- // If a terminator has more then two successors, for example SwitchInst,
- // then it is possible that there are multiple edges from current block
- // to one exit block.
- if (std::distance(succ_begin(BB), succ_end(BB)) <= 2) {
- ExitBlocks.push_back(Successor);
- continue;
- }
-
- // In case of multiple edges from current block to exit block, collect
- // only one edge in ExitBlocks. Use switchExitBlocks to keep track of
- // duplicate edges.
- if (!is_contained(SwitchExitBlocks, Successor)) {
- SwitchExitBlocks.push_back(Successor);
- ExitBlocks.push_back(Successor);
- }
- }
- }
-}
-
-BasicBlock *Loop::getUniqueExitBlock() const {
- SmallVector<BasicBlock *, 8> UniqueExitBlocks;
- getUniqueExitBlocks(UniqueExitBlocks);
- if (UniqueExitBlocks.size() == 1)
- return UniqueExitBlocks[0];
- return nullptr;
-}
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void Loop::dump() const { print(dbgs()); }
diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp
index 9af717bafdca..07a151ce0fce 100644
--- a/contrib/llvm/lib/Analysis/LoopPass.cpp
+++ b/contrib/llvm/lib/Analysis/LoopPass.cpp
@@ -142,8 +142,17 @@ void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
void LPPassManager::markLoopAsDeleted(Loop &L) {
assert((&L == CurrentLoop || CurrentLoop->contains(&L)) &&
"Must not delete loop outside the current loop tree!");
- if (&L == CurrentLoop)
+ // If this loop appears elsewhere within the queue, we also need to remove it
+ // there. However, we have to be careful to not remove the back of the queue
+ // as that is assumed to match the current loop.
+ assert(LQ.back() == CurrentLoop && "Loop queue back isn't the current loop!");
+ LQ.erase(std::remove(LQ.begin(), LQ.end(), &L), LQ.end());
+
+ if (&L == CurrentLoop) {
CurrentLoopDeleted = true;
+ // Add this loop back onto the back of the queue to preserve our invariants.
+ LQ.push_back(&L);
+ }
}
/// run - Execute all of the passes scheduled for execution. Keep track of
@@ -151,7 +160,10 @@ void LPPassManager::markLoopAsDeleted(Loop &L) {
bool LPPassManager::runOnFunction(Function &F) {
auto &LIWP = getAnalysis<LoopInfoWrapperPass>();
LI = &LIWP.getLoopInfo();
+ Module &M = *F.getParent();
+#if 0
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+#endif
bool Changed = false;
// Collect inherited analysis from Module level pass manager.
@@ -181,6 +193,8 @@ bool LPPassManager::runOnFunction(Function &F) {
}
// Walk Loops
+ unsigned InstrCount = 0;
+ bool EmitICRemark = M.shouldEmitInstrCountChangedRemark();
while (!LQ.empty()) {
CurrentLoopDeleted = false;
CurrentLoop = LQ.back();
@@ -198,8 +212,11 @@ bool LPPassManager::runOnFunction(Function &F) {
{
PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader());
TimeRegion PassTimer(getPassTimer(P));
-
+ if (EmitICRemark)
+ InstrCount = initSizeRemarkInfo(M);
Changed |= P->runOnLoop(CurrentLoop, *this);
+ if (EmitICRemark)
+ emitInstrCountChangedRemark(P, M, InstrCount);
}
if (Changed)
@@ -225,8 +242,12 @@ bool LPPassManager::runOnFunction(Function &F) {
// is that LPPassManager might run passes which do not require LCSSA
// form (LoopPassPrinter for example). We should skip verification for
// such passes.
+ // FIXME: Loop-sink currently break LCSSA. Fix it and reenable the
+ // verification!
+#if 0
if (mustPreserveAnalysisID(LCSSAVerificationPass::ID))
- CurrentLoop->isRecursivelyLCSSAForm(*DT, *LI);
+ assert(CurrentLoop->isRecursivelyLCSSAForm(*DT, *LI));
+#endif
// Then call the regular verifyAnalysis functions.
verifyPreservedAnalysis(P);
@@ -351,13 +372,13 @@ bool LoopPass::skipLoop(const Loop *L) const {
return false;
// Check the opt bisect limit.
LLVMContext &Context = F->getContext();
- if (!Context.getOptBisect().shouldRunPass(this, *L))
+ if (!Context.getOptPassGate().shouldRunPass(this, *L))
return true;
// Check for the OptimizeNone attribute.
if (F->hasFnAttribute(Attribute::OptimizeNone)) {
// FIXME: Report this to dbgs() only once per function.
- DEBUG(dbgs() << "Skipping pass '" << getPassName()
- << "' in function " << F->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "Skipping pass '" << getPassName() << "' in function "
+ << F->getName() << "\n");
// FIXME: Delete loop from pass manager's queue?
return true;
}
diff --git a/contrib/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp b/contrib/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp
index 0da90dae3d9a..c8b91a7a1a51 100644
--- a/contrib/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp
+++ b/contrib/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp
@@ -17,7 +17,7 @@
using namespace llvm;
-/// \brief Try to simplify instruction \param I using its SCEV expression.
+/// Try to simplify instruction \param I using its SCEV expression.
///
/// The idea is that some AddRec expressions become constants, which then
/// could trigger folding of other instructions. However, that only happens
diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
index 24fedfed772c..686ad294378c 100644
--- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
@@ -40,7 +41,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstdint>
#include <iterator>
@@ -75,12 +75,24 @@ static const std::pair<LibFunc, AllocFnsTy> AllocationFnData[] = {
{LibFunc_valloc, {MallocLike, 1, 0, -1}},
{LibFunc_Znwj, {OpNewLike, 1, 0, -1}}, // new(unsigned int)
{LibFunc_ZnwjRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new(unsigned int, nothrow)
+ {LibFunc_ZnwjSt11align_val_t, {OpNewLike, 2, 0, -1}}, // new(unsigned int, align_val_t)
+ {LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t, // new(unsigned int, align_val_t, nothrow)
+ {MallocLike, 3, 0, -1}},
{LibFunc_Znwm, {OpNewLike, 1, 0, -1}}, // new(unsigned long)
{LibFunc_ZnwmRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new(unsigned long, nothrow)
+ {LibFunc_ZnwmSt11align_val_t, {OpNewLike, 2, 0, -1}}, // new(unsigned long, align_val_t)
+ {LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t, // new(unsigned long, align_val_t, nothrow)
+ {MallocLike, 3, 0, -1}},
{LibFunc_Znaj, {OpNewLike, 1, 0, -1}}, // new[](unsigned int)
{LibFunc_ZnajRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new[](unsigned int, nothrow)
+ {LibFunc_ZnajSt11align_val_t, {OpNewLike, 2, 0, -1}}, // new[](unsigned int, align_val_t)
+ {LibFunc_ZnajSt11align_val_tRKSt9nothrow_t, // new[](unsigned int, align_val_t, nothrow)
+ {MallocLike, 3, 0, -1}},
{LibFunc_Znam, {OpNewLike, 1, 0, -1}}, // new[](unsigned long)
{LibFunc_ZnamRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new[](unsigned long, nothrow)
+ {LibFunc_ZnamSt11align_val_t, {OpNewLike, 2, 0, -1}}, // new[](unsigned long, align_val_t)
+ {LibFunc_ZnamSt11align_val_tRKSt9nothrow_t, // new[](unsigned long, align_val_t, nothrow)
+ {MallocLike, 3, 0, -1}},
{LibFunc_msvc_new_int, {OpNewLike, 1, 0, -1}}, // new(unsigned int)
{LibFunc_msvc_new_int_nothrow, {MallocLike, 2, 0, -1}}, // new(unsigned int, nothrow)
{LibFunc_msvc_new_longlong, {OpNewLike, 1, 0, -1}}, // new(unsigned long long)
@@ -112,10 +124,9 @@ static const Function *getCalledFunction(const Value *V, bool LookThroughBitCast
IsNoBuiltin = CS.isNoBuiltin();
- const Function *Callee = CS.getCalledFunction();
- if (!Callee || !Callee->isDeclaration())
- return nullptr;
- return Callee;
+ if (const Function *Callee = CS.getCalledFunction())
+ return Callee;
+ return nullptr;
}
/// Returns the allocation data for the given value if it's either a call to a
@@ -206,7 +217,7 @@ static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) {
return CS && CS.hasRetAttr(Attribute::NoAlias);
}
-/// \brief Tests if a value is a call or invoke to a library function that
+/// Tests if a value is a call or invoke to a library function that
/// allocates or reallocates memory (either malloc, calloc, realloc, or strdup
/// like).
bool llvm::isAllocationFn(const Value *V, const TargetLibraryInfo *TLI,
@@ -214,7 +225,7 @@ bool llvm::isAllocationFn(const Value *V, const TargetLibraryInfo *TLI,
return getAllocationData(V, AnyAlloc, TLI, LookThroughBitCast).hasValue();
}
-/// \brief Tests if a value is a call or invoke to a function that returns a
+/// Tests if a value is a call or invoke to a function that returns a
/// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions).
bool llvm::isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI,
bool LookThroughBitCast) {
@@ -224,29 +235,29 @@ bool llvm::isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI,
hasNoAliasAttr(V, LookThroughBitCast);
}
-/// \brief Tests if a value is a call or invoke to a library function that
+/// Tests if a value is a call or invoke to a library function that
/// allocates uninitialized memory (such as malloc).
bool llvm::isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
bool LookThroughBitCast) {
return getAllocationData(V, MallocLike, TLI, LookThroughBitCast).hasValue();
}
-/// \brief Tests if a value is a call or invoke to a library function that
+/// Tests if a value is a call or invoke to a library function that
/// allocates zero-filled memory (such as calloc).
bool llvm::isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
bool LookThroughBitCast) {
return getAllocationData(V, CallocLike, TLI, LookThroughBitCast).hasValue();
}
-/// \brief Tests if a value is a call or invoke to a library function that
-/// allocates memory similiar to malloc or calloc.
+/// Tests if a value is a call or invoke to a library function that
+/// allocates memory similar to malloc or calloc.
bool llvm::isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
bool LookThroughBitCast) {
return getAllocationData(V, MallocOrCallocLike, TLI,
LookThroughBitCast).hasValue();
}
-/// \brief Tests if a value is a call or invoke to a library function that
+/// Tests if a value is a call or invoke to a library function that
/// allocates memory (either malloc, calloc, or strdup like).
bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
bool LookThroughBitCast) {
@@ -350,11 +361,10 @@ const CallInst *llvm::extractCallocCall(const Value *I,
/// isFreeCall - Returns non-null if the value is a call to the builtin free()
const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
- const CallInst *CI = dyn_cast<CallInst>(I);
- if (!CI || isa<IntrinsicInst>(CI))
- return nullptr;
- Function *Callee = CI->getCalledFunction();
- if (Callee == nullptr)
+ bool IsNoBuiltinCall;
+ const Function *Callee =
+ getCalledFunction(I, /*LookThroughBitCast=*/false, IsNoBuiltinCall);
+ if (Callee == nullptr || IsNoBuiltinCall)
return nullptr;
StringRef FnName = Callee->getName();
@@ -374,9 +384,11 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
else if (TLIFn == LibFunc_ZdlPvj || // delete(void*, uint)
TLIFn == LibFunc_ZdlPvm || // delete(void*, ulong)
TLIFn == LibFunc_ZdlPvRKSt9nothrow_t || // delete(void*, nothrow)
+ TLIFn == LibFunc_ZdlPvSt11align_val_t || // delete(void*, align_val_t)
TLIFn == LibFunc_ZdaPvj || // delete[](void*, uint)
TLIFn == LibFunc_ZdaPvm || // delete[](void*, ulong)
TLIFn == LibFunc_ZdaPvRKSt9nothrow_t || // delete[](void*, nothrow)
+ TLIFn == LibFunc_ZdaPvSt11align_val_t || // delete[](void*, align_val_t)
TLIFn == LibFunc_msvc_delete_ptr32_int || // delete(void*, uint)
TLIFn == LibFunc_msvc_delete_ptr64_longlong || // delete(void*, ulonglong)
TLIFn == LibFunc_msvc_delete_ptr32_nothrow || // delete(void*, nothrow)
@@ -386,6 +398,9 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
TLIFn == LibFunc_msvc_delete_array_ptr32_nothrow || // delete[](void*, nothrow)
TLIFn == LibFunc_msvc_delete_array_ptr64_nothrow) // delete[](void*, nothrow)
ExpectedNumParams = 2;
+ else if (TLIFn == LibFunc_ZdaPvSt11align_val_tRKSt9nothrow_t || // delete(void*, align_val_t, nothrow)
+ TLIFn == LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t) // delete[](void*, align_val_t, nothrow)
+ ExpectedNumParams = 3;
else
return nullptr;
@@ -400,7 +415,7 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
if (FTy->getParamType(0) != Type::getInt8PtrTy(Callee->getContext()))
return nullptr;
- return CI;
+ return dyn_cast<CallInst>(I);
}
//===----------------------------------------------------------------------===//
@@ -412,7 +427,7 @@ static APInt getSizeWithOverflow(const SizeOffsetType &Data) {
return Data.first - Data.second;
}
-/// \brief Compute the size of the object pointed by Ptr. Returns true and the
+/// Compute the size of the object pointed by Ptr. Returns true and the
/// object size in Size if successful, and false otherwise.
/// If RoundToAlign is true, then Size is rounded up to the alignment of
/// allocas, byval arguments, and global variables.
@@ -513,8 +528,8 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
return visitGEPOperator(cast<GEPOperator>(*CE));
}
- DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V
- << '\n');
+ LLVM_DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: "
+ << *V << '\n');
return unknown();
}
@@ -627,7 +642,14 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) {
SizeOffsetType
ObjectSizeOffsetVisitor::visitConstantPointerNull(ConstantPointerNull& CPN) {
- if (Options.NullIsUnknownSize && CPN.getType()->getAddressSpace() == 0)
+ // If null is unknown, there's nothing we can do. Additionally, non-zero
+ // address spaces can make use of null, so we don't presume to know anything
+ // about that.
+ //
+ // TODO: How should this work with address space casts? We currently just drop
+ // them on the floor, but it's unclear what we should do when a NULL from
+ // addrspace(1) gets casted to addrspace(0) (or vice-versa).
+ if (Options.NullIsUnknownSize || CPN.getType()->getAddressSpace())
return unknown();
return std::make_pair(Zero, Zero);
}
@@ -714,7 +736,8 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitUndefValue(UndefValue&) {
}
SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) {
- DEBUG(dbgs() << "ObjectSizeOffsetVisitor unknown instruction:" << I << '\n');
+ LLVM_DEBUG(dbgs() << "ObjectSizeOffsetVisitor unknown instruction:" << I
+ << '\n');
return unknown();
}
@@ -793,8 +816,9 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
// Ignore values where we cannot do more than ObjectSizeVisitor.
Result = unknown();
} else {
- DEBUG(dbgs() << "ObjectSizeOffsetEvaluator::compute() unhandled value: "
- << *V << '\n');
+ LLVM_DEBUG(
+ dbgs() << "ObjectSizeOffsetEvaluator::compute() unhandled value: " << *V
+ << '\n');
Result = unknown();
}
@@ -931,6 +955,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitSelectInst(SelectInst &I) {
}
SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitInstruction(Instruction &I) {
- DEBUG(dbgs() << "ObjectSizeOffsetEvaluator unknown instruction:" << I <<'\n');
+ LLVM_DEBUG(dbgs() << "ObjectSizeOffsetEvaluator unknown instruction:" << I
+ << '\n');
return unknown();
}
diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index bf83f52ccf2e..7eeefd54f007 100644
--- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -154,24 +154,16 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
}
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- AAMDNodes AAInfo;
-
switch (II->getIntrinsicID()) {
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::invariant_start:
- II->getAAMetadata(AAInfo);
- Loc = MemoryLocation(
- II->getArgOperand(1),
- cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(), AAInfo);
+ Loc = MemoryLocation::getForArgument(II, 1, TLI);
// These intrinsics don't really modify the memory, but returning Mod
// will allow them to be handled conservatively.
return ModRefInfo::Mod;
case Intrinsic::invariant_end:
- II->getAAMetadata(AAInfo);
- Loc = MemoryLocation(
- II->getArgOperand(2),
- cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(), AAInfo);
+ Loc = MemoryLocation::getForArgument(II, 2, TLI);
// These intrinsics don't really modify the memory, but returning Mod
// will allow them to be handled conservatively.
return ModRefInfo::Mod;
@@ -363,8 +355,8 @@ MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
MemDepResult
MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
BasicBlock *BB) {
- auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group);
- if (!InvariantGroupMD)
+
+ if (!LI->getMetadata(LLVMContext::MD_invariant_group))
return MemDepResult::getUnknown();
// Take the ptr operand after all casts and geps 0. This way we can search
@@ -425,7 +417,7 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
// same pointer operand) we can assume that value pointed by pointer
// operand didn't change.
if ((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
- U->getMetadata(LLVMContext::MD_invariant_group) == InvariantGroupMD)
+ U->getMetadata(LLVMContext::MD_invariant_group) != nullptr)
ClosestDependency = GetClosestDependency(ClosestDependency, U);
}
}
@@ -441,6 +433,7 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
NonLocalDefsCache.try_emplace(
LI, NonLocalDepResult(ClosestDependency->getParent(),
MemDepResult::getDef(ClosestDependency), nullptr));
+ ReverseNonLocalDefsCache[ClosestDependency].insert(LI);
return MemDepResult::getNonLocal();
}
@@ -813,7 +806,7 @@ MemoryDependenceResults::getNonLocalCallDependency(CallSite QueryCS) {
DirtyBlocks.push_back(Entry.getBB());
// Sort the cache so that we can do fast binary search lookups below.
- std::sort(Cache.begin(), Cache.end());
+ llvm::sort(Cache.begin(), Cache.end());
++NumCacheDirtyNonLocal;
// cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
@@ -832,7 +825,7 @@ MemoryDependenceResults::getNonLocalCallDependency(CallSite QueryCS) {
SmallPtrSet<BasicBlock *, 32> Visited;
unsigned NumSortedEntries = Cache.size();
- DEBUG(AssertSorted(Cache));
+ LLVM_DEBUG(AssertSorted(Cache));
// Iterate while we still have blocks to update.
while (!DirtyBlocks.empty()) {
@@ -845,7 +838,7 @@ MemoryDependenceResults::getNonLocalCallDependency(CallSite QueryCS) {
// Do a binary search to see if we already have an entry for this block in
// the cache set. If so, find it.
- DEBUG(AssertSorted(Cache, NumSortedEntries));
+ LLVM_DEBUG(AssertSorted(Cache, NumSortedEntries));
NonLocalDepInfo::iterator Entry =
std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries,
NonLocalDepEntry(DirtyBB));
@@ -927,12 +920,12 @@ void MemoryDependenceResults::getNonLocalPointerDependency(
"Can't get pointer deps of a non-pointer!");
Result.clear();
{
- // Check if there is cached Def with invariant.group. FIXME: cache might be
- // invalid if cached instruction would be removed between call to
- // getPointerDependencyFrom and this function.
+ // Check if there is cached Def with invariant.group.
auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
if (NonLocalDefIt != NonLocalDefsCache.end()) {
- Result.push_back(std::move(NonLocalDefIt->second));
+ Result.push_back(NonLocalDefIt->second);
+ ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()]
+ .erase(QueryInst);
NonLocalDefsCache.erase(NonLocalDefIt);
return;
}
@@ -1076,7 +1069,7 @@ SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
break;
default:
// Added many values, do a full scale sort.
- std::sort(Cache.begin(), Cache.end());
+ llvm::sort(Cache.begin(), Cache.end());
break;
}
}
@@ -1218,7 +1211,7 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
unsigned NumSortedEntries = Cache->size();
unsigned WorklistEntries = BlockNumberLimit;
bool GotWorklistLimit = false;
- DEBUG(AssertSorted(*Cache));
+ LLVM_DEBUG(AssertSorted(*Cache));
while (!Worklist.empty()) {
BasicBlock *BB = Worklist.pop_back_val();
@@ -1249,7 +1242,7 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
// Get the dependency info for Pointer in BB. If we have cached
// information, we will use it, otherwise we compute it.
- DEBUG(AssertSorted(*Cache, NumSortedEntries));
+ LLVM_DEBUG(AssertSorted(*Cache, NumSortedEntries));
MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst, Loc, isLoad, BB,
Cache, NumSortedEntries);
@@ -1463,13 +1456,33 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
// Okay, we're done now. If we added new values to the cache, re-sort it.
SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
- DEBUG(AssertSorted(*Cache));
+ LLVM_DEBUG(AssertSorted(*Cache));
return true;
}
-/// If P exists in CachedNonLocalPointerInfo, remove it.
+/// If P exists in CachedNonLocalPointerInfo or NonLocalDefsCache, remove it.
void MemoryDependenceResults::RemoveCachedNonLocalPointerDependencies(
ValueIsLoadPair P) {
+
+ // Most of the time this cache is empty.
+ if (!NonLocalDefsCache.empty()) {
+ auto it = NonLocalDefsCache.find(P.getPointer());
+ if (it != NonLocalDefsCache.end()) {
+ RemoveFromReverseMap(ReverseNonLocalDefsCache,
+ it->second.getResult().getInst(), P.getPointer());
+ NonLocalDefsCache.erase(it);
+ }
+
+ if (auto *I = dyn_cast<Instruction>(P.getPointer())) {
+ auto toRemoveIt = ReverseNonLocalDefsCache.find(I);
+ if (toRemoveIt != ReverseNonLocalDefsCache.end()) {
+ for (const auto &entry : toRemoveIt->second)
+ NonLocalDefsCache.erase(entry);
+ ReverseNonLocalDefsCache.erase(toRemoveIt);
+ }
+ }
+ }
+
CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P);
if (It == NonLocalPointerDeps.end())
return;
@@ -1646,7 +1659,7 @@ void MemoryDependenceResults::removeInstruction(Instruction *RemInst) {
// Re-sort the NonLocalDepInfo. Changing the dirty entry to its
// subsequent value may invalidate the sortedness.
- std::sort(NLPDI.begin(), NLPDI.end());
+ llvm::sort(NLPDI.begin(), NLPDI.end());
}
ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
@@ -1659,7 +1672,7 @@ void MemoryDependenceResults::removeInstruction(Instruction *RemInst) {
}
assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
- DEBUG(verifyRemoved(RemInst));
+ LLVM_DEBUG(verifyRemoved(RemInst));
}
/// Verify that the specified instruction does not occur in our internal data
diff --git a/contrib/llvm/lib/Analysis/MemoryLocation.cpp b/contrib/llvm/lib/Analysis/MemoryLocation.cpp
index 9db6c499129a..55924db284ec 100644
--- a/contrib/llvm/lib/Analysis/MemoryLocation.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryLocation.cpp
@@ -65,6 +65,14 @@ MemoryLocation MemoryLocation::get(const AtomicRMWInst *RMWI) {
}
MemoryLocation MemoryLocation::getForSource(const MemTransferInst *MTI) {
+ return getForSource(cast<AnyMemTransferInst>(MTI));
+}
+
+MemoryLocation MemoryLocation::getForSource(const AtomicMemTransferInst *MTI) {
+ return getForSource(cast<AnyMemTransferInst>(MTI));
+}
+
+MemoryLocation MemoryLocation::getForSource(const AnyMemTransferInst *MTI) {
uint64_t Size = UnknownSize;
if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
Size = C->getValue().getZExtValue();
@@ -77,17 +85,25 @@ MemoryLocation MemoryLocation::getForSource(const MemTransferInst *MTI) {
return MemoryLocation(MTI->getRawSource(), Size, AATags);
}
-MemoryLocation MemoryLocation::getForDest(const MemIntrinsic *MTI) {
+MemoryLocation MemoryLocation::getForDest(const MemIntrinsic *MI) {
+ return getForDest(cast<AnyMemIntrinsic>(MI));
+}
+
+MemoryLocation MemoryLocation::getForDest(const AtomicMemIntrinsic *MI) {
+ return getForDest(cast<AnyMemIntrinsic>(MI));
+}
+
+MemoryLocation MemoryLocation::getForDest(const AnyMemIntrinsic *MI) {
uint64_t Size = UnknownSize;
- if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+ if (ConstantInt *C = dyn_cast<ConstantInt>(MI->getLength()))
Size = C->getValue().getZExtValue();
// memcpy/memmove can have AA tags. For memcpy, they apply
// to both the source and the destination.
AAMDNodes AATags;
- MTI->getAAMetadata(AATags);
+ MI->getAAMetadata(AATags);
- return MemoryLocation(MTI->getRawDest(), Size, AATags);
+ return MemoryLocation(MI->getRawDest(), Size, AATags);
}
MemoryLocation MemoryLocation::getForArgument(ImmutableCallSite CS,
diff --git a/contrib/llvm/lib/Analysis/MemorySSA.cpp b/contrib/llvm/lib/Analysis/MemorySSA.cpp
index 09605f61fa93..f57d490ce96e 100644
--- a/contrib/llvm/lib/Analysis/MemorySSA.cpp
+++ b/contrib/llvm/lib/Analysis/MemorySSA.cpp
@@ -27,6 +27,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/IteratedDominanceFrontier.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
@@ -82,7 +83,7 @@ static cl::opt<bool>
namespace llvm {
-/// \brief An assembly annotator class to print Memory SSA information in
+/// An assembly annotator class to print Memory SSA information in
/// comments.
class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter {
friend class MemorySSA;
@@ -235,13 +236,25 @@ static bool areLoadsReorderable(const LoadInst *Use,
return !(SeqCstUse || MayClobberIsAcquire);
}
-static bool instructionClobbersQuery(MemoryDef *MD,
- const MemoryLocation &UseLoc,
- const Instruction *UseInst,
- AliasAnalysis &AA) {
+namespace {
+
+struct ClobberAlias {
+ bool IsClobber;
+ Optional<AliasResult> AR;
+};
+
+} // end anonymous namespace
+
+// Return a pair of {IsClobber (bool), AR (AliasResult)}. It relies on AR being
+// ignored if IsClobber = false.
+static ClobberAlias instructionClobbersQuery(MemoryDef *MD,
+ const MemoryLocation &UseLoc,
+ const Instruction *UseInst,
+ AliasAnalysis &AA) {
Instruction *DefInst = MD->getMemoryInst();
assert(DefInst && "Defining instruction not actually an instruction");
ImmutableCallSite UseCS(UseInst);
+ Optional<AliasResult> AR;
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
// These intrinsics will show up as affecting memory, but they are just
@@ -249,13 +262,14 @@ static bool instructionClobbersQuery(MemoryDef *MD,
switch (II->getIntrinsicID()) {
case Intrinsic::lifetime_start:
if (UseCS)
- return false;
- return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), UseLoc);
+ return {false, NoAlias};
+ AR = AA.alias(MemoryLocation(II->getArgOperand(1)), UseLoc);
+ return {AR == MustAlias, AR};
case Intrinsic::lifetime_end:
case Intrinsic::invariant_start:
case Intrinsic::invariant_end:
case Intrinsic::assume:
- return false;
+ return {false, NoAlias};
default:
break;
}
@@ -263,19 +277,23 @@ static bool instructionClobbersQuery(MemoryDef *MD,
if (UseCS) {
ModRefInfo I = AA.getModRefInfo(DefInst, UseCS);
- return isModOrRefSet(I);
+ AR = isMustSet(I) ? MustAlias : MayAlias;
+ return {isModOrRefSet(I), AR};
}
if (auto *DefLoad = dyn_cast<LoadInst>(DefInst))
if (auto *UseLoad = dyn_cast<LoadInst>(UseInst))
- return !areLoadsReorderable(UseLoad, DefLoad);
+ return {!areLoadsReorderable(UseLoad, DefLoad), MayAlias};
- return isModSet(AA.getModRefInfo(DefInst, UseLoc));
+ ModRefInfo I = AA.getModRefInfo(DefInst, UseLoc);
+ AR = isMustSet(I) ? MustAlias : MayAlias;
+ return {isModSet(I), AR};
}
-static bool instructionClobbersQuery(MemoryDef *MD, const MemoryUseOrDef *MU,
- const MemoryLocOrCall &UseMLOC,
- AliasAnalysis &AA) {
+static ClobberAlias instructionClobbersQuery(MemoryDef *MD,
+ const MemoryUseOrDef *MU,
+ const MemoryLocOrCall &UseMLOC,
+ AliasAnalysis &AA) {
// FIXME: This is a temporary hack to allow a single instructionClobbersQuery
// to exist while MemoryLocOrCall is pushed through places.
if (UseMLOC.IsCall)
@@ -288,7 +306,7 @@ static bool instructionClobbersQuery(MemoryDef *MD, const MemoryUseOrDef *MU,
// Return true when MD may alias MU, return false otherwise.
bool MemorySSAUtil::defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU,
AliasAnalysis &AA) {
- return instructionClobbersQuery(MD, MU, MemoryLocOrCall(MU), AA);
+ return instructionClobbersQuery(MD, MU, MemoryLocOrCall(MU), AA).IsClobber;
}
namespace {
@@ -303,6 +321,7 @@ struct UpwardsMemoryQuery {
const Instruction *Inst = nullptr;
// The MemoryAccess we actually got called with, used to test local domination
const MemoryAccess *OriginalAccess = nullptr;
+ Optional<AliasResult> AR = MayAlias;
UpwardsMemoryQuery() = default;
@@ -333,9 +352,6 @@ static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA,
const Instruction *I) {
// If the memory can't be changed, then loads of the memory can't be
// clobbered.
- //
- // FIXME: We should handle invariant groups, as well. It's a bit harder,
- // because we need to pay close attention to invariant group barriers.
return isa<LoadInst>(I) && (I->getMetadata(LLVMContext::MD_invariant_load) ||
AA.pointsToConstantMemory(cast<LoadInst>(I)->
getPointerOperand()));
@@ -386,9 +402,15 @@ checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt,
//
// Also, note that this can't be hoisted out of the `Worklist` loop,
// since MD may only act as a clobber for 1 of N MemoryLocations.
- FoundClobber =
- FoundClobber || MSSA.isLiveOnEntryDef(MD) ||
- instructionClobbersQuery(MD, MAP.second, Query.Inst, AA);
+ FoundClobber = FoundClobber || MSSA.isLiveOnEntryDef(MD);
+ if (!FoundClobber) {
+ ClobberAlias CA =
+ instructionClobbersQuery(MD, MAP.second, Query.Inst, AA);
+ if (CA.IsClobber) {
+ FoundClobber = true;
+ // Not used: CA.AR;
+ }
+ }
}
break;
}
@@ -398,7 +420,8 @@ checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt,
if (auto *MD = dyn_cast<MemoryDef>(MA)) {
(void)MD;
- assert(!instructionClobbersQuery(MD, MAP.second, Query.Inst, AA) &&
+ assert(!instructionClobbersQuery(MD, MAP.second, Query.Inst, AA)
+ .IsClobber &&
"Found clobber before reaching ClobberAt!");
continue;
}
@@ -468,9 +491,10 @@ class ClobberWalker {
/// Result of calling walkToPhiOrClobber.
struct UpwardsWalkResult {
/// The "Result" of the walk. Either a clobber, the last thing we walked, or
- /// both.
+ /// both. Include alias info when clobber found.
MemoryAccess *Result;
bool IsKnownClobber;
+ Optional<AliasResult> AR;
};
/// Walk to the next Phi or Clobber in the def chain starting at Desc.Last.
@@ -486,17 +510,21 @@ class ClobberWalker {
for (MemoryAccess *Current : def_chain(Desc.Last)) {
Desc.Last = Current;
if (Current == StopAt)
- return {Current, false};
-
- if (auto *MD = dyn_cast<MemoryDef>(Current))
- if (MSSA.isLiveOnEntryDef(MD) ||
- instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA))
- return {MD, true};
+ return {Current, false, MayAlias};
+
+ if (auto *MD = dyn_cast<MemoryDef>(Current)) {
+ if (MSSA.isLiveOnEntryDef(MD))
+ return {MD, true, MustAlias};
+ ClobberAlias CA =
+ instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA);
+ if (CA.IsClobber)
+ return {MD, true, CA.AR};
+ }
}
assert(isa<MemoryPhi>(Desc.Last) &&
"Ended at a non-clobber that's not a phi?");
- return {Desc.Last, false};
+ return {Desc.Last, false, MayAlias};
}
void addSearches(MemoryPhi *Phi, SmallVectorImpl<ListIndex> &PausedSearches,
@@ -819,8 +847,6 @@ public:
ClobberWalker(const MemorySSA &MSSA, AliasAnalysis &AA, DominatorTree &DT)
: MSSA(MSSA), AA(AA), DT(DT) {}
- void reset() {}
-
/// Finds the nearest clobber for the given query, optimizing phis if
/// possible.
MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q) {
@@ -839,6 +865,7 @@ public:
MemoryAccess *Result;
if (WalkResult.IsKnownClobber) {
Result = WalkResult.Result;
+ Q.AR = WalkResult.AR;
} else {
OptznResult OptRes = tryOptimizePhi(cast<MemoryPhi>(FirstDesc.Last),
Current, Q.StartingLoc);
@@ -876,12 +903,11 @@ struct RenamePassData {
namespace llvm {
-/// \brief A MemorySSAWalker that does AA walks to disambiguate accesses. It no
-/// longer does caching on its own,
-/// but the name has been retained for the moment.
+/// A MemorySSAWalker that does AA walks to disambiguate accesses. It no
+/// longer does caching on its own, but the name has been retained for the
+/// moment.
class MemorySSA::CachingWalker final : public MemorySSAWalker {
ClobberWalker Walker;
- bool AutoResetWalker = true;
MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &);
@@ -896,13 +922,6 @@ public:
const MemoryLocation &) override;
void invalidateInfo(MemoryAccess *) override;
- /// Whether we call resetClobberWalker() after each time we *actually* walk to
- /// answer a clobber query.
- void setAutoResetWalker(bool AutoReset) { AutoResetWalker = AutoReset; }
-
- /// Drop the walker's persistent data structures.
- void resetClobberWalker() { Walker.reset(); }
-
void verify(const MemorySSA *MSSA) override {
MemorySSAWalker::verify(MSSA);
Walker.verify(MSSA);
@@ -930,7 +949,7 @@ void MemorySSA::renameSuccessorPhis(BasicBlock *BB, MemoryAccess *IncomingVal,
}
}
-/// \brief Rename a single basic block into MemorySSA form.
+/// Rename a single basic block into MemorySSA form.
/// Uses the standard SSA renaming algorithm.
/// \returns The new incoming value.
MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, MemoryAccess *IncomingVal,
@@ -953,7 +972,7 @@ MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, MemoryAccess *IncomingVal,
return IncomingVal;
}
-/// \brief This is the standard SSA renaming algorithm.
+/// This is the standard SSA renaming algorithm.
///
/// We walk the dominator tree in preorder, renaming accesses, and then filling
/// in phi nodes in our successors.
@@ -1002,7 +1021,7 @@ void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal,
}
}
-/// \brief This handles unreachable block accesses by deleting phi nodes in
+/// This handles unreachable block accesses by deleting phi nodes in
/// unreachable blocks, and marking all other unreachable MemoryAccess's as
/// being uses of the live on entry definition.
void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) {
@@ -1044,7 +1063,7 @@ void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) {
MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT)
: AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr),
- NextID(INVALID_MEMORYACCESS_ID) {
+ NextID(0) {
buildMemorySSA();
}
@@ -1106,6 +1125,7 @@ private:
// This is where the last walk for this memory location ended.
unsigned long LastKill;
bool LastKillValid;
+ Optional<AliasResult> AR;
};
void optimizeUsesInBlock(const BasicBlock *, unsigned long &, unsigned long &,
@@ -1165,7 +1185,7 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock(
}
if (isUseTriviallyOptimizableToLiveOnEntry(*AA, MU->getMemoryInst())) {
- MU->setDefiningAccess(MSSA->getLiveOnEntryDef(), true);
+ MU->setDefiningAccess(MSSA->getLiveOnEntryDef(), true, None);
continue;
}
@@ -1207,6 +1227,7 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock(
if (!LocInfo.LastKillValid) {
LocInfo.LastKill = VersionStack.size() - 1;
LocInfo.LastKillValid = true;
+ LocInfo.AR = MayAlias;
}
// At this point, we should have corrected last kill and LowerBound to be
@@ -1219,10 +1240,11 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock(
unsigned long UpperBound = VersionStack.size() - 1;
if (UpperBound - LocInfo.LowerBound > MaxCheckLimit) {
- DEBUG(dbgs() << "MemorySSA skipping optimization of " << *MU << " ("
- << *(MU->getMemoryInst()) << ")"
- << " because there are " << UpperBound - LocInfo.LowerBound
- << " stores to disambiguate\n");
+ LLVM_DEBUG(dbgs() << "MemorySSA skipping optimization of " << *MU << " ("
+ << *(MU->getMemoryInst()) << ")"
+ << " because there are "
+ << UpperBound - LocInfo.LowerBound
+ << " stores to disambiguate\n");
// Because we did not walk, LastKill is no longer valid, as this may
// have been a kill.
LocInfo.LastKillValid = false;
@@ -1250,24 +1272,32 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock(
// Reset UpperBound to liveOnEntryDef's place in the stack
UpperBound = 0;
FoundClobberResult = true;
+ LocInfo.AR = MustAlias;
break;
}
- if (instructionClobbersQuery(MD, MU, UseMLOC, *AA)) {
+ ClobberAlias CA = instructionClobbersQuery(MD, MU, UseMLOC, *AA);
+ if (CA.IsClobber) {
FoundClobberResult = true;
+ LocInfo.AR = CA.AR;
break;
}
--UpperBound;
}
+
+ // Note: Phis always have AliasResult AR set to MayAlias ATM.
+
// At the end of this loop, UpperBound is either a clobber, or lower bound
// PHI walking may cause it to be < LowerBound, and in fact, < LastKill.
if (FoundClobberResult || UpperBound < LocInfo.LastKill) {
- MU->setDefiningAccess(VersionStack[UpperBound], true);
// We were last killed now by where we got to
+ if (MSSA->isLiveOnEntryDef(VersionStack[UpperBound]))
+ LocInfo.AR = None;
+ MU->setDefiningAccess(VersionStack[UpperBound], true, LocInfo.AR);
LocInfo.LastKill = UpperBound;
} else {
// Otherwise, we checked all the new ones, and now we know we can get to
// LastKill.
- MU->setDefiningAccess(VersionStack[LocInfo.LastKill], true);
+ MU->setDefiningAccess(VersionStack[LocInfo.LastKill], true, LocInfo.AR);
}
LocInfo.LowerBound = VersionStack.size() - 1;
LocInfo.LowerBoundBlock = BB;
@@ -1289,19 +1319,13 @@ void MemorySSA::OptimizeUses::optimizeUses() {
}
void MemorySSA::placePHINodes(
- const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks,
- const DenseMap<const BasicBlock *, unsigned int> &BBNumbers) {
+ const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks) {
// Determine where our MemoryPhi's should go
ForwardIDFCalculator IDFs(*DT);
IDFs.setDefiningBlocks(DefiningBlocks);
SmallVector<BasicBlock *, 32> IDFBlocks;
IDFs.calculate(IDFBlocks);
- std::sort(IDFBlocks.begin(), IDFBlocks.end(),
- [&BBNumbers](const BasicBlock *A, const BasicBlock *B) {
- return BBNumbers.lookup(A) < BBNumbers.lookup(B);
- });
-
// Now place MemoryPhi nodes.
for (auto &BB : IDFBlocks)
createMemoryPhi(BB);
@@ -1315,11 +1339,8 @@ void MemorySSA::buildMemorySSA() {
// semantics do *not* imply that something with no immediate uses can simply
// be removed.
BasicBlock &StartingPoint = F.getEntryBlock();
- LiveOnEntryDef =
- llvm::make_unique<MemoryDef>(F.getContext(), nullptr, nullptr,
- &StartingPoint, NextID++);
- DenseMap<const BasicBlock *, unsigned int> BBNumbers;
- unsigned NextBBNum = 0;
+ LiveOnEntryDef.reset(new MemoryDef(F.getContext(), nullptr, nullptr,
+ &StartingPoint, NextID++));
// We maintain lists of memory accesses per-block, trading memory for time. We
// could just look up the memory access for every possible instruction in the
@@ -1328,7 +1349,6 @@ void MemorySSA::buildMemorySSA() {
// Go through each block, figure out where defs occur, and chain together all
// the accesses.
for (BasicBlock &B : F) {
- BBNumbers[&B] = NextBBNum++;
bool InsertIntoDef = false;
AccessList *Accesses = nullptr;
DefsList *Defs = nullptr;
@@ -1350,7 +1370,7 @@ void MemorySSA::buildMemorySSA() {
if (InsertIntoDef)
DefiningBlocks.insert(&B);
}
- placePHINodes(DefiningBlocks, BBNumbers);
+ placePHINodes(DefiningBlocks);
// Now do regular SSA renaming on the MemoryDef/MemoryUse. Visited will get
// filled in with all blocks.
@@ -1359,11 +1379,7 @@ void MemorySSA::buildMemorySSA() {
CachingWalker *Walker = getWalkerImpl();
- // We're doing a batch of updates; don't drop useful caches between them.
- Walker->setAutoResetWalker(false);
OptimizeUses(this, Walker, AA, DT).optimizeUses();
- Walker->setAutoResetWalker(true);
- Walker->resetClobberWalker();
// Mark the uses in unreachable blocks as live on entry, so that they go
// somewhere.
@@ -1426,7 +1442,7 @@ void MemorySSA::insertIntoListsBefore(MemoryAccess *What, const BasicBlock *BB,
auto *Defs = getOrCreateDefsList(BB);
// If we got asked to insert at the end, we have an easy job, just shove it
// at the end. If we got asked to insert before an existing def, we also get
- // an terator. If we got asked to insert before a use, we have to hunt for
+ // an iterator. If we got asked to insert before a use, we have to hunt for
// the next def.
if (WasEnd) {
Defs->push_back(*What);
@@ -1445,7 +1461,7 @@ void MemorySSA::insertIntoListsBefore(MemoryAccess *What, const BasicBlock *BB,
BlockNumberingValid.erase(BB);
}
-// Move What before Where in the IR. The end result is taht What will belong to
+// Move What before Where in the IR. The end result is that What will belong to
// the right lists and have the right Block set, but will not otherwise be
// correct. It will not have the right defining access, and if it is a def,
// things below it will not properly be updated.
@@ -1457,8 +1473,18 @@ void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB,
insertIntoListsBefore(What, BB, Where);
}
-void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB,
+void MemorySSA::moveTo(MemoryAccess *What, BasicBlock *BB,
InsertionPlace Point) {
+ if (isa<MemoryPhi>(What)) {
+ assert(Point == Beginning &&
+ "Can only move a Phi at the beginning of the block");
+ // Update lookup table entry
+ ValueToMemoryAccess.erase(What->getBlock());
+ bool Inserted = ValueToMemoryAccess.insert({BB, What}).second;
+ (void)Inserted;
+ assert(Inserted && "Cannot move a Phi to a block that already has one");
+ }
+
removeFromLists(What, false);
What->setBlock(BB);
insertIntoListsForBlock(What, BB, Point);
@@ -1498,7 +1524,7 @@ static inline bool isOrdered(const Instruction *I) {
return false;
}
-/// \brief Helper function to create new memory accesses
+/// Helper function to create new memory accesses
MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) {
// The assume intrinsic has a control dependency which we model by claiming
// that it writes arbitrarily. Ignore that fake memory dependency here.
@@ -1526,9 +1552,6 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) {
if (!Def && !Use)
return nullptr;
- assert((Def || Use) &&
- "Trying to create a memory access with a non-memory instruction");
-
MemoryUseOrDef *MUD;
if (Def)
MUD = new MemoryDef(I->getContext(), nullptr, I, I->getParent(), NextID++);
@@ -1538,7 +1561,7 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) {
return MUD;
}
-/// \brief Returns true if \p Replacer dominates \p Replacee .
+/// Returns true if \p Replacer dominates \p Replacee .
bool MemorySSA::dominatesUse(const MemoryAccess *Replacer,
const MemoryAccess *Replacee) const {
if (isa<MemoryUseOrDef>(Replacee))
@@ -1555,40 +1578,40 @@ bool MemorySSA::dominatesUse(const MemoryAccess *Replacer,
return true;
}
-/// \brief Properly remove \p MA from all of MemorySSA's lookup tables.
+/// Properly remove \p MA from all of MemorySSA's lookup tables.
void MemorySSA::removeFromLookups(MemoryAccess *MA) {
assert(MA->use_empty() &&
"Trying to remove memory access that still has uses");
BlockNumbering.erase(MA);
- if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA))
+ if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
MUD->setDefiningAccess(nullptr);
// Invalidate our walker's cache if necessary
if (!isa<MemoryUse>(MA))
Walker->invalidateInfo(MA);
- // The call below to erase will destroy MA, so we can't change the order we
- // are doing things here
+
Value *MemoryInst;
- if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA)) {
+ if (const auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
MemoryInst = MUD->getMemoryInst();
- } else {
+ else
MemoryInst = MA->getBlock();
- }
+
auto VMA = ValueToMemoryAccess.find(MemoryInst);
if (VMA->second == MA)
ValueToMemoryAccess.erase(VMA);
}
-/// \brief Properly remove \p MA from all of MemorySSA's lists.
+/// Properly remove \p MA from all of MemorySSA's lists.
///
/// Because of the way the intrusive list and use lists work, it is important to
/// do removal in the right order.
/// ShouldDelete defaults to true, and will cause the memory access to also be
/// deleted, not just removed.
void MemorySSA::removeFromLists(MemoryAccess *MA, bool ShouldDelete) {
+ BasicBlock *BB = MA->getBlock();
// The access list owns the reference, so we erase it from the non-owning list
// first.
if (!isa<MemoryUse>(MA)) {
- auto DefsIt = PerBlockDefs.find(MA->getBlock());
+ auto DefsIt = PerBlockDefs.find(BB);
std::unique_ptr<DefsList> &Defs = DefsIt->second;
Defs->remove(*MA);
if (Defs->empty())
@@ -1597,15 +1620,17 @@ void MemorySSA::removeFromLists(MemoryAccess *MA, bool ShouldDelete) {
// The erase call here will delete it. If we don't want it deleted, we call
// remove instead.
- auto AccessIt = PerBlockAccesses.find(MA->getBlock());
+ auto AccessIt = PerBlockAccesses.find(BB);
std::unique_ptr<AccessList> &Accesses = AccessIt->second;
if (ShouldDelete)
Accesses->erase(MA);
else
Accesses->remove(MA);
- if (Accesses->empty())
+ if (Accesses->empty()) {
PerBlockAccesses.erase(AccessIt);
+ BlockNumberingValid.erase(BB);
+ }
}
void MemorySSA::print(raw_ostream &OS) const {
@@ -1621,10 +1646,49 @@ void MemorySSA::verifyMemorySSA() const {
verifyDefUses(F);
verifyDomination(F);
verifyOrdering(F);
+ verifyDominationNumbers(F);
Walker->verify(this);
}
-/// \brief Verify that the order and existence of MemoryAccesses matches the
+/// Verify that all of the blocks we believe to have valid domination numbers
+/// actually have valid domination numbers.
+void MemorySSA::verifyDominationNumbers(const Function &F) const {
+#ifndef NDEBUG
+ if (BlockNumberingValid.empty())
+ return;
+
+ SmallPtrSet<const BasicBlock *, 16> ValidBlocks = BlockNumberingValid;
+ for (const BasicBlock &BB : F) {
+ if (!ValidBlocks.count(&BB))
+ continue;
+
+ ValidBlocks.erase(&BB);
+
+ const AccessList *Accesses = getBlockAccesses(&BB);
+ // It's correct to say an empty block has valid numbering.
+ if (!Accesses)
+ continue;
+
+ // Block numbering starts at 1.
+ unsigned long LastNumber = 0;
+ for (const MemoryAccess &MA : *Accesses) {
+ auto ThisNumberIter = BlockNumbering.find(&MA);
+ assert(ThisNumberIter != BlockNumbering.end() &&
+ "MemoryAccess has no domination number in a valid block!");
+
+ unsigned long ThisNumber = ThisNumberIter->second;
+ assert(ThisNumber > LastNumber &&
+ "Domination numbers should be strictly increasing!");
+ LastNumber = ThisNumber;
+ }
+ }
+
+ assert(ValidBlocks.empty() &&
+ "All valid BasicBlocks should exist in F -- dangling pointers?");
+#endif
+}
+
+/// Verify that the order and existence of MemoryAccesses matches the
/// order and existence of memory affecting instructions.
void MemorySSA::verifyOrdering(Function &F) const {
// Walk all the blocks, comparing what the lookups think and what the access
@@ -1687,7 +1751,7 @@ void MemorySSA::verifyOrdering(Function &F) const {
}
}
-/// \brief Verify the domination properties of MemorySSA by checking that each
+/// Verify the domination properties of MemorySSA by checking that each
/// definition dominates all of its uses.
void MemorySSA::verifyDomination(Function &F) const {
#ifndef NDEBUG
@@ -1709,7 +1773,7 @@ void MemorySSA::verifyDomination(Function &F) const {
#endif
}
-/// \brief Verify the def-use lists in MemorySSA, by verifying that \p Use
+/// Verify the def-use lists in MemorySSA, by verifying that \p Use
/// appears in the use list of \p Def.
void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const {
#ifndef NDEBUG
@@ -1723,7 +1787,7 @@ void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const {
#endif
}
-/// \brief Verify the immediate use information, by walking all the memory
+/// Verify the immediate use information, by walking all the memory
/// accesses and verifying that, for each use, it appears in the
/// appropriate def's use list
void MemorySSA::verifyDefUses(Function &F) const {
@@ -1733,8 +1797,12 @@ void MemorySSA::verifyDefUses(Function &F) const {
assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance(
pred_begin(&B), pred_end(&B))) &&
"Incomplete MemoryPhi Node");
- for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I)
+ for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
verifyUseInDefs(Phi->getIncomingValue(I), Phi);
+ assert(find(predecessors(&B), Phi->getIncomingBlock(I)) !=
+ pred_end(&B) &&
+ "Incoming phi block not a block predecessor");
+ }
}
for (Instruction &I : B) {
@@ -1769,7 +1837,7 @@ void MemorySSA::renumberBlock(const BasicBlock *B) const {
BlockNumberingValid.insert(B);
}
-/// \brief Determine, for two memory accesses in the same block,
+/// Determine, for two memory accesses in the same block,
/// whether \p Dominator dominates \p Dominatee.
/// \returns True if \p Dominator dominates \p Dominatee.
bool MemorySSA::locallyDominates(const MemoryAccess *Dominator,
@@ -1844,12 +1912,24 @@ void MemoryAccess::print(raw_ostream &OS) const {
void MemoryDef::print(raw_ostream &OS) const {
MemoryAccess *UO = getDefiningAccess();
+ auto printID = [&OS](MemoryAccess *A) {
+ if (A && A->getID())
+ OS << A->getID();
+ else
+ OS << LiveOnEntryStr;
+ };
+
OS << getID() << " = MemoryDef(";
- if (UO && UO->getID())
- OS << UO->getID();
- else
- OS << LiveOnEntryStr;
- OS << ')';
+ printID(UO);
+ OS << ")";
+
+ if (isOptimized()) {
+ OS << "->";
+ printID(getOptimized());
+
+ if (Optional<AliasResult> AR = getOptimizedAccessType())
+ OS << " " << *AR;
+ }
}
void MemoryPhi::print(raw_ostream &OS) const {
@@ -1886,6 +1966,9 @@ void MemoryUse::print(raw_ostream &OS) const {
else
OS << LiveOnEntryStr;
OS << ')';
+
+ if (Optional<AliasResult> AR = getOptimizedAccessType())
+ OS << " " << *AR;
}
void MemoryAccess::dump() const {
@@ -1977,21 +2060,13 @@ void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) {
MUD->resetOptimized();
}
-/// \brief Walk the use-def chains starting at \p MA and find
+/// Walk the use-def chains starting at \p MA and find
/// the MemoryAccess that actually clobbers Loc.
///
/// \returns our clobbering memory access
MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
MemoryAccess *StartingAccess, UpwardsMemoryQuery &Q) {
- MemoryAccess *New = Walker.findClobber(StartingAccess, Q);
-#ifdef EXPENSIVE_CHECKS
- MemoryAccess *NewNoCache = Walker.findClobber(StartingAccess, Q);
- assert(NewNoCache == New && "Cache made us hand back a different result?");
- (void)NewNoCache;
-#endif
- if (AutoResetWalker)
- resetClobberWalker();
- return New;
+ return Walker.findClobber(StartingAccess, Q);
}
MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
@@ -2023,10 +2098,10 @@ MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
: StartingUseOrDef;
MemoryAccess *Clobber = getClobberingMemoryAccess(DefiningAccess, Q);
- DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
- DEBUG(dbgs() << *StartingUseOrDef << "\n");
- DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
- DEBUG(dbgs() << *Clobber << "\n");
+ LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
+ LLVM_DEBUG(dbgs() << *StartingUseOrDef << "\n");
+ LLVM_DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
+ LLVM_DEBUG(dbgs() << *Clobber << "\n");
return Clobber;
}
@@ -2038,24 +2113,23 @@ MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
return MA;
// If this is an already optimized use or def, return the optimized result.
- // Note: Currently, we do not store the optimized def result because we'd need
- // a separate field, since we can't use it as the defining access.
- if (auto *MUD = dyn_cast<MemoryUseOrDef>(StartingAccess))
- if (MUD->isOptimized())
- return MUD->getOptimized();
+ // Note: Currently, we store the optimized def result in a separate field,
+ // since we can't use the defining access.
+ if (StartingAccess->isOptimized())
+ return StartingAccess->getOptimized();
const Instruction *I = StartingAccess->getMemoryInst();
UpwardsMemoryQuery Q(I, StartingAccess);
- // We can't sanely do anything with a fences, they conservatively
- // clobber all memory, and have no locations to get pointers from to
- // try to disambiguate.
+ // We can't sanely do anything with a fence, since they conservatively clobber
+ // all memory, and have no locations to get pointers from to try to
+ // disambiguate.
if (!Q.IsCall && I->isFenceLike())
return StartingAccess;
if (isUseTriviallyOptimizableToLiveOnEntry(*MSSA->AA, I)) {
MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef();
- if (auto *MUD = dyn_cast<MemoryUseOrDef>(StartingAccess))
- MUD->setOptimized(LiveOnEntry);
+ StartingAccess->setOptimized(LiveOnEntry);
+ StartingAccess->setOptimizedAccessType(None);
return LiveOnEntry;
}
@@ -2064,16 +2138,23 @@ MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
// At this point, DefiningAccess may be the live on entry def.
// If it is, we will not get a better result.
- if (MSSA->isLiveOnEntryDef(DefiningAccess))
+ if (MSSA->isLiveOnEntryDef(DefiningAccess)) {
+ StartingAccess->setOptimized(DefiningAccess);
+ StartingAccess->setOptimizedAccessType(None);
return DefiningAccess;
+ }
MemoryAccess *Result = getClobberingMemoryAccess(DefiningAccess, Q);
- DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
- DEBUG(dbgs() << *DefiningAccess << "\n");
- DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
- DEBUG(dbgs() << *Result << "\n");
- if (auto *MUD = dyn_cast<MemoryUseOrDef>(StartingAccess))
- MUD->setOptimized(Result);
+ LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
+ LLVM_DEBUG(dbgs() << *DefiningAccess << "\n");
+ LLVM_DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
+ LLVM_DEBUG(dbgs() << *Result << "\n");
+
+ StartingAccess->setOptimized(Result);
+ if (MSSA->isLiveOnEntryDef(Result))
+ StartingAccess->setOptimizedAccessType(None);
+ else if (Q.AR == MustAlias)
+ StartingAccess->setOptimizedAccessType(MustAlias);
return Result;
}
diff --git a/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp b/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp
index f5d89f699a5a..abe2b3c25a58 100644
--- a/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp
+++ b/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp
@@ -37,36 +37,45 @@ using namespace llvm;
// that there are two or more definitions needing to be merged.
// This still will leave non-minimal form in the case of irreducible control
// flow, where phi nodes may be in cycles with themselves, but unnecessary.
-MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(BasicBlock *BB) {
- // Single predecessor case, just recurse, we can only have one definition.
+MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(
+ BasicBlock *BB,
+ DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &CachedPreviousDef) {
+ // First, do a cache lookup. Without this cache, certain CFG structures
+ // (like a series of if statements) take exponential time to visit.
+ auto Cached = CachedPreviousDef.find(BB);
+ if (Cached != CachedPreviousDef.end()) {
+ return Cached->second;
+ }
+
if (BasicBlock *Pred = BB->getSinglePredecessor()) {
- return getPreviousDefFromEnd(Pred);
- } else if (VisitedBlocks.count(BB)) {
+ // Single predecessor case, just recurse, we can only have one definition.
+ MemoryAccess *Result = getPreviousDefFromEnd(Pred, CachedPreviousDef);
+ CachedPreviousDef.insert({BB, Result});
+ return Result;
+ }
+
+ if (VisitedBlocks.count(BB)) {
// We hit our node again, meaning we had a cycle, we must insert a phi
// node to break it so we have an operand. The only case this will
// insert useless phis is if we have irreducible control flow.
- return MSSA->createMemoryPhi(BB);
- } else if (VisitedBlocks.insert(BB).second) {
+ MemoryAccess *Result = MSSA->createMemoryPhi(BB);
+ CachedPreviousDef.insert({BB, Result});
+ return Result;
+ }
+
+ if (VisitedBlocks.insert(BB).second) {
// Mark us visited so we can detect a cycle
- SmallVector<MemoryAccess *, 8> PhiOps;
+ SmallVector<TrackingVH<MemoryAccess>, 8> PhiOps;
// Recurse to get the values in our predecessors for placement of a
// potential phi node. This will insert phi nodes if we cycle in order to
// break the cycle and have an operand.
for (auto *Pred : predecessors(BB))
- PhiOps.push_back(getPreviousDefFromEnd(Pred));
+ PhiOps.push_back(getPreviousDefFromEnd(Pred, CachedPreviousDef));
// Now try to simplify the ops to avoid placing a phi.
// This may return null if we never created a phi yet, that's okay
MemoryPhi *Phi = dyn_cast_or_null<MemoryPhi>(MSSA->getMemoryAccess(BB));
- bool PHIExistsButNeedsUpdate = false;
- // See if the existing phi operands match what we need.
- // Unlike normal SSA, we only allow one phi node per block, so we can't just
- // create a new one.
- if (Phi && Phi->getNumOperands() != 0)
- if (!std::equal(Phi->op_begin(), Phi->op_end(), PhiOps.begin())) {
- PHIExistsButNeedsUpdate = true;
- }
// See if we can avoid the phi by simplifying it.
auto *Result = tryRemoveTrivialPhi(Phi, PhiOps);
@@ -75,14 +84,20 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(BasicBlock *BB) {
if (!Phi)
Phi = MSSA->createMemoryPhi(BB);
- // These will have been filled in by the recursive read we did above.
- if (PHIExistsButNeedsUpdate) {
- std::copy(PhiOps.begin(), PhiOps.end(), Phi->op_begin());
- std::copy(pred_begin(BB), pred_end(BB), Phi->block_begin());
+ // See if the existing phi operands match what we need.
+ // Unlike normal SSA, we only allow one phi node per block, so we can't just
+ // create a new one.
+ if (Phi->getNumOperands() != 0) {
+ // FIXME: Figure out whether this is dead code and if so remove it.
+ if (!std::equal(Phi->op_begin(), Phi->op_end(), PhiOps.begin())) {
+ // These will have been filled in by the recursive read we did above.
+ std::copy(PhiOps.begin(), PhiOps.end(), Phi->op_begin());
+ std::copy(pred_begin(BB), pred_end(BB), Phi->block_begin());
+ }
} else {
unsigned i = 0;
for (auto *Pred : predecessors(BB))
- Phi->addIncoming(PhiOps[i++], Pred);
+ Phi->addIncoming(&*PhiOps[i++], Pred);
InsertedPHIs.push_back(Phi);
}
Result = Phi;
@@ -90,6 +105,7 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(BasicBlock *BB) {
// Set ourselves up for the next variable by resetting visited state.
VisitedBlocks.erase(BB);
+ CachedPreviousDef.insert({BB, Result});
return Result;
}
llvm_unreachable("Should have hit one of the three cases above");
@@ -100,9 +116,10 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(BasicBlock *BB) {
// it continues globally, creating phi nodes to ensure we have a single
// definition.
MemoryAccess *MemorySSAUpdater::getPreviousDef(MemoryAccess *MA) {
- auto *LocalResult = getPreviousDefInBlock(MA);
-
- return LocalResult ? LocalResult : getPreviousDefRecursive(MA->getBlock());
+ if (auto *LocalResult = getPreviousDefInBlock(MA))
+ return LocalResult;
+ DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> CachedPreviousDef;
+ return getPreviousDefRecursive(MA->getBlock(), CachedPreviousDef);
}
// This starts at the memory access, and goes backwards in the block to the find
@@ -133,13 +150,15 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefInBlock(MemoryAccess *MA) {
}
// This starts at the end of block
-MemoryAccess *MemorySSAUpdater::getPreviousDefFromEnd(BasicBlock *BB) {
+MemoryAccess *MemorySSAUpdater::getPreviousDefFromEnd(
+ BasicBlock *BB,
+ DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &CachedPreviousDef) {
auto *Defs = MSSA->getWritableBlockDefs(BB);
if (Defs)
return &*Defs->rbegin();
- return getPreviousDefRecursive(BB);
+ return getPreviousDefRecursive(BB, CachedPreviousDef);
}
// Recurse over a set of phi uses to eliminate the trivial ones
MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) {
@@ -165,6 +184,10 @@ MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) {
template <class RangeType>
MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi,
RangeType &Operands) {
+ // Bail out on non-opt Phis.
+ if (NonOptPhis.count(Phi))
+ return Phi;
+
// Detect equal or self arguments
MemoryAccess *Same = nullptr;
for (auto &Op : Operands) {
@@ -174,7 +197,7 @@ MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi,
// not the same, return the phi since it's not eliminatable by us
if (Same)
return Phi;
- Same = cast<MemoryAccess>(Op);
+ Same = cast<MemoryAccess>(&*Op);
}
// Never found a non-self reference, the phi is undef
if (Same == nullptr)
@@ -230,10 +253,8 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
InsertedPHIs.clear();
// See if we had a local def, and if not, go hunting.
- MemoryAccess *DefBefore = getPreviousDefInBlock(MD);
- bool DefBeforeSameBlock = DefBefore != nullptr;
- if (!DefBefore)
- DefBefore = getPreviousDefRecursive(MD->getBlock());
+ MemoryAccess *DefBefore = getPreviousDef(MD);
+ bool DefBeforeSameBlock = DefBefore->getBlock() == MD->getBlock();
// There is a def before us, which means we can replace any store/phi uses
// of that thing with us, since we are in the way of whatever was there
@@ -255,8 +276,7 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
// above and reset ourselves.
MD->setDefiningAccess(DefBefore);
- SmallVector<MemoryAccess *, 8> FixupList(InsertedPHIs.begin(),
- InsertedPHIs.end());
+ SmallVector<WeakVH, 8> FixupList(InsertedPHIs.begin(), InsertedPHIs.end());
if (!DefBeforeSameBlock) {
// If there was a local def before us, we must have the same effect it
// did. Because every may-def is the same, any phis/etc we would create, it
@@ -277,7 +297,7 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
fixupDefs(FixupList);
FixupList.clear();
// Put any new phis on the fixup list, and process them
- FixupList.append(InsertedPHIs.end() - StartingPHISize, InsertedPHIs.end());
+ FixupList.append(InsertedPHIs.begin() + StartingPHISize, InsertedPHIs.end());
}
// Now that all fixups are done, rename all uses if we are asked.
if (RenameUses) {
@@ -294,19 +314,29 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
MSSA->renamePass(MD->getBlock(), FirstDef, Visited);
// We just inserted a phi into this block, so the incoming value will become
// the phi anyway, so it does not matter what we pass.
- for (auto *MP : InsertedPHIs)
- MSSA->renamePass(MP->getBlock(), nullptr, Visited);
+ for (auto &MP : InsertedPHIs) {
+ MemoryPhi *Phi = dyn_cast_or_null<MemoryPhi>(MP);
+ if (Phi)
+ MSSA->renamePass(Phi->getBlock(), nullptr, Visited);
+ }
}
}
-void MemorySSAUpdater::fixupDefs(const SmallVectorImpl<MemoryAccess *> &Vars) {
+void MemorySSAUpdater::fixupDefs(const SmallVectorImpl<WeakVH> &Vars) {
SmallPtrSet<const BasicBlock *, 8> Seen;
SmallVector<const BasicBlock *, 16> Worklist;
- for (auto *NewDef : Vars) {
+ for (auto &Var : Vars) {
+ MemoryAccess *NewDef = dyn_cast_or_null<MemoryAccess>(Var);
+ if (!NewDef)
+ continue;
// First, see if there is a local def after the operand.
auto *Defs = MSSA->getWritableBlockDefs(NewDef->getBlock());
auto DefIter = NewDef->getDefsIterator();
+ // The temporary Phi is being fixed, unmark it for not to optimize.
+ if (MemoryPhi *Phi = dyn_cast<MemoryPhi>(NewDef))
+ NonOptPhis.erase(Phi);
+
// If there is a local def after us, we only have to rename that.
if (++DefIter != Defs->end()) {
cast<MemoryDef>(DefIter)->setDefiningAccess(NewDef);
@@ -366,6 +396,11 @@ void MemorySSAUpdater::fixupDefs(const SmallVectorImpl<MemoryAccess *> &Vars) {
template <class WhereType>
void MemorySSAUpdater::moveTo(MemoryUseOrDef *What, BasicBlock *BB,
WhereType Where) {
+ // Mark MemoryPhi users of What not to be optimized.
+ for (auto *U : What->users())
+ if (MemoryPhi *PhiUser = dyn_cast<MemoryPhi>(U))
+ NonOptPhis.insert(PhiUser);
+
// Replace all our users with our defining access.
What->replaceAllUsesWith(What->getDefiningAccess());
@@ -377,6 +412,10 @@ void MemorySSAUpdater::moveTo(MemoryUseOrDef *What, BasicBlock *BB,
insertDef(MD);
else
insertUse(cast<MemoryUse>(What));
+
+ // Clear dangling pointers. We added all MemoryPhi users, but not all
+ // of them are removed by fixupDefs().
+ NonOptPhis.clear();
}
// Move What before Where in the MemorySSA IR.
@@ -394,7 +433,57 @@ void MemorySSAUpdater::moveToPlace(MemoryUseOrDef *What, BasicBlock *BB,
return moveTo(What, BB, Where);
}
-/// \brief If all arguments of a MemoryPHI are defined by the same incoming
+// All accesses in To used to be in From. Move to end and update access lists.
+void MemorySSAUpdater::moveAllAccesses(BasicBlock *From, BasicBlock *To,
+ Instruction *Start) {
+
+ MemorySSA::AccessList *Accs = MSSA->getWritableBlockAccesses(From);
+ if (!Accs)
+ return;
+
+ MemoryAccess *FirstInNew = nullptr;
+ for (Instruction &I : make_range(Start->getIterator(), To->end()))
+ if ((FirstInNew = MSSA->getMemoryAccess(&I)))
+ break;
+ if (!FirstInNew)
+ return;
+
+ auto *MUD = cast<MemoryUseOrDef>(FirstInNew);
+ do {
+ auto NextIt = ++MUD->getIterator();
+ MemoryUseOrDef *NextMUD = (!Accs || NextIt == Accs->end())
+ ? nullptr
+ : cast<MemoryUseOrDef>(&*NextIt);
+ MSSA->moveTo(MUD, To, MemorySSA::End);
+ // Moving MUD from Accs in the moveTo above, may delete Accs, so we need to
+ // retrieve it again.
+ Accs = MSSA->getWritableBlockAccesses(From);
+ MUD = NextMUD;
+ } while (MUD);
+}
+
+void MemorySSAUpdater::moveAllAfterSpliceBlocks(BasicBlock *From,
+ BasicBlock *To,
+ Instruction *Start) {
+ assert(MSSA->getBlockAccesses(To) == nullptr &&
+ "To block is expected to be free of MemoryAccesses.");
+ moveAllAccesses(From, To, Start);
+ for (BasicBlock *Succ : successors(To))
+ if (MemoryPhi *MPhi = MSSA->getMemoryAccess(Succ))
+ MPhi->setIncomingBlock(MPhi->getBasicBlockIndex(From), To);
+}
+
+void MemorySSAUpdater::moveAllAfterMergeBlocks(BasicBlock *From, BasicBlock *To,
+ Instruction *Start) {
+ assert(From->getSinglePredecessor() == To &&
+ "From block is expected to have a single predecessor (To).");
+ moveAllAccesses(From, To, Start);
+ for (BasicBlock *Succ : successors(From))
+ if (MemoryPhi *MPhi = MSSA->getMemoryAccess(Succ))
+ MPhi->setIncomingBlock(MPhi->getBasicBlockIndex(From), To);
+}
+
+/// If all arguments of a MemoryPHI are defined by the same incoming
/// argument, return that argument.
static MemoryAccess *onlySingleValue(MemoryPhi *MP) {
MemoryAccess *MA = nullptr;
@@ -408,6 +497,35 @@ static MemoryAccess *onlySingleValue(MemoryPhi *MP) {
return MA;
}
+void MemorySSAUpdater::wireOldPredecessorsToNewImmediatePredecessor(
+ BasicBlock *Old, BasicBlock *New, ArrayRef<BasicBlock *> Preds) {
+ assert(!MSSA->getWritableBlockAccesses(New) &&
+ "Access list should be null for a new block.");
+ MemoryPhi *Phi = MSSA->getMemoryAccess(Old);
+ if (!Phi)
+ return;
+ if (pred_size(Old) == 1) {
+ assert(pred_size(New) == Preds.size() &&
+ "Should have moved all predecessors.");
+ MSSA->moveTo(Phi, New, MemorySSA::Beginning);
+ } else {
+ assert(!Preds.empty() && "Must be moving at least one predecessor to the "
+ "new immediate predecessor.");
+ MemoryPhi *NewPhi = MSSA->createMemoryPhi(New);
+ SmallPtrSet<BasicBlock *, 16> PredsSet(Preds.begin(), Preds.end());
+ Phi->unorderedDeleteIncomingIf([&](MemoryAccess *MA, BasicBlock *B) {
+ if (PredsSet.count(B)) {
+ NewPhi->addIncoming(MA, B);
+ return true;
+ }
+ return false;
+ });
+ Phi->addIncoming(NewPhi, New);
+ if (onlySingleValue(NewPhi))
+ removeMemoryAccess(NewPhi);
+ }
+}
+
void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) {
assert(!MSSA->isLiveOnEntryDef(MA) &&
"Trying to remove the live on entry def");
@@ -456,6 +574,39 @@ void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) {
MSSA->removeFromLists(MA);
}
+void MemorySSAUpdater::removeBlocks(
+ const SmallPtrSetImpl<BasicBlock *> &DeadBlocks) {
+ // First delete all uses of BB in MemoryPhis.
+ for (BasicBlock *BB : DeadBlocks) {
+ TerminatorInst *TI = BB->getTerminator();
+ assert(TI && "Basic block expected to have a terminator instruction");
+ for (BasicBlock *Succ : TI->successors())
+ if (!DeadBlocks.count(Succ))
+ if (MemoryPhi *MP = MSSA->getMemoryAccess(Succ)) {
+ MP->unorderedDeleteIncomingBlock(BB);
+ if (MP->getNumIncomingValues() == 1)
+ removeMemoryAccess(MP);
+ }
+ // Drop all references of all accesses in BB
+ if (MemorySSA::AccessList *Acc = MSSA->getWritableBlockAccesses(BB))
+ for (MemoryAccess &MA : *Acc)
+ MA.dropAllReferences();
+ }
+
+ // Next, delete all memory accesses in each block
+ for (BasicBlock *BB : DeadBlocks) {
+ MemorySSA::AccessList *Acc = MSSA->getWritableBlockAccesses(BB);
+ if (!Acc)
+ continue;
+ for (auto AB = Acc->begin(), AE = Acc->end(); AB != AE;) {
+ MemoryAccess *MA = &*AB;
+ ++AB;
+ MSSA->removeFromLookups(MA);
+ MSSA->removeFromLists(MA);
+ }
+ }
+}
+
MemoryAccess *MemorySSAUpdater::createMemoryAccessInBB(
Instruction *I, MemoryAccess *Definition, const BasicBlock *BB,
MemorySSA::InsertionPlace Point) {
diff --git a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index efa5bd564ad0..17dae20ce3a1 100644
--- a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -49,6 +49,7 @@
#include "llvm/Object/SymbolicFile.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -58,6 +59,18 @@ using namespace llvm;
#define DEBUG_TYPE "module-summary-analysis"
+// Option to force edges cold which will block importing when the
+// -import-cold-multiplier is set to 0. Useful for debugging.
+FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold =
+ FunctionSummary::FSHT_None;
+cl::opt<FunctionSummary::ForceSummaryHotnessType, true> FSEC(
+ "force-summary-edges-cold", cl::Hidden, cl::location(ForceSummaryEdgesCold),
+ cl::desc("Force all edges in the function summary to cold"),
+ cl::values(clEnumValN(FunctionSummary::FSHT_None, "none", "None."),
+ clEnumValN(FunctionSummary::FSHT_AllNonCritical,
+ "all-non-critical", "All non-critical edges."),
+ clEnumValN(FunctionSummary::FSHT_All, "all", "All edges.")));
+
// Walk through the operands of a given User via worklist iteration and populate
// the set of GlobalValue references encountered. Invoked either on an
// Instruction or a GlobalVariable (which walks its initializer).
@@ -268,14 +281,23 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
auto ScaledCount = PSI->getProfileCount(&I, BFI);
auto Hotness = ScaledCount ? getHotness(ScaledCount.getValue(), PSI)
: CalleeInfo::HotnessType::Unknown;
+ if (ForceSummaryEdgesCold != FunctionSummary::FSHT_None)
+ Hotness = CalleeInfo::HotnessType::Cold;
// Use the original CalledValue, in case it was an alias. We want
// to record the call edge to the alias in that case. Eventually
// an alias summary will be created to associate the alias and
// aliasee.
- CallGraphEdges[Index.getOrInsertValueInfo(
- cast<GlobalValue>(CalledValue))]
- .updateHotness(Hotness);
+ auto &ValueInfo = CallGraphEdges[Index.getOrInsertValueInfo(
+ cast<GlobalValue>(CalledValue))];
+ ValueInfo.updateHotness(Hotness);
+ // Add the relative block frequency to CalleeInfo if there is no profile
+ // information.
+ if (BFI != nullptr && Hotness == CalleeInfo::HotnessType::Unknown) {
+ uint64_t BBFreq = BFI->getBlockFreq(&BB).getFrequency();
+ uint64_t EntryFreq = BFI->getEntryFreq();
+ ValueInfo.updateRelBlockFreq(BBFreq, EntryFreq);
+ }
} else {
// Skip inline assembly calls.
if (CI && CI->isInlineAsm())
@@ -284,6 +306,18 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
if (!CalledValue || isa<Constant>(CalledValue))
continue;
+ // Check if the instruction has a callees metadata. If so, add callees
+ // to CallGraphEdges to reflect the references from the metadata, and
+ // to enable importing for subsequent indirect call promotion and
+ // inlining.
+ if (auto *MD = I.getMetadata(LLVMContext::MD_callees)) {
+ for (auto &Op : MD->operands()) {
+ Function *Callee = mdconst::extract_or_null<Function>(Op);
+ if (Callee)
+ CallGraphEdges[Index.getOrInsertValueInfo(Callee)];
+ }
+ }
+
uint32_t NumVals, NumCandidates;
uint64_t TotalCount;
auto CandidateProfileData =
@@ -299,7 +333,9 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
// sample PGO, to enable the same inlines as the profiled optimized binary.
for (auto &I : F.getImportGUIDs())
CallGraphEdges[Index.getOrInsertValueInfo(I)].updateHotness(
- CalleeInfo::HotnessType::Critical);
+ ForceSummaryEdgesCold == FunctionSummary::FSHT_All
+ ? CalleeInfo::HotnessType::Cold
+ : CalleeInfo::HotnessType::Critical);
bool NonRenamableLocal = isNonRenamableLocal(F);
bool NotEligibleForImport =
@@ -325,7 +361,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
TypeCheckedLoadConstVCalls.takeVector());
if (NonRenamableLocal)
CantBePromoted.insert(F.getGUID());
- Index.addGlobalValueSummary(F.getName(), std::move(FuncSummary));
+ Index.addGlobalValueSummary(F, std::move(FuncSummary));
}
static void
@@ -341,7 +377,7 @@ computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V,
llvm::make_unique<GlobalVarSummary>(Flags, RefEdges.takeVector());
if (NonRenamableLocal)
CantBePromoted.insert(V.getGUID());
- Index.addGlobalValueSummary(V.getName(), std::move(GVarSummary));
+ Index.addGlobalValueSummary(V, std::move(GVarSummary));
}
static void
@@ -357,7 +393,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
AS->setAliasee(AliaseeSummary);
if (NonRenamableLocal)
CantBePromoted.insert(A.getGUID());
- Index.addGlobalValueSummary(A.getName(), std::move(AS));
+ Index.addGlobalValueSummary(A, std::move(AS));
}
// Set LiveRoot flag on entries matching the given value name.
@@ -372,7 +408,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback,
ProfileSummaryInfo *PSI) {
assert(PSI);
- ModuleSummaryIndex Index;
+ ModuleSummaryIndex Index(/*HaveGVs=*/true);
// Identify the local values in the llvm.used and llvm.compiler.used sets,
// which should not be exported as they would then require renaming and
@@ -419,7 +455,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
/* NotEligibleToImport = */ true,
/* Live = */ true,
/* Local */ GV->isDSOLocal());
- CantBePromoted.insert(GlobalValue::getGUID(Name));
+ CantBePromoted.insert(GV->getGUID());
// Create the appropriate summary type.
if (Function *F = dyn_cast<Function>(GV)) {
std::unique_ptr<FunctionSummary> Summary =
@@ -436,12 +472,12 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
ArrayRef<FunctionSummary::VFuncId>{},
ArrayRef<FunctionSummary::ConstVCall>{},
ArrayRef<FunctionSummary::ConstVCall>{});
- Index.addGlobalValueSummary(Name, std::move(Summary));
+ Index.addGlobalValueSummary(*GV, std::move(Summary));
} else {
std::unique_ptr<GlobalVarSummary> Summary =
llvm::make_unique<GlobalVarSummary>(GVFlags,
ArrayRef<ValueInfo>{});
- Index.addGlobalValueSummary(Name, std::move(Summary));
+ Index.addGlobalValueSummary(*GV, std::move(Summary));
}
});
}
@@ -571,14 +607,14 @@ ModuleSummaryIndexWrapperPass::ModuleSummaryIndexWrapperPass()
bool ModuleSummaryIndexWrapperPass::runOnModule(Module &M) {
auto &PSI = *getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- Index = buildModuleSummaryIndex(
+ Index.emplace(buildModuleSummaryIndex(
M,
[this](const Function &F) {
return &(this->getAnalysis<BlockFrequencyInfoWrapperPass>(
*const_cast<Function *>(&F))
.getBFI());
},
- &PSI);
+ &PSI));
return false;
}
diff --git a/contrib/llvm/lib/Analysis/MustExecute.cpp b/contrib/llvm/lib/Analysis/MustExecute.cpp
new file mode 100644
index 000000000000..fc4049874622
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/MustExecute.cpp
@@ -0,0 +1,269 @@
+//===- MustExecute.cpp - Printer for isGuaranteedToExecute ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/MustExecute.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/AssemblyAnnotationWriter.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// Computes loop safety information, checks loop body & header
+/// for the possibility of may throw exception.
+///
+void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) {
+ assert(CurLoop != nullptr && "CurLoop can't be null");
+ BasicBlock *Header = CurLoop->getHeader();
+ // Setting default safety values.
+ SafetyInfo->MayThrow = false;
+ SafetyInfo->HeaderMayThrow = false;
+ // Iterate over header and compute safety info.
+ SafetyInfo->HeaderMayThrow =
+ !isGuaranteedToTransferExecutionToSuccessor(Header);
+
+ SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow;
+ // Iterate over loop instructions and compute safety info.
+ // Skip header as it has been computed and stored in HeaderMayThrow.
+ // The first block in loopinfo.Blocks is guaranteed to be the header.
+ assert(Header == *CurLoop->getBlocks().begin() &&
+ "First block must be header");
+ for (Loop::block_iterator BB = std::next(CurLoop->block_begin()),
+ BBE = CurLoop->block_end();
+ (BB != BBE) && !SafetyInfo->MayThrow; ++BB)
+ SafetyInfo->MayThrow |=
+ !isGuaranteedToTransferExecutionToSuccessor(*BB);
+
+ // Compute funclet colors if we might sink/hoist in a function with a funclet
+ // personality routine.
+ Function *Fn = CurLoop->getHeader()->getParent();
+ if (Fn->hasPersonalityFn())
+ if (Constant *PersonalityFn = Fn->getPersonalityFn())
+ if (isScopedEHPersonality(classifyEHPersonality(PersonalityFn)))
+ SafetyInfo->BlockColors = colorEHFunclets(*Fn);
+}
+
+/// Return true if we can prove that the given ExitBlock is not reached on the
+/// first iteration of the given loop. That is, the backedge of the loop must
+/// be executed before the ExitBlock is executed in any dynamic execution trace.
+static bool CanProveNotTakenFirstIteration(BasicBlock *ExitBlock,
+ const DominatorTree *DT,
+ const Loop *CurLoop) {
+ auto *CondExitBlock = ExitBlock->getSinglePredecessor();
+ if (!CondExitBlock)
+ // expect unique exits
+ return false;
+ assert(CurLoop->contains(CondExitBlock) && "meaning of exit block");
+ auto *BI = dyn_cast<BranchInst>(CondExitBlock->getTerminator());
+ if (!BI || !BI->isConditional())
+ return false;
+ // If condition is constant and false leads to ExitBlock then we always
+ // execute the true branch.
+ if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition()))
+ return BI->getSuccessor(Cond->getZExtValue() ? 1 : 0) == ExitBlock;
+ auto *Cond = dyn_cast<CmpInst>(BI->getCondition());
+ if (!Cond)
+ return false;
+ // todo: this would be a lot more powerful if we used scev, but all the
+ // plumbing is currently missing to pass a pointer in from the pass
+ // Check for cmp (phi [x, preheader] ...), y where (pred x, y is known
+ auto *LHS = dyn_cast<PHINode>(Cond->getOperand(0));
+ auto *RHS = Cond->getOperand(1);
+ if (!LHS || LHS->getParent() != CurLoop->getHeader())
+ return false;
+ auto DL = ExitBlock->getModule()->getDataLayout();
+ auto *IVStart = LHS->getIncomingValueForBlock(CurLoop->getLoopPreheader());
+ auto *SimpleValOrNull = SimplifyCmpInst(Cond->getPredicate(),
+ IVStart, RHS,
+ {DL, /*TLI*/ nullptr,
+ DT, /*AC*/ nullptr, BI});
+ auto *SimpleCst = dyn_cast_or_null<Constant>(SimpleValOrNull);
+ if (!SimpleCst)
+ return false;
+ if (ExitBlock == BI->getSuccessor(0))
+ return SimpleCst->isZeroValue();
+ assert(ExitBlock == BI->getSuccessor(1) && "implied by above");
+ return SimpleCst->isAllOnesValue();
+}
+
+/// Returns true if the instruction in a loop is guaranteed to execute at least
+/// once.
+bool llvm::isGuaranteedToExecute(const Instruction &Inst,
+ const DominatorTree *DT, const Loop *CurLoop,
+ const LoopSafetyInfo *SafetyInfo) {
+ // We have to check to make sure that the instruction dominates all
+ // of the exit blocks. If it doesn't, then there is a path out of the loop
+ // which does not execute this instruction, so we can't hoist it.
+
+ // If the instruction is in the header block for the loop (which is very
+ // common), it is always guaranteed to dominate the exit blocks. Since this
+ // is a common case, and can save some work, check it now.
+ if (Inst.getParent() == CurLoop->getHeader())
+ // If there's a throw in the header block, we can't guarantee we'll reach
+ // Inst unless we can prove that Inst comes before the potential implicit
+ // exit. At the moment, we use a (cheap) hack for the common case where
+ // the instruction of interest is the first one in the block.
+ return !SafetyInfo->HeaderMayThrow ||
+ Inst.getParent()->getFirstNonPHIOrDbg() == &Inst;
+
+ // Somewhere in this loop there is an instruction which may throw and make us
+ // exit the loop.
+ if (SafetyInfo->MayThrow)
+ return false;
+
+ // Note: There are two styles of reasoning intermixed below for
+ // implementation efficiency reasons. They are:
+ // 1) If we can prove that the instruction dominates all exit blocks, then we
+ // know the instruction must have executed on *some* iteration before we
+ // exit. We do not prove *which* iteration the instruction must execute on.
+ // 2) If we can prove that the instruction dominates the latch and all exits
+ // which might be taken on the first iteration, we know the instruction must
+ // execute on the first iteration. This second style allows a conditional
+ // exit before the instruction of interest which is provably not taken on the
+ // first iteration. This is a quite common case for range check like
+ // patterns. TODO: support loops with multiple latches.
+
+ const bool InstDominatesLatch =
+ CurLoop->getLoopLatch() != nullptr &&
+ DT->dominates(Inst.getParent(), CurLoop->getLoopLatch());
+
+ // Get the exit blocks for the current loop.
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ CurLoop->getExitBlocks(ExitBlocks);
+
+ // Verify that the block dominates each of the exit blocks of the loop.
+ for (BasicBlock *ExitBlock : ExitBlocks)
+ if (!DT->dominates(Inst.getParent(), ExitBlock))
+ if (!InstDominatesLatch ||
+ !CanProveNotTakenFirstIteration(ExitBlock, DT, CurLoop))
+ return false;
+
+ // As a degenerate case, if the loop is statically infinite then we haven't
+ // proven anything since there are no exit blocks.
+ if (ExitBlocks.empty())
+ return false;
+
+ // FIXME: In general, we have to prove that the loop isn't an infinite loop.
+ // See http::llvm.org/PR24078 . (The "ExitBlocks.empty()" check above is
+ // just a special case of this.)
+ return true;
+}
+
+
+namespace {
+ struct MustExecutePrinter : public FunctionPass {
+
+ static char ID; // Pass identification, replacement for typeid
+ MustExecutePrinter() : FunctionPass(ID) {
+ initializeMustExecutePrinterPass(*PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ }
+ bool runOnFunction(Function &F) override;
+ };
+}
+
+char MustExecutePrinter::ID = 0;
+INITIALIZE_PASS_BEGIN(MustExecutePrinter, "print-mustexecute",
+ "Instructions which execute on loop entry", false, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(MustExecutePrinter, "print-mustexecute",
+ "Instructions which execute on loop entry", false, true)
+
+FunctionPass *llvm::createMustExecutePrinter() {
+ return new MustExecutePrinter();
+}
+
+static bool isMustExecuteIn(const Instruction &I, Loop *L, DominatorTree *DT) {
+ // TODO: merge these two routines. For the moment, we display the best
+ // result obtained by *either* implementation. This is a bit unfair since no
+ // caller actually gets the full power at the moment.
+ LoopSafetyInfo LSI;
+ computeLoopSafetyInfo(&LSI, L);
+ return isGuaranteedToExecute(I, DT, L, &LSI) ||
+ isGuaranteedToExecuteForEveryIteration(&I, L);
+}
+
+namespace {
+/// An assembly annotator class to print must execute information in
+/// comments.
+class MustExecuteAnnotatedWriter : public AssemblyAnnotationWriter {
+ DenseMap<const Value*, SmallVector<Loop*, 4> > MustExec;
+
+public:
+ MustExecuteAnnotatedWriter(const Function &F,
+ DominatorTree &DT, LoopInfo &LI) {
+ for (auto &I: instructions(F)) {
+ Loop *L = LI.getLoopFor(I.getParent());
+ while (L) {
+ if (isMustExecuteIn(I, L, &DT)) {
+ MustExec[&I].push_back(L);
+ }
+ L = L->getParentLoop();
+ };
+ }
+ }
+ MustExecuteAnnotatedWriter(const Module &M,
+ DominatorTree &DT, LoopInfo &LI) {
+ for (auto &F : M)
+ for (auto &I: instructions(F)) {
+ Loop *L = LI.getLoopFor(I.getParent());
+ while (L) {
+ if (isMustExecuteIn(I, L, &DT)) {
+ MustExec[&I].push_back(L);
+ }
+ L = L->getParentLoop();
+ };
+ }
+ }
+
+
+ void printInfoComment(const Value &V, formatted_raw_ostream &OS) override {
+ if (!MustExec.count(&V))
+ return;
+
+ const auto &Loops = MustExec.lookup(&V);
+ const auto NumLoops = Loops.size();
+ if (NumLoops > 1)
+ OS << " ; (mustexec in " << NumLoops << " loops: ";
+ else
+ OS << " ; (mustexec in: ";
+
+ bool first = true;
+ for (const Loop *L : Loops) {
+ if (!first)
+ OS << ", ";
+ first = false;
+ OS << L->getHeader()->getName();
+ }
+ OS << ")";
+ }
+};
+} // namespace
+
+bool MustExecutePrinter::runOnFunction(Function &F) {
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ MustExecuteAnnotatedWriter Writer(F, DT, LI);
+ F.print(dbgs(), &Writer);
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp b/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp
index 55335f3a7cb0..d6db6386c38b 100644
--- a/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp
+++ b/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp
@@ -19,7 +19,7 @@
using namespace llvm;
using namespace llvm::objcarc;
-/// \brief A handy option to enable/disable all ARC Optimizations.
+/// A handy option to enable/disable all ARC Optimizations.
bool llvm::objcarc::EnableARCOpts;
static cl::opt<bool, true> EnableARCOptimizations(
"enable-objc-arc-opts", cl::desc("enable/disable all ARC Optimizations"),
diff --git a/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp b/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp
index f374dd33f86f..f268e2a9abdd 100644
--- a/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp
+++ b/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp
@@ -209,6 +209,7 @@ static bool isInertIntrinsic(unsigned ID) {
// Don't let dbg info affect our results.
case Intrinsic::dbg_declare:
case Intrinsic::dbg_value:
+ case Intrinsic::dbg_label:
// Short cut: Some intrinsics obviously don't use ObjC pointers.
return true;
default:
@@ -233,7 +234,7 @@ static bool isUseOnlyIntrinsic(unsigned ID) {
}
}
-/// \brief Determine what kind of construct V is.
+/// Determine what kind of construct V is.
ARCInstKind llvm::objcarc::GetARCInstKind(const Value *V) {
if (const Instruction *I = dyn_cast<Instruction>(V)) {
// Any instruction other than bitcast and gep with a pointer operand have a
@@ -331,7 +332,7 @@ ARCInstKind llvm::objcarc::GetARCInstKind(const Value *V) {
return ARCInstKind::None;
}
-/// \brief Test if the given class is a kind of user.
+/// Test if the given class is a kind of user.
bool llvm::objcarc::IsUser(ARCInstKind Class) {
switch (Class) {
case ARCInstKind::User:
@@ -365,7 +366,7 @@ bool llvm::objcarc::IsUser(ARCInstKind Class) {
llvm_unreachable("covered switch isn't covered?");
}
-/// \brief Test if the given class is objc_retain or equivalent.
+/// Test if the given class is objc_retain or equivalent.
bool llvm::objcarc::IsRetain(ARCInstKind Class) {
switch (Class) {
case ARCInstKind::Retain:
@@ -401,7 +402,7 @@ bool llvm::objcarc::IsRetain(ARCInstKind Class) {
llvm_unreachable("covered switch isn't covered?");
}
-/// \brief Test if the given class is objc_autorelease or equivalent.
+/// Test if the given class is objc_autorelease or equivalent.
bool llvm::objcarc::IsAutorelease(ARCInstKind Class) {
switch (Class) {
case ARCInstKind::Autorelease:
@@ -435,7 +436,7 @@ bool llvm::objcarc::IsAutorelease(ARCInstKind Class) {
llvm_unreachable("covered switch isn't covered?");
}
-/// \brief Test if the given class represents instructions which return their
+/// Test if the given class represents instructions which return their
/// argument verbatim.
bool llvm::objcarc::IsForwarding(ARCInstKind Class) {
switch (Class) {
@@ -470,7 +471,7 @@ bool llvm::objcarc::IsForwarding(ARCInstKind Class) {
llvm_unreachable("covered switch isn't covered?");
}
-/// \brief Test if the given class represents instructions which do nothing if
+/// Test if the given class represents instructions which do nothing if
/// passed a null pointer.
bool llvm::objcarc::IsNoopOnNull(ARCInstKind Class) {
switch (Class) {
@@ -505,7 +506,7 @@ bool llvm::objcarc::IsNoopOnNull(ARCInstKind Class) {
llvm_unreachable("covered switch isn't covered?");
}
-/// \brief Test if the given class represents instructions which are always safe
+/// Test if the given class represents instructions which are always safe
/// to mark with the "tail" keyword.
bool llvm::objcarc::IsAlwaysTail(ARCInstKind Class) {
// ARCInstKind::RetainBlock may be given a stack argument.
@@ -541,7 +542,7 @@ bool llvm::objcarc::IsAlwaysTail(ARCInstKind Class) {
llvm_unreachable("covered switch isn't covered?");
}
-/// \brief Test if the given class represents instructions which are never safe
+/// Test if the given class represents instructions which are never safe
/// to mark with the "tail" keyword.
bool llvm::objcarc::IsNeverTail(ARCInstKind Class) {
/// It is never safe to tail call objc_autorelease since by tail calling
@@ -580,7 +581,7 @@ bool llvm::objcarc::IsNeverTail(ARCInstKind Class) {
llvm_unreachable("covered switch isn't covered?");
}
-/// \brief Test if the given class represents instructions which are always safe
+/// Test if the given class represents instructions which are always safe
/// to mark with the nounwind attribute.
bool llvm::objcarc::IsNoThrow(ARCInstKind Class) {
// objc_retainBlock is not nounwind because it calls user copy constructors
diff --git a/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp b/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp
index a04c0aef04be..6c47651eae9e 100644
--- a/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp
+++ b/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp
@@ -30,7 +30,7 @@ OrderedBasicBlock::OrderedBasicBlock(const BasicBlock *BasicB)
LastInstFound = BB->end();
}
-/// \brief Given no cached results, find if \p A comes before \p B in \p BB.
+/// Given no cached results, find if \p A comes before \p B in \p BB.
/// Cache and number out instruction while walking \p BB.
bool OrderedBasicBlock::comesBefore(const Instruction *A,
const Instruction *B) {
@@ -58,7 +58,7 @@ bool OrderedBasicBlock::comesBefore(const Instruction *A,
return Inst != B;
}
-/// \brief Find out whether \p A dominates \p B, meaning whether \p A
+/// Find out whether \p A dominates \p B, meaning whether \p A
/// comes before \p B in \p BB. This is a simplification that considers
/// cached instruction positions and ignores other basic blocks, being
/// only relevant to compare relative instructions positions inside \p BB.
diff --git a/contrib/llvm/lib/Analysis/PHITransAddr.cpp b/contrib/llvm/lib/Analysis/PHITransAddr.cpp
index 682af4dc708e..858f08f6537a 100644
--- a/contrib/llvm/lib/Analysis/PHITransAddr.cpp
+++ b/contrib/llvm/lib/Analysis/PHITransAddr.cpp
@@ -14,6 +14,7 @@
#include "llvm/Analysis/PHITransAddr.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
diff --git a/contrib/llvm/lib/Analysis/PhiValues.cpp b/contrib/llvm/lib/Analysis/PhiValues.cpp
new file mode 100644
index 000000000000..ef121815d2cf
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PhiValues.cpp
@@ -0,0 +1,196 @@
+//===- PhiValues.cpp - Phi Value Analysis ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/PhiValues.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+
+bool PhiValues::invalidate(Function &, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &) {
+ // PhiValues is invalidated if it isn't preserved.
+ auto PAC = PA.getChecker<PhiValuesAnalysis>();
+ return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>());
+}
+
+// The goal here is to find all of the non-phi values reachable from this phi,
+// and to do the same for all of the phis reachable from this phi, as doing so
+// is necessary anyway in order to get the values for this phi. We do this using
+// Tarjan's algorithm with Nuutila's improvements to find the strongly connected
+// components of the phi graph rooted in this phi:
+// * All phis in a strongly connected component will have the same reachable
+// non-phi values. The SCC may not be the maximal subgraph for that set of
+// reachable values, but finding out that isn't really necessary (it would
+// only reduce the amount of memory needed to store the values).
+// * Tarjan's algorithm completes components in a bottom-up manner, i.e. it
+// never completes a component before the components reachable from it have
+// been completed. This means that when we complete a component we have
+// everything we need to collect the values reachable from that component.
+// * We collect both the non-phi values reachable from each SCC, as that's what
+// we're ultimately interested in, and all of the reachable values, i.e.
+// including phis, as that makes invalidateValue easier.
+void PhiValues::processPhi(const PHINode *Phi,
+ SmallVector<const PHINode *, 8> &Stack) {
+ // Initialize the phi with the next depth number.
+ assert(DepthMap.lookup(Phi) == 0);
+ assert(NextDepthNumber != UINT_MAX);
+ unsigned int DepthNumber = ++NextDepthNumber;
+ DepthMap[Phi] = DepthNumber;
+
+ // Recursively process the incoming phis of this phi.
+ for (Value *PhiOp : Phi->incoming_values()) {
+ if (PHINode *PhiPhiOp = dyn_cast<PHINode>(PhiOp)) {
+ // Recurse if the phi has not yet been visited.
+ if (DepthMap.lookup(PhiPhiOp) == 0)
+ processPhi(PhiPhiOp, Stack);
+ assert(DepthMap.lookup(PhiPhiOp) != 0);
+ // If the phi did not become part of a component then this phi and that
+ // phi are part of the same component, so adjust the depth number.
+ if (!ReachableMap.count(DepthMap[PhiPhiOp]))
+ DepthMap[Phi] = std::min(DepthMap[Phi], DepthMap[PhiPhiOp]);
+ }
+ }
+
+ // Now that incoming phis have been handled, push this phi to the stack.
+ Stack.push_back(Phi);
+
+ // If the depth number has not changed then we've finished collecting the phis
+ // of a strongly connected component.
+ if (DepthMap[Phi] == DepthNumber) {
+ // Collect the reachable values for this component. The phis of this
+ // component will be those on top of the depth stach with the same or
+ // greater depth number.
+ ConstValueSet Reachable;
+ while (!Stack.empty() && DepthMap[Stack.back()] >= DepthNumber) {
+ const PHINode *ComponentPhi = Stack.pop_back_val();
+ Reachable.insert(ComponentPhi);
+ DepthMap[ComponentPhi] = DepthNumber;
+ for (Value *Op : ComponentPhi->incoming_values()) {
+ if (PHINode *PhiOp = dyn_cast<PHINode>(Op)) {
+ // If this phi is not part of the same component then that component
+ // is guaranteed to have been completed before this one. Therefore we
+ // can just add its reachable values to the reachable values of this
+ // component.
+ auto It = ReachableMap.find(DepthMap[PhiOp]);
+ if (It != ReachableMap.end())
+ Reachable.insert(It->second.begin(), It->second.end());
+ } else {
+ Reachable.insert(Op);
+ }
+ }
+ }
+ ReachableMap.insert({DepthNumber,Reachable});
+
+ // Filter out phis to get the non-phi reachable values.
+ ValueSet NonPhi;
+ for (const Value *V : Reachable)
+ if (!isa<PHINode>(V))
+ NonPhi.insert(const_cast<Value*>(V));
+ NonPhiReachableMap.insert({DepthNumber,NonPhi});
+ }
+}
+
+const PhiValues::ValueSet &PhiValues::getValuesForPhi(const PHINode *PN) {
+ if (DepthMap.count(PN) == 0) {
+ SmallVector<const PHINode *, 8> Stack;
+ processPhi(PN, Stack);
+ assert(Stack.empty());
+ }
+ assert(DepthMap.lookup(PN) != 0);
+ return NonPhiReachableMap[DepthMap[PN]];
+}
+
+void PhiValues::invalidateValue(const Value *V) {
+ // Components that can reach V are invalid.
+ SmallVector<unsigned int, 8> InvalidComponents;
+ for (auto &Pair : ReachableMap)
+ if (Pair.second.count(V))
+ InvalidComponents.push_back(Pair.first);
+
+ for (unsigned int N : InvalidComponents) {
+ for (const Value *V : ReachableMap[N])
+ if (const PHINode *PN = dyn_cast<PHINode>(V))
+ DepthMap.erase(PN);
+ NonPhiReachableMap.erase(N);
+ ReachableMap.erase(N);
+ }
+}
+
+void PhiValues::releaseMemory() {
+ DepthMap.clear();
+ NonPhiReachableMap.clear();
+ ReachableMap.clear();
+}
+
+void PhiValues::print(raw_ostream &OS) const {
+ // Iterate through the phi nodes of the function rather than iterating through
+ // DepthMap in order to get predictable ordering.
+ for (const BasicBlock &BB : F) {
+ for (const PHINode &PN : BB.phis()) {
+ OS << "PHI ";
+ PN.printAsOperand(OS, false);
+ OS << " has values:\n";
+ unsigned int N = DepthMap.lookup(&PN);
+ auto It = NonPhiReachableMap.find(N);
+ if (It == NonPhiReachableMap.end())
+ OS << " UNKNOWN\n";
+ else if (It->second.empty())
+ OS << " NONE\n";
+ else
+ for (Value *V : It->second)
+ // Printing of an instruction prints two spaces at the start, so
+ // handle instructions and everything else slightly differently in
+ // order to get consistent indenting.
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ OS << *I << "\n";
+ else
+ OS << " " << *V << "\n";
+ }
+ }
+}
+
+AnalysisKey PhiValuesAnalysis::Key;
+PhiValues PhiValuesAnalysis::run(Function &F, FunctionAnalysisManager &) {
+ return PhiValues(F);
+}
+
+PreservedAnalyses PhiValuesPrinterPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ OS << "PHI Values for function: " << F.getName() << "\n";
+ PhiValues &PI = AM.getResult<PhiValuesAnalysis>(F);
+ for (const BasicBlock &BB : F)
+ for (const PHINode &PN : BB.phis())
+ PI.getValuesForPhi(&PN);
+ PI.print(OS);
+ return PreservedAnalyses::all();
+}
+
+PhiValuesWrapperPass::PhiValuesWrapperPass() : FunctionPass(ID) {
+ initializePhiValuesWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool PhiValuesWrapperPass::runOnFunction(Function &F) {
+ Result.reset(new PhiValues(F));
+ return false;
+}
+
+void PhiValuesWrapperPass::releaseMemory() {
+ Result->releaseMemory();
+}
+
+void PhiValuesWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+}
+
+char PhiValuesWrapperPass::ID = 0;
+
+INITIALIZE_PASS(PhiValuesWrapperPass, "phi-values", "Phi Values Analysis", false,
+ true)
diff --git a/contrib/llvm/lib/Analysis/PostDominators.cpp b/contrib/llvm/lib/Analysis/PostDominators.cpp
index 2282401085d4..e6b660fe26d7 100644
--- a/contrib/llvm/lib/Analysis/PostDominators.cpp
+++ b/contrib/llvm/lib/Analysis/PostDominators.cpp
@@ -21,6 +21,12 @@ using namespace llvm;
#define DEBUG_TYPE "postdomtree"
+#ifdef EXPENSIVE_CHECKS
+static constexpr bool ExpensiveChecksEnabled = true;
+#else
+static constexpr bool ExpensiveChecksEnabled = false;
+#endif
+
//===----------------------------------------------------------------------===//
// PostDominatorTree Implementation
//===----------------------------------------------------------------------===//
@@ -44,6 +50,13 @@ bool PostDominatorTreeWrapperPass::runOnFunction(Function &F) {
return false;
}
+void PostDominatorTreeWrapperPass::verifyAnalysis() const {
+ if (VerifyDomInfo)
+ assert(DT.verify(PostDominatorTree::VerificationLevel::Full));
+ else if (ExpensiveChecksEnabled)
+ assert(DT.verify(PostDominatorTree::VerificationLevel::Basic));
+}
+
void PostDominatorTreeWrapperPass::print(raw_ostream &OS, const Module *) const {
DT.print(OS);
}
@@ -56,8 +69,7 @@ AnalysisKey PostDominatorTreeAnalysis::Key;
PostDominatorTree PostDominatorTreeAnalysis::run(Function &F,
FunctionAnalysisManager &) {
- PostDominatorTree PDT;
- PDT.recalculate(F);
+ PostDominatorTree PDT(F);
return PDT;
}
diff --git a/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp
index 347d093b0f61..fb591f5d6a69 100644
--- a/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp
@@ -112,7 +112,7 @@ bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) {
// FIXME: The heuristic used below for determining hotness is based on
// preliminary SPEC tuning for inliner. This will eventually be a
// convenience method that calls isHotCount.
- return FunctionCount && isHotCount(FunctionCount.getValue());
+ return FunctionCount && isHotCount(FunctionCount.getCount());
}
/// Returns true if the function contains hot code. This can include a hot
@@ -125,7 +125,7 @@ bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F,
if (!F || !computeSummary())
return false;
if (auto FunctionCount = F->getEntryCount())
- if (isHotCount(FunctionCount.getValue()))
+ if (isHotCount(FunctionCount.getCount()))
return true;
if (hasSampleProfile()) {
@@ -154,7 +154,7 @@ bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F,
if (!F || !computeSummary())
return false;
if (auto FunctionCount = F->getEntryCount())
- if (!isColdCount(FunctionCount.getValue()))
+ if (!isColdCount(FunctionCount.getCount()))
return false;
if (hasSampleProfile()) {
@@ -187,7 +187,7 @@ bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) {
// FIXME: The heuristic used below for determining coldness is based on
// preliminary SPEC tuning for inliner. This will eventually be a
// convenience method that calls isHotCount.
- return FunctionCount && isColdCount(FunctionCount.getValue());
+ return FunctionCount && isColdCount(FunctionCount.getCount());
}
/// Compute the hot and cold thresholds.
@@ -223,6 +223,18 @@ bool ProfileSummaryInfo::isColdCount(uint64_t C) {
return ColdCountThreshold && C <= ColdCountThreshold.getValue();
}
+uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() {
+ if (!HotCountThreshold)
+ computeThresholds();
+ return HotCountThreshold && HotCountThreshold.getValue();
+}
+
+uint64_t ProfileSummaryInfo::getOrCompColdCountThreshold() {
+ if (!ColdCountThreshold)
+ computeThresholds();
+ return ColdCountThreshold && ColdCountThreshold.getValue();
+}
+
bool ProfileSummaryInfo::isHotBB(const BasicBlock *B, BlockFrequencyInfo *BFI) {
auto Count = BFI->getBlockProfileCount(B);
return Count && isHotCount(*Count);
@@ -247,7 +259,7 @@ bool ProfileSummaryInfo::isColdCallSite(const CallSite &CS,
return isColdCount(*C);
// In SamplePGO, if the caller has been sampled, and there is no profile
- // annotatedon the callsite, we consider the callsite as cold.
+ // annotated on the callsite, we consider the callsite as cold.
// If there is no profile for the caller, and we know the profile is
// accurate, we consider the callsite as cold.
return (hasSampleProfile() &&
diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp
index 900487323005..2bd611350f46 100644
--- a/contrib/llvm/lib/Analysis/RegionInfo.cpp
+++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp
@@ -15,6 +15,7 @@
#include "llvm/Analysis/RegionPrinter.h"
#endif
#include "llvm/Analysis/RegionInfoImpl.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
@@ -80,7 +81,7 @@ RegionInfo::~RegionInfo() = default;
bool RegionInfo::invalidate(Function &F, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &) {
// Check whether the analysis, all analyses on functions, or the function's
- // CFG have been preserved.
+ // CFG has been preserved.
auto PAC = PA.getChecker<RegionInfoAnalysis>();
return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() ||
PAC.preservedSet<CFGAnalyses>());
diff --git a/contrib/llvm/lib/Analysis/RegionPass.cpp b/contrib/llvm/lib/Analysis/RegionPass.cpp
index c5d71b25e022..ed17df2e7e93 100644
--- a/contrib/llvm/lib/Analysis/RegionPass.cpp
+++ b/contrib/llvm/lib/Analysis/RegionPass.cpp
@@ -158,12 +158,9 @@ bool RGPassManager::runOnFunction(Function &F) {
}
// Print the region tree after all pass.
- DEBUG(
- dbgs() << "\nRegion tree of function " << F.getName()
- << " after all region Pass:\n";
- RI->dump();
- dbgs() << "\n";
- );
+ LLVM_DEBUG(dbgs() << "\nRegion tree of function " << F.getName()
+ << " after all region Pass:\n";
+ RI->dump(); dbgs() << "\n";);
return Changed;
}
@@ -283,14 +280,14 @@ Pass *RegionPass::createPrinterPass(raw_ostream &O,
bool RegionPass::skipRegion(Region &R) const {
Function &F = *R.getEntry()->getParent();
- if (!F.getContext().getOptBisect().shouldRunPass(this, R))
+ if (!F.getContext().getOptPassGate().shouldRunPass(this, R))
return true;
if (F.hasFnAttribute(Attribute::OptimizeNone)) {
// Report this only once per function.
if (R.getEntry() == &F.getEntryBlock())
- DEBUG(dbgs() << "Skipping pass '" << getPassName()
- << "' on function " << F.getName() << "\n");
+ LLVM_DEBUG(dbgs() << "Skipping pass '" << getPassName()
+ << "' on function " << F.getName() << "\n");
return true;
}
return false;
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
index bfff7afb5b4e..aa95ace93014 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -83,6 +83,7 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -205,11 +206,6 @@ static cl::opt<unsigned>
cl::desc("Max coefficients in AddRec during evolving"),
cl::init(16));
-static cl::opt<bool> VersionUnknown(
- "scev-version-unknown", cl::Hidden,
- cl::desc("Use predicated scalar evolution to version SCEVUnknowns"),
- cl::init(false));
-
//===----------------------------------------------------------------------===//
// SCEV class definitions
//===----------------------------------------------------------------------===//
@@ -425,24 +421,21 @@ SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
const SCEV *op, Type *ty)
: SCEVCastExpr(ID, scTruncate, op, ty) {
- assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
- (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ assert(Op->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
"Cannot truncate non-integer value!");
}
SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
const SCEV *op, Type *ty)
: SCEVCastExpr(ID, scZeroExtend, op, ty) {
- assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
- (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ assert(Op->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
"Cannot zero extend non-integer value!");
}
SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
const SCEV *op, Type *ty)
: SCEVCastExpr(ID, scSignExtend, op, ty) {
- assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
- (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ assert(Op->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
"Cannot sign extend non-integer value!");
}
@@ -1260,42 +1253,32 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
- // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can
- // eliminate all the truncates, or we replace other casts with truncates.
- if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) {
+ // trunc(x1 + ... + xN) --> trunc(x1) + ... + trunc(xN) and
+ // trunc(x1 * ... * xN) --> trunc(x1) * ... * trunc(xN),
+ // if after transforming we have at most one truncate, not counting truncates
+ // that replace other casts.
+ if (isa<SCEVAddExpr>(Op) || isa<SCEVMulExpr>(Op)) {
+ auto *CommOp = cast<SCEVCommutativeExpr>(Op);
SmallVector<const SCEV *, 4> Operands;
- bool hasTrunc = false;
- for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) {
- const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty);
- if (!isa<SCEVCastExpr>(SA->getOperand(i)))
- hasTrunc = isa<SCEVTruncateExpr>(S);
+ unsigned numTruncs = 0;
+ for (unsigned i = 0, e = CommOp->getNumOperands(); i != e && numTruncs < 2;
+ ++i) {
+ const SCEV *S = getTruncateExpr(CommOp->getOperand(i), Ty);
+ if (!isa<SCEVCastExpr>(CommOp->getOperand(i)) && isa<SCEVTruncateExpr>(S))
+ numTruncs++;
Operands.push_back(S);
}
- if (!hasTrunc)
- return getAddExpr(Operands);
- // In spite we checked in the beginning that ID is not in the cache,
- // it is possible that during recursion and different modification
- // ID came to cache, so if we found it, just return it.
- if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
- return S;
- }
-
- // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can
- // eliminate all the truncates, or we replace other casts with truncates.
- if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) {
- SmallVector<const SCEV *, 4> Operands;
- bool hasTrunc = false;
- for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) {
- const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty);
- if (!isa<SCEVCastExpr>(SM->getOperand(i)))
- hasTrunc = isa<SCEVTruncateExpr>(S);
- Operands.push_back(S);
+ if (numTruncs < 2) {
+ if (isa<SCEVAddExpr>(Op))
+ return getAddExpr(Operands);
+ else if (isa<SCEVMulExpr>(Op))
+ return getMulExpr(Operands);
+ else
+ llvm_unreachable("Unexpected SCEV type for Op.");
}
- if (!hasTrunc)
- return getMulExpr(Operands);
- // In spite we checked in the beginning that ID is not in the cache,
- // it is possible that during recursion and different modification
- // ID came to cache, so if we found it, just return it.
+ // Although we checked in the beginning that ID is not in the cache, it is
+ // possible that during recursion and different modification ID was inserted
+ // into the cache. So if we find it, just return it.
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
return S;
}
@@ -1576,6 +1559,43 @@ bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,
return false;
}
+// Finds an integer D for an expression (C + x + y + ...) such that the top
+// level addition in (D + (C - D + x + y + ...)) would not wrap (signed or
+// unsigned) and the number of trailing zeros of (C - D + x + y + ...) is
+// maximized, where C is the \p ConstantTerm, x, y, ... are arbitrary SCEVs, and
+// the (C + x + y + ...) expression is \p WholeAddExpr.
+static APInt extractConstantWithoutWrapping(ScalarEvolution &SE,
+ const SCEVConstant *ConstantTerm,
+ const SCEVAddExpr *WholeAddExpr) {
+ const APInt C = ConstantTerm->getAPInt();
+ const unsigned BitWidth = C.getBitWidth();
+ // Find number of trailing zeros of (x + y + ...) w/o the C first:
+ uint32_t TZ = BitWidth;
+ for (unsigned I = 1, E = WholeAddExpr->getNumOperands(); I < E && TZ; ++I)
+ TZ = std::min(TZ, SE.GetMinTrailingZeros(WholeAddExpr->getOperand(I)));
+ if (TZ) {
+ // Set D to be as many least significant bits of C as possible while still
+ // guaranteeing that adding D to (C - D + x + y + ...) won't cause a wrap:
+ return TZ < BitWidth ? C.trunc(TZ).zext(BitWidth) : C;
+ }
+ return APInt(BitWidth, 0);
+}
+
+// Finds an integer D for an affine AddRec expression {C,+,x} such that the top
+// level addition in (D + {C-D,+,x}) would not wrap (signed or unsigned) and the
+// number of trailing zeros of (C - D + x * n) is maximized, where C is the \p
+// ConstantStart, x is an arbitrary \p Step, and n is the loop trip count.
+static APInt extractConstantWithoutWrapping(ScalarEvolution &SE,
+ const APInt &ConstantStart,
+ const SCEV *Step) {
+ const unsigned BitWidth = ConstantStart.getBitWidth();
+ const uint32_t TZ = SE.GetMinTrailingZeros(Step);
+ if (TZ)
+ return TZ < BitWidth ? ConstantStart.trunc(TZ).zext(BitWidth)
+ : ConstantStart;
+ return APInt(BitWidth, 0);
+}
+
const SCEV *
ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
@@ -1732,9 +1752,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
getUnsignedRangeMax(Step));
if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
- (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
- isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT,
- AR->getPostIncExpr(*this), N))) {
+ isKnownOnEveryIteration(ICmpInst::ICMP_ULT, AR, N)) {
// Cache knowledge of AR NUW, which is propagated to this
// AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
@@ -1749,9 +1767,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
getSignedRangeMin(Step));
if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
- (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) &&
- isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT,
- AR->getPostIncExpr(*this), N))) {
+ isKnownOnEveryIteration(ICmpInst::ICMP_UGT, AR, N)) {
// Cache knowledge of AR NW, which is propagated to this
// AddRec. Negative step causes unsigned wrap, but it
// still can't self-wrap.
@@ -1766,6 +1782,23 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
}
}
+ // zext({C,+,Step}) --> (zext(D) + zext({C-D,+,Step}))<nuw><nsw>
+ // if D + (C - D + Step * n) could be proven to not unsigned wrap
+ // where D maximizes the number of trailing zeros of (C - D + Step * n)
+ if (const auto *SC = dyn_cast<SCEVConstant>(Start)) {
+ const APInt &C = SC->getAPInt();
+ const APInt &D = extractConstantWithoutWrapping(*this, C, Step);
+ if (D != 0) {
+ const SCEV *SZExtD = getZeroExtendExpr(getConstant(D), Ty, Depth);
+ const SCEV *SResidual =
+ getAddRecExpr(getConstant(C - D), Step, L, AR->getNoWrapFlags());
+ const SCEV *SZExtR = getZeroExtendExpr(SResidual, Ty, Depth + 1);
+ return getAddExpr(SZExtD, SZExtR,
+ (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW),
+ Depth + 1);
+ }
+ }
+
if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) {
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
return getAddRecExpr(
@@ -1774,6 +1807,20 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
}
}
+ // zext(A % B) --> zext(A) % zext(B)
+ {
+ const SCEV *LHS;
+ const SCEV *RHS;
+ if (matchURem(Op, LHS, RHS))
+ return getURemExpr(getZeroExtendExpr(LHS, Ty, Depth + 1),
+ getZeroExtendExpr(RHS, Ty, Depth + 1));
+ }
+
+ // zext(A / B) --> zext(A) / zext(B).
+ if (auto *Div = dyn_cast<SCEVUDivExpr>(Op))
+ return getUDivExpr(getZeroExtendExpr(Div->getLHS(), Ty, Depth + 1),
+ getZeroExtendExpr(Div->getRHS(), Ty, Depth + 1));
+
if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
// zext((A + B + ...)<nuw>) --> (zext(A) + zext(B) + ...)<nuw>
if (SA->hasNoUnsignedWrap()) {
@@ -1784,6 +1831,65 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
Ops.push_back(getZeroExtendExpr(Op, Ty, Depth + 1));
return getAddExpr(Ops, SCEV::FlagNUW, Depth + 1);
}
+
+ // zext(C + x + y + ...) --> (zext(D) + zext((C - D) + x + y + ...))
+ // if D + (C - D + x + y + ...) could be proven to not unsigned wrap
+ // where D maximizes the number of trailing zeros of (C - D + x + y + ...)
+ //
+ // Often address arithmetics contain expressions like
+ // (zext (add (shl X, C1), C2)), for instance, (zext (5 + (4 * X))).
+ // This transformation is useful while proving that such expressions are
+ // equal or differ by a small constant amount, see LoadStoreVectorizer pass.
+ if (const auto *SC = dyn_cast<SCEVConstant>(SA->getOperand(0))) {
+ const APInt &D = extractConstantWithoutWrapping(*this, SC, SA);
+ if (D != 0) {
+ const SCEV *SZExtD = getZeroExtendExpr(getConstant(D), Ty, Depth);
+ const SCEV *SResidual =
+ getAddExpr(getConstant(-D), SA, SCEV::FlagAnyWrap, Depth);
+ const SCEV *SZExtR = getZeroExtendExpr(SResidual, Ty, Depth + 1);
+ return getAddExpr(SZExtD, SZExtR,
+ (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW),
+ Depth + 1);
+ }
+ }
+ }
+
+ if (auto *SM = dyn_cast<SCEVMulExpr>(Op)) {
+ // zext((A * B * ...)<nuw>) --> (zext(A) * zext(B) * ...)<nuw>
+ if (SM->hasNoUnsignedWrap()) {
+ // If the multiply does not unsign overflow then we can, by definition,
+ // commute the zero extension with the multiply operation.
+ SmallVector<const SCEV *, 4> Ops;
+ for (const auto *Op : SM->operands())
+ Ops.push_back(getZeroExtendExpr(Op, Ty, Depth + 1));
+ return getMulExpr(Ops, SCEV::FlagNUW, Depth + 1);
+ }
+
+ // zext(2^K * (trunc X to iN)) to iM ->
+ // 2^K * (zext(trunc X to i{N-K}) to iM)<nuw>
+ //
+ // Proof:
+ //
+ // zext(2^K * (trunc X to iN)) to iM
+ // = zext((trunc X to iN) << K) to iM
+ // = zext((trunc X to i{N-K}) << K)<nuw> to iM
+ // (because shl removes the top K bits)
+ // = zext((2^K * (trunc X to i{N-K}))<nuw>) to iM
+ // = (2^K * (zext(trunc X to i{N-K}) to iM))<nuw>.
+ //
+ if (SM->getNumOperands() == 2)
+ if (auto *MulLHS = dyn_cast<SCEVConstant>(SM->getOperand(0)))
+ if (MulLHS->getAPInt().isPowerOf2())
+ if (auto *TruncRHS = dyn_cast<SCEVTruncateExpr>(SM->getOperand(1))) {
+ int NewTruncBits = getTypeSizeInBits(TruncRHS->getType()) -
+ MulLHS->getAPInt().logBase2();
+ Type *NewTruncTy = IntegerType::get(getContext(), NewTruncBits);
+ return getMulExpr(
+ getZeroExtendExpr(MulLHS, Ty),
+ getZeroExtendExpr(
+ getTruncateExpr(TruncRHS->getOperand(), NewTruncTy), Ty),
+ SCEV::FlagNUW, Depth + 1);
+ }
}
// The cast wasn't folded; create an explicit cast node.
@@ -1847,24 +1953,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
return getTruncateOrSignExtend(X, Ty);
}
- // sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2
if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
- if (SA->getNumOperands() == 2) {
- auto *SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0));
- auto *SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1));
- if (SMul && SC1) {
- if (auto *SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) {
- const APInt &C1 = SC1->getAPInt();
- const APInt &C2 = SC2->getAPInt();
- if (C1.isStrictlyPositive() && C2.isStrictlyPositive() &&
- C2.ugt(C1) && C2.isPowerOf2())
- return getAddExpr(getSignExtendExpr(SC1, Ty, Depth + 1),
- getSignExtendExpr(SMul, Ty, Depth + 1),
- SCEV::FlagAnyWrap, Depth + 1);
- }
- }
- }
-
// sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw>
if (SA->hasNoSignedWrap()) {
// If the addition does not sign overflow then we can, by definition,
@@ -1874,6 +1963,28 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
Ops.push_back(getSignExtendExpr(Op, Ty, Depth + 1));
return getAddExpr(Ops, SCEV::FlagNSW, Depth + 1);
}
+
+ // sext(C + x + y + ...) --> (sext(D) + sext((C - D) + x + y + ...))
+ // if D + (C - D + x + y + ...) could be proven to not signed wrap
+ // where D maximizes the number of trailing zeros of (C - D + x + y + ...)
+ //
+ // For instance, this will bring two seemingly different expressions:
+ // 1 + sext(5 + 20 * %x + 24 * %y) and
+ // sext(6 + 20 * %x + 24 * %y)
+ // to the same form:
+ // 2 + sext(4 + 20 * %x + 24 * %y)
+ if (const auto *SC = dyn_cast<SCEVConstant>(SA->getOperand(0))) {
+ const APInt &D = extractConstantWithoutWrapping(*this, SC, SA);
+ if (D != 0) {
+ const SCEV *SSExtD = getSignExtendExpr(getConstant(D), Ty, Depth);
+ const SCEV *SResidual =
+ getAddExpr(getConstant(-D), SA, SCEV::FlagAnyWrap, Depth);
+ const SCEV *SSExtR = getSignExtendExpr(SResidual, Ty, Depth + 1);
+ return getAddExpr(SSExtD, SSExtR,
+ (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW),
+ Depth + 1);
+ }
+ }
}
// If the input value is a chrec scev, and we can prove that the value
// did not overflow the old, smaller, value, we can sign extend all of the
@@ -1994,9 +2105,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
getSignedOverflowLimitForStep(Step, &Pred, this);
if (OverflowLimit &&
(isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
- (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) &&
- isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this),
- OverflowLimit)))) {
+ isKnownOnEveryIteration(Pred, AR, OverflowLimit))) {
// Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
return getAddRecExpr(
@@ -2005,21 +2114,20 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
}
}
- // If Start and Step are constants, check if we can apply this
- // transformation:
- // sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2
- auto *SC1 = dyn_cast<SCEVConstant>(Start);
- auto *SC2 = dyn_cast<SCEVConstant>(Step);
- if (SC1 && SC2) {
- const APInt &C1 = SC1->getAPInt();
- const APInt &C2 = SC2->getAPInt();
- if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) &&
- C2.isPowerOf2()) {
- Start = getSignExtendExpr(Start, Ty, Depth + 1);
- const SCEV *NewAR = getAddRecExpr(getZero(AR->getType()), Step, L,
- AR->getNoWrapFlags());
- return getAddExpr(Start, getSignExtendExpr(NewAR, Ty, Depth + 1),
- SCEV::FlagAnyWrap, Depth + 1);
+ // sext({C,+,Step}) --> (sext(D) + sext({C-D,+,Step}))<nuw><nsw>
+ // if D + (C - D + Step * n) could be proven to not signed wrap
+ // where D maximizes the number of trailing zeros of (C - D + Step * n)
+ if (const auto *SC = dyn_cast<SCEVConstant>(Start)) {
+ const APInt &C = SC->getAPInt();
+ const APInt &D = extractConstantWithoutWrapping(*this, C, Step);
+ if (D != 0) {
+ const SCEV *SSExtD = getSignExtendExpr(getConstant(D), Ty, Depth);
+ const SCEV *SResidual =
+ getAddRecExpr(getConstant(C - D), Step, L, AR->getNoWrapFlags());
+ const SCEV *SSExtR = getSignExtendExpr(SResidual, Ty, Depth + 1);
+ return getAddExpr(SSExtD, SSExtR,
+ (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW),
+ Depth + 1);
}
}
@@ -2215,22 +2323,35 @@ StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
SignOrUnsignWrap = ScalarEvolution::maskFlags(Flags, SignOrUnsignMask);
- if (SignOrUnsignWrap != SignOrUnsignMask && Type == scAddExpr &&
- Ops.size() == 2 && isa<SCEVConstant>(Ops[0])) {
+ if (SignOrUnsignWrap != SignOrUnsignMask &&
+ (Type == scAddExpr || Type == scMulExpr) && Ops.size() == 2 &&
+ isa<SCEVConstant>(Ops[0])) {
- // (A + C) --> (A + C)<nsw> if the addition does not sign overflow
- // (A + C) --> (A + C)<nuw> if the addition does not unsign overflow
+ auto Opcode = [&] {
+ switch (Type) {
+ case scAddExpr:
+ return Instruction::Add;
+ case scMulExpr:
+ return Instruction::Mul;
+ default:
+ llvm_unreachable("Unexpected SCEV op.");
+ }
+ }();
const APInt &C = cast<SCEVConstant>(Ops[0])->getAPInt();
+
+ // (A <opcode> C) --> (A <opcode> C)<nsw> if the op doesn't sign overflow.
if (!(SignOrUnsignWrap & SCEV::FlagNSW)) {
auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
- Instruction::Add, C, OBO::NoSignedWrap);
+ Opcode, C, OBO::NoSignedWrap);
if (NSWRegion.contains(SE->getSignedRange(Ops[1])))
Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
}
+
+ // (A <opcode> C) --> (A <opcode> C)<nuw> if the op doesn't unsign overflow.
if (!(SignOrUnsignWrap & SCEV::FlagNUW)) {
auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
- Instruction::Add, C, OBO::NoUnsignedWrap);
+ Opcode, C, OBO::NoUnsignedWrap);
if (NUWRegion.contains(SE->getUnsignedRange(Ops[1])))
Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
}
@@ -2240,59 +2361,7 @@ StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
}
bool ScalarEvolution::isAvailableAtLoopEntry(const SCEV *S, const Loop *L) {
- if (!isLoopInvariant(S, L))
- return false;
- // If a value depends on a SCEVUnknown which is defined after the loop, we
- // conservatively assume that we cannot calculate it at the loop's entry.
- struct FindDominatedSCEVUnknown {
- bool Found = false;
- const Loop *L;
- DominatorTree &DT;
- LoopInfo &LI;
-
- FindDominatedSCEVUnknown(const Loop *L, DominatorTree &DT, LoopInfo &LI)
- : L(L), DT(DT), LI(LI) {}
-
- bool checkSCEVUnknown(const SCEVUnknown *SU) {
- if (auto *I = dyn_cast<Instruction>(SU->getValue())) {
- if (DT.dominates(L->getHeader(), I->getParent()))
- Found = true;
- else
- assert(DT.dominates(I->getParent(), L->getHeader()) &&
- "No dominance relationship between SCEV and loop?");
- }
- return false;
- }
-
- bool follow(const SCEV *S) {
- switch (static_cast<SCEVTypes>(S->getSCEVType())) {
- case scConstant:
- return false;
- case scAddRecExpr:
- case scTruncate:
- case scZeroExtend:
- case scSignExtend:
- case scAddExpr:
- case scMulExpr:
- case scUMaxExpr:
- case scSMaxExpr:
- case scUDivExpr:
- return true;
- case scUnknown:
- return checkSCEVUnknown(cast<SCEVUnknown>(S));
- case scCouldNotCompute:
- llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
- }
- return false;
- }
-
- bool isDone() { return Found; }
- };
-
- FindDominatedSCEVUnknown FSU(L, DT, LI);
- SCEVTraversal<FindDominatedSCEVUnknown> ST(FSU);
- ST.visitAll(S);
- return !FSU.Found;
+ return isLoopInvariant(S, L) && properlyDominates(S, L->getHeader());
}
/// Get a canonical add expression, or something simpler if possible.
@@ -2423,7 +2492,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
}
if (Ok) {
// Evaluate the expression in the larger type.
- const SCEV *Fold = getAddExpr(LargeOps, Flags, Depth + 1);
+ const SCEV *Fold = getAddExpr(LargeOps, SCEV::FlagAnyWrap, Depth + 1);
// If it folds to something simple, use it. Otherwise, don't.
if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
return getTruncateExpr(Fold, Ty);
@@ -2801,22 +2870,21 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
unsigned Idx = 0;
if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
- // C1*(C2+V) -> C1*C2 + C1*V
if (Ops.size() == 2)
- if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
- // If any of Add's ops are Adds or Muls with a constant,
- // apply this transformation as well.
- if (Add->getNumOperands() == 2)
- // TODO: There are some cases where this transformation is not
- // profitable, for example:
- // Add = (C0 + X) * Y + Z.
- // Maybe the scope of this transformation should be narrowed down.
- if (containsConstantInAddMulChain(Add))
- return getAddExpr(getMulExpr(LHSC, Add->getOperand(0),
- SCEV::FlagAnyWrap, Depth + 1),
- getMulExpr(LHSC, Add->getOperand(1),
- SCEV::FlagAnyWrap, Depth + 1),
- SCEV::FlagAnyWrap, Depth + 1);
+ // C1*(C2+V) -> C1*C2 + C1*V
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
+ // If any of Add's ops are Adds or Muls with a constant, apply this
+ // transformation as well.
+ //
+ // TODO: There are some cases where this transformation is not
+ // profitable; for example, Add = (C0 + X) * Y + Z. Maybe the scope of
+ // this transformation should be narrowed down.
+ if (Add->getNumOperands() == 2 && containsConstantInAddMulChain(Add))
+ return getAddExpr(getMulExpr(LHSC, Add->getOperand(0),
+ SCEV::FlagAnyWrap, Depth + 1),
+ getMulExpr(LHSC, Add->getOperand(1),
+ SCEV::FlagAnyWrap, Depth + 1),
+ SCEV::FlagAnyWrap, Depth + 1);
++Idx;
while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
@@ -3128,6 +3196,21 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
}
}
}
+
+ // (A/B)/C --> A/(B*C) if safe and B*C can be folded.
+ if (const SCEVUDivExpr *OtherDiv = dyn_cast<SCEVUDivExpr>(LHS)) {
+ if (auto *DivisorConstant =
+ dyn_cast<SCEVConstant>(OtherDiv->getRHS())) {
+ bool Overflow = false;
+ APInt NewRHS =
+ DivisorConstant->getAPInt().umul_ov(RHSC->getAPInt(), Overflow);
+ if (Overflow) {
+ return getConstant(RHSC->getType(), 0, false);
+ }
+ return getUDivExpr(OtherDiv->getLHS(), getConstant(NewRHS));
+ }
+ }
+
// (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {
SmallVector<const SCEV *, 4> Operands;
@@ -3579,12 +3662,13 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
// X umax Y umax Y --> X umax Y
// X umax Y --> X, if X is always greater than Y
- if (Ops[i] == Ops[i+1] ||
- isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) {
- Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
+ if (Ops[i] == Ops[i + 1] || isKnownViaNonRecursiveReasoning(
+ ICmpInst::ICMP_UGE, Ops[i], Ops[i + 1])) {
+ Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2);
--i; --e;
- } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) {
- Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
+ } else if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, Ops[i],
+ Ops[i + 1])) {
+ Ops.erase(Ops.begin() + i, Ops.begin() + i + 1);
--i; --e;
}
@@ -3611,14 +3695,35 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
const SCEV *RHS) {
- // ~smax(~x, ~y) == smin(x, y).
- return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
+ SmallVector<const SCEV *, 2> Ops = { LHS, RHS };
+ return getSMinExpr(Ops);
+}
+
+const SCEV *ScalarEvolution::getSMinExpr(SmallVectorImpl<const SCEV *> &Ops) {
+ // ~smax(~x, ~y, ~z) == smin(x, y, z).
+ SmallVector<const SCEV *, 2> NotOps;
+ for (auto *S : Ops)
+ NotOps.push_back(getNotSCEV(S));
+ return getNotSCEV(getSMaxExpr(NotOps));
}
const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
const SCEV *RHS) {
- // ~umax(~x, ~y) == umin(x, y)
- return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
+ SmallVector<const SCEV *, 2> Ops = { LHS, RHS };
+ return getUMinExpr(Ops);
+}
+
+const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl<const SCEV *> &Ops) {
+ assert(!Ops.empty() && "At least one operand must be!");
+ // Trivial case.
+ if (Ops.size() == 1)
+ return Ops[0];
+
+ // ~umax(~x, ~y, ~z) == umin(x, y, z).
+ SmallVector<const SCEV *, 2> NotOps;
+ for (auto *S : Ops)
+ NotOps.push_back(getNotSCEV(S));
+ return getNotSCEV(getUMaxExpr(NotOps));
}
const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
@@ -3670,13 +3775,15 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) {
/// target-specific information.
bool ScalarEvolution::isSCEVable(Type *Ty) const {
// Integers and pointers are always SCEVable.
- return Ty->isIntegerTy() || Ty->isPointerTy();
+ return Ty->isIntOrPtrTy();
}
/// Return the size in bits of the specified type, for which isSCEVable must
/// return true.
uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
assert(isSCEVable(Ty) && "Type is not SCEVable!");
+ if (Ty->isPointerTy())
+ return getDataLayout().getIndexTypeSizeInBits(Ty);
return getDataLayout().getTypeSizeInBits(Ty);
}
@@ -3779,6 +3886,24 @@ void ScalarEvolution::eraseValueFromMap(Value *V) {
}
}
+/// Check whether value has nuw/nsw/exact set but SCEV does not.
+/// TODO: In reality it is better to check the poison recursevely
+/// but this is better than nothing.
+static bool SCEVLostPoisonFlags(const SCEV *S, const Value *V) {
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ if (isa<OverflowingBinaryOperator>(I)) {
+ if (auto *NS = dyn_cast<SCEVNAryExpr>(S)) {
+ if (I->hasNoSignedWrap() && !NS->hasNoSignedWrap())
+ return true;
+ if (I->hasNoUnsignedWrap() && !NS->hasNoUnsignedWrap())
+ return true;
+ }
+ } else if (isa<PossiblyExactOperator>(I) && I->isExact())
+ return true;
+ }
+ return false;
+}
+
/// Return an existing SCEV if it exists, otherwise analyze the expression and
/// create a new one.
const SCEV *ScalarEvolution::getSCEV(Value *V) {
@@ -3792,7 +3917,7 @@ const SCEV *ScalarEvolution::getSCEV(Value *V) {
// ValueExprMap before insert S->{V, 0} into ExprValueMap.
std::pair<ValueExprMapType::iterator, bool> Pair =
ValueExprMap.insert({SCEVCallbackVH(V, this), S});
- if (Pair.second) {
+ if (Pair.second && !SCEVLostPoisonFlags(S, V)) {
ExprValueMap[S].insert({V, nullptr});
// If S == Stripped + Offset, add Stripped -> {V, Offset} into
@@ -3895,8 +4020,7 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
const SCEV *
ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) {
Type *SrcTy = V->getType();
- assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
- (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
"Cannot truncate or zero extend with non-integer arguments!");
if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
return V; // No conversion
@@ -3909,8 +4033,7 @@ const SCEV *
ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
Type *Ty) {
Type *SrcTy = V->getType();
- assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
- (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
"Cannot truncate or zero extend with non-integer arguments!");
if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
return V; // No conversion
@@ -3922,8 +4045,7 @@ ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
const SCEV *
ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) {
Type *SrcTy = V->getType();
- assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
- (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
"Cannot noop or zero extend with non-integer arguments!");
assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
"getNoopOrZeroExtend cannot truncate!");
@@ -3935,8 +4057,7 @@ ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) {
const SCEV *
ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) {
Type *SrcTy = V->getType();
- assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
- (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
"Cannot noop or sign extend with non-integer arguments!");
assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
"getNoopOrSignExtend cannot truncate!");
@@ -3948,8 +4069,7 @@ ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) {
const SCEV *
ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) {
Type *SrcTy = V->getType();
- assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
- (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
"Cannot noop or any extend with non-integer arguments!");
assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
"getNoopOrAnyExtend cannot truncate!");
@@ -3961,8 +4081,7 @@ ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) {
const SCEV *
ScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) {
Type *SrcTy = V->getType();
- assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
- (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
"Cannot truncate or noop with non-integer arguments!");
assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
"getTruncateOrNoop cannot extend!");
@@ -3986,15 +4105,32 @@ const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS,
const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS,
const SCEV *RHS) {
- const SCEV *PromotedLHS = LHS;
- const SCEV *PromotedRHS = RHS;
+ SmallVector<const SCEV *, 2> Ops = { LHS, RHS };
+ return getUMinFromMismatchedTypes(Ops);
+}
+
+const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(
+ SmallVectorImpl<const SCEV *> &Ops) {
+ assert(!Ops.empty() && "At least one operand must be!");
+ // Trivial case.
+ if (Ops.size() == 1)
+ return Ops[0];
+
+ // Find the max type first.
+ Type *MaxType = nullptr;
+ for (auto *S : Ops)
+ if (MaxType)
+ MaxType = getWiderType(MaxType, S->getType());
+ else
+ MaxType = S->getType();
- if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
- PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
- else
- PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
+ // Extend all ops to max type.
+ SmallVector<const SCEV *, 2> PromotedOps;
+ for (auto *S : Ops)
+ PromotedOps.push_back(getNoopOrZeroExtend(S, MaxType));
- return getUMinExpr(PromotedLHS, PromotedRHS);
+ // Generate umin.
+ return getUMinExpr(PromotedOps);
}
const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
@@ -4071,37 +4207,90 @@ void ScalarEvolution::forgetSymbolicName(Instruction *PN, const SCEV *SymName) {
namespace {
+/// Takes SCEV S and Loop L. For each AddRec sub-expression, use its start
+/// expression in case its Loop is L. If it is not L then
+/// if IgnoreOtherLoops is true then use AddRec itself
+/// otherwise rewrite cannot be done.
+/// If SCEV contains non-invariant unknown SCEV rewrite cannot be done.
class SCEVInitRewriter : public SCEVRewriteVisitor<SCEVInitRewriter> {
public:
- static const SCEV *rewrite(const SCEV *S, const Loop *L,
- ScalarEvolution &SE) {
+ static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE,
+ bool IgnoreOtherLoops = true) {
SCEVInitRewriter Rewriter(L, SE);
const SCEV *Result = Rewriter.visit(S);
- return Rewriter.isValid() ? Result : SE.getCouldNotCompute();
+ if (Rewriter.hasSeenLoopVariantSCEVUnknown())
+ return SE.getCouldNotCompute();
+ return Rewriter.hasSeenOtherLoops() && !IgnoreOtherLoops
+ ? SE.getCouldNotCompute()
+ : Result;
}
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
if (!SE.isLoopInvariant(Expr, L))
- Valid = false;
+ SeenLoopVariantSCEVUnknown = true;
return Expr;
}
const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
- // Only allow AddRecExprs for this loop.
+ // Only re-write AddRecExprs for this loop.
if (Expr->getLoop() == L)
return Expr->getStart();
- Valid = false;
+ SeenOtherLoops = true;
return Expr;
}
- bool isValid() { return Valid; }
+ bool hasSeenLoopVariantSCEVUnknown() { return SeenLoopVariantSCEVUnknown; }
+
+ bool hasSeenOtherLoops() { return SeenOtherLoops; }
private:
explicit SCEVInitRewriter(const Loop *L, ScalarEvolution &SE)
: SCEVRewriteVisitor(SE), L(L) {}
const Loop *L;
- bool Valid = true;
+ bool SeenLoopVariantSCEVUnknown = false;
+ bool SeenOtherLoops = false;
+};
+
+/// Takes SCEV S and Loop L. For each AddRec sub-expression, use its post
+/// increment expression in case its Loop is L. If it is not L then
+/// use AddRec itself.
+/// If SCEV contains non-invariant unknown SCEV rewrite cannot be done.
+class SCEVPostIncRewriter : public SCEVRewriteVisitor<SCEVPostIncRewriter> {
+public:
+ static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE) {
+ SCEVPostIncRewriter Rewriter(L, SE);
+ const SCEV *Result = Rewriter.visit(S);
+ return Rewriter.hasSeenLoopVariantSCEVUnknown()
+ ? SE.getCouldNotCompute()
+ : Result;
+ }
+
+ const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+ if (!SE.isLoopInvariant(Expr, L))
+ SeenLoopVariantSCEVUnknown = true;
+ return Expr;
+ }
+
+ const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+ // Only re-write AddRecExprs for this loop.
+ if (Expr->getLoop() == L)
+ return Expr->getPostIncExpr(SE);
+ SeenOtherLoops = true;
+ return Expr;
+ }
+
+ bool hasSeenLoopVariantSCEVUnknown() { return SeenLoopVariantSCEVUnknown; }
+
+ bool hasSeenOtherLoops() { return SeenOtherLoops; }
+
+private:
+ explicit SCEVPostIncRewriter(const Loop *L, ScalarEvolution &SE)
+ : SCEVRewriteVisitor(SE), L(L) {}
+
+ const Loop *L;
+ bool SeenLoopVariantSCEVUnknown = false;
+ bool SeenOtherLoops = false;
};
/// This class evaluates the compare condition by matching it against the
@@ -4673,7 +4862,7 @@ ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI
const SCEV *StartExtended = getExtendedExpr(StartVal, Signed);
if (PredIsKnownFalse(StartVal, StartExtended)) {
- DEBUG(dbgs() << "P2 is compile-time false\n";);
+ LLVM_DEBUG(dbgs() << "P2 is compile-time false\n";);
return None;
}
@@ -4681,7 +4870,7 @@ ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI
// NSSW or NUSW)
const SCEV *AccumExtended = getExtendedExpr(Accum, /*CreateSignExtend=*/true);
if (PredIsKnownFalse(Accum, AccumExtended)) {
- DEBUG(dbgs() << "P3 is compile-time false\n";);
+ LLVM_DEBUG(dbgs() << "P3 is compile-time false\n";);
return None;
}
@@ -4690,7 +4879,7 @@ ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI
if (Expr != ExtendedExpr &&
!isKnownPredicate(ICmpInst::ICMP_EQ, Expr, ExtendedExpr)) {
const SCEVPredicate *Pred = getEqualPredicate(Expr, ExtendedExpr);
- DEBUG (dbgs() << "Added Predicate: " << *Pred);
+ LLVM_DEBUG(dbgs() << "Added Predicate: " << *Pred);
Predicates.push_back(Pred);
}
};
@@ -4953,7 +5142,7 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
// by one iteration:
// PHI(f(0), f({1,+,1})) --> f({0,+,1})
const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this);
- const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this);
+ const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this, false);
if (Shifted != getCouldNotCompute() &&
Start != getCouldNotCompute()) {
const SCEV *StartVal = getSCEV(StartValueV);
@@ -5515,6 +5704,25 @@ ScalarEvolution::getRangeRef(const SCEV *S,
APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1));
}
+ // A range of Phi is a subset of union of all ranges of its input.
+ if (const PHINode *Phi = dyn_cast<PHINode>(U->getValue())) {
+ // Make sure that we do not run over cycled Phis.
+ if (PendingPhiRanges.insert(Phi).second) {
+ ConstantRange RangeFromOps(BitWidth, /*isFullSet=*/false);
+ for (auto &Op : Phi->operands()) {
+ auto OpRange = getRangeRef(getSCEV(Op), SignHint);
+ RangeFromOps = RangeFromOps.unionWith(OpRange);
+ // No point to continue if we already have a full set.
+ if (RangeFromOps.isFullSet())
+ break;
+ }
+ ConservativeResult = ConservativeResult.intersectWith(RangeFromOps);
+ bool Erased = PendingPhiRanges.erase(Phi);
+ assert(Erased && "Failed to erase Phi properly?");
+ (void) Erased;
+ }
+ }
+
return setRange(U, SignHint, std::move(ConservativeResult));
}
@@ -6134,33 +6342,33 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
}
break;
- case Instruction::Shl:
- // Turn shift left of a constant amount into a multiply.
- if (ConstantInt *SA = dyn_cast<ConstantInt>(BO->RHS)) {
- uint32_t BitWidth = cast<IntegerType>(SA->getType())->getBitWidth();
-
- // If the shift count is not less than the bitwidth, the result of
- // the shift is undefined. Don't try to analyze it, because the
- // resolution chosen here may differ from the resolution chosen in
- // other parts of the compiler.
- if (SA->getValue().uge(BitWidth))
- break;
+ case Instruction::Shl:
+ // Turn shift left of a constant amount into a multiply.
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(BO->RHS)) {
+ uint32_t BitWidth = cast<IntegerType>(SA->getType())->getBitWidth();
- // It is currently not resolved how to interpret NSW for left
- // shift by BitWidth - 1, so we avoid applying flags in that
- // case. Remove this check (or this comment) once the situation
- // is resolved. See
- // http://lists.llvm.org/pipermail/llvm-dev/2015-April/084195.html
- // and http://reviews.llvm.org/D8890 .
- auto Flags = SCEV::FlagAnyWrap;
- if (BO->Op && SA->getValue().ult(BitWidth - 1))
- Flags = getNoWrapFlagsFromUB(BO->Op);
+ // If the shift count is not less than the bitwidth, the result of
+ // the shift is undefined. Don't try to analyze it, because the
+ // resolution chosen here may differ from the resolution chosen in
+ // other parts of the compiler.
+ if (SA->getValue().uge(BitWidth))
+ break;
- Constant *X = ConstantInt::get(getContext(),
- APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
- return getMulExpr(getSCEV(BO->LHS), getSCEV(X), Flags);
- }
- break;
+ // It is currently not resolved how to interpret NSW for left
+ // shift by BitWidth - 1, so we avoid applying flags in that
+ // case. Remove this check (or this comment) once the situation
+ // is resolved. See
+ // http://lists.llvm.org/pipermail/llvm-dev/2015-April/084195.html
+ // and http://reviews.llvm.org/D8890 .
+ auto Flags = SCEV::FlagAnyWrap;
+ if (BO->Op && SA->getValue().ult(BitWidth - 1))
+ Flags = getNoWrapFlagsFromUB(BO->Op);
+
+ Constant *X = ConstantInt::get(
+ getContext(), APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
+ return getMulExpr(getSCEV(BO->LHS), getSCEV(X), Flags);
+ }
+ break;
case Instruction::AShr: {
// AShr X, C, where C is a constant.
@@ -6384,11 +6592,11 @@ const SCEV *ScalarEvolution::getExitCount(const Loop *L,
const SCEV *
ScalarEvolution::getPredicatedBackedgeTakenCount(const Loop *L,
SCEVUnionPredicate &Preds) {
- return getPredicatedBackedgeTakenInfo(L).getExact(this, &Preds);
+ return getPredicatedBackedgeTakenInfo(L).getExact(L, this, &Preds);
}
const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
- return getBackedgeTakenInfo(L).getExact(this);
+ return getBackedgeTakenInfo(L).getExact(L, this);
}
/// Similar to getBackedgeTakenCount, except return the least SCEV value that is
@@ -6445,8 +6653,13 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
// must be cleared in this scope.
BackedgeTakenInfo Result = computeBackedgeTakenCount(L);
- if (Result.getExact(this) != getCouldNotCompute()) {
- assert(isLoopInvariant(Result.getExact(this), L) &&
+ // In product build, there are no usage of statistic.
+ (void)NumTripCountsComputed;
+ (void)NumTripCountsNotComputed;
+#if LLVM_ENABLE_STATS || !defined(NDEBUG)
+ const SCEV *BEExact = Result.getExact(L, this);
+ if (BEExact != getCouldNotCompute()) {
+ assert(isLoopInvariant(BEExact, L) &&
isLoopInvariant(Result.getMax(this), L) &&
"Computed backedge-taken count isn't loop invariant for loop!");
++NumTripCountsComputed;
@@ -6456,6 +6669,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
// Only count loops that have phi nodes as not being computable.
++NumTripCountsNotComputed;
}
+#endif // LLVM_ENABLE_STATS || !defined(NDEBUG)
// Now that we know more about the trip count for this loop, forget any
// existing SCEV values for PHI nodes in this loop since they are only
@@ -6591,6 +6805,12 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
}
}
+void ScalarEvolution::forgetTopmostLoop(const Loop *L) {
+ while (Loop *Parent = L->getParentLoop())
+ L = Parent;
+ forgetLoop(L);
+}
+
void ScalarEvolution::forgetValue(Value *V) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return;
@@ -6619,28 +6839,35 @@ void ScalarEvolution::forgetValue(Value *V) {
}
/// Get the exact loop backedge taken count considering all loop exits. A
-/// computable result can only be returned for loops with a single exit.
-/// Returning the minimum taken count among all exits is incorrect because one
-/// of the loop's exit limit's may have been skipped. howFarToZero assumes that
-/// the limit of each loop test is never skipped. This is a valid assumption as
-/// long as the loop exits via that test. For precise results, it is the
-/// caller's responsibility to specify the relevant loop exit using
-/// getExact(ExitingBlock, SE).
+/// computable result can only be returned for loops with all exiting blocks
+/// dominating the latch. howFarToZero assumes that the limit of each loop test
+/// is never skipped. This is a valid assumption as long as the loop exits via
+/// that test. For precise results, it is the caller's responsibility to specify
+/// the relevant loop exiting block using getExact(ExitingBlock, SE).
const SCEV *
-ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE,
+ScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE,
SCEVUnionPredicate *Preds) const {
// If any exits were not computable, the loop is not computable.
if (!isComplete() || ExitNotTaken.empty())
return SE->getCouldNotCompute();
- const SCEV *BECount = nullptr;
+ const BasicBlock *Latch = L->getLoopLatch();
+ // All exiting blocks we have collected must dominate the only backedge.
+ if (!Latch)
+ return SE->getCouldNotCompute();
+
+ // All exiting blocks we have gathered dominate loop's latch, so exact trip
+ // count is simply a minimum out of all these calculated exit counts.
+ SmallVector<const SCEV *, 2> Ops;
for (auto &ENT : ExitNotTaken) {
- assert(ENT.ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV");
+ const SCEV *BECount = ENT.ExactNotTaken;
+ assert(BECount != SE->getCouldNotCompute() && "Bad exit SCEV!");
+ assert(SE->DT.dominates(ENT.ExitingBlock, Latch) &&
+ "We should only have known counts for exiting blocks that dominate "
+ "latch!");
+
+ Ops.push_back(BECount);
- if (!BECount)
- BECount = ENT.ExactNotTaken;
- else if (BECount != ENT.ExactNotTaken)
- return SE->getCouldNotCompute();
if (Preds && !ENT.hasAlwaysTruePredicate())
Preds->add(ENT.Predicate.get());
@@ -6648,8 +6875,7 @@ ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE,
"Predicate should be always true!");
}
- assert(BECount && "Invalid not taken count for loop exit");
- return BECount;
+ return SE->getUMinFromMismatchedTypes(Ops);
}
/// Get the exact not taken count for this loop exit.
@@ -6846,99 +7072,60 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
ScalarEvolution::ExitLimit
ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
bool AllowPredicates) {
- // Okay, we've chosen an exiting block. See what condition causes us to exit
- // at this block and remember the exit block and whether all other targets
- // lead to the loop header.
- bool MustExecuteLoopHeader = true;
- BasicBlock *Exit = nullptr;
- for (auto *SBB : successors(ExitingBlock))
- if (!L->contains(SBB)) {
- if (Exit) // Multiple exit successors.
- return getCouldNotCompute();
- Exit = SBB;
- } else if (SBB != L->getHeader()) {
- MustExecuteLoopHeader = false;
- }
-
- // At this point, we know we have a conditional branch that determines whether
- // the loop is exited. However, we don't know if the branch is executed each
- // time through the loop. If not, then the execution count of the branch will
- // not be equal to the trip count of the loop.
- //
- // Currently we check for this by checking to see if the Exit branch goes to
- // the loop header. If so, we know it will always execute the same number of
- // times as the loop. We also handle the case where the exit block *is* the
- // loop header. This is common for un-rotated loops.
- //
- // If both of those tests fail, walk up the unique predecessor chain to the
- // header, stopping if there is an edge that doesn't exit the loop. If the
- // header is reached, the execution count of the branch will be equal to the
- // trip count of the loop.
- //
- // More extensive analysis could be done to handle more cases here.
- //
- if (!MustExecuteLoopHeader && ExitingBlock != L->getHeader()) {
- // The simple checks failed, try climbing the unique predecessor chain
- // up to the header.
- bool Ok = false;
- for (BasicBlock *BB = ExitingBlock; BB; ) {
- BasicBlock *Pred = BB->getUniquePredecessor();
- if (!Pred)
- return getCouldNotCompute();
- TerminatorInst *PredTerm = Pred->getTerminator();
- for (const BasicBlock *PredSucc : PredTerm->successors()) {
- if (PredSucc == BB)
- continue;
- // If the predecessor has a successor that isn't BB and isn't
- // outside the loop, assume the worst.
- if (L->contains(PredSucc))
- return getCouldNotCompute();
- }
- if (Pred == L->getHeader()) {
- Ok = true;
- break;
- }
- BB = Pred;
- }
- if (!Ok)
- return getCouldNotCompute();
- }
+ assert(L->contains(ExitingBlock) && "Exit count for non-loop block?");
+ // If our exiting block does not dominate the latch, then its connection with
+ // loop's exit limit may be far from trivial.
+ const BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch || !DT.dominates(ExitingBlock, Latch))
+ return getCouldNotCompute();
bool IsOnlyExit = (L->getExitingBlock() != nullptr);
TerminatorInst *Term = ExitingBlock->getTerminator();
if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
assert(BI->isConditional() && "If unconditional, it can't be in loop!");
+ bool ExitIfTrue = !L->contains(BI->getSuccessor(0));
+ assert(ExitIfTrue == L->contains(BI->getSuccessor(1)) &&
+ "It should have one successor in loop and one exit block!");
// Proceed to the next level to examine the exit condition expression.
return computeExitLimitFromCond(
- L, BI->getCondition(), BI->getSuccessor(0), BI->getSuccessor(1),
+ L, BI->getCondition(), ExitIfTrue,
/*ControlsExit=*/IsOnlyExit, AllowPredicates);
}
- if (SwitchInst *SI = dyn_cast<SwitchInst>(Term))
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(Term)) {
+ // For switch, make sure that there is a single exit from the loop.
+ BasicBlock *Exit = nullptr;
+ for (auto *SBB : successors(ExitingBlock))
+ if (!L->contains(SBB)) {
+ if (Exit) // Multiple exit successors.
+ return getCouldNotCompute();
+ Exit = SBB;
+ }
+ assert(Exit && "Exiting block must have at least one exit");
return computeExitLimitFromSingleExitSwitch(L, SI, Exit,
/*ControlsExit=*/IsOnlyExit);
+ }
return getCouldNotCompute();
}
ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCond(
- const Loop *L, Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB,
+ const Loop *L, Value *ExitCond, bool ExitIfTrue,
bool ControlsExit, bool AllowPredicates) {
- ScalarEvolution::ExitLimitCacheTy Cache(L, TBB, FBB, AllowPredicates);
- return computeExitLimitFromCondCached(Cache, L, ExitCond, TBB, FBB,
+ ScalarEvolution::ExitLimitCacheTy Cache(L, ExitIfTrue, AllowPredicates);
+ return computeExitLimitFromCondCached(Cache, L, ExitCond, ExitIfTrue,
ControlsExit, AllowPredicates);
}
Optional<ScalarEvolution::ExitLimit>
ScalarEvolution::ExitLimitCache::find(const Loop *L, Value *ExitCond,
- BasicBlock *TBB, BasicBlock *FBB,
- bool ControlsExit, bool AllowPredicates) {
+ bool ExitIfTrue, bool ControlsExit,
+ bool AllowPredicates) {
(void)this->L;
- (void)this->TBB;
- (void)this->FBB;
+ (void)this->ExitIfTrue;
(void)this->AllowPredicates;
- assert(this->L == L && this->TBB == TBB && this->FBB == FBB &&
+ assert(this->L == L && this->ExitIfTrue == ExitIfTrue &&
this->AllowPredicates == AllowPredicates &&
"Variance in assumed invariant key components!");
auto Itr = TripCountMap.find({ExitCond, ControlsExit});
@@ -6948,47 +7135,48 @@ ScalarEvolution::ExitLimitCache::find(const Loop *L, Value *ExitCond,
}
void ScalarEvolution::ExitLimitCache::insert(const Loop *L, Value *ExitCond,
- BasicBlock *TBB, BasicBlock *FBB,
+ bool ExitIfTrue,
bool ControlsExit,
bool AllowPredicates,
const ExitLimit &EL) {
- assert(this->L == L && this->TBB == TBB && this->FBB == FBB &&
+ assert(this->L == L && this->ExitIfTrue == ExitIfTrue &&
this->AllowPredicates == AllowPredicates &&
"Variance in assumed invariant key components!");
auto InsertResult = TripCountMap.insert({{ExitCond, ControlsExit}, EL});
assert(InsertResult.second && "Expected successful insertion!");
(void)InsertResult;
+ (void)ExitIfTrue;
}
ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondCached(
- ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, BasicBlock *TBB,
- BasicBlock *FBB, bool ControlsExit, bool AllowPredicates) {
+ ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue,
+ bool ControlsExit, bool AllowPredicates) {
if (auto MaybeEL =
- Cache.find(L, ExitCond, TBB, FBB, ControlsExit, AllowPredicates))
+ Cache.find(L, ExitCond, ExitIfTrue, ControlsExit, AllowPredicates))
return *MaybeEL;
- ExitLimit EL = computeExitLimitFromCondImpl(Cache, L, ExitCond, TBB, FBB,
+ ExitLimit EL = computeExitLimitFromCondImpl(Cache, L, ExitCond, ExitIfTrue,
ControlsExit, AllowPredicates);
- Cache.insert(L, ExitCond, TBB, FBB, ControlsExit, AllowPredicates, EL);
+ Cache.insert(L, ExitCond, ExitIfTrue, ControlsExit, AllowPredicates, EL);
return EL;
}
ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
- ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, BasicBlock *TBB,
- BasicBlock *FBB, bool ControlsExit, bool AllowPredicates) {
+ ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue,
+ bool ControlsExit, bool AllowPredicates) {
// Check if the controlling expression for this loop is an And or Or.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
if (BO->getOpcode() == Instruction::And) {
// Recurse on the operands of the and.
- bool EitherMayExit = L->contains(TBB);
+ bool EitherMayExit = !ExitIfTrue;
ExitLimit EL0 = computeExitLimitFromCondCached(
- Cache, L, BO->getOperand(0), TBB, FBB, ControlsExit && !EitherMayExit,
- AllowPredicates);
+ Cache, L, BO->getOperand(0), ExitIfTrue,
+ ControlsExit && !EitherMayExit, AllowPredicates);
ExitLimit EL1 = computeExitLimitFromCondCached(
- Cache, L, BO->getOperand(1), TBB, FBB, ControlsExit && !EitherMayExit,
- AllowPredicates);
+ Cache, L, BO->getOperand(1), ExitIfTrue,
+ ControlsExit && !EitherMayExit, AllowPredicates);
const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
if (EitherMayExit) {
@@ -7010,7 +7198,6 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
} else {
// Both conditions must be true at the same time for the loop to exit.
// For now, be conservative.
- assert(L->contains(FBB) && "Loop block has no successor in loop!");
if (EL0.MaxNotTaken == EL1.MaxNotTaken)
MaxBECount = EL0.MaxNotTaken;
if (EL0.ExactNotTaken == EL1.ExactNotTaken)
@@ -7031,13 +7218,13 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
}
if (BO->getOpcode() == Instruction::Or) {
// Recurse on the operands of the or.
- bool EitherMayExit = L->contains(FBB);
+ bool EitherMayExit = ExitIfTrue;
ExitLimit EL0 = computeExitLimitFromCondCached(
- Cache, L, BO->getOperand(0), TBB, FBB, ControlsExit && !EitherMayExit,
- AllowPredicates);
+ Cache, L, BO->getOperand(0), ExitIfTrue,
+ ControlsExit && !EitherMayExit, AllowPredicates);
ExitLimit EL1 = computeExitLimitFromCondCached(
- Cache, L, BO->getOperand(1), TBB, FBB, ControlsExit && !EitherMayExit,
- AllowPredicates);
+ Cache, L, BO->getOperand(1), ExitIfTrue,
+ ControlsExit && !EitherMayExit, AllowPredicates);
const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
if (EitherMayExit) {
@@ -7059,7 +7246,6 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
} else {
// Both conditions must be false at the same time for the loop to exit.
// For now, be conservative.
- assert(L->contains(TBB) && "Loop block has no successor in loop!");
if (EL0.MaxNotTaken == EL1.MaxNotTaken)
MaxBECount = EL0.MaxNotTaken;
if (EL0.ExactNotTaken == EL1.ExactNotTaken)
@@ -7075,12 +7261,12 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
// Proceed to the next level to examine the icmp.
if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) {
ExitLimit EL =
- computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit);
+ computeExitLimitFromICmp(L, ExitCondICmp, ExitIfTrue, ControlsExit);
if (EL.hasFullInfo() || !AllowPredicates)
return EL;
// Try again, but use SCEV predicates this time.
- return computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit,
+ return computeExitLimitFromICmp(L, ExitCondICmp, ExitIfTrue, ControlsExit,
/*AllowPredicates=*/true);
}
@@ -7089,7 +7275,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
// preserve the CFG and is temporarily leaving constant conditions
// in place.
if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) {
- if (L->contains(FBB) == !CI->getZExtValue())
+ if (ExitIfTrue == !CI->getZExtValue())
// The backedge is always taken.
return getCouldNotCompute();
else
@@ -7098,19 +7284,18 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
}
// If it's not an integer or pointer comparison then compute it the hard way.
- return computeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
+ return computeExitCountExhaustively(L, ExitCond, ExitIfTrue);
}
ScalarEvolution::ExitLimit
ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
ICmpInst *ExitCond,
- BasicBlock *TBB,
- BasicBlock *FBB,
+ bool ExitIfTrue,
bool ControlsExit,
bool AllowPredicates) {
// If the condition was exit on true, convert the condition to exit on false
ICmpInst::Predicate Pred;
- if (!L->contains(FBB))
+ if (!ExitIfTrue)
Pred = ExitCond->getPredicate();
else
Pred = ExitCond->getInversePredicate();
@@ -7192,7 +7377,7 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
}
auto *ExhaustiveCount =
- computeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
+ computeExitCountExhaustively(L, ExitCond, ExitIfTrue);
if (!isa<SCEVCouldNotCompute>(ExhaustiveCount))
return ExhaustiveCount;
@@ -8104,6 +8289,14 @@ const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) {
return getSCEVAtScope(getSCEV(V), L);
}
+const SCEV *ScalarEvolution::stripInjectiveFunctions(const SCEV *S) const {
+ if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S))
+ return stripInjectiveFunctions(ZExt->getOperand());
+ if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S))
+ return stripInjectiveFunctions(SExt->getOperand());
+ return S;
+}
+
/// Finds the minimum unsigned root of the following equation:
///
/// A * X = B (mod N)
@@ -8233,7 +8426,9 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
return getCouldNotCompute(); // Otherwise it will loop infinitely.
}
- const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V);
+ const SCEVAddRecExpr *AddRec =
+ dyn_cast<SCEVAddRecExpr>(stripInjectiveFunctions(V));
+
if (!AddRec && AllowPredicates)
// Try to make this an AddRec using runtime tests, in the first X
// iterations of this loop, where X is the SCEV expression found by the
@@ -8641,43 +8836,88 @@ bool ScalarEvolution::isKnownNonZero(const SCEV *S) {
return isKnownNegative(S) || isKnownPositive(S);
}
+std::pair<const SCEV *, const SCEV *>
+ScalarEvolution::SplitIntoInitAndPostInc(const Loop *L, const SCEV *S) {
+ // Compute SCEV on entry of loop L.
+ const SCEV *Start = SCEVInitRewriter::rewrite(S, L, *this);
+ if (Start == getCouldNotCompute())
+ return { Start, Start };
+ // Compute post increment SCEV for loop L.
+ const SCEV *PostInc = SCEVPostIncRewriter::rewrite(S, L, *this);
+ assert(PostInc != getCouldNotCompute() && "Unexpected could not compute");
+ return { Start, PostInc };
+}
+
+bool ScalarEvolution::isKnownViaInduction(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS) {
+ // First collect all loops.
+ SmallPtrSet<const Loop *, 8> LoopsUsed;
+ getUsedLoops(LHS, LoopsUsed);
+ getUsedLoops(RHS, LoopsUsed);
+
+ if (LoopsUsed.empty())
+ return false;
+
+ // Domination relationship must be a linear order on collected loops.
+#ifndef NDEBUG
+ for (auto *L1 : LoopsUsed)
+ for (auto *L2 : LoopsUsed)
+ assert((DT.dominates(L1->getHeader(), L2->getHeader()) ||
+ DT.dominates(L2->getHeader(), L1->getHeader())) &&
+ "Domination relationship is not a linear order");
+#endif
+
+ const Loop *MDL =
+ *std::max_element(LoopsUsed.begin(), LoopsUsed.end(),
+ [&](const Loop *L1, const Loop *L2) {
+ return DT.properlyDominates(L1->getHeader(), L2->getHeader());
+ });
+
+ // Get init and post increment value for LHS.
+ auto SplitLHS = SplitIntoInitAndPostInc(MDL, LHS);
+ // if LHS contains unknown non-invariant SCEV then bail out.
+ if (SplitLHS.first == getCouldNotCompute())
+ return false;
+ assert (SplitLHS.second != getCouldNotCompute() && "Unexpected CNC");
+ // Get init and post increment value for RHS.
+ auto SplitRHS = SplitIntoInitAndPostInc(MDL, RHS);
+ // if RHS contains unknown non-invariant SCEV then bail out.
+ if (SplitRHS.first == getCouldNotCompute())
+ return false;
+ assert (SplitRHS.second != getCouldNotCompute() && "Unexpected CNC");
+ // It is possible that init SCEV contains an invariant load but it does
+ // not dominate MDL and is not available at MDL loop entry, so we should
+ // check it here.
+ if (!isAvailableAtLoopEntry(SplitLHS.first, MDL) ||
+ !isAvailableAtLoopEntry(SplitRHS.first, MDL))
+ return false;
+
+ return isLoopEntryGuardedByCond(MDL, Pred, SplitLHS.first, SplitRHS.first) &&
+ isLoopBackedgeGuardedByCond(MDL, Pred, SplitLHS.second,
+ SplitRHS.second);
+}
+
bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS) {
// Canonicalize the inputs first.
(void)SimplifyICmpOperands(Pred, LHS, RHS);
- // If LHS or RHS is an addrec, check to see if the condition is true in
- // every iteration of the loop.
- // If LHS and RHS are both addrec, both conditions must be true in
- // every iteration of the loop.
- const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS);
- const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS);
- bool LeftGuarded = false;
- bool RightGuarded = false;
- if (LAR) {
- const Loop *L = LAR->getLoop();
- if (isLoopEntryGuardedByCond(L, Pred, LAR->getStart(), RHS) &&
- isLoopBackedgeGuardedByCond(L, Pred, LAR->getPostIncExpr(*this), RHS)) {
- if (!RAR) return true;
- LeftGuarded = true;
- }
- }
- if (RAR) {
- const Loop *L = RAR->getLoop();
- if (isLoopEntryGuardedByCond(L, Pred, LHS, RAR->getStart()) &&
- isLoopBackedgeGuardedByCond(L, Pred, LHS, RAR->getPostIncExpr(*this))) {
- if (!LAR) return true;
- RightGuarded = true;
- }
- }
- if (LeftGuarded && RightGuarded)
+ if (isKnownViaInduction(Pred, LHS, RHS))
return true;
if (isKnownPredicateViaSplitting(Pred, LHS, RHS))
return true;
- // Otherwise see what can be done with known constant ranges.
- return isKnownPredicateViaConstantRanges(Pred, LHS, RHS);
+ // Otherwise see what can be done with some simple reasoning.
+ return isKnownViaNonRecursiveReasoning(Pred, LHS, RHS);
+}
+
+bool ScalarEvolution::isKnownOnEveryIteration(ICmpInst::Predicate Pred,
+ const SCEVAddRecExpr *LHS,
+ const SCEV *RHS) {
+ const Loop *L = LHS->getLoop();
+ return isLoopEntryGuardedByCond(L, Pred, LHS->getStart(), RHS) &&
+ isLoopBackedgeGuardedByCond(L, Pred, LHS->getPostIncExpr(*this), RHS);
}
bool ScalarEvolution::isMonotonicPredicate(const SCEVAddRecExpr *LHS,
@@ -8944,7 +9184,7 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
// (interprocedural conditions notwithstanding).
if (!L) return true;
- if (isKnownPredicateViaConstantRanges(Pred, LHS, RHS))
+ if (isKnownViaNonRecursiveReasoning(Pred, LHS, RHS))
return true;
BasicBlock *Latch = L->getLoopLatch();
@@ -9049,9 +9289,68 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
// (interprocedural conditions notwithstanding).
if (!L) return false;
- if (isKnownPredicateViaConstantRanges(Pred, LHS, RHS))
+ // Both LHS and RHS must be available at loop entry.
+ assert(isAvailableAtLoopEntry(LHS, L) &&
+ "LHS is not available at Loop Entry");
+ assert(isAvailableAtLoopEntry(RHS, L) &&
+ "RHS is not available at Loop Entry");
+
+ if (isKnownViaNonRecursiveReasoning(Pred, LHS, RHS))
return true;
+ // If we cannot prove strict comparison (e.g. a > b), maybe we can prove
+ // the facts (a >= b && a != b) separately. A typical situation is when the
+ // non-strict comparison is known from ranges and non-equality is known from
+ // dominating predicates. If we are proving strict comparison, we always try
+ // to prove non-equality and non-strict comparison separately.
+ auto NonStrictPredicate = ICmpInst::getNonStrictPredicate(Pred);
+ const bool ProvingStrictComparison = (Pred != NonStrictPredicate);
+ bool ProvedNonStrictComparison = false;
+ bool ProvedNonEquality = false;
+
+ if (ProvingStrictComparison) {
+ ProvedNonStrictComparison =
+ isKnownViaNonRecursiveReasoning(NonStrictPredicate, LHS, RHS);
+ ProvedNonEquality =
+ isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_NE, LHS, RHS);
+ if (ProvedNonStrictComparison && ProvedNonEquality)
+ return true;
+ }
+
+ // Try to prove (Pred, LHS, RHS) using isImpliedViaGuard.
+ auto ProveViaGuard = [&](BasicBlock *Block) {
+ if (isImpliedViaGuard(Block, Pred, LHS, RHS))
+ return true;
+ if (ProvingStrictComparison) {
+ if (!ProvedNonStrictComparison)
+ ProvedNonStrictComparison =
+ isImpliedViaGuard(Block, NonStrictPredicate, LHS, RHS);
+ if (!ProvedNonEquality)
+ ProvedNonEquality =
+ isImpliedViaGuard(Block, ICmpInst::ICMP_NE, LHS, RHS);
+ if (ProvedNonStrictComparison && ProvedNonEquality)
+ return true;
+ }
+ return false;
+ };
+
+ // Try to prove (Pred, LHS, RHS) using isImpliedCond.
+ auto ProveViaCond = [&](Value *Condition, bool Inverse) {
+ if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse))
+ return true;
+ if (ProvingStrictComparison) {
+ if (!ProvedNonStrictComparison)
+ ProvedNonStrictComparison =
+ isImpliedCond(NonStrictPredicate, LHS, RHS, Condition, Inverse);
+ if (!ProvedNonEquality)
+ ProvedNonEquality =
+ isImpliedCond(ICmpInst::ICMP_NE, LHS, RHS, Condition, Inverse);
+ if (ProvedNonStrictComparison && ProvedNonEquality)
+ return true;
+ }
+ return false;
+ };
+
// Starting at the loop predecessor, climb up the predecessor chain, as long
// as there are predecessors that can be found that have unique successors
// leading to the original header.
@@ -9060,7 +9359,7 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
Pair.first;
Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
- if (isImpliedViaGuard(Pair.first, Pred, LHS, RHS))
+ if (ProveViaGuard(Pair.first))
return true;
BranchInst *LoopEntryPredicate =
@@ -9069,9 +9368,8 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
LoopEntryPredicate->isUnconditional())
continue;
- if (isImpliedCond(Pred, LHS, RHS,
- LoopEntryPredicate->getCondition(),
- LoopEntryPredicate->getSuccessor(0) != Pair.second))
+ if (ProveViaCond(LoopEntryPredicate->getCondition(),
+ LoopEntryPredicate->getSuccessor(0) != Pair.second))
return true;
}
@@ -9083,7 +9381,7 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
if (!DT.dominates(CI, L->getHeader()))
continue;
- if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
+ if (ProveViaCond(CI->getArgOperand(0), false))
return true;
}
@@ -9318,17 +9616,25 @@ Optional<APInt> ScalarEvolution::computeConstantDifference(const SCEV *More,
return M - L;
}
- const SCEV *L, *R;
SCEV::NoWrapFlags Flags;
- if (splitBinaryAdd(Less, L, R, Flags))
- if (const auto *LC = dyn_cast<SCEVConstant>(L))
- if (R == More)
- return -(LC->getAPInt());
-
- if (splitBinaryAdd(More, L, R, Flags))
- if (const auto *LC = dyn_cast<SCEVConstant>(L))
- if (R == Less)
- return LC->getAPInt();
+ const SCEV *LLess = nullptr, *RLess = nullptr;
+ const SCEV *LMore = nullptr, *RMore = nullptr;
+ const SCEVConstant *C1 = nullptr, *C2 = nullptr;
+ // Compare (X + C1) vs X.
+ if (splitBinaryAdd(Less, LLess, RLess, Flags))
+ if ((C1 = dyn_cast<SCEVConstant>(LLess)))
+ if (RLess == More)
+ return -(C1->getAPInt());
+
+ // Compare X vs (X + C2).
+ if (splitBinaryAdd(More, LMore, RMore, Flags))
+ if ((C2 = dyn_cast<SCEVConstant>(LMore)))
+ if (RMore == Less)
+ return C2->getAPInt();
+
+ // Compare (X + C1) vs (X + C2).
+ if (C1 && C2 && RLess == RMore)
+ return C2->getAPInt() - C1->getAPInt();
return None;
}
@@ -9405,10 +9711,121 @@ bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow(
}
// Try to prove (1) or (2), as needed.
- return isLoopEntryGuardedByCond(L, Pred, FoundRHS,
+ return isAvailableAtLoopEntry(FoundRHS, L) &&
+ isLoopEntryGuardedByCond(L, Pred, FoundRHS,
getConstant(FoundRHSLimit));
}
+bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS,
+ const SCEV *FoundLHS,
+ const SCEV *FoundRHS, unsigned Depth) {
+ const PHINode *LPhi = nullptr, *RPhi = nullptr;
+
+ auto ClearOnExit = make_scope_exit([&]() {
+ if (LPhi) {
+ bool Erased = PendingMerges.erase(LPhi);
+ assert(Erased && "Failed to erase LPhi!");
+ (void)Erased;
+ }
+ if (RPhi) {
+ bool Erased = PendingMerges.erase(RPhi);
+ assert(Erased && "Failed to erase RPhi!");
+ (void)Erased;
+ }
+ });
+
+ // Find respective Phis and check that they are not being pending.
+ if (const SCEVUnknown *LU = dyn_cast<SCEVUnknown>(LHS))
+ if (auto *Phi = dyn_cast<PHINode>(LU->getValue())) {
+ if (!PendingMerges.insert(Phi).second)
+ return false;
+ LPhi = Phi;
+ }
+ if (const SCEVUnknown *RU = dyn_cast<SCEVUnknown>(RHS))
+ if (auto *Phi = dyn_cast<PHINode>(RU->getValue())) {
+ // If we detect a loop of Phi nodes being processed by this method, for
+ // example:
+ //
+ // %a = phi i32 [ %some1, %preheader ], [ %b, %latch ]
+ // %b = phi i32 [ %some2, %preheader ], [ %a, %latch ]
+ //
+ // we don't want to deal with a case that complex, so return conservative
+ // answer false.
+ if (!PendingMerges.insert(Phi).second)
+ return false;
+ RPhi = Phi;
+ }
+
+ // If none of LHS, RHS is a Phi, nothing to do here.
+ if (!LPhi && !RPhi)
+ return false;
+
+ // If there is a SCEVUnknown Phi we are interested in, make it left.
+ if (!LPhi) {
+ std::swap(LHS, RHS);
+ std::swap(FoundLHS, FoundRHS);
+ std::swap(LPhi, RPhi);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+
+ assert(LPhi && "LPhi should definitely be a SCEVUnknown Phi!");
+ const BasicBlock *LBB = LPhi->getParent();
+ const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS);
+
+ auto ProvedEasily = [&](const SCEV *S1, const SCEV *S2) {
+ return isKnownViaNonRecursiveReasoning(Pred, S1, S2) ||
+ isImpliedCondOperandsViaRanges(Pred, S1, S2, FoundLHS, FoundRHS) ||
+ isImpliedViaOperations(Pred, S1, S2, FoundLHS, FoundRHS, Depth);
+ };
+
+ if (RPhi && RPhi->getParent() == LBB) {
+ // Case one: RHS is also a SCEVUnknown Phi from the same basic block.
+ // If we compare two Phis from the same block, and for each entry block
+ // the predicate is true for incoming values from this block, then the
+ // predicate is also true for the Phis.
+ for (const BasicBlock *IncBB : predecessors(LBB)) {
+ const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB));
+ const SCEV *R = getSCEV(RPhi->getIncomingValueForBlock(IncBB));
+ if (!ProvedEasily(L, R))
+ return false;
+ }
+ } else if (RAR && RAR->getLoop()->getHeader() == LBB) {
+ // Case two: RHS is also a Phi from the same basic block, and it is an
+ // AddRec. It means that there is a loop which has both AddRec and Unknown
+ // PHIs, for it we can compare incoming values of AddRec from above the loop
+ // and latch with their respective incoming values of LPhi.
+ // TODO: Generalize to handle loops with many inputs in a header.
+ if (LPhi->getNumIncomingValues() != 2) return false;
+
+ auto *RLoop = RAR->getLoop();
+ auto *Predecessor = RLoop->getLoopPredecessor();
+ assert(Predecessor && "Loop with AddRec with no predecessor?");
+ const SCEV *L1 = getSCEV(LPhi->getIncomingValueForBlock(Predecessor));
+ if (!ProvedEasily(L1, RAR->getStart()))
+ return false;
+ auto *Latch = RLoop->getLoopLatch();
+ assert(Latch && "Loop with AddRec with no latch?");
+ const SCEV *L2 = getSCEV(LPhi->getIncomingValueForBlock(Latch));
+ if (!ProvedEasily(L2, RAR->getPostIncExpr(*this)))
+ return false;
+ } else {
+ // In all other cases go over inputs of LHS and compare each of them to RHS,
+ // the predicate is true for (LHS, RHS) if it is true for all such pairs.
+ // At this point RHS is either a non-Phi, or it is a Phi from some block
+ // different from LBB.
+ for (const BasicBlock *IncBB : predecessors(LBB)) {
+ // Check that RHS is available in this block.
+ if (!dominates(RHS, IncBB))
+ return false;
+ const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB));
+ if (!ProvedEasily(L, RHS))
+ return false;
+ }
+ }
+ return true;
+}
+
bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
const SCEV *FoundLHS,
@@ -9562,13 +9979,14 @@ bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred,
};
// Acquire values from extensions.
+ auto *OrigLHS = LHS;
auto *OrigFoundLHS = FoundLHS;
LHS = GetOpFromSExt(LHS);
FoundLHS = GetOpFromSExt(FoundLHS);
// Is the SGT predicate can be proved trivially or using the found context.
auto IsSGTViaContext = [&](const SCEV *S1, const SCEV *S2) {
- return isKnownViaSimpleReasoning(ICmpInst::ICMP_SGT, S1, S2) ||
+ return isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SGT, S1, S2) ||
isImpliedViaOperations(ICmpInst::ICMP_SGT, S1, S2, OrigFoundLHS,
FoundRHS, Depth + 1);
};
@@ -9669,11 +10087,17 @@ bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred,
}
}
+ // If our expression contained SCEVUnknown Phis, and we split it down and now
+ // need to prove something for them, try to prove the predicate for every
+ // possible incoming values of those Phis.
+ if (isImpliedViaMerge(Pred, OrigLHS, RHS, OrigFoundLHS, FoundRHS, Depth + 1))
+ return true;
+
return false;
}
bool
-ScalarEvolution::isKnownViaSimpleReasoning(ICmpInst::Predicate Pred,
+ScalarEvolution::isKnownViaNonRecursiveReasoning(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS) {
return isKnownPredicateViaConstantRanges(Pred, LHS, RHS) ||
IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) ||
@@ -9695,26 +10119,26 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
break;
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
- if (isKnownViaSimpleReasoning(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
- isKnownViaSimpleReasoning(ICmpInst::ICMP_SGE, RHS, FoundRHS))
+ if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
+ isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SGE, RHS, FoundRHS))
return true;
break;
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
- if (isKnownViaSimpleReasoning(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
- isKnownViaSimpleReasoning(ICmpInst::ICMP_SLE, RHS, FoundRHS))
+ if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
+ isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SLE, RHS, FoundRHS))
return true;
break;
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE:
- if (isKnownViaSimpleReasoning(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
- isKnownViaSimpleReasoning(ICmpInst::ICMP_UGE, RHS, FoundRHS))
+ if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
+ isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_UGE, RHS, FoundRHS))
return true;
break;
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE:
- if (isKnownViaSimpleReasoning(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
- isKnownViaSimpleReasoning(ICmpInst::ICMP_ULE, RHS, FoundRHS))
+ if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
+ isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, RHS, FoundRHS))
return true;
break;
}
@@ -10192,6 +10616,31 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
return SE.getCouldNotCompute();
}
+const SCEVAddRecExpr *
+SCEVAddRecExpr::getPostIncExpr(ScalarEvolution &SE) const {
+ assert(getNumOperands() > 1 && "AddRec with zero step?");
+ // There is a temptation to just call getAddExpr(this, getStepRecurrence(SE)),
+ // but in this case we cannot guarantee that the value returned will be an
+ // AddRec because SCEV does not have a fixed point where it stops
+ // simplification: it is legal to return ({rec1} + {rec2}). For example, it
+ // may happen if we reach arithmetic depth limit while simplifying. So we
+ // construct the returned value explicitly.
+ SmallVector<const SCEV *, 3> Ops;
+ // If this is {A,+,B,+,C,...,+,N}, then its step is {B,+,C,+,...,+,N}, and
+ // (this + Step) is {A+B,+,B+C,+...,+,N}.
+ for (unsigned i = 0, e = getNumOperands() - 1; i < e; ++i)
+ Ops.push_back(SE.getAddExpr(getOperand(i), getOperand(i + 1)));
+ // We know that the last operand is not a constant zero (otherwise it would
+ // have been popped out earlier). This guarantees us that if the result has
+ // the same last operand, then it will also not be popped out, meaning that
+ // the returned value will be an AddRec.
+ const SCEV *Last = getOperand(getNumOperands() - 1);
+ assert(!Last->isZero() && "Recurrency with zero step?");
+ Ops.push_back(Last);
+ return cast<SCEVAddRecExpr>(SE.getAddRecExpr(Ops, getLoop(),
+ SCEV::FlagAnyWrap));
+}
+
// Return true when S contains at least an undef value.
static inline bool containsUndefs(const SCEV *S) {
return SCEVExprContains(S, [](const SCEV *S) {
@@ -10334,22 +10783,22 @@ void ScalarEvolution::collectParametricTerms(const SCEV *Expr,
SCEVCollectStrides StrideCollector(*this, Strides);
visitAll(Expr, StrideCollector);
- DEBUG({
- dbgs() << "Strides:\n";
- for (const SCEV *S : Strides)
- dbgs() << *S << "\n";
- });
+ LLVM_DEBUG({
+ dbgs() << "Strides:\n";
+ for (const SCEV *S : Strides)
+ dbgs() << *S << "\n";
+ });
for (const SCEV *S : Strides) {
SCEVCollectTerms TermCollector(Terms);
visitAll(S, TermCollector);
}
- DEBUG({
- dbgs() << "Terms:\n";
- for (const SCEV *T : Terms)
- dbgs() << *T << "\n";
- });
+ LLVM_DEBUG({
+ dbgs() << "Terms:\n";
+ for (const SCEV *T : Terms)
+ dbgs() << *T << "\n";
+ });
SCEVCollectAddRecMultiplies MulCollector(Terms, *this);
visitAll(Expr, MulCollector);
@@ -10460,18 +10909,18 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
if (!containsParameters(Terms))
return;
- DEBUG({
- dbgs() << "Terms:\n";
- for (const SCEV *T : Terms)
- dbgs() << *T << "\n";
- });
+ LLVM_DEBUG({
+ dbgs() << "Terms:\n";
+ for (const SCEV *T : Terms)
+ dbgs() << *T << "\n";
+ });
// Remove duplicates.
array_pod_sort(Terms.begin(), Terms.end());
Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end());
// Put larger terms first.
- std::sort(Terms.begin(), Terms.end(), [](const SCEV *LHS, const SCEV *RHS) {
+ llvm::sort(Terms.begin(), Terms.end(), [](const SCEV *LHS, const SCEV *RHS) {
return numberOfTerms(LHS) > numberOfTerms(RHS);
});
@@ -10491,11 +10940,11 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
if (const SCEV *NewT = removeConstantFactors(*this, T))
NewTerms.push_back(NewT);
- DEBUG({
- dbgs() << "Terms after sorting:\n";
- for (const SCEV *T : NewTerms)
- dbgs() << *T << "\n";
- });
+ LLVM_DEBUG({
+ dbgs() << "Terms after sorting:\n";
+ for (const SCEV *T : NewTerms)
+ dbgs() << *T << "\n";
+ });
if (NewTerms.empty() || !findArrayDimensionsRec(*this, NewTerms, Sizes)) {
Sizes.clear();
@@ -10505,11 +10954,11 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
// The last element to be pushed into Sizes is the size of an element.
Sizes.push_back(ElementSize);
- DEBUG({
- dbgs() << "Sizes:\n";
- for (const SCEV *S : Sizes)
- dbgs() << *S << "\n";
- });
+ LLVM_DEBUG({
+ dbgs() << "Sizes:\n";
+ for (const SCEV *S : Sizes)
+ dbgs() << *S << "\n";
+ });
}
void ScalarEvolution::computeAccessFunctions(
@@ -10529,13 +10978,13 @@ void ScalarEvolution::computeAccessFunctions(
const SCEV *Q, *R;
SCEVDivision::divide(*this, Res, Sizes[i], &Q, &R);
- DEBUG({
- dbgs() << "Res: " << *Res << "\n";
- dbgs() << "Sizes[i]: " << *Sizes[i] << "\n";
- dbgs() << "Res divided by Sizes[i]:\n";
- dbgs() << "Quotient: " << *Q << "\n";
- dbgs() << "Remainder: " << *R << "\n";
- });
+ LLVM_DEBUG({
+ dbgs() << "Res: " << *Res << "\n";
+ dbgs() << "Sizes[i]: " << *Sizes[i] << "\n";
+ dbgs() << "Res divided by Sizes[i]:\n";
+ dbgs() << "Quotient: " << *Q << "\n";
+ dbgs() << "Remainder: " << *R << "\n";
+ });
Res = Q;
@@ -10563,11 +11012,11 @@ void ScalarEvolution::computeAccessFunctions(
std::reverse(Subscripts.begin(), Subscripts.end());
- DEBUG({
- dbgs() << "Subscripts:\n";
- for (const SCEV *S : Subscripts)
- dbgs() << *S << "\n";
- });
+ LLVM_DEBUG({
+ dbgs() << "Subscripts:\n";
+ for (const SCEV *S : Subscripts)
+ dbgs() << *S << "\n";
+ });
}
/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
@@ -10641,17 +11090,17 @@ void ScalarEvolution::delinearize(const SCEV *Expr,
if (Subscripts.empty())
return;
- DEBUG({
- dbgs() << "succeeded to delinearize " << *Expr << "\n";
- dbgs() << "ArrayDecl[UnknownSize]";
- for (const SCEV *S : Sizes)
- dbgs() << "[" << *S << "]";
+ LLVM_DEBUG({
+ dbgs() << "succeeded to delinearize " << *Expr << "\n";
+ dbgs() << "ArrayDecl[UnknownSize]";
+ for (const SCEV *S : Sizes)
+ dbgs() << "[" << *S << "]";
- dbgs() << "\nArrayRef";
- for (const SCEV *S : Subscripts)
- dbgs() << "[" << *S << "]";
- dbgs() << "\n";
- });
+ dbgs() << "\nArrayRef";
+ for (const SCEV *S : Subscripts)
+ dbgs() << "[" << *S << "]";
+ dbgs() << "\n";
+ });
}
//===----------------------------------------------------------------------===//
@@ -10728,6 +11177,8 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
LI(Arg.LI), CouldNotCompute(std::move(Arg.CouldNotCompute)),
ValueExprMap(std::move(Arg.ValueExprMap)),
PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)),
+ PendingPhiRanges(std::move(Arg.PendingPhiRanges)),
+ PendingMerges(std::move(Arg.PendingMerges)),
MinTrailingZerosCache(std::move(Arg.MinTrailingZerosCache)),
BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
PredicatedBackedgeTakenCounts(
@@ -10771,6 +11222,8 @@ ScalarEvolution::~ScalarEvolution() {
BTCI.second.clear();
assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
+ assert(PendingPhiRanges.empty() && "getRangeRef garbage");
+ assert(PendingMerges.empty() && "isImpliedViaMerge garbage");
assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!");
assert(!ProvingSplitPredicate && "ProvingSplitPredicate garbage!");
}
@@ -11181,9 +11634,13 @@ ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts);
}
-void ScalarEvolution::addToLoopUseLists(const SCEV *S) {
+void
+ScalarEvolution::getUsedLoops(const SCEV *S,
+ SmallPtrSetImpl<const Loop *> &LoopsUsed) {
struct FindUsedLoops {
- SmallPtrSet<const Loop *, 8> LoopsUsed;
+ FindUsedLoops(SmallPtrSetImpl<const Loop *> &LoopsUsed)
+ : LoopsUsed(LoopsUsed) {}
+ SmallPtrSetImpl<const Loop *> &LoopsUsed;
bool follow(const SCEV *S) {
if (auto *AR = dyn_cast<SCEVAddRecExpr>(S))
LoopsUsed.insert(AR->getLoop());
@@ -11193,10 +11650,14 @@ void ScalarEvolution::addToLoopUseLists(const SCEV *S) {
bool isDone() const { return false; }
};
- FindUsedLoops F;
+ FindUsedLoops F(LoopsUsed);
SCEVTraversal<FindUsedLoops>(F).visitAll(S);
+}
- for (auto *L : F.LoopsUsed)
+void ScalarEvolution::addToLoopUseLists(const SCEV *S) {
+ SmallPtrSet<const Loop *, 8> LoopsUsed;
+ getUsedLoops(S, LoopsUsed);
+ for (auto *L : LoopsUsed)
LoopUsers[L].push_back(S);
}
@@ -11472,8 +11933,6 @@ private:
// couldn't create an AddRec for it, or couldn't add the predicate), we just
// return \p Expr.
const SCEV *convertToAddRecWithPreds(const SCEVUnknown *Expr) {
- if (!VersionUnknown)
- return Expr;
if (!isa<PHINode>(Expr->getValue()))
return Expr;
Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
@@ -11481,6 +11940,12 @@ private:
if (!PredicatedRewrite)
return Expr;
for (auto *P : PredicatedRewrite->second){
+ // Wrap predicates from outer loops are not supported.
+ if (auto *WP = dyn_cast<const SCEVWrapPredicate>(P)) {
+ auto *AR = cast<const SCEVAddRecExpr>(WP->getExpr());
+ if (L != AR->getLoop())
+ return Expr;
+ }
if (!addOverflowAssumption(P))
return Expr;
}
@@ -11786,3 +12251,43 @@ void PredicatedScalarEvolution::print(raw_ostream &OS, unsigned Depth) const {
OS.indent(Depth + 2) << "--> " << *II->second.second << "\n";
}
}
+
+// Match the mathematical pattern A - (A / B) * B, where A and B can be
+// arbitrary expressions.
+// It's not always easy, as A and B can be folded (imagine A is X / 2, and B is
+// 4, A / B becomes X / 8).
+bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS,
+ const SCEV *&RHS) {
+ const auto *Add = dyn_cast<SCEVAddExpr>(Expr);
+ if (Add == nullptr || Add->getNumOperands() != 2)
+ return false;
+
+ const SCEV *A = Add->getOperand(1);
+ const auto *Mul = dyn_cast<SCEVMulExpr>(Add->getOperand(0));
+
+ if (Mul == nullptr)
+ return false;
+
+ const auto MatchURemWithDivisor = [&](const SCEV *B) {
+ // (SomeExpr + (-(SomeExpr / B) * B)).
+ if (Expr == getURemExpr(A, B)) {
+ LHS = A;
+ RHS = B;
+ return true;
+ }
+ return false;
+ };
+
+ // (SomeExpr + (-1 * (SomeExpr / B) * B)).
+ if (Mul->getNumOperands() == 3 && isa<SCEVConstant>(Mul->getOperand(0)))
+ return MatchURemWithDivisor(Mul->getOperand(1)) ||
+ MatchURemWithDivisor(Mul->getOperand(2));
+
+ // (SomeExpr + ((-SomeExpr / B) * B)) or (SomeExpr + ((SomeExpr / B) * -B)).
+ if (Mul->getNumOperands() == 2)
+ return MatchURemWithDivisor(Mul->getOperand(1)) ||
+ MatchURemWithDivisor(Mul->getOperand(0)) ||
+ MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(1))) ||
+ MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(0)));
+ return false;
+}
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
index 53ce33bacbe9..8f89389c4b5d 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -589,6 +589,12 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
return expand(SE.getAddExpr(Ops));
}
+Value *SCEVExpander::expandAddToGEP(const SCEV *Op, PointerType *PTy, Type *Ty,
+ Value *V) {
+ const SCEV *const Ops[1] = {Op};
+ return expandAddToGEP(Ops, Ops + 1, PTy, Ty, V);
+}
+
/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
/// SCEV expansion. If they are nested, this is the most nested. If they are
/// neighboring, pick the later.
@@ -1036,8 +1042,7 @@ Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
if (!isa<ConstantInt>(StepV))
GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
GEPPtrTy->getAddressSpace());
- const SCEV *const StepArray[1] = { SE.getSCEV(StepV) };
- IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN);
+ IncV = expandAddToGEP(SE.getSCEV(StepV), GEPPtrTy, IntTy, PN);
if (IncV->getType() != PN->getType()) {
IncV = Builder.CreateBitCast(IncV, PN->getType());
rememberInstruction(IncV);
@@ -1051,7 +1056,7 @@ Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
return IncV;
}
-/// \brief Hoist the addrec instruction chain rooted in the loop phi above the
+/// Hoist the addrec instruction chain rooted in the loop phi above the
/// position. This routine assumes that this is possible (has been checked).
void SCEVExpander::hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist,
Instruction *Pos, PHINode *LoopPhi) {
@@ -1067,7 +1072,7 @@ void SCEVExpander::hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist,
} while (InstToHoist != LoopPhi);
}
-/// \brief Check whether we can cheaply express the requested SCEV in terms of
+/// Check whether we can cheaply express the requested SCEV in terms of
/// the available PHI SCEV by truncation and/or inversion of the step.
static bool canBeCheaplyTransformed(ScalarEvolution &SE,
const SCEVAddRecExpr *Phi,
@@ -1169,8 +1174,11 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
if (!IsMatchingSCEV && !TryNonMatchingSCEV)
continue;
+ // TODO: this possibly can be reworked to avoid this cast at all.
Instruction *TempIncV =
- cast<Instruction>(PN.getIncomingValueForBlock(LatchBlock));
+ dyn_cast<Instruction>(PN.getIncomingValueForBlock(LatchBlock));
+ if (!TempIncV)
+ continue;
// Check whether we can reuse this PHI node.
if (LSRMode) {
@@ -1387,7 +1395,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
// IVUsers tries to prevent this case, so it is rare. However, it can
// happen when an IVUser outside the loop is not dominated by the latch
// block. Adjusting IVIncInsertPos before expansion begins cannot handle
- // all cases. Consider a phi outide whose operand is replaced during
+ // all cases. Consider a phi outside whose operand is replaced during
// expansion with the value of the postinc user. Without fundamentally
// changing the way postinc users are tracked, the only remedy is
// inserting an extra IV increment. StepV might fold into PostLoopOffset,
@@ -1407,7 +1415,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
}
// We have decided to reuse an induction variable of a dominating loop. Apply
- // truncation and/or invertion of the step.
+ // truncation and/or inversion of the step.
if (TruncTy) {
Type *ResTy = Result->getType();
// Normalize the result type.
@@ -1440,12 +1448,9 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) {
if (Result->getType()->isIntegerTy()) {
Value *Base = expandCodeFor(PostLoopOffset, ExpandTy);
- const SCEV *const OffsetArray[1] = {SE.getUnknown(Result)};
- Result = expandAddToGEP(OffsetArray, OffsetArray + 1, PTy, IntTy, Base);
+ Result = expandAddToGEP(SE.getUnknown(Result), PTy, IntTy, Base);
} else {
- const SCEV *const OffsetArray[1] = {PostLoopOffset};
- Result =
- expandAddToGEP(OffsetArray, OffsetArray + 1, PTy, IntTy, Result);
+ Result = expandAddToGEP(PostLoopOffset, PTy, IntTy, Result);
}
} else {
Result = InsertNoopCastOfTo(Result, IntTy);
@@ -1497,9 +1502,9 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
// Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
// comments on expandAddToGEP for details.
const SCEV *Base = S->getStart();
- const SCEV *RestArray[1] = { Rest };
// Dig into the expression to find the pointer base for a GEP.
- ExposePointerBase(Base, RestArray[0], SE);
+ const SCEV *ExposedRest = Rest;
+ ExposePointerBase(Base, ExposedRest, SE);
// If we found a pointer, expand the AddRec with a GEP.
if (PointerType *PTy = dyn_cast<PointerType>(Base->getType())) {
// Make sure the Base isn't something exotic, such as a multiplied
@@ -1508,7 +1513,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) {
Value *StartV = expand(Base);
assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!");
- return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV);
+ return expandAddToGEP(ExposedRest, PTy, Ty, StartV);
}
}
@@ -1862,7 +1867,7 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
Phis.push_back(&PN);
if (TTI)
- std::sort(Phis.begin(), Phis.end(), [](Value *LHS, Value *RHS) {
+ llvm::sort(Phis.begin(), Phis.end(), [](Value *LHS, Value *RHS) {
// Put pointers at the back and make sure pointer < pointer = false.
if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
@@ -2154,8 +2159,9 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
const SCEV *Step = AR->getStepRecurrence(SE);
const SCEV *Start = AR->getStart();
+ Type *ARTy = AR->getType();
unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType());
- unsigned DstBits = SE.getTypeSizeInBits(AR->getType());
+ unsigned DstBits = SE.getTypeSizeInBits(ARTy);
// The expression {Start,+,Step} has nusw/nssw if
// Step < 0, Start - |Step| * Backedge <= Start
@@ -2167,11 +2173,12 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
Value *TripCountVal = expandCodeFor(ExitCount, CountTy, Loc);
IntegerType *Ty =
- IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(AR->getType()));
+ IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy));
+ Type *ARExpandTy = DL.isNonIntegralPointerType(ARTy) ? ARTy : Ty;
Value *StepValue = expandCodeFor(Step, Ty, Loc);
Value *NegStepValue = expandCodeFor(SE.getNegativeSCEV(Step), Ty, Loc);
- Value *StartValue = expandCodeFor(Start, Ty, Loc);
+ Value *StartValue = expandCodeFor(Start, ARExpandTy, Loc);
ConstantInt *Zero =
ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits));
@@ -2194,8 +2201,18 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
// Compute:
// Start + |Step| * Backedge < Start
// Start - |Step| * Backedge > Start
- Value *Add = Builder.CreateAdd(StartValue, MulV);
- Value *Sub = Builder.CreateSub(StartValue, MulV);
+ Value *Add = nullptr, *Sub = nullptr;
+ if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARExpandTy)) {
+ const SCEV *MulS = SE.getSCEV(MulV);
+ const SCEV *NegMulS = SE.getNegativeSCEV(MulS);
+ Add = Builder.CreateBitCast(expandAddToGEP(MulS, ARPtrTy, Ty, StartValue),
+ ARPtrTy);
+ Sub = Builder.CreateBitCast(
+ expandAddToGEP(NegMulS, ARPtrTy, Ty, StartValue), ARPtrTy);
+ } else {
+ Add = Builder.CreateAdd(StartValue, MulV);
+ Sub = Builder.CreateSub(StartValue, MulV);
+ }
Value *EndCompareGT = Builder.CreateICmp(
Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
@@ -2209,7 +2226,7 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
// If the backedge taken count type is larger than the AR type,
// check that we don't drop any bits by truncating it. If we are
- // droping bits, then we have overflow (unless the step is zero).
+ // dropping bits, then we have overflow (unless the step is zero).
if (SE.getTypeSizeInBits(CountTy) > SE.getTypeSizeInBits(Ty)) {
auto MaxVal = APInt::getMaxValue(DstBits).zext(SrcBits);
auto *BackedgeCheck =
diff --git a/contrib/llvm/lib/Analysis/StratifiedSets.h b/contrib/llvm/lib/Analysis/StratifiedSets.h
index 772df175b384..2f20cd12506c 100644
--- a/contrib/llvm/lib/Analysis/StratifiedSets.h
+++ b/contrib/llvm/lib/Analysis/StratifiedSets.h
@@ -29,7 +29,7 @@ typedef unsigned StratifiedIndex;
/// NOTE: ^ This can't be a short -- bootstrapping clang has a case where
/// ~1M sets exist.
-// \brief Container of information related to a value in a StratifiedSet.
+// Container of information related to a value in a StratifiedSet.
struct StratifiedInfo {
StratifiedIndex Index;
/// For field sensitivity, etc. we can tack fields on here.
@@ -37,7 +37,7 @@ struct StratifiedInfo {
/// A "link" between two StratifiedSets.
struct StratifiedLink {
- /// \brief This is a value used to signify "does not exist" where the
+ /// This is a value used to signify "does not exist" where the
/// StratifiedIndex type is used.
///
/// This is used instead of Optional<StratifiedIndex> because
@@ -63,7 +63,7 @@ struct StratifiedLink {
void clearAbove() { Above = SetSentinel; }
};
-/// \brief These are stratified sets, as described in "Fast algorithms for
+/// These are stratified sets, as described in "Fast algorithms for
/// Dyck-CFL-reachability with applications to Alias Analysis" by Zhang Q, Lyu M
/// R, Yuan H, and Su Z. -- in short, this is meant to represent different sets
/// of Value*s. If two Value*s are in the same set, or if both sets have
@@ -172,7 +172,7 @@ private:
/// remap has occurred, and use this information so we can defer renumbering set
/// elements until build time.
template <typename T> class StratifiedSetsBuilder {
- /// \brief Represents a Stratified Set, with information about the Stratified
+ /// Represents a Stratified Set, with information about the Stratified
/// Set above it, the set below it, and whether the current set has been
/// remapped to another.
struct BuilderLink {
@@ -263,7 +263,7 @@ template <typename T> class StratifiedSetsBuilder {
StratifiedIndex Remap;
};
- /// \brief This function performs all of the set unioning/value renumbering
+ /// This function performs all of the set unioning/value renumbering
/// that we've been putting off, and generates a vector<StratifiedLink> that
/// may be placed in a StratifiedSets instance.
void finalizeSets(std::vector<StratifiedLink> &StratLinks) {
@@ -302,7 +302,7 @@ template <typename T> class StratifiedSetsBuilder {
}
}
- /// \brief There's a guarantee in StratifiedLink where all bits set in a
+ /// There's a guarantee in StratifiedLink where all bits set in a
/// Link.externals will be set in all Link.externals "below" it.
static void propagateAttrs(std::vector<StratifiedLink> &Links) {
const auto getHighestParentAbove = [&Links](StratifiedIndex Idx) {
@@ -351,7 +351,7 @@ public:
return addAtMerging(Main, NewIndex);
}
- /// \brief Restructures the stratified sets as necessary to make "ToAdd" in a
+ /// Restructures the stratified sets as necessary to make "ToAdd" in a
/// set above "Main". There are some cases where this is not possible (see
/// above), so we merge them such that ToAdd and Main are in the same set.
bool addAbove(const T &Main, const T &ToAdd) {
@@ -364,7 +364,7 @@ public:
return addAtMerging(ToAdd, Above);
}
- /// \brief Restructures the stratified sets as necessary to make "ToAdd" in a
+ /// Restructures the stratified sets as necessary to make "ToAdd" in a
/// set below "Main". There are some cases where this is not possible (see
/// above), so we merge them such that ToAdd and Main are in the same set.
bool addBelow(const T &Main, const T &ToAdd) {
@@ -437,7 +437,7 @@ private:
return *Current;
}
- /// \brief Merges two sets into one another. Assumes that these sets are not
+ /// Merges two sets into one another. Assumes that these sets are not
/// already one in the same.
void merge(StratifiedIndex Idx1, StratifiedIndex Idx2) {
assert(inbounds(Idx1) && inbounds(Idx2));
@@ -458,7 +458,7 @@ private:
mergeDirect(Idx1, Idx2);
}
- /// \brief Merges two sets assuming that the set at `Idx1` is unreachable from
+ /// Merges two sets assuming that the set at `Idx1` is unreachable from
/// traversing above or below the set at `Idx2`.
void mergeDirect(StratifiedIndex Idx1, StratifiedIndex Idx2) {
assert(inbounds(Idx1) && inbounds(Idx2));
diff --git a/contrib/llvm/lib/Analysis/SyntheticCountsUtils.cpp b/contrib/llvm/lib/Analysis/SyntheticCountsUtils.cpp
new file mode 100644
index 000000000000..b085fa274d7f
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/SyntheticCountsUtils.cpp
@@ -0,0 +1,113 @@
+//===--- SyntheticCountsUtils.cpp - synthetic counts propagation utils ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines utilities for propagating synthetic counts.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/SyntheticCountsUtils.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+
+// Given an SCC, propagate entry counts along the edge of the SCC nodes.
+template <typename CallGraphType>
+void SyntheticCountsUtils<CallGraphType>::propagateFromSCC(
+ const SccTy &SCC, GetRelBBFreqTy GetRelBBFreq, GetCountTy GetCount,
+ AddCountTy AddCount) {
+
+ SmallPtrSet<NodeRef, 8> SCCNodes;
+ SmallVector<std::pair<NodeRef, EdgeRef>, 8> SCCEdges, NonSCCEdges;
+
+ for (auto &Node : SCC)
+ SCCNodes.insert(Node);
+
+ // Partition the edges coming out of the SCC into those whose destination is
+ // in the SCC and the rest.
+ for (const auto &Node : SCCNodes) {
+ for (auto &E : children_edges<CallGraphType>(Node)) {
+ if (SCCNodes.count(CGT::edge_dest(E)))
+ SCCEdges.emplace_back(Node, E);
+ else
+ NonSCCEdges.emplace_back(Node, E);
+ }
+ }
+
+ // For nodes in the same SCC, update the counts in two steps:
+ // 1. Compute the additional count for each node by propagating the counts
+ // along all incoming edges to the node that originate from within the same
+ // SCC and summing them up.
+ // 2. Add the additional counts to the nodes in the SCC.
+ // This ensures that the order of
+ // traversal of nodes within the SCC doesn't affect the final result.
+
+ DenseMap<NodeRef, uint64_t> AdditionalCounts;
+ for (auto &E : SCCEdges) {
+ auto OptRelFreq = GetRelBBFreq(E.second);
+ if (!OptRelFreq)
+ continue;
+ Scaled64 RelFreq = OptRelFreq.getValue();
+ auto Caller = E.first;
+ auto Callee = CGT::edge_dest(E.second);
+ RelFreq *= Scaled64(GetCount(Caller), 0);
+ uint64_t AdditionalCount = RelFreq.toInt<uint64_t>();
+ AdditionalCounts[Callee] += AdditionalCount;
+ }
+
+ // Update the counts for the nodes in the SCC.
+ for (auto &Entry : AdditionalCounts)
+ AddCount(Entry.first, Entry.second);
+
+ // Now update the counts for nodes outside the SCC.
+ for (auto &E : NonSCCEdges) {
+ auto OptRelFreq = GetRelBBFreq(E.second);
+ if (!OptRelFreq)
+ continue;
+ Scaled64 RelFreq = OptRelFreq.getValue();
+ auto Caller = E.first;
+ auto Callee = CGT::edge_dest(E.second);
+ RelFreq *= Scaled64(GetCount(Caller), 0);
+ AddCount(Callee, RelFreq.toInt<uint64_t>());
+ }
+}
+
+/// Propgate synthetic entry counts on a callgraph \p CG.
+///
+/// This performs a reverse post-order traversal of the callgraph SCC. For each
+/// SCC, it first propagates the entry counts to the nodes within the SCC
+/// through call edges and updates them in one shot. Then the entry counts are
+/// propagated to nodes outside the SCC. This requires \p GraphTraits
+/// to have a specialization for \p CallGraphType.
+
+template <typename CallGraphType>
+void SyntheticCountsUtils<CallGraphType>::propagate(const CallGraphType &CG,
+ GetRelBBFreqTy GetRelBBFreq,
+ GetCountTy GetCount,
+ AddCountTy AddCount) {
+ std::vector<SccTy> SCCs;
+
+ // Collect all the SCCs.
+ for (auto I = scc_begin(CG); !I.isAtEnd(); ++I)
+ SCCs.push_back(*I);
+
+ // The callgraph-scc needs to be visited in top-down order for propagation.
+ // The scc iterator returns the scc in bottom-up order, so reverse the SCCs
+ // and call propagateFromSCC.
+ for (auto &SCC : reverse(SCCs))
+ propagateFromSCC(SCC, GetRelBBFreq, GetCount, AddCount);
+}
+
+template class llvm::SyntheticCountsUtils<const CallGraph *>;
diff --git a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
index d18246ac5941..102135fbf313 100644
--- a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -62,6 +62,18 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
}) &&
"TargetLibraryInfoImpl function names must be sorted");
+ // Set IO unlocked variants as unavailable
+ // Set them as available per system below
+ TLI.setUnavailable(LibFunc_getchar_unlocked);
+ TLI.setUnavailable(LibFunc_putc_unlocked);
+ TLI.setUnavailable(LibFunc_putchar_unlocked);
+ TLI.setUnavailable(LibFunc_fputc_unlocked);
+ TLI.setUnavailable(LibFunc_fgetc_unlocked);
+ TLI.setUnavailable(LibFunc_fread_unlocked);
+ TLI.setUnavailable(LibFunc_fwrite_unlocked);
+ TLI.setUnavailable(LibFunc_fputs_unlocked);
+ TLI.setUnavailable(LibFunc_fgets_unlocked);
+
bool ShouldExtI32Param = false, ShouldExtI32Return = false,
ShouldSignExtI32Param = false;
// PowerPC64, Sparc64, SystemZ need signext/zeroext on i32 parameters and
@@ -73,8 +85,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
}
// Mips, on the other hand, needs signext on i32 parameters corresponding
// to both signed and unsigned ints.
- if (T.getArch() == Triple::mips || T.getArch() == Triple::mipsel ||
- T.getArch() == Triple::mips64 || T.getArch() == Triple::mips64el) {
+ if (T.isMIPS()) {
ShouldSignExtI32Param = true;
}
TLI.setShouldExtI32Param(ShouldExtI32Param);
@@ -107,6 +118,12 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
// memset_pattern16 is only available on iOS 3.0 and Mac OS X 10.5 and later.
// All versions of watchOS support it.
if (T.isMacOSX()) {
+ // available IO unlocked variants on Mac OS X
+ TLI.setAvailable(LibFunc_getc_unlocked);
+ TLI.setAvailable(LibFunc_getchar_unlocked);
+ TLI.setAvailable(LibFunc_putc_unlocked);
+ TLI.setAvailable(LibFunc_putchar_unlocked);
+
if (T.isMacOSXVersionLT(10, 5))
TLI.setUnavailable(LibFunc_memset_pattern16);
} else if (T.isiOS()) {
@@ -245,51 +262,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_tanhf);
}
- // These definitions are due to math-finite.h header on Linux
- TLI.setUnavailable(LibFunc_acos_finite);
- TLI.setUnavailable(LibFunc_acosf_finite);
- TLI.setUnavailable(LibFunc_acosl_finite);
- TLI.setUnavailable(LibFunc_acosh_finite);
- TLI.setUnavailable(LibFunc_acoshf_finite);
- TLI.setUnavailable(LibFunc_acoshl_finite);
- TLI.setUnavailable(LibFunc_asin_finite);
- TLI.setUnavailable(LibFunc_asinf_finite);
- TLI.setUnavailable(LibFunc_asinl_finite);
- TLI.setUnavailable(LibFunc_atan2_finite);
- TLI.setUnavailable(LibFunc_atan2f_finite);
- TLI.setUnavailable(LibFunc_atan2l_finite);
- TLI.setUnavailable(LibFunc_atanh_finite);
- TLI.setUnavailable(LibFunc_atanhf_finite);
- TLI.setUnavailable(LibFunc_atanhl_finite);
- TLI.setUnavailable(LibFunc_cosh_finite);
- TLI.setUnavailable(LibFunc_coshf_finite);
- TLI.setUnavailable(LibFunc_coshl_finite);
- TLI.setUnavailable(LibFunc_exp10_finite);
- TLI.setUnavailable(LibFunc_exp10f_finite);
- TLI.setUnavailable(LibFunc_exp10l_finite);
- TLI.setUnavailable(LibFunc_exp2_finite);
- TLI.setUnavailable(LibFunc_exp2f_finite);
- TLI.setUnavailable(LibFunc_exp2l_finite);
- TLI.setUnavailable(LibFunc_exp_finite);
- TLI.setUnavailable(LibFunc_expf_finite);
- TLI.setUnavailable(LibFunc_expl_finite);
- TLI.setUnavailable(LibFunc_log10_finite);
- TLI.setUnavailable(LibFunc_log10f_finite);
- TLI.setUnavailable(LibFunc_log10l_finite);
- TLI.setUnavailable(LibFunc_log2_finite);
- TLI.setUnavailable(LibFunc_log2f_finite);
- TLI.setUnavailable(LibFunc_log2l_finite);
- TLI.setUnavailable(LibFunc_log_finite);
- TLI.setUnavailable(LibFunc_logf_finite);
- TLI.setUnavailable(LibFunc_logl_finite);
- TLI.setUnavailable(LibFunc_pow_finite);
- TLI.setUnavailable(LibFunc_powf_finite);
- TLI.setUnavailable(LibFunc_powl_finite);
- TLI.setUnavailable(LibFunc_sinh_finite);
- TLI.setUnavailable(LibFunc_sinhf_finite);
- TLI.setUnavailable(LibFunc_sinhl_finite);
-
- // Win32 does *not* provide provide these functions, but they are
+ // Win32 does *not* provide these functions, but they are
// generally available on POSIX-compliant systems:
TLI.setUnavailable(LibFunc_access);
TLI.setUnavailable(LibFunc_bcmp);
@@ -309,7 +282,6 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_ftello);
TLI.setUnavailable(LibFunc_ftrylockfile);
TLI.setUnavailable(LibFunc_funlockfile);
- TLI.setUnavailable(LibFunc_getc_unlocked);
TLI.setUnavailable(LibFunc_getitimer);
TLI.setUnavailable(LibFunc_getlogin_r);
TLI.setUnavailable(LibFunc_getpwnam);
@@ -441,15 +413,18 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_flsll);
}
- // The following functions are available on at least Linux:
- if (!T.isOSLinux()) {
+ // The following functions are available on Linux,
+ // but Android uses bionic instead of glibc.
+ if (!T.isOSLinux() || T.isAndroid()) {
TLI.setUnavailable(LibFunc_dunder_strdup);
TLI.setUnavailable(LibFunc_dunder_strtok_r);
TLI.setUnavailable(LibFunc_dunder_isoc99_scanf);
TLI.setUnavailable(LibFunc_dunder_isoc99_sscanf);
TLI.setUnavailable(LibFunc_under_IO_getc);
TLI.setUnavailable(LibFunc_under_IO_putc);
- TLI.setUnavailable(LibFunc_memalign);
+ // But, Android has memalign.
+ if (!T.isAndroid())
+ TLI.setUnavailable(LibFunc_memalign);
TLI.setUnavailable(LibFunc_fopen64);
TLI.setUnavailable(LibFunc_fseeko64);
TLI.setUnavailable(LibFunc_fstat64);
@@ -460,6 +435,65 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_stat64);
TLI.setUnavailable(LibFunc_statvfs64);
TLI.setUnavailable(LibFunc_tmpfile64);
+
+ // Relaxed math functions are included in math-finite.h on Linux (GLIBC).
+ TLI.setUnavailable(LibFunc_acos_finite);
+ TLI.setUnavailable(LibFunc_acosf_finite);
+ TLI.setUnavailable(LibFunc_acosl_finite);
+ TLI.setUnavailable(LibFunc_acosh_finite);
+ TLI.setUnavailable(LibFunc_acoshf_finite);
+ TLI.setUnavailable(LibFunc_acoshl_finite);
+ TLI.setUnavailable(LibFunc_asin_finite);
+ TLI.setUnavailable(LibFunc_asinf_finite);
+ TLI.setUnavailable(LibFunc_asinl_finite);
+ TLI.setUnavailable(LibFunc_atan2_finite);
+ TLI.setUnavailable(LibFunc_atan2f_finite);
+ TLI.setUnavailable(LibFunc_atan2l_finite);
+ TLI.setUnavailable(LibFunc_atanh_finite);
+ TLI.setUnavailable(LibFunc_atanhf_finite);
+ TLI.setUnavailable(LibFunc_atanhl_finite);
+ TLI.setUnavailable(LibFunc_cosh_finite);
+ TLI.setUnavailable(LibFunc_coshf_finite);
+ TLI.setUnavailable(LibFunc_coshl_finite);
+ TLI.setUnavailable(LibFunc_exp10_finite);
+ TLI.setUnavailable(LibFunc_exp10f_finite);
+ TLI.setUnavailable(LibFunc_exp10l_finite);
+ TLI.setUnavailable(LibFunc_exp2_finite);
+ TLI.setUnavailable(LibFunc_exp2f_finite);
+ TLI.setUnavailable(LibFunc_exp2l_finite);
+ TLI.setUnavailable(LibFunc_exp_finite);
+ TLI.setUnavailable(LibFunc_expf_finite);
+ TLI.setUnavailable(LibFunc_expl_finite);
+ TLI.setUnavailable(LibFunc_log10_finite);
+ TLI.setUnavailable(LibFunc_log10f_finite);
+ TLI.setUnavailable(LibFunc_log10l_finite);
+ TLI.setUnavailable(LibFunc_log2_finite);
+ TLI.setUnavailable(LibFunc_log2f_finite);
+ TLI.setUnavailable(LibFunc_log2l_finite);
+ TLI.setUnavailable(LibFunc_log_finite);
+ TLI.setUnavailable(LibFunc_logf_finite);
+ TLI.setUnavailable(LibFunc_logl_finite);
+ TLI.setUnavailable(LibFunc_pow_finite);
+ TLI.setUnavailable(LibFunc_powf_finite);
+ TLI.setUnavailable(LibFunc_powl_finite);
+ TLI.setUnavailable(LibFunc_sinh_finite);
+ TLI.setUnavailable(LibFunc_sinhf_finite);
+ TLI.setUnavailable(LibFunc_sinhl_finite);
+ }
+
+ if ((T.isOSLinux() && T.isGNUEnvironment()) ||
+ (T.isAndroid() && !T.isAndroidVersionLT(28))) {
+ // available IO unlocked variants on GNU/Linux and Android P or later
+ TLI.setAvailable(LibFunc_getc_unlocked);
+ TLI.setAvailable(LibFunc_getchar_unlocked);
+ TLI.setAvailable(LibFunc_putc_unlocked);
+ TLI.setAvailable(LibFunc_putchar_unlocked);
+ TLI.setAvailable(LibFunc_fputc_unlocked);
+ TLI.setAvailable(LibFunc_fgetc_unlocked);
+ TLI.setAvailable(LibFunc_fread_unlocked);
+ TLI.setAvailable(LibFunc_fwrite_unlocked);
+ TLI.setAvailable(LibFunc_fputs_unlocked);
+ TLI.setAvailable(LibFunc_fgets_unlocked);
}
// As currently implemented in clang, NVPTX code has no standard library to
@@ -689,10 +723,12 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_siprintf:
case LibFunc_sprintf:
return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
- FTy.getParamType(1)->isPointerTy());
+ FTy.getParamType(1)->isPointerTy() &&
+ FTy.getReturnType()->isIntegerTy(32));
case LibFunc_snprintf:
return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
- FTy.getParamType(2)->isPointerTy());
+ FTy.getParamType(2)->isPointerTy() &&
+ FTy.getReturnType()->isIntegerTy(32));
case LibFunc_setitimer:
return (NumParams == 3 && FTy.getParamType(1)->isPointerTy() &&
FTy.getParamType(2)->isPointerTy());
@@ -802,6 +838,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_feof:
case LibFunc_fflush:
case LibFunc_fgetc:
+ case LibFunc_fgetc_unlocked:
case LibFunc_fileno:
case LibFunc_flockfile:
case LibFunc_free:
@@ -830,6 +867,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
case LibFunc_fputc:
+ case LibFunc_fputc_unlocked:
case LibFunc_fstat:
case LibFunc_frexp:
case LibFunc_frexpf:
@@ -837,18 +875,22 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_fstatvfs:
return (NumParams == 2 && FTy.getParamType(1)->isPointerTy());
case LibFunc_fgets:
+ case LibFunc_fgets_unlocked:
return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(2)->isPointerTy());
case LibFunc_fread:
+ case LibFunc_fread_unlocked:
return (NumParams == 4 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(3)->isPointerTy());
case LibFunc_fwrite:
+ case LibFunc_fwrite_unlocked:
return (NumParams == 4 && FTy.getReturnType()->isIntegerTy() &&
FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isIntegerTy() &&
FTy.getParamType(2)->isIntegerTy() &&
FTy.getParamType(3)->isPointerTy());
case LibFunc_fputs:
+ case LibFunc_fputs_unlocked:
return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
case LibFunc_fscanf:
@@ -861,6 +903,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
case LibFunc_getchar:
+ case LibFunc_getchar_unlocked:
return (NumParams == 0 && FTy.getReturnType()->isIntegerTy());
case LibFunc_gets:
return (NumParams == 1 && FTy.getParamType(0) == PCharTy);
@@ -873,6 +916,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
case LibFunc_putc:
+ case LibFunc_putc_unlocked:
return (NumParams == 2 && FTy.getParamType(1)->isPointerTy());
case LibFunc_pread:
case LibFunc_pwrite:
@@ -989,8 +1033,26 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_msvc_new_array_int_nothrow:
// new[](unsigned long long, nothrow);
case LibFunc_msvc_new_array_longlong_nothrow:
+ // new(unsigned int, align_val_t)
+ case LibFunc_ZnwjSt11align_val_t:
+ // new(unsigned long, align_val_t)
+ case LibFunc_ZnwmSt11align_val_t:
+ // new[](unsigned int, align_val_t)
+ case LibFunc_ZnajSt11align_val_t:
+ // new[](unsigned long, align_val_t)
+ case LibFunc_ZnamSt11align_val_t:
return (NumParams == 2 && FTy.getReturnType()->isPointerTy());
+ // new(unsigned int, align_val_t, nothrow)
+ case LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t:
+ // new(unsigned long, align_val_t, nothrow)
+ case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
+ // new[](unsigned int, align_val_t, nothrow)
+ case LibFunc_ZnajSt11align_val_tRKSt9nothrow_t:
+ // new[](unsigned long, align_val_t, nothrow)
+ case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
+ return (NumParams == 3 && FTy.getReturnType()->isPointerTy());
+
// void operator delete[](void*);
case LibFunc_ZdaPv:
// void operator delete(void*);
@@ -1017,6 +1079,10 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_ZdlPvj:
// void operator delete(void*, unsigned long);
case LibFunc_ZdlPvm:
+ // void operator delete(void*, align_val_t)
+ case LibFunc_ZdlPvSt11align_val_t:
+ // void operator delete[](void*, align_val_t)
+ case LibFunc_ZdaPvSt11align_val_t:
// void operator delete[](void*, unsigned int);
case LibFunc_msvc_delete_array_ptr32_int:
// void operator delete[](void*, nothrow);
@@ -1035,6 +1101,12 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_msvc_delete_ptr64_nothrow:
return (NumParams == 2 && FTy.getParamType(0)->isPointerTy());
+ // void operator delete(void*, align_val_t, nothrow)
+ case LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t:
+ // void operator delete[](void*, align_val_t, nothrow)
+ case LibFunc_ZdaPvSt11align_val_tRKSt9nothrow_t:
+ return (NumParams == 3 && FTy.getParamType(0)->isPointerTy());
+
case LibFunc_memset_pattern16:
return (!FTy.isVarArg() && NumParams == 3 &&
FTy.getParamType(0)->isPointerTy() &&
@@ -1231,6 +1303,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_isascii:
case LibFunc_toascii:
case LibFunc_putchar:
+ case LibFunc_putchar_unlocked:
return (NumParams == 1 && FTy.getReturnType()->isIntegerTy(32) &&
FTy.getReturnType() == FTy.getParamType(0));
@@ -1326,10 +1399,10 @@ static bool compareWithVectorFnName(const VecDesc &LHS, StringRef S) {
void TargetLibraryInfoImpl::addVectorizableFunctions(ArrayRef<VecDesc> Fns) {
VectorDescs.insert(VectorDescs.end(), Fns.begin(), Fns.end());
- std::sort(VectorDescs.begin(), VectorDescs.end(), compareByScalarFnName);
+ llvm::sort(VectorDescs.begin(), VectorDescs.end(), compareByScalarFnName);
ScalarDescs.insert(ScalarDescs.end(), Fns.begin(), Fns.end());
- std::sort(ScalarDescs.begin(), ScalarDescs.end(), compareByVectorFnName);
+ llvm::sort(ScalarDescs.begin(), ScalarDescs.end(), compareByVectorFnName);
}
void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
@@ -1387,6 +1460,14 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
{"sinf", "__svml_sinf8", 8},
{"sinf", "__svml_sinf16", 16},
+ {"llvm.sin.f64", "__svml_sin2", 2},
+ {"llvm.sin.f64", "__svml_sin4", 4},
+ {"llvm.sin.f64", "__svml_sin8", 8},
+
+ {"llvm.sin.f32", "__svml_sinf4", 4},
+ {"llvm.sin.f32", "__svml_sinf8", 8},
+ {"llvm.sin.f32", "__svml_sinf16", 16},
+
{"cos", "__svml_cos2", 2},
{"cos", "__svml_cos4", 4},
{"cos", "__svml_cos8", 8},
@@ -1395,6 +1476,14 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
{"cosf", "__svml_cosf8", 8},
{"cosf", "__svml_cosf16", 16},
+ {"llvm.cos.f64", "__svml_cos2", 2},
+ {"llvm.cos.f64", "__svml_cos4", 4},
+ {"llvm.cos.f64", "__svml_cos8", 8},
+
+ {"llvm.cos.f32", "__svml_cosf4", 4},
+ {"llvm.cos.f32", "__svml_cosf8", 8},
+ {"llvm.cos.f32", "__svml_cosf16", 16},
+
{"pow", "__svml_pow2", 2},
{"pow", "__svml_pow4", 4},
{"pow", "__svml_pow8", 8},
diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
index b744cae51ed7..9de2f789c89c 100644
--- a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -31,7 +31,7 @@ static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
cl::desc("Recognize reduction patterns."));
namespace {
-/// \brief No-op implementation of the TTI interface using the utility base
+/// No-op implementation of the TTI interface using the utility base
/// classes.
///
/// This is used when no target specific information is available.
@@ -155,6 +155,14 @@ bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const {
return TTIImpl->isLSRCostLess(C1, C2);
}
+bool TargetTransformInfo::canMacroFuseCmp() const {
+ return TTIImpl->canMacroFuseCmp();
+}
+
+bool TargetTransformInfo::shouldFavorPostInc() const {
+ return TTIImpl->shouldFavorPostInc();
+}
+
bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const {
return TTIImpl->isLegalMaskedStore(DataType);
}
@@ -207,6 +215,8 @@ bool TargetTransformInfo::isProfitableToHoist(Instruction *I) const {
return TTIImpl->isProfitableToHoist(I);
}
+bool TargetTransformInfo::useAA() const { return TTIImpl->useAA(); }
+
bool TargetTransformInfo::isTypeLegal(Type *Ty) const {
return TTIImpl->isTypeLegal(Ty);
}
@@ -226,6 +236,10 @@ bool TargetTransformInfo::shouldBuildLookupTablesForConstant(Constant *C) const
return TTIImpl->shouldBuildLookupTablesForConstant(C);
}
+bool TargetTransformInfo::useColdCCForColdCall(Function &F) const {
+ return TTIImpl->useColdCCForColdCall(F);
+}
+
unsigned TargetTransformInfo::
getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const {
return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract);
@@ -326,6 +340,14 @@ unsigned TargetTransformInfo::getMinVectorRegisterBitWidth() const {
return TTIImpl->getMinVectorRegisterBitWidth();
}
+bool TargetTransformInfo::shouldMaximizeVectorBandwidth(bool OptSize) const {
+ return TTIImpl->shouldMaximizeVectorBandwidth(OptSize);
+}
+
+unsigned TargetTransformInfo::getMinimumVF(unsigned ElemWidth) const {
+ return TTIImpl->getMinimumVF(ElemWidth);
+}
+
bool TargetTransformInfo::shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
return TTIImpl->shouldConsiderAddressTypePromotion(
@@ -547,6 +569,16 @@ bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
return TTIImpl->areInlineCompatible(Caller, Callee);
}
+bool TargetTransformInfo::isIndexedLoadLegal(MemIndexedMode Mode,
+ Type *Ty) const {
+ return TTIImpl->isIndexedLoadLegal(Mode, Ty);
+}
+
+bool TargetTransformInfo::isIndexedStoreLegal(MemIndexedMode Mode,
+ Type *Ty) const {
+ return TTIImpl->isIndexedStoreLegal(Mode, Ty);
+}
+
unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const {
return TTIImpl->getLoadStoreVecRegBitWidth(AS);
}
@@ -598,73 +630,43 @@ int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
return TTIImpl->getInstructionLatency(I);
}
-static bool isReverseVectorMask(ArrayRef<int> Mask) {
- for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i)
- if (Mask[i] >= 0 && Mask[i] != (int)(MaskSize - 1 - i))
- return false;
- return true;
-}
-
-static bool isSingleSourceVectorMask(ArrayRef<int> Mask) {
- bool Vec0 = false;
- bool Vec1 = false;
- for (unsigned i = 0, NumVecElts = Mask.size(); i < NumVecElts; ++i) {
- if (Mask[i] >= 0) {
- if ((unsigned)Mask[i] >= NumVecElts)
- Vec1 = true;
- else
- Vec0 = true;
- }
- }
- return !(Vec0 && Vec1);
-}
-
-static bool isZeroEltBroadcastVectorMask(ArrayRef<int> Mask) {
- for (unsigned i = 0; i < Mask.size(); ++i)
- if (Mask[i] > 0)
- return false;
- return true;
-}
-
-static bool isAlternateVectorMask(ArrayRef<int> Mask) {
- bool isAlternate = true;
- unsigned MaskSize = Mask.size();
-
- // Example: shufflevector A, B, <0,5,2,7>
- for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
- if (Mask[i] < 0)
- continue;
- isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i);
- }
-
- if (isAlternate)
- return true;
+static TargetTransformInfo::OperandValueKind
+getOperandInfo(Value *V, TargetTransformInfo::OperandValueProperties &OpProps) {
+ TargetTransformInfo::OperandValueKind OpInfo =
+ TargetTransformInfo::OK_AnyValue;
+ OpProps = TargetTransformInfo::OP_None;
- isAlternate = true;
- // Example: shufflevector A, B, <4,1,6,3>
- for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
- if (Mask[i] < 0)
- continue;
- isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i);
+ if (auto *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getValue().isPowerOf2())
+ OpProps = TargetTransformInfo::OP_PowerOf2;
+ return TargetTransformInfo::OK_UniformConstantValue;
}
- return isAlternate;
-}
-
-static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
- TargetTransformInfo::OperandValueKind OpInfo =
- TargetTransformInfo::OK_AnyValue;
+ const Value *Splat = getSplatValue(V);
- // Check for a splat of a constant or for a non uniform vector of constants.
+ // Check for a splat of a constant or for a non uniform vector of constants
+ // and check if the constant(s) are all powers of two.
if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
OpInfo = TargetTransformInfo::OK_NonUniformConstantValue;
- if (cast<Constant>(V)->getSplatValue() != nullptr)
+ if (Splat) {
OpInfo = TargetTransformInfo::OK_UniformConstantValue;
+ if (auto *CI = dyn_cast<ConstantInt>(Splat))
+ if (CI->getValue().isPowerOf2())
+ OpProps = TargetTransformInfo::OP_PowerOf2;
+ } else if (auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
+ OpProps = TargetTransformInfo::OP_PowerOf2;
+ for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
+ if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I)))
+ if (CI->getValue().isPowerOf2())
+ continue;
+ OpProps = TargetTransformInfo::OP_None;
+ break;
+ }
+ }
}
// Check for a splat of a uniform value. This is not loop aware, so return
// true only for the obviously uniform cases (argument, globalvalue)
- const Value *Splat = getSplatValue(V);
if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
OpInfo = TargetTransformInfo::OK_UniformValue;
@@ -994,15 +996,13 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- TargetTransformInfo::OperandValueKind Op1VK =
- getOperandInfo(I->getOperand(0));
- TargetTransformInfo::OperandValueKind Op2VK =
- getOperandInfo(I->getOperand(1));
- SmallVector<const Value*, 2> Operands(I->operand_values());
- return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK,
- Op2VK, TargetTransformInfo::OP_None,
- TargetTransformInfo::OP_None,
- Operands);
+ TargetTransformInfo::OperandValueKind Op1VK, Op2VK;
+ TargetTransformInfo::OperandValueProperties Op1VP, Op2VP;
+ Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
+ Op2VK = getOperandInfo(I->getOperand(1), Op2VP);
+ SmallVector<const Value *, 2> Operands(I->operand_values());
+ return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
+ Op1VP, Op2VP, Operands);
}
case Instruction::Select: {
const SelectInst *SI = cast<SelectInst>(I);
@@ -1101,31 +1101,30 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
}
case Instruction::ShuffleVector: {
const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
- Type *VecTypOp0 = Shuffle->getOperand(0)->getType();
- unsigned NumVecElems = VecTypOp0->getVectorNumElements();
- SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
+ // TODO: Identify and add costs for insert/extract subvector, etc.
+ if (Shuffle->changesLength())
+ return -1;
+
+ if (Shuffle->isIdentity())
+ return 0;
- if (NumVecElems == Mask.size()) {
- if (isReverseVectorMask(Mask))
- return getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0,
- 0, nullptr);
- if (isAlternateVectorMask(Mask))
- return getShuffleCost(TargetTransformInfo::SK_Alternate,
- VecTypOp0, 0, nullptr);
+ Type *Ty = Shuffle->getType();
+ if (Shuffle->isReverse())
+ return TTIImpl->getShuffleCost(SK_Reverse, Ty, 0, nullptr);
- if (isZeroEltBroadcastVectorMask(Mask))
- return getShuffleCost(TargetTransformInfo::SK_Broadcast,
- VecTypOp0, 0, nullptr);
+ if (Shuffle->isSelect())
+ return TTIImpl->getShuffleCost(SK_Select, Ty, 0, nullptr);
- if (isSingleSourceVectorMask(Mask))
- return getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
- VecTypOp0, 0, nullptr);
+ if (Shuffle->isTranspose())
+ return TTIImpl->getShuffleCost(SK_Transpose, Ty, 0, nullptr);
- return getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
- VecTypOp0, 0, nullptr);
- }
+ if (Shuffle->isZeroEltSplat())
+ return TTIImpl->getShuffleCost(SK_Broadcast, Ty, 0, nullptr);
- return -1;
+ if (Shuffle->isSingleSource())
+ return TTIImpl->getShuffleCost(SK_PermuteSingleSrc, Ty, 0, nullptr);
+
+ return TTIImpl->getShuffleCost(SK_PermuteTwoSrc, Ty, 0, nullptr);
}
case Instruction::Call:
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
diff --git a/contrib/llvm/lib/Analysis/Trace.cpp b/contrib/llvm/lib/Analysis/Trace.cpp
index 34c998501a6c..4dec53151ed6 100644
--- a/contrib/llvm/lib/Analysis/Trace.cpp
+++ b/contrib/llvm/lib/Analysis/Trace.cpp
@@ -16,6 +16,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/Trace.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/Compiler.h"
diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 173db399b9d6..25a154edf4ac 100644
--- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -104,21 +104,6 @@
// If neither node is an ancestor of the other and they have the same root,
// then we say NoAlias.
//
-// TODO: The current metadata format doesn't support struct
-// fields. For example:
-// struct X {
-// double d;
-// int i;
-// };
-// void foo(struct X *x, struct X *y, double *p) {
-// *x = *y;
-// *p = 0.0;
-// }
-// Struct X has a double member, so the store to *x can alias the store to *p.
-// Currently it's not possible to precisely describe all the things struct X
-// aliases, so struct assignments must use conservative TBAA nodes. There's
-// no scheme for attaching metadata to @llvm.memcpy yet either.
-//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
@@ -146,6 +131,17 @@ static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true), cl::Hidden);
namespace {
+/// isNewFormatTypeNode - Return true iff the given type node is in the new
+/// size-aware format.
+static bool isNewFormatTypeNode(const MDNode *N) {
+ if (N->getNumOperands() < 3)
+ return false;
+ // In the old format the first operand is a string.
+ if (!isa<MDNode>(N->getOperand(0)))
+ return false;
+ return true;
+}
+
/// This is a simple wrapper around an MDNode which provides a higher-level
/// interface by hiding the details of how alias analysis information is encoded
/// in its operands.
@@ -160,8 +156,15 @@ public:
/// getNode - Get the MDNode for this TBAANode.
MDNodeTy *getNode() const { return Node; }
+ /// isNewFormat - Return true iff the wrapped type node is in the new
+ /// size-aware format.
+ bool isNewFormat() const { return isNewFormatTypeNode(Node); }
+
/// getParent - Get this TBAANode's Alias tree parent.
TBAANodeImpl<MDNodeTy> getParent() const {
+ if (isNewFormat())
+ return TBAANodeImpl(cast<MDNodeTy>(Node->getOperand(0)));
+
if (Node->getNumOperands() < 2)
return TBAANodeImpl<MDNodeTy>();
MDNodeTy *P = dyn_cast_or_null<MDNodeTy>(Node->getOperand(1));
@@ -196,7 +199,7 @@ using MutableTBAANode = TBAANodeImpl<MDNode>;
/// information is encoded in its operands.
template<typename MDNodeTy>
class TBAAStructTagNodeImpl {
- /// This node should be created with createTBAAStructTagNode.
+ /// This node should be created with createTBAAAccessTag().
MDNodeTy *Node;
public:
@@ -205,6 +208,17 @@ public:
/// Get the MDNode for this TBAAStructTagNode.
MDNodeTy *getNode() const { return Node; }
+ /// isNewFormat - Return true iff the wrapped access tag is in the new
+ /// size-aware format.
+ bool isNewFormat() const {
+ if (Node->getNumOperands() < 4)
+ return false;
+ if (MDNodeTy *AccessType = getAccessType())
+ if (!TBAANodeImpl<MDNodeTy>(AccessType).isNewFormat())
+ return false;
+ return true;
+ }
+
MDNodeTy *getBaseType() const {
return dyn_cast_or_null<MDNode>(Node->getOperand(0));
}
@@ -217,13 +231,20 @@ public:
return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue();
}
+ uint64_t getSize() const {
+ if (!isNewFormat())
+ return UINT64_MAX;
+ return mdconst::extract<ConstantInt>(Node->getOperand(3))->getZExtValue();
+ }
+
/// Test if this TBAAStructTagNode represents a type for objects
/// which are not modified (by any means) in the context where this
/// AliasAnalysis is relevant.
bool isTypeImmutable() const {
- if (Node->getNumOperands() < 4)
+ unsigned OpNo = isNewFormat() ? 4 : 3;
+ if (Node->getNumOperands() < OpNo + 1)
return false;
- ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(3));
+ ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(OpNo));
if (!CI)
return false;
return CI->getValue()[0];
@@ -241,7 +262,7 @@ using MutableTBAAStructTagNode = TBAAStructTagNodeImpl<MDNode>;
/// higher-level interface by hiding the details of how alias analysis
/// information is encoded in its operands.
class TBAAStructTypeNode {
- /// This node should be created with createTBAAStructTypeNode.
+ /// This node should be created with createTBAATypeNode().
const MDNode *Node = nullptr;
public:
@@ -251,43 +272,80 @@ public:
/// Get the MDNode for this TBAAStructTypeNode.
const MDNode *getNode() const { return Node; }
+ /// isNewFormat - Return true iff the wrapped type node is in the new
+ /// size-aware format.
+ bool isNewFormat() const { return isNewFormatTypeNode(Node); }
+
+ bool operator==(const TBAAStructTypeNode &Other) const {
+ return getNode() == Other.getNode();
+ }
+
+ /// getId - Return type identifier.
+ Metadata *getId() const {
+ return Node->getOperand(isNewFormat() ? 2 : 0);
+ }
+
+ unsigned getNumFields() const {
+ unsigned FirstFieldOpNo = isNewFormat() ? 3 : 1;
+ unsigned NumOpsPerField = isNewFormat() ? 3 : 2;
+ return (getNode()->getNumOperands() - FirstFieldOpNo) / NumOpsPerField;
+ }
+
+ TBAAStructTypeNode getFieldType(unsigned FieldIndex) const {
+ unsigned FirstFieldOpNo = isNewFormat() ? 3 : 1;
+ unsigned NumOpsPerField = isNewFormat() ? 3 : 2;
+ unsigned OpIndex = FirstFieldOpNo + FieldIndex * NumOpsPerField;
+ auto *TypeNode = cast<MDNode>(getNode()->getOperand(OpIndex));
+ return TBAAStructTypeNode(TypeNode);
+ }
+
/// Get this TBAAStructTypeNode's field in the type DAG with
/// given offset. Update the offset to be relative to the field type.
- TBAAStructTypeNode getParent(uint64_t &Offset) const {
- // Parent can be omitted for the root node.
- if (Node->getNumOperands() < 2)
- return TBAAStructTypeNode();
-
- // Fast path for a scalar type node and a struct type node with a single
- // field.
- if (Node->getNumOperands() <= 3) {
- uint64_t Cur = Node->getNumOperands() == 2
- ? 0
- : mdconst::extract<ConstantInt>(Node->getOperand(2))
- ->getZExtValue();
- Offset -= Cur;
- MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
- if (!P)
+ TBAAStructTypeNode getField(uint64_t &Offset) const {
+ bool NewFormat = isNewFormat();
+ if (NewFormat) {
+ // New-format root and scalar type nodes have no fields.
+ if (Node->getNumOperands() < 6)
+ return TBAAStructTypeNode();
+ } else {
+ // Parent can be omitted for the root node.
+ if (Node->getNumOperands() < 2)
return TBAAStructTypeNode();
- return TBAAStructTypeNode(P);
+
+ // Fast path for a scalar type node and a struct type node with a single
+ // field.
+ if (Node->getNumOperands() <= 3) {
+ uint64_t Cur = Node->getNumOperands() == 2
+ ? 0
+ : mdconst::extract<ConstantInt>(Node->getOperand(2))
+ ->getZExtValue();
+ Offset -= Cur;
+ MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
+ if (!P)
+ return TBAAStructTypeNode();
+ return TBAAStructTypeNode(P);
+ }
}
// Assume the offsets are in order. We return the previous field if
// the current offset is bigger than the given offset.
+ unsigned FirstFieldOpNo = NewFormat ? 3 : 1;
+ unsigned NumOpsPerField = NewFormat ? 3 : 2;
unsigned TheIdx = 0;
- for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) {
+ for (unsigned Idx = FirstFieldOpNo; Idx < Node->getNumOperands();
+ Idx += NumOpsPerField) {
uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(Idx + 1))
->getZExtValue();
if (Cur > Offset) {
- assert(Idx >= 3 &&
- "TBAAStructTypeNode::getParent should have an offset match!");
- TheIdx = Idx - 2;
+ assert(Idx >= FirstFieldOpNo + NumOpsPerField &&
+ "TBAAStructTypeNode::getField should have an offset match!");
+ TheIdx = Idx - NumOpsPerField;
break;
}
}
// Move along the last field.
if (TheIdx == 0)
- TheIdx = Node->getNumOperands() - 2;
+ TheIdx = Node->getNumOperands() - NumOpsPerField;
uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(TheIdx + 1))
->getZExtValue();
Offset -= Cur;
@@ -403,15 +461,11 @@ bool MDNode::isTBAAVtableAccess() const {
}
// For struct-path aware TBAA, we use the access type of the tag.
- if (getNumOperands() < 2)
- return false;
- MDNode *Tag = cast_or_null<MDNode>(getOperand(1));
- if (!Tag)
- return false;
- if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) {
- if (Tag1->getString() == "vtable pointer")
+ TBAAStructTagNode Tag(this);
+ TBAAStructTypeNode AccessType(Tag.getAccessType());
+ if(auto *Id = dyn_cast<MDString>(AccessType.getId()))
+ if (Id->getString() == "vtable pointer")
return true;
- }
return false;
}
@@ -485,26 +539,6 @@ void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const {
N.NoAlias = getMetadata(LLVMContext::MD_noalias);
}
-static bool findAccessType(TBAAStructTagNode BaseTag,
- const MDNode *AccessTypeNode,
- uint64_t &OffsetInBase) {
- // Start from the base type, follow the edge with the correct offset in
- // the type DAG and adjust the offset until we reach the access type or
- // until we reach a root node.
- TBAAStructTypeNode BaseType(BaseTag.getBaseType());
- OffsetInBase = BaseTag.getOffset();
-
- while (const MDNode *BaseTypeNode = BaseType.getNode()) {
- if (BaseTypeNode == AccessTypeNode)
- return true;
-
- // Follow the edge with the correct offset, Offset will be adjusted to
- // be relative to the field type.
- BaseType = BaseType.getParent(OffsetInBase);
- }
- return false;
-}
-
static const MDNode *createAccessTag(const MDNode *AccessType) {
// If there is no access type or the access type is the root node, then
// we don't have any useful access tag to return.
@@ -512,12 +546,111 @@ static const MDNode *createAccessTag(const MDNode *AccessType) {
return nullptr;
Type *Int64 = IntegerType::get(AccessType->getContext(), 64);
- auto *ImmutabilityFlag = ConstantAsMetadata::get(ConstantInt::get(Int64, 0));
+ auto *OffsetNode = ConstantAsMetadata::get(ConstantInt::get(Int64, 0));
+
+ if (TBAAStructTypeNode(AccessType).isNewFormat()) {
+ // TODO: Take access ranges into account when matching access tags and
+ // fix this code to generate actual access sizes for generic tags.
+ uint64_t AccessSize = UINT64_MAX;
+ auto *SizeNode =
+ ConstantAsMetadata::get(ConstantInt::get(Int64, AccessSize));
+ Metadata *Ops[] = {const_cast<MDNode*>(AccessType),
+ const_cast<MDNode*>(AccessType),
+ OffsetNode, SizeNode};
+ return MDNode::get(AccessType->getContext(), Ops);
+ }
+
Metadata *Ops[] = {const_cast<MDNode*>(AccessType),
- const_cast<MDNode*>(AccessType), ImmutabilityFlag};
+ const_cast<MDNode*>(AccessType),
+ OffsetNode};
return MDNode::get(AccessType->getContext(), Ops);
}
+static bool hasField(TBAAStructTypeNode BaseType,
+ TBAAStructTypeNode FieldType) {
+ for (unsigned I = 0, E = BaseType.getNumFields(); I != E; ++I) {
+ TBAAStructTypeNode T = BaseType.getFieldType(I);
+ if (T == FieldType || hasField(T, FieldType))
+ return true;
+ }
+ return false;
+}
+
+/// Return true if for two given accesses, one of the accessed objects may be a
+/// subobject of the other. The \p BaseTag and \p SubobjectTag parameters
+/// describe the accesses to the base object and the subobject respectively.
+/// \p CommonType must be the metadata node describing the common type of the
+/// accessed objects. On return, \p MayAlias is set to true iff these accesses
+/// may alias and \p Generic, if not null, points to the most generic access
+/// tag for the given two.
+static bool mayBeAccessToSubobjectOf(TBAAStructTagNode BaseTag,
+ TBAAStructTagNode SubobjectTag,
+ const MDNode *CommonType,
+ const MDNode **GenericTag,
+ bool &MayAlias) {
+ // If the base object is of the least common type, then this may be an access
+ // to its subobject.
+ if (BaseTag.getAccessType() == BaseTag.getBaseType() &&
+ BaseTag.getAccessType() == CommonType) {
+ if (GenericTag)
+ *GenericTag = createAccessTag(CommonType);
+ MayAlias = true;
+ return true;
+ }
+
+ // If the access to the base object is through a field of the subobject's
+ // type, then this may be an access to that field. To check for that we start
+ // from the base type, follow the edge with the correct offset in the type DAG
+ // and adjust the offset until we reach the field type or until we reach the
+ // access type.
+ bool NewFormat = BaseTag.isNewFormat();
+ TBAAStructTypeNode BaseType(BaseTag.getBaseType());
+ uint64_t OffsetInBase = BaseTag.getOffset();
+
+ for (;;) {
+ // In the old format there is no distinction between fields and parent
+ // types, so in this case we consider all nodes up to the root.
+ if (!BaseType.getNode()) {
+ assert(!NewFormat && "Did not see access type in access path!");
+ break;
+ }
+
+ if (BaseType.getNode() == SubobjectTag.getBaseType()) {
+ bool SameMemberAccess = OffsetInBase == SubobjectTag.getOffset();
+ if (GenericTag) {
+ *GenericTag = SameMemberAccess ? SubobjectTag.getNode() :
+ createAccessTag(CommonType);
+ }
+ MayAlias = SameMemberAccess;
+ return true;
+ }
+
+ // With new-format nodes we stop at the access type.
+ if (NewFormat && BaseType.getNode() == BaseTag.getAccessType())
+ break;
+
+ // Follow the edge with the correct offset. Offset will be adjusted to
+ // be relative to the field type.
+ BaseType = BaseType.getField(OffsetInBase);
+ }
+
+ // If the base object has a direct or indirect field of the subobject's type,
+ // then this may be an access to that field. We need this to check now that
+ // we support aggregates as access types.
+ if (NewFormat) {
+ // TBAAStructTypeNode BaseAccessType(BaseTag.getAccessType());
+ TBAAStructTypeNode FieldType(SubobjectTag.getBaseType());
+ if (hasField(BaseType, FieldType)) {
+ if (GenericTag)
+ *GenericTag = createAccessTag(CommonType);
+ MayAlias = true;
+ return true;
+ }
+ }
+
+ return false;
+}
+
/// matchTags - Return true if the given couple of accesses are allowed to
/// overlap. If \arg GenericTag is not null, then on return it points to the
/// most generic access descriptor for the given two.
@@ -545,38 +678,26 @@ static bool matchAccessTags(const MDNode *A, const MDNode *B,
const MDNode *CommonType = getLeastCommonType(TagA.getAccessType(),
TagB.getAccessType());
- // TODO: We need to check if AccessType of TagA encloses AccessType of
- // TagB to support aggregate AccessType. If yes, return true.
-
- // Climb the type DAG from base type of A to see if we reach base type of B.
- uint64_t OffsetA;
- if (findAccessType(TagA, TagB.getBaseType(), OffsetA)) {
- bool SameMemberAccess = OffsetA == TagB.getOffset();
+ // If the final access types have different roots, they're part of different
+ // potentially unrelated type systems, so we must be conservative.
+ if (!CommonType) {
if (GenericTag)
- *GenericTag = SameMemberAccess ? TagB.getNode() :
- createAccessTag(CommonType);
- return SameMemberAccess;
+ *GenericTag = nullptr;
+ return true;
}
- // Climb the type DAG from base type of B to see if we reach base type of A.
- uint64_t OffsetB;
- if (findAccessType(TagB, TagA.getBaseType(), OffsetB)) {
- bool SameMemberAccess = OffsetB == TagA.getOffset();
- if (GenericTag)
- *GenericTag = SameMemberAccess ? TagA.getNode() :
- createAccessTag(CommonType);
- return SameMemberAccess;
- }
+ // If one of the accessed objects may be a subobject of the other, then such
+ // accesses may alias.
+ bool MayAlias;
+ if (mayBeAccessToSubobjectOf(/* BaseTag= */ TagA, /* SubobjectTag= */ TagB,
+ CommonType, GenericTag, MayAlias) ||
+ mayBeAccessToSubobjectOf(/* BaseTag= */ TagB, /* SubobjectTag= */ TagA,
+ CommonType, GenericTag, MayAlias))
+ return MayAlias;
+ // Otherwise, we've proved there's no alias.
if (GenericTag)
*GenericTag = createAccessTag(CommonType);
-
- // If the final access types have different roots, they're part of different
- // potentially unrelated type systems, so we must be conservative.
- if (!CommonType)
- return true;
-
- // If they have the same root, then we've proved there's no alias.
return false;
}
diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp
index 46ac3f451f81..04a7b73c22bf 100644
--- a/contrib/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@@ -89,7 +89,7 @@ static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
if (unsigned BitWidth = Ty->getScalarSizeInBits())
return BitWidth;
- return DL.getPointerTypeSizeInBits(Ty);
+ return DL.getIndexTypeSizeInBits(Ty);
}
namespace {
@@ -190,6 +190,14 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
"LHS and RHS should have the same type");
assert(LHS->getType()->isIntOrIntVectorTy() &&
"LHS and RHS should be integers");
+ // Look for an inverted mask: (X & ~M) op (Y & M).
+ Value *M;
+ if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) &&
+ match(RHS, m_c_And(m_Specific(M), m_Value())))
+ return true;
+ if (match(RHS, m_c_And(m_Not(m_Value(M)), m_Value())) &&
+ match(LHS, m_c_And(m_Specific(M), m_Value())))
+ return true;
IntegerType *IT = cast<IntegerType>(LHS->getType()->getScalarType());
KnownBits LHSKnown(IT->getBitWidth());
KnownBits RHSKnown(IT->getBitWidth());
@@ -493,6 +501,7 @@ bool llvm::isAssumeLikeIntrinsic(const Instruction *I) {
case Intrinsic::sideeffect:
case Intrinsic::dbg_declare:
case Intrinsic::dbg_value:
+ case Intrinsic::dbg_label:
case Intrinsic::invariant_start:
case Intrinsic::invariant_end:
case Intrinsic::lifetime_start:
@@ -530,7 +539,7 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv,
if (Inv->getParent() != CxtI->getParent())
return false;
- // If we have a dom tree, then we now know that the assume doens't dominate
+ // If we have a dom tree, then we now know that the assume doesn't dominate
// the other instruction. If we don't have a dom tree then we can check if
// the assume is first in the BB.
if (!DT) {
@@ -574,7 +583,7 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
if (Q.isExcluded(I))
continue;
- // Warning: This loop can end up being somewhat performance sensetive.
+ // Warning: This loop can end up being somewhat performance sensitive.
// We're running this loop for once for each value queried resulting in a
// runtime of ~O(#assumes * #values).
@@ -816,6 +825,14 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
KnownBits RHSKnown(BitWidth);
computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ // If the RHS is known zero, then this assumption must be wrong (nothing
+ // is unsigned less than zero). Signal a conflict and get out of here.
+ if (RHSKnown.isZero()) {
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+ break;
+ }
+
// Whatever high bits in c are zero are known to be zero (if c is a power
// of 2, then one more).
if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I)))
@@ -848,7 +865,7 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
/// Compute known bits from a shift operator, including those with a
/// non-constant shift amount. Known is the output of this function. Known2 is a
/// pre-allocated temporary with the same bit width as Known. KZF and KOF are
-/// operator-specific functors that, given the known-zero or known-one bits
+/// operator-specific functions that, given the known-zero or known-one bits
/// respectively, and a shift amount, compute the implied known-zero or
/// known-one bits of the shift operator's result respectively for that shift
/// amount. The results from calling KZF and KOF are conservatively combined for
@@ -966,12 +983,9 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
// matching the form add(x, add(x, y)) where y is odd.
// TODO: This could be generalized to clearing any bit set in y where the
// following bit is known to be unset in y.
- Value *Y = nullptr;
+ Value *X = nullptr, *Y = nullptr;
if (!Known.Zero[0] && !Known.One[0] &&
- (match(I->getOperand(0), m_Add(m_Specific(I->getOperand(1)),
- m_Value(Y))) ||
- match(I->getOperand(1), m_Add(m_Specific(I->getOperand(0)),
- m_Value(Y))))) {
+ match(I, m_c_BinOp(m_Value(X), m_Add(m_Deferred(X), m_Value(Y))))) {
Known2.resetAll();
computeKnownBits(Y, Known2, Depth + 1, Q);
if (Known2.countMinTrailingOnes() > 0)
@@ -1064,6 +1078,12 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
// leading zero bits.
MaxHighZeros =
std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros());
+ } else if (SPF == SPF_ABS) {
+ // RHS from matchSelectPattern returns the negation part of abs pattern.
+ // If the negate has an NSW flag we can assume the sign bit of the result
+ // will be 0 because that makes abs(INT_MIN) undefined.
+ if (cast<Instruction>(RHS)->hasNoSignedWrap())
+ MaxHighZeros = 1;
}
// Only known if known in both the LHS and RHS.
@@ -1093,7 +1113,10 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
unsigned SrcBitWidth;
// Note that we handle pointer operands here because of inttoptr/ptrtoint
// which fall through here.
- SrcBitWidth = Q.DL.getTypeSizeInBits(SrcTy->getScalarType());
+ Type *ScalarTy = SrcTy->getScalarType();
+ SrcBitWidth = ScalarTy->isPointerTy() ?
+ Q.DL.getIndexTypeSizeInBits(ScalarTy) :
+ Q.DL.getTypeSizeInBits(ScalarTy);
assert(SrcBitWidth && "SrcBitWidth can't be zero");
Known = Known.zextOrTrunc(SrcBitWidth);
@@ -1106,7 +1129,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
}
case Instruction::BitCast: {
Type *SrcTy = I->getOperand(0)->getType();
- if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ if (SrcTy->isIntOrPtrTy() &&
// TODO: For now, not handling conversions like:
// (bitcast i64 %x to <2 x i32>)
!I->getType()->isVectorTy()) {
@@ -1547,9 +1570,13 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
assert((V->getType()->isIntOrIntVectorTy(BitWidth) ||
V->getType()->isPtrOrPtrVectorTy()) &&
"Not integer or pointer type!");
- assert(Q.DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth &&
- "V and Known should have same BitWidth");
+
+ Type *ScalarTy = V->getType()->getScalarType();
+ unsigned ExpectedWidth = ScalarTy->isPointerTy() ?
+ Q.DL.getIndexTypeSizeInBits(ScalarTy) : Q.DL.getTypeSizeInBits(ScalarTy);
+ assert(ExpectedWidth == BitWidth && "V and Known should have same BitWidth");
(void)BitWidth;
+ (void)ExpectedWidth;
const APInt *C;
if (match(V, m_APInt(C))) {
@@ -1646,14 +1673,11 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
const Query &Q) {
assert(Depth <= MaxDepth && "Limit Search Depth");
- if (const Constant *C = dyn_cast<Constant>(V)) {
- if (C->isNullValue())
- return OrZero;
-
- const APInt *ConstIntOrConstSplatInt;
- if (match(C, m_APInt(ConstIntOrConstSplatInt)))
- return ConstIntOrConstSplatInt->isPowerOf2();
- }
+ // Attempt to match against constants.
+ if (OrZero && match(V, m_Power2OrZero()))
+ return true;
+ if (match(V, m_Power2()))
+ return true;
// 1 << X is clearly a power of two if the one is not shifted off the end. If
// it is shifted off the end then the result is undefined.
@@ -1737,7 +1761,7 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
return false;
}
-/// \brief Test whether a GEP's result is known to be non-null.
+/// Test whether a GEP's result is known to be non-null.
///
/// Uses properties inherent in a GEP to try to determine whether it is known
/// to be non-null.
@@ -1745,7 +1769,12 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
/// Currently this routine does not support vector GEPs.
static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
const Query &Q) {
- if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0)
+ const Function *F = nullptr;
+ if (const Instruction *I = dyn_cast<Instruction>(GEP))
+ F = I->getFunction();
+
+ if (!GEP->isInBounds() ||
+ NullPointerIsDefined(F, GEP->getPointerAddressSpace()))
return false;
// FIXME: Support vector-GEPs.
@@ -1919,6 +1948,10 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
}
}
+ // Some of the tests below are recursive, so bail out if we hit the limit.
+ if (Depth++ >= MaxDepth)
+ return false;
+
// Check for pointer simplifications.
if (V->getType()->isPointerTy()) {
// Alloca never returns null, malloc might.
@@ -1935,14 +1968,14 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
if (LI->getMetadata(LLVMContext::MD_nonnull))
return true;
- if (auto CS = ImmutableCallSite(V))
+ if (auto CS = ImmutableCallSite(V)) {
if (CS.isReturnNonNull())
return true;
+ if (const auto *RP = getArgumentAliasingToReturnedPointer(CS))
+ return isKnownNonZero(RP, Depth, Q);
+ }
}
- // The remaining tests are all recursive, so bail out if we hit the limit.
- if (Depth++ >= MaxDepth)
- return false;
// Check for recursive pointer simplifications.
if (V->getType()->isPointerTy()) {
@@ -2180,7 +2213,7 @@ static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
/// (itself), but other cases can give us information. For example, immediately
/// after an "ashr X, 2", we know that the top 3 bits are all equal to each
/// other, so we return 3. For vectors, return the number of sign bits for the
-/// vector element with the mininum number of known sign bits.
+/// vector element with the minimum number of known sign bits.
static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
const Query &Q) {
assert(Depth <= MaxDepth && "Limit Search Depth");
@@ -2189,7 +2222,11 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
// in V, so for undef we have to conservatively return 1. We don't have the
// same behavior for poison though -- that's a FIXME today.
- unsigned TyBits = Q.DL.getTypeSizeInBits(V->getType()->getScalarType());
+ Type *ScalarTy = V->getType()->getScalarType();
+ unsigned TyBits = ScalarTy->isPointerTy() ?
+ Q.DL.getIndexTypeSizeInBits(ScalarTy) :
+ Q.DL.getTypeSizeInBits(ScalarTy);
+
unsigned Tmp, Tmp2;
unsigned FirstAnswer = 1;
@@ -2300,7 +2337,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
case Instruction::Select:
Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
- if (Tmp == 1) return 1; // Early out.
+ if (Tmp == 1) break;
Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q);
return std::min(Tmp, Tmp2);
@@ -2308,7 +2345,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
// Add can have at most one carry bit. Thus we know that the output
// is, at worst, one more bit than the inputs.
Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
- if (Tmp == 1) return 1; // Early out.
+ if (Tmp == 1) break;
// Special case decrementing a value (ADD X, -1):
if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
@@ -2328,12 +2365,12 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
}
Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
- if (Tmp2 == 1) return 1;
+ if (Tmp2 == 1) break;
return std::min(Tmp, Tmp2)-1;
case Instruction::Sub:
Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
- if (Tmp2 == 1) return 1;
+ if (Tmp2 == 1) break;
// Handle NEG.
if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
@@ -2356,15 +2393,15 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
// Sub can have at most one carry bit. Thus we know that the output
// is, at worst, one more bit than the inputs.
Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
- if (Tmp == 1) return 1; // Early out.
+ if (Tmp == 1) break;
return std::min(Tmp, Tmp2)-1;
case Instruction::Mul: {
// The output of the Mul can be at most twice the valid bits in the inputs.
unsigned SignBitsOp0 = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
- if (SignBitsOp0 == 1) return 1; // Early out.
+ if (SignBitsOp0 == 1) break;
unsigned SignBitsOp1 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
- if (SignBitsOp1 == 1) return 1;
+ if (SignBitsOp1 == 1) break;
unsigned OutValidBits =
(TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1);
return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1;
@@ -2671,7 +2708,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
return true;
// (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
- if (match(Op, m_FAdd(m_Value(), m_Zero())))
+ if (match(Op, m_FAdd(m_Value(), m_PosZeroFP())))
return true;
// sitofp and uitofp turn into +0.0 for zero.
@@ -2712,6 +2749,24 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
(!SignBitOnly && CFP->getValueAPF().isZero());
}
+ // Handle vector of constants.
+ if (auto *CV = dyn_cast<Constant>(V)) {
+ if (CV->getType()->isVectorTy()) {
+ unsigned NumElts = CV->getType()->getVectorNumElements();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));
+ if (!CFP)
+ return false;
+ if (CFP->getValueAPF().isNegative() &&
+ (SignBitOnly || !CFP->getValueAPF().isZero()))
+ return false;
+ }
+
+ // All non-negative ConstantFPs.
+ return true;
+ }
+ }
+
if (Depth == MaxDepth)
return false; // Limit search depth.
@@ -2749,6 +2804,12 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
// Widening/narrowing never change sign.
return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
Depth + 1);
+ case Instruction::ExtractElement:
+ // Look through extract element. At the moment we keep this simple and skip
+ // tracking the specific element. But at least we might find information
+ // valid for all elements of the vector.
+ return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
+ Depth + 1);
case Instruction::Call:
const auto *CI = cast<CallInst>(I);
Intrinsic::ID IID = getIntrinsicForCallSite(CI, TLI);
@@ -2963,7 +3024,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType,
if (!V)
return nullptr;
- // Insert the value in the new (sub) aggregrate
+ // Insert the value in the new (sub) aggregate
return InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip),
"tmp", InsertBefore);
}
@@ -2992,9 +3053,9 @@ static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range,
return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore);
}
-/// Given an aggregrate and an sequence of indices, see if
-/// the scalar value indexed is already around as a register, for example if it
-/// were inserted directly into the aggregrate.
+/// Given an aggregate and a sequence of indices, see if the scalar value
+/// indexed is already around as a register, for example if it was inserted
+/// directly into the aggregate.
///
/// If InsertBefore is not null, this function will duplicate (modified)
/// insertvalues when a part of a nested struct is extracted.
@@ -3086,7 +3147,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
/// pointer plus a constant offset. Return the base and offset to the caller.
Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
const DataLayout &DL) {
- unsigned BitWidth = DL.getPointerTypeSizeInBits(Ptr->getType());
+ unsigned BitWidth = DL.getIndexTypeSizeInBits(Ptr->getType());
APInt ByteOffset(BitWidth, 0);
// We walk up the defs but use a visited set to handle unreachable code. In
@@ -3104,7 +3165,7 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
// means when we construct GEPOffset, we need to use the size
// of GEP's pointer type rather than the size of the original
// pointer type.
- APInt GEPOffset(DL.getPointerTypeSizeInBits(Ptr->getType()), 0);
+ APInt GEPOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
if (!GEP->accumulateConstantOffset(DL, GEPOffset))
break;
@@ -3326,7 +3387,8 @@ static uint64_t GetStringLengthH(const Value *V,
/// If we can compute the length of the string pointed to by
/// the specified pointer, return 'len+1'. If we can't, return 0.
uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) {
- if (!V->getType()->isPointerTy()) return 0;
+ if (!V->getType()->isPointerTy())
+ return 0;
SmallPtrSet<const PHINode*, 32> PHIs;
uint64_t Len = GetStringLengthH(V, PHIs, CharSize);
@@ -3335,7 +3397,24 @@ uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) {
return Len == ~0ULL ? 1 : Len;
}
-/// \brief \p PN defines a loop-variant pointer to an object. Check if the
+const Value *llvm::getArgumentAliasingToReturnedPointer(ImmutableCallSite CS) {
+ assert(CS &&
+ "getArgumentAliasingToReturnedPointer only works on nonnull CallSite");
+ if (const Value *RV = CS.getReturnedArgOperand())
+ return RV;
+ // This can be used only as a aliasing property.
+ if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(CS))
+ return CS.getArgOperand(0);
+ return nullptr;
+}
+
+bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
+ ImmutableCallSite CS) {
+ return CS.getIntrinsicID() == Intrinsic::launder_invariant_group ||
+ CS.getIntrinsicID() == Intrinsic::strip_invariant_group;
+}
+
+/// \p PN defines a loop-variant pointer to an object. Check if the
/// previous iteration of the loop was referring to the same object as \p PN.
static bool isSameUnderlyingObjectInLoop(const PHINode *PN,
const LoopInfo *LI) {
@@ -3380,11 +3459,21 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
// An alloca can't be further simplified.
return V;
} else {
- if (auto CS = CallSite(V))
- if (Value *RV = CS.getReturnedArgOperand()) {
- V = RV;
+ if (auto CS = CallSite(V)) {
+ // CaptureTracking can know about special capturing properties of some
+ // intrinsics like launder.invariant.group, that can't be expressed with
+ // the attributes, but have properties like returning aliasing pointer.
+ // Because some analysis may assume that nocaptured pointer is not
+ // returned from some special intrinsic (because function would have to
+ // be marked with returns attribute), it is crucial to use this function
+ // because it should be in sync with CaptureTracking. Not using it may
+ // cause weird miscompilations where 2 aliasing pointers are assumed to
+ // noalias.
+ if (auto *RP = getArgumentAliasingToReturnedPointer(CS)) {
+ V = RP;
continue;
}
+ }
// See if InstructionSimplify knows any relevant tricks.
if (Instruction *I = dyn_cast<Instruction>(V))
@@ -3658,6 +3747,48 @@ OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
return OverflowResult::MayOverflow;
}
+OverflowResult llvm::computeOverflowForSignedMul(const Value *LHS,
+ const Value *RHS,
+ const DataLayout &DL,
+ AssumptionCache *AC,
+ const Instruction *CxtI,
+ const DominatorTree *DT) {
+ // Multiplying n * m significant bits yields a result of n + m significant
+ // bits. If the total number of significant bits does not exceed the
+ // result bit width (minus 1), there is no overflow.
+ // This means if we have enough leading sign bits in the operands
+ // we can guarantee that the result does not overflow.
+ // Ref: "Hacker's Delight" by Henry Warren
+ unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
+
+ // Note that underestimating the number of sign bits gives a more
+ // conservative answer.
+ unsigned SignBits = ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) +
+ ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT);
+
+ // First handle the easy case: if we have enough sign bits there's
+ // definitely no overflow.
+ if (SignBits > BitWidth + 1)
+ return OverflowResult::NeverOverflows;
+
+ // There are two ambiguous cases where there can be no overflow:
+ // SignBits == BitWidth + 1 and
+ // SignBits == BitWidth
+ // The second case is difficult to check, therefore we only handle the
+ // first case.
+ if (SignBits == BitWidth + 1) {
+ // It overflows only when both arguments are negative and the true
+ // product is exactly the minimum negative number.
+ // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000
+ // For simplicity we just check if at least one side is not negative.
+ KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
+ KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
+ if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative())
+ return OverflowResult::NeverOverflows;
+ }
+ return OverflowResult::MayOverflow;
+}
+
OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS,
const Value *RHS,
const DataLayout &DL,
@@ -3684,7 +3815,7 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS,
return OverflowResult::MayOverflow;
}
-/// \brief Return true if we can prove that adding the two values of the
+/// Return true if we can prove that adding the two values of the
/// knownbits will not overflow.
/// Otherwise return false.
static bool checkRippleForSignedAdd(const KnownBits &LHSKnown,
@@ -3787,6 +3918,47 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
return OverflowResult::MayOverflow;
}
+OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS,
+ const Value *RHS,
+ const DataLayout &DL,
+ AssumptionCache *AC,
+ const Instruction *CxtI,
+ const DominatorTree *DT) {
+ // If the LHS is negative and the RHS is non-negative, no unsigned wrap.
+ KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
+ KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
+ if (LHSKnown.isNegative() && RHSKnown.isNonNegative())
+ return OverflowResult::NeverOverflows;
+
+ return OverflowResult::MayOverflow;
+}
+
+OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS,
+ const Value *RHS,
+ const DataLayout &DL,
+ AssumptionCache *AC,
+ const Instruction *CxtI,
+ const DominatorTree *DT) {
+ // If LHS and RHS each have at least two sign bits, the subtraction
+ // cannot overflow.
+ if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 &&
+ ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1)
+ return OverflowResult::NeverOverflows;
+
+ KnownBits LHSKnown = computeKnownBits(LHS, DL, 0, AC, CxtI, DT);
+
+ KnownBits RHSKnown = computeKnownBits(RHS, DL, 0, AC, CxtI, DT);
+
+ // Subtraction of two 2's complement numbers having identical signs will
+ // never overflow.
+ if ((LHSKnown.isNegative() && RHSKnown.isNegative()) ||
+ (LHSKnown.isNonNegative() && RHSKnown.isNonNegative()))
+ return OverflowResult::NeverOverflows;
+
+ // TODO: implement logic similar to checkRippleForAdd
+ return OverflowResult::MayOverflow;
+}
+
bool llvm::isOverflowIntrinsicNoWrap(const IntrinsicInst *II,
const DominatorTree &DT) {
#ifndef NDEBUG
@@ -3928,6 +4100,15 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
return true;
}
+bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) {
+ // TODO: This is slightly consdervative for invoke instruction since exiting
+ // via an exception *is* normal control for them.
+ for (auto I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (!isGuaranteedToTransferExecutionToSuccessor(&*I))
+ return false;
+ return true;
+}
+
bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I,
const Loop *L) {
// The loop header is guaranteed to be executed for every iteration.
@@ -4180,7 +4361,9 @@ static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred,
if (L.Flavor != R.Flavor)
return {SPF_UNKNOWN, SPNB_NA, false};
- // Match the compare to the min/max operations of the select operands.
+ // We have something like: x Pred y ? min(a, b) : min(c, d).
+ // Try to match the compare to the min/max operations of the select operands.
+ // First, make sure we have the right compare predicate.
switch (L.Flavor) {
case SPF_SMIN:
if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) {
@@ -4218,21 +4401,38 @@ static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred,
return {SPF_UNKNOWN, SPNB_NA, false};
}
- // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
- if (CmpLHS == A && CmpRHS == C && D == B)
- return {L.Flavor, SPNB_NA, false};
+ // If there is a common operand in the already matched min/max and the other
+ // min/max operands match the compare operands (either directly or inverted),
+ // then this is min/max of the same flavor.
+ // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
+ // ~c pred ~a ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
+ if (D == B) {
+ if ((CmpLHS == A && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) &&
+ match(A, m_Not(m_Specific(CmpRHS)))))
+ return {L.Flavor, SPNB_NA, false};
+ }
// a pred d ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
- if (CmpLHS == A && CmpRHS == D && C == B)
- return {L.Flavor, SPNB_NA, false};
-
+ // ~d pred ~a ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
+ if (C == B) {
+ if ((CmpLHS == A && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) &&
+ match(A, m_Not(m_Specific(CmpRHS)))))
+ return {L.Flavor, SPNB_NA, false};
+ }
// b pred c ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
- if (CmpLHS == B && CmpRHS == C && D == A)
- return {L.Flavor, SPNB_NA, false};
-
+ // ~c pred ~b ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
+ if (D == A) {
+ if ((CmpLHS == B && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) &&
+ match(B, m_Not(m_Specific(CmpRHS)))))
+ return {L.Flavor, SPNB_NA, false};
+ }
// b pred d ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
- if (CmpLHS == B && CmpRHS == D && C == A)
- return {L.Flavor, SPNB_NA, false};
+ // ~d pred ~b ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
+ if (C == A) {
+ if ((CmpLHS == B && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) &&
+ match(B, m_Not(m_Specific(CmpRHS)))))
+ return {L.Flavor, SPNB_NA, false};
+ }
return {SPF_UNKNOWN, SPNB_NA, false};
}
@@ -4311,6 +4511,27 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
return {SPF_UNKNOWN, SPNB_NA, false};
}
+bool llvm::isKnownNegation(const Value *X, const Value *Y, bool NeedNSW) {
+ assert(X && Y && "Invalid operand");
+
+ // X = sub (0, Y) || X = sub nsw (0, Y)
+ if ((!NeedNSW && match(X, m_Sub(m_ZeroInt(), m_Specific(Y)))) ||
+ (NeedNSW && match(X, m_NSWSub(m_ZeroInt(), m_Specific(Y)))))
+ return true;
+
+ // Y = sub (0, X) || Y = sub nsw (0, X)
+ if ((!NeedNSW && match(Y, m_Sub(m_ZeroInt(), m_Specific(X)))) ||
+ (NeedNSW && match(Y, m_NSWSub(m_ZeroInt(), m_Specific(X)))))
+ return true;
+
+ // X = sub (A, B), Y = sub (B, A) || X = sub nsw (A, B), Y = sub nsw (B, A)
+ Value *A, *B;
+ return (!NeedNSW && (match(X, m_Sub(m_Value(A), m_Value(B))) &&
+ match(Y, m_Sub(m_Specific(B), m_Specific(A))))) ||
+ (NeedNSW && (match(X, m_NSWSub(m_Value(A), m_Value(B))) &&
+ match(Y, m_NSWSub(m_Specific(B), m_Specific(A)))));
+}
+
static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
FastMathFlags FMF,
Value *CmpLHS, Value *CmpRHS,
@@ -4409,25 +4630,49 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
case FCmpInst::FCMP_OLE: return {SPF_FMINNUM, NaNBehavior, Ordered};
}
}
-
- const APInt *C1;
- if (match(CmpRHS, m_APInt(C1))) {
- if ((CmpLHS == TrueVal && match(FalseVal, m_Neg(m_Specific(CmpLHS)))) ||
- (CmpLHS == FalseVal && match(TrueVal, m_Neg(m_Specific(CmpLHS))))) {
-
- // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X
- // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X
- if (Pred == ICmpInst::ICMP_SGT &&
- (C1->isNullValue() || C1->isAllOnesValue())) {
- return {(CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};
- }
-
- // ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X
- // NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X
- if (Pred == ICmpInst::ICMP_SLT &&
- (C1->isNullValue() || C1->isOneValue())) {
- return {(CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};
- }
+
+ if (isKnownNegation(TrueVal, FalseVal)) {
+ // Sign-extending LHS does not change its sign, so TrueVal/FalseVal can
+ // match against either LHS or sext(LHS).
+ auto MaybeSExtCmpLHS =
+ m_CombineOr(m_Specific(CmpLHS), m_SExt(m_Specific(CmpLHS)));
+ auto ZeroOrAllOnes = m_CombineOr(m_ZeroInt(), m_AllOnes());
+ auto ZeroOrOne = m_CombineOr(m_ZeroInt(), m_One());
+ if (match(TrueVal, MaybeSExtCmpLHS)) {
+ // Set the return values. If the compare uses the negated value (-X >s 0),
+ // swap the return values because the negated value is always 'RHS'.
+ LHS = TrueVal;
+ RHS = FalseVal;
+ if (match(CmpLHS, m_Neg(m_Specific(FalseVal))))
+ std::swap(LHS, RHS);
+
+ // (X >s 0) ? X : -X or (X >s -1) ? X : -X --> ABS(X)
+ // (-X >s 0) ? -X : X or (-X >s -1) ? -X : X --> ABS(X)
+ if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes))
+ return {SPF_ABS, SPNB_NA, false};
+
+ // (X <s 0) ? X : -X or (X <s 1) ? X : -X --> NABS(X)
+ // (-X <s 0) ? -X : X or (-X <s 1) ? -X : X --> NABS(X)
+ if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne))
+ return {SPF_NABS, SPNB_NA, false};
+ }
+ else if (match(FalseVal, MaybeSExtCmpLHS)) {
+ // Set the return values. If the compare uses the negated value (-X >s 0),
+ // swap the return values because the negated value is always 'RHS'.
+ LHS = FalseVal;
+ RHS = TrueVal;
+ if (match(CmpLHS, m_Neg(m_Specific(TrueVal))))
+ std::swap(LHS, RHS);
+
+ // (X >s 0) ? -X : X or (X >s -1) ? -X : X --> NABS(X)
+ // (-X >s 0) ? X : -X or (-X >s -1) ? X : -X --> NABS(X)
+ if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes))
+ return {SPF_NABS, SPNB_NA, false};
+
+ // (X <s 0) ? -X : X or (X <s 1) ? -X : X --> ABS(X)
+ // (-X <s 0) ? X : -X or (-X <s 1) ? X : -X --> ABS(X)
+ if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne))
+ return {SPF_ABS, SPNB_NA, false};
}
}
@@ -4449,7 +4694,7 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
///
/// The function processes the case when type of true and false values of a
/// select instruction differs from type of the cmp instruction operands because
-/// of a cast instructon. The function checks if it is legal to move the cast
+/// of a cast instruction. The function checks if it is legal to move the cast
/// operation after "select". If yes, it returns the new second value of
/// "select" (with the assumption that cast is moved):
/// 1. As operand of cast instruction when both values of "select" are same cast
@@ -4602,6 +4847,30 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
LHS, RHS, Depth);
}
+CmpInst::Predicate llvm::getMinMaxPred(SelectPatternFlavor SPF, bool Ordered) {
+ if (SPF == SPF_SMIN) return ICmpInst::ICMP_SLT;
+ if (SPF == SPF_UMIN) return ICmpInst::ICMP_ULT;
+ if (SPF == SPF_SMAX) return ICmpInst::ICMP_SGT;
+ if (SPF == SPF_UMAX) return ICmpInst::ICMP_UGT;
+ if (SPF == SPF_FMINNUM)
+ return Ordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT;
+ if (SPF == SPF_FMAXNUM)
+ return Ordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT;
+ llvm_unreachable("unhandled!");
+}
+
+SelectPatternFlavor llvm::getInverseMinMaxFlavor(SelectPatternFlavor SPF) {
+ if (SPF == SPF_SMIN) return SPF_SMAX;
+ if (SPF == SPF_UMIN) return SPF_UMAX;
+ if (SPF == SPF_SMAX) return SPF_SMIN;
+ if (SPF == SPF_UMAX) return SPF_UMIN;
+ llvm_unreachable("unhandled!");
+}
+
+CmpInst::Predicate llvm::getInverseMinMaxPred(SelectPatternFlavor SPF) {
+ return getMinMaxPred(getInverseMinMaxFlavor(SPF));
+}
+
/// Return true if "icmp Pred LHS RHS" is always true.
static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS,
const Value *RHS, const DataLayout &DL,
diff --git a/contrib/llvm/lib/Analysis/VectorUtils.cpp b/contrib/llvm/lib/Analysis/VectorUtils.cpp
index 2becfbfe8a8d..d73d24736439 100644
--- a/contrib/llvm/lib/Analysis/VectorUtils.cpp
+++ b/contrib/llvm/lib/Analysis/VectorUtils.cpp
@@ -28,7 +28,7 @@
using namespace llvm;
using namespace llvm::PatternMatch;
-/// \brief Identify if the intrinsic is trivially vectorizable.
+/// Identify if the intrinsic is trivially vectorizable.
/// This method returns true if the intrinsic's argument types are all
/// scalars for the scalar form of the intrinsic and all vectors for
/// the vector form of the intrinsic.
@@ -67,7 +67,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
}
}
-/// \brief Identifies if the intrinsic has a scalar operand. It check for
+/// Identifies if the intrinsic has a scalar operand. It check for
/// ctlz,cttz and powi special intrinsics whose argument is scalar.
bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
unsigned ScalarOpdIdx) {
@@ -81,7 +81,7 @@ bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
}
}
-/// \brief Returns intrinsic ID for call.
+/// Returns intrinsic ID for call.
/// For the input call instruction it finds mapping intrinsic and returns
/// its ID, in case it does not found it return not_intrinsic.
Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI,
@@ -97,7 +97,7 @@ Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI,
return Intrinsic::not_intrinsic;
}
-/// \brief Find the operand of the GEP that should be checked for consecutive
+/// Find the operand of the GEP that should be checked for consecutive
/// stores. This ignores trailing indices that have no effect on the final
/// pointer.
unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) {
@@ -121,7 +121,7 @@ unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) {
return LastOperand;
}
-/// \brief If the argument is a GEP, then returns the operand identified by
+/// If the argument is a GEP, then returns the operand identified by
/// getGEPInductionOperand. However, if there is some other non-loop-invariant
/// operand, it returns that instead.
Value *llvm::stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
@@ -140,7 +140,7 @@ Value *llvm::stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
return GEP->getOperand(InductionOperand);
}
-/// \brief If a value has only one user that is a CastInst, return it.
+/// If a value has only one user that is a CastInst, return it.
Value *llvm::getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) {
Value *UniqueCast = nullptr;
for (User *U : Ptr->users()) {
@@ -155,7 +155,7 @@ Value *llvm::getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) {
return UniqueCast;
}
-/// \brief Get the stride of a pointer access in a loop. Looks for symbolic
+/// Get the stride of a pointer access in a loop. Looks for symbolic
/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
Value *llvm::getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
@@ -163,7 +163,7 @@ Value *llvm::getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
return nullptr;
// Try to remove a gep instruction to make the pointer (actually index at this
- // point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the
+ // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the
// pointer, otherwise, we are analyzing the index.
Value *OrigPtr = Ptr;
@@ -230,7 +230,7 @@ Value *llvm::getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
return Stride;
}
-/// \brief Given a vector and an element number, see if the scalar value is
+/// Given a vector and an element number, see if the scalar value is
/// already around as a register, for example if it were inserted then extracted
/// from the vector.
Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
@@ -280,7 +280,7 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
return nullptr;
}
-/// \brief Get splat value if the input is a splat vector or return nullptr.
+/// Get splat value if the input is a splat vector or return nullptr.
/// This function is not fully general. It checks only 2 cases:
/// the input value is (1) a splat constants vector or (2) a sequence
/// of instructions that broadcast a single value into a vector.