diff options
Diffstat (limited to 'contrib/llvm/lib/Analysis')
67 files changed, 4914 insertions, 2440 deletions
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp index 55df66714178..a6585df949f8 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp @@ -126,7 +126,7 @@ ModRefInfo AAResults::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) { // Early-exit the moment we reach the bottom of the lattice. if (isNoModRef(Result)) - return Result; + return ModRefInfo::NoModRef; } return Result; @@ -162,7 +162,7 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS, // Early-exit the moment we reach the bottom of the lattice. if (isNoModRef(Result)) - return Result; + return ModRefInfo::NoModRef; } // Try to refine the mod-ref info further using other API entry points to the @@ -224,7 +224,7 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1, // Early-exit the moment we reach the bottom of the lattice. if (isNoModRef(Result)) - return Result; + return ModRefInfo::NoModRef; } // Try to refine the mod-ref info further using other API entry points to the @@ -254,85 +254,91 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1, // information from CS1's references to the memory referenced by // CS2's arguments. if (onlyAccessesArgPointees(CS2B)) { + if (!doesAccessArgPointees(CS2B)) + return ModRefInfo::NoModRef; ModRefInfo R = ModRefInfo::NoModRef; - if (doesAccessArgPointees(CS2B)) { - bool IsMustAlias = true; - for (auto I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) { - const Value *Arg = *I; - if (!Arg->getType()->isPointerTy()) - continue; - unsigned CS2ArgIdx = std::distance(CS2.arg_begin(), I); - auto CS2ArgLoc = MemoryLocation::getForArgument(CS2, CS2ArgIdx, TLI); - - // ArgModRefCS2 indicates what CS2 might do to CS2ArgLoc, and the - // dependence of CS1 on that location is the inverse: - // - If CS2 modifies location, dependence exists if CS1 reads or writes. - // - If CS2 only reads location, dependence exists if CS1 writes. - ModRefInfo ArgModRefCS2 = getArgModRefInfo(CS2, CS2ArgIdx); - ModRefInfo ArgMask = ModRefInfo::NoModRef; - if (isModSet(ArgModRefCS2)) - ArgMask = ModRefInfo::ModRef; - else if (isRefSet(ArgModRefCS2)) - ArgMask = ModRefInfo::Mod; - - // ModRefCS1 indicates what CS1 might do to CS2ArgLoc, and we use - // above ArgMask to update dependence info. - ModRefInfo ModRefCS1 = getModRefInfo(CS1, CS2ArgLoc); - ArgMask = intersectModRef(ArgMask, ModRefCS1); - - // Conservatively clear IsMustAlias unless only MustAlias is found. - IsMustAlias &= isMustSet(ModRefCS1); - - R = intersectModRef(unionModRef(R, ArgMask), Result); - if (R == Result) { - // On early exit, not all args were checked, cannot set Must. - if (I + 1 != E) - IsMustAlias = false; - break; - } + bool IsMustAlias = true; + for (auto I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) { + const Value *Arg = *I; + if (!Arg->getType()->isPointerTy()) + continue; + unsigned CS2ArgIdx = std::distance(CS2.arg_begin(), I); + auto CS2ArgLoc = MemoryLocation::getForArgument(CS2, CS2ArgIdx, TLI); + + // ArgModRefCS2 indicates what CS2 might do to CS2ArgLoc, and the + // dependence of CS1 on that location is the inverse: + // - If CS2 modifies location, dependence exists if CS1 reads or writes. + // - If CS2 only reads location, dependence exists if CS1 writes. + ModRefInfo ArgModRefCS2 = getArgModRefInfo(CS2, CS2ArgIdx); + ModRefInfo ArgMask = ModRefInfo::NoModRef; + if (isModSet(ArgModRefCS2)) + ArgMask = ModRefInfo::ModRef; + else if (isRefSet(ArgModRefCS2)) + ArgMask = ModRefInfo::Mod; + + // ModRefCS1 indicates what CS1 might do to CS2ArgLoc, and we use + // above ArgMask to update dependence info. + ModRefInfo ModRefCS1 = getModRefInfo(CS1, CS2ArgLoc); + ArgMask = intersectModRef(ArgMask, ModRefCS1); + + // Conservatively clear IsMustAlias unless only MustAlias is found. + IsMustAlias &= isMustSet(ModRefCS1); + + R = intersectModRef(unionModRef(R, ArgMask), Result); + if (R == Result) { + // On early exit, not all args were checked, cannot set Must. + if (I + 1 != E) + IsMustAlias = false; + break; } - // If Alias found and only MustAlias found above, set Must bit. - R = IsMustAlias ? setMust(R) : clearMust(R); } - return R; + + if (isNoModRef(R)) + return ModRefInfo::NoModRef; + + // If MustAlias found above, set Must bit. + return IsMustAlias ? setMust(R) : clearMust(R); } // If CS1 only accesses memory through arguments, check if CS2 references // any of the memory referenced by CS1's arguments. If not, return NoModRef. if (onlyAccessesArgPointees(CS1B)) { + if (!doesAccessArgPointees(CS1B)) + return ModRefInfo::NoModRef; ModRefInfo R = ModRefInfo::NoModRef; - if (doesAccessArgPointees(CS1B)) { - bool IsMustAlias = true; - for (auto I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) { - const Value *Arg = *I; - if (!Arg->getType()->isPointerTy()) - continue; - unsigned CS1ArgIdx = std::distance(CS1.arg_begin(), I); - auto CS1ArgLoc = MemoryLocation::getForArgument(CS1, CS1ArgIdx, TLI); - - // ArgModRefCS1 indicates what CS1 might do to CS1ArgLoc; if CS1 might - // Mod CS1ArgLoc, then we care about either a Mod or a Ref by CS2. If - // CS1 might Ref, then we care only about a Mod by CS2. - ModRefInfo ArgModRefCS1 = getArgModRefInfo(CS1, CS1ArgIdx); - ModRefInfo ModRefCS2 = getModRefInfo(CS2, CS1ArgLoc); - if ((isModSet(ArgModRefCS1) && isModOrRefSet(ModRefCS2)) || - (isRefSet(ArgModRefCS1) && isModSet(ModRefCS2))) - R = intersectModRef(unionModRef(R, ArgModRefCS1), Result); - - // Conservatively clear IsMustAlias unless only MustAlias is found. - IsMustAlias &= isMustSet(ModRefCS2); - - if (R == Result) { - // On early exit, not all args were checked, cannot set Must. - if (I + 1 != E) - IsMustAlias = false; - break; - } + bool IsMustAlias = true; + for (auto I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) { + const Value *Arg = *I; + if (!Arg->getType()->isPointerTy()) + continue; + unsigned CS1ArgIdx = std::distance(CS1.arg_begin(), I); + auto CS1ArgLoc = MemoryLocation::getForArgument(CS1, CS1ArgIdx, TLI); + + // ArgModRefCS1 indicates what CS1 might do to CS1ArgLoc; if CS1 might + // Mod CS1ArgLoc, then we care about either a Mod or a Ref by CS2. If + // CS1 might Ref, then we care only about a Mod by CS2. + ModRefInfo ArgModRefCS1 = getArgModRefInfo(CS1, CS1ArgIdx); + ModRefInfo ModRefCS2 = getModRefInfo(CS2, CS1ArgLoc); + if ((isModSet(ArgModRefCS1) && isModOrRefSet(ModRefCS2)) || + (isRefSet(ArgModRefCS1) && isModSet(ModRefCS2))) + R = intersectModRef(unionModRef(R, ArgModRefCS1), Result); + + // Conservatively clear IsMustAlias unless only MustAlias is found. + IsMustAlias &= isMustSet(ModRefCS2); + + if (R == Result) { + // On early exit, not all args were checked, cannot set Must. + if (I + 1 != E) + IsMustAlias = false; + break; } - // If Alias found and only MustAlias found above, set Must bit. - R = IsMustAlias ? setMust(R) : clearMust(R); } - return R; + + if (isNoModRef(R)) + return ModRefInfo::NoModRef; + + // If MustAlias found above, set Must bit. + return IsMustAlias ? setMust(R) : clearMust(R); } return Result; @@ -366,6 +372,24 @@ FunctionModRefBehavior AAResults::getModRefBehavior(const Function *F) { return Result; } +raw_ostream &llvm::operator<<(raw_ostream &OS, AliasResult AR) { + switch (AR) { + case NoAlias: + OS << "NoAlias"; + break; + case MustAlias: + OS << "MustAlias"; + break; + case MayAlias: + OS << "MayAlias"; + break; + case PartialAlias: + OS << "PartialAlias"; + break; + } + return OS; +} + //===----------------------------------------------------------------------===// // Helper method implementation //===----------------------------------------------------------------------===// @@ -515,7 +539,7 @@ ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW, return ModRefInfo::ModRef; } -/// \brief Return information about whether a particular call site modifies +/// Return information about whether a particular call site modifies /// or reads the specified memory location \p MemLoc before instruction \p I /// in a BasicBlock. An ordered basic block \p OBB can be used to speed up /// instruction-ordering queries inside the BasicBlock containing \p I. @@ -548,7 +572,7 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I, unsigned ArgNo = 0; ModRefInfo R = ModRefInfo::NoModRef; - bool MustAlias = true; + bool IsMustAlias = true; // Set flag only if no May found and all operands processed. for (auto CI = CS.data_operands_begin(), CE = CS.data_operands_end(); CI != CE; ++CI, ++ArgNo) { @@ -566,7 +590,7 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I, // assume that the call could touch the pointer, even though it doesn't // escape. if (AR != MustAlias) - MustAlias = false; + IsMustAlias = false; if (AR == NoAlias) continue; if (CS.doesNotAccessMemory(ArgNo)) @@ -578,7 +602,7 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I, // Not returning MustModRef since we have not seen all the arguments. return ModRefInfo::ModRef; } - return MustAlias ? setMust(R) : clearMust(R); + return IsMustAlias ? setMust(R) : clearMust(R); } /// canBasicBlockModify - Return true if it is possible for execution of the diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp index f737cecc43d1..764ae9160350 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -41,7 +41,7 @@ static cl::opt<bool> PrintMustModRef("print-mustmodref", cl::ReallyHidden); static cl::opt<bool> EvalAAMD("evaluate-aa-metadata", cl::ReallyHidden); -static void PrintResults(const char *Msg, bool P, const Value *V1, +static void PrintResults(AliasResult AR, bool P, const Value *V1, const Value *V2, const Module *M) { if (PrintAll || P) { std::string o1, o2; @@ -50,18 +50,15 @@ static void PrintResults(const char *Msg, bool P, const Value *V1, V1->printAsOperand(os1, true, M); V2->printAsOperand(os2, true, M); } - + if (o2 < o1) std::swap(o1, o2); - errs() << " " << Msg << ":\t" - << o1 << ", " - << o2 << "\n"; + errs() << " " << AR << ":\t" << o1 << ", " << o2 << "\n"; } } -static inline void -PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr, - Module *M) { +static inline void PrintModRefResults(const char *Msg, bool P, Instruction *I, + Value *Ptr, Module *M) { if (PrintAll || P) { errs() << " " << Msg << ": Ptr: "; Ptr->printAsOperand(errs(), true, M); @@ -69,21 +66,19 @@ PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr, } } -static inline void -PrintModRefResults(const char *Msg, bool P, CallSite CSA, CallSite CSB, - Module *M) { +static inline void PrintModRefResults(const char *Msg, bool P, CallSite CSA, + CallSite CSB, Module *M) { if (PrintAll || P) { - errs() << " " << Msg << ": " << *CSA.getInstruction() - << " <-> " << *CSB.getInstruction() << '\n'; + errs() << " " << Msg << ": " << *CSA.getInstruction() << " <-> " + << *CSB.getInstruction() << '\n'; } } -static inline void -PrintLoadStoreResults(const char *Msg, bool P, const Value *V1, - const Value *V2, const Module *M) { +static inline void PrintLoadStoreResults(AliasResult AR, bool P, + const Value *V1, const Value *V2, + const Module *M) { if (PrintAll || P) { - errs() << " " << Msg << ": " << *V1 - << " <-> " << *V2 << '\n'; + errs() << " " << AR << ": " << *V1 << " <-> " << *V2 << '\n'; } } @@ -155,22 +150,22 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) { Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType(); if (I2ElTy->isSized()) I2Size = DL.getTypeStoreSize(I2ElTy); - switch (AA.alias(*I1, I1Size, *I2, I2Size)) { + AliasResult AR = AA.alias(*I1, I1Size, *I2, I2Size); + switch (AR) { case NoAlias: - PrintResults("NoAlias", PrintNoAlias, *I1, *I2, F.getParent()); + PrintResults(AR, PrintNoAlias, *I1, *I2, F.getParent()); ++NoAliasCount; break; case MayAlias: - PrintResults("MayAlias", PrintMayAlias, *I1, *I2, F.getParent()); + PrintResults(AR, PrintMayAlias, *I1, *I2, F.getParent()); ++MayAliasCount; break; case PartialAlias: - PrintResults("PartialAlias", PrintPartialAlias, *I1, *I2, - F.getParent()); + PrintResults(AR, PrintPartialAlias, *I1, *I2, F.getParent()); ++PartialAliasCount; break; case MustAlias: - PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent()); + PrintResults(AR, PrintMustAlias, *I1, *I2, F.getParent()); ++MustAliasCount; break; } @@ -181,26 +176,23 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) { // iterate over all pairs of load, store for (Value *Load : Loads) { for (Value *Store : Stores) { - switch (AA.alias(MemoryLocation::get(cast<LoadInst>(Load)), - MemoryLocation::get(cast<StoreInst>(Store)))) { + AliasResult AR = AA.alias(MemoryLocation::get(cast<LoadInst>(Load)), + MemoryLocation::get(cast<StoreInst>(Store))); + switch (AR) { case NoAlias: - PrintLoadStoreResults("NoAlias", PrintNoAlias, Load, Store, - F.getParent()); + PrintLoadStoreResults(AR, PrintNoAlias, Load, Store, F.getParent()); ++NoAliasCount; break; case MayAlias: - PrintLoadStoreResults("MayAlias", PrintMayAlias, Load, Store, - F.getParent()); + PrintLoadStoreResults(AR, PrintMayAlias, Load, Store, F.getParent()); ++MayAliasCount; break; case PartialAlias: - PrintLoadStoreResults("PartialAlias", PrintPartialAlias, Load, Store, - F.getParent()); + PrintLoadStoreResults(AR, PrintPartialAlias, Load, Store, F.getParent()); ++PartialAliasCount; break; case MustAlias: - PrintLoadStoreResults("MustAlias", PrintMustAlias, Load, Store, - F.getParent()); + PrintLoadStoreResults(AR, PrintMustAlias, Load, Store, F.getParent()); ++MustAliasCount; break; } @@ -211,26 +203,23 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) { for (SetVector<Value *>::iterator I1 = Stores.begin(), E = Stores.end(); I1 != E; ++I1) { for (SetVector<Value *>::iterator I2 = Stores.begin(); I2 != I1; ++I2) { - switch (AA.alias(MemoryLocation::get(cast<StoreInst>(*I1)), - MemoryLocation::get(cast<StoreInst>(*I2)))) { + AliasResult AR = AA.alias(MemoryLocation::get(cast<StoreInst>(*I1)), + MemoryLocation::get(cast<StoreInst>(*I2))); + switch (AR) { case NoAlias: - PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2, - F.getParent()); + PrintLoadStoreResults(AR, PrintNoAlias, *I1, *I2, F.getParent()); ++NoAliasCount; break; case MayAlias: - PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2, - F.getParent()); + PrintLoadStoreResults(AR, PrintMayAlias, *I1, *I2, F.getParent()); ++MayAliasCount; break; case PartialAlias: - PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2, - F.getParent()); + PrintLoadStoreResults(AR, PrintPartialAlias, *I1, *I2, F.getParent()); ++PartialAliasCount; break; case MustAlias: - PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2, - F.getParent()); + PrintLoadStoreResults(AR, PrintMustAlias, *I1, *I2, F.getParent()); ++MustAliasCount; break; } diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisSummary.h b/contrib/llvm/lib/Analysis/AliasAnalysisSummary.h index 51a85f4e7061..fb93a12420f8 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysisSummary.h +++ b/contrib/llvm/lib/Analysis/AliasAnalysisSummary.h @@ -13,7 +13,7 @@ /// Summary-based analysis, also known as bottom-up analysis, is a style of /// interprocedrual static analysis that tries to analyze the callees before the /// callers get analyzed. The key idea of summary-based analysis is to first -/// process each function indepedently, outline its behavior in a condensed +/// process each function independently, outline its behavior in a condensed /// summary, and then instantiate the summary at the callsite when the said /// function is called elsewhere. This is often in contrast to another style /// called top-down analysis, in which callers are always analyzed first before diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp index c88e0dd7dc44..8aee81b1f1d8 100644 --- a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -126,7 +127,7 @@ void AliasSet::removeFromTracker(AliasSetTracker &AST) { } void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry, - uint64_t Size, const AAMDNodes &AAInfo, + LocationSize Size, const AAMDNodes &AAInfo, bool KnownMustAlias) { assert(!Entry.hasAliasSet() && "Entry already in set!"); @@ -182,7 +183,7 @@ void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) { /// aliasesPointer - Return true if the specified pointer "may" (or must) /// alias one of the members in the set. /// -bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size, +bool AliasSet::aliasesPointer(const Value *Ptr, LocationSize Size, const AAMDNodes &AAInfo, AliasAnalysis &AA) const { if (AliasAny) @@ -262,7 +263,7 @@ void AliasSetTracker::clear() { /// alias the pointer. Return the unified set, or nullptr if no set that aliases /// the pointer was found. AliasSet *AliasSetTracker::mergeAliasSetsForPointer(const Value *Ptr, - uint64_t Size, + LocationSize Size, const AAMDNodes &AAInfo) { AliasSet *FoundSet = nullptr; for (iterator I = begin(), E = end(); I != E;) { @@ -302,7 +303,8 @@ AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) { /// getAliasSetForPointer - Return the alias set that the specified pointer /// lives in. -AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size, +AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, + LocationSize Size, const AAMDNodes &AAInfo) { AliasSet::PointerRec &Entry = getEntryFor(Pointer); @@ -347,7 +349,8 @@ AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size, return AliasSets.back(); } -void AliasSetTracker::add(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) { +void AliasSetTracker::add(Value *Ptr, LocationSize Size, + const AAMDNodes &AAInfo) { addPointer(Ptr, Size, AAInfo, AliasSet::NoAccess); } @@ -386,7 +389,7 @@ void AliasSetTracker::add(VAArgInst *VAAI) { AliasSet::ModRefAccess); } -void AliasSetTracker::add(MemSetInst *MSI) { +void AliasSetTracker::add(AnyMemSetInst *MSI) { AAMDNodes AAInfo; MSI->getAAMetadata(AAInfo); @@ -399,11 +402,12 @@ void AliasSetTracker::add(MemSetInst *MSI) { AliasSet &AS = addPointer(MSI->getRawDest(), Len, AAInfo, AliasSet::ModAccess); - if (MSI->isVolatile()) + auto *MS = dyn_cast<MemSetInst>(MSI); + if (MS && MS->isVolatile()) AS.setVolatile(); } -void AliasSetTracker::add(MemTransferInst *MTI) { +void AliasSetTracker::add(AnyMemTransferInst *MTI) { AAMDNodes AAInfo; MTI->getAAMetadata(AAInfo); @@ -415,13 +419,15 @@ void AliasSetTracker::add(MemTransferInst *MTI) { AliasSet &ASSrc = addPointer(MTI->getRawSource(), Len, AAInfo, AliasSet::RefAccess); - if (MTI->isVolatile()) - ASSrc.setVolatile(); AliasSet &ASDst = addPointer(MTI->getRawDest(), Len, AAInfo, AliasSet::ModAccess); - if (MTI->isVolatile()) + + auto* MT = dyn_cast<MemTransferInst>(MTI); + if (MT && MT->isVolatile()) { + ASSrc.setVolatile(); ASDst.setVolatile(); + } } void AliasSetTracker::addUnknown(Instruction *Inst) { @@ -461,9 +467,9 @@ void AliasSetTracker::add(Instruction *I) { return add(SI); if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I)) return add(VAAI); - if (MemSetInst *MSI = dyn_cast<MemSetInst>(I)) + if (AnyMemSetInst *MSI = dyn_cast<AnyMemSetInst>(I)) return add(MSI); - if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) + if (AnyMemTransferInst *MTI = dyn_cast<AnyMemTransferInst>(I)) return add(MTI); return addUnknown(I); } @@ -588,7 +594,7 @@ AliasSet &AliasSetTracker::mergeAllAliasSets() { return *AliasAnyAS; } -AliasSet &AliasSetTracker::addPointer(Value *P, uint64_t Size, +AliasSet &AliasSetTracker::addPointer(Value *P, LocationSize Size, const AAMDNodes &AAInfo, AliasSet::AccessLattice E) { AliasSet &AS = getAliasSetForPointer(P, Size, AAInfo); @@ -633,8 +639,12 @@ void AliasSet::print(raw_ostream &OS) const { OS << "\n " << UnknownInsts.size() << " Unknown instructions: "; for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) { if (i) OS << ", "; - if (auto *I = getUnknownInst(i)) - I->printAsOperand(OS); + if (auto *I = getUnknownInst(i)) { + if (I->hasName()) + I->printAsOperand(OS); + else + I->print(OS); + } } } OS << "\n"; diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp index 0e0b5c92a918..30576cf1ae10 100644 --- a/contrib/llvm/lib/Analysis/Analysis.cpp +++ b/contrib/llvm/lib/Analysis/Analysis.cpp @@ -65,8 +65,10 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeMemoryDependenceWrapperPassPass(Registry); initializeModuleDebugInfoPrinterPass(Registry); initializeModuleSummaryIndexWrapperPassPass(Registry); + initializeMustExecutePrinterPass(Registry); initializeObjCARCAAWrapperPassPass(Registry); initializeOptimizationRemarkEmitterWrapperPassPass(Registry); + initializePhiValuesWrapperPassPass(Registry); initializePostDominatorTreeWrapperPassPass(Registry); initializeRegionInfoPassPass(Registry); initializeRegionViewerPass(Registry); diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 537813b6b752..96326347b712 100644 --- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -85,15 +85,15 @@ const unsigned MaxNumPhiBBsValueReachabilityCheck = 20; // depth otherwise the algorithm in aliasGEP will assert. static const unsigned MaxLookupSearchDepth = 6; -bool BasicAAResult::invalidate(Function &F, const PreservedAnalyses &PA, +bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv) { // We don't care if this analysis itself is preserved, it has no state. But // we need to check that the analyses it depends on have been. Note that we // may be created without handles to some analyses and in that case don't // depend on them. - if (Inv.invalidate<AssumptionAnalysis>(F, PA) || - (DT && Inv.invalidate<DominatorTreeAnalysis>(F, PA)) || - (LI && Inv.invalidate<LoopAnalysis>(F, PA))) + if (Inv.invalidate<AssumptionAnalysis>(Fn, PA) || + (DT && Inv.invalidate<DominatorTreeAnalysis>(Fn, PA)) || + (LI && Inv.invalidate<LoopAnalysis>(Fn, PA))) return true; // Otherwise this analysis result remains valid. @@ -132,7 +132,10 @@ static bool isNonEscapingLocalObject(const Value *V) { /// Returns true if the pointer is one which would have been considered an /// escape by isNonEscapingLocalObject. static bool isEscapeSource(const Value *V) { - if (isa<CallInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V)) + if (ImmutableCallSite(V)) + return true; + + if (isa<Argument>(V)) return true; // The load case works because isNonEscapingLocalObject considers all @@ -147,10 +150,12 @@ static bool isEscapeSource(const Value *V) { /// Returns the size of the object specified by V or UnknownSize if unknown. static uint64_t getObjectSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, + bool NullIsValidLoc, bool RoundToAlign = false) { uint64_t Size; ObjectSizeOpts Opts; Opts.RoundToAlign = RoundToAlign; + Opts.NullIsUnknownSize = NullIsValidLoc; if (getObjectSize(V, Size, DL, &TLI, Opts)) return Size; return MemoryLocation::UnknownSize; @@ -160,7 +165,8 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &DL, /// Size. static bool isObjectSmallerThan(const Value *V, uint64_t Size, const DataLayout &DL, - const TargetLibraryInfo &TLI) { + const TargetLibraryInfo &TLI, + bool NullIsValidLoc) { // Note that the meanings of the "object" are slightly different in the // following contexts: // c1: llvm::getObjectSize() @@ -192,15 +198,16 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size, // This function needs to use the aligned object size because we allow // reads a bit past the end given sufficient alignment. - uint64_t ObjectSize = getObjectSize(V, DL, TLI, /*RoundToAlign*/ true); + uint64_t ObjectSize = getObjectSize(V, DL, TLI, NullIsValidLoc, + /*RoundToAlign*/ true); return ObjectSize != MemoryLocation::UnknownSize && ObjectSize < Size; } /// Returns true if we can prove that the object specified by V has size Size. static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL, - const TargetLibraryInfo &TLI) { - uint64_t ObjectSize = getObjectSize(V, DL, TLI); + const TargetLibraryInfo &TLI, bool NullIsValidLoc) { + uint64_t ObjectSize = getObjectSize(V, DL, TLI, NullIsValidLoc); return ObjectSize != MemoryLocation::UnknownSize && ObjectSize == Size; } @@ -285,6 +292,19 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL, case Instruction::Shl: V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits, SExtBits, DL, Depth + 1, AC, DT, NSW, NUW); + + // We're trying to linearize an expression of the kind: + // shl i8 -128, 36 + // where the shift count exceeds the bitwidth of the type. + // We can't decompose this further (the expression would return + // a poison value). + if (Offset.getBitWidth() < RHS.getLimitedValue() || + Scale.getBitWidth() < RHS.getLimitedValue()) { + Scale = 1; + Offset = 0; + return V; + } + Offset <<= RHS.getLimitedValue(); Scale <<= RHS.getLimitedValue(); // the semantics of nsw and nuw for left shifts don't match those of @@ -414,11 +434,21 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V, const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op); if (!GEPOp) { - if (auto CS = ImmutableCallSite(V)) - if (const Value *RV = CS.getReturnedArgOperand()) { - V = RV; + if (auto CS = ImmutableCallSite(V)) { + // CaptureTracking can know about special capturing properties of some + // intrinsics like launder.invariant.group, that can't be expressed with + // the attributes, but have properties like returning aliasing pointer. + // Because some analysis may assume that nocaptured pointer is not + // returned from some special intrinsic (because function would have to + // be marked with returns attribute), it is crucial to use this function + // because it should be in sync with CaptureTracking. Not using it may + // cause weird miscompilations where 2 aliasing pointers are assumed to + // noalias. + if (auto *RP = getArgumentAliasingToReturnedPointer(CS)) { + V = RP; continue; } + } // If it's not a GEP, hand it off to SimplifyInstruction to see if it // can come up with something. This matches what GetUnderlyingObject does. @@ -490,6 +520,13 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V, Index = GetLinearExpression(Index, IndexScale, IndexOffset, ZExtBits, SExtBits, DL, 0, AC, DT, NSW, NUW); + // All GEP math happens in the width of the pointer type, + // so we can truncate the value to 64-bits as we don't handle + // currently pointers larger than 64 bits and we would crash + // later. TODO: Make `Scale` an APInt to avoid this problem. + if (IndexScale.getBitWidth() > 64) + IndexScale = IndexScale.sextOrTrunc(64); + // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale. // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale. Decomposed.OtherOffset += IndexOffset.getSExtValue() * Scale; @@ -832,8 +869,11 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, IsMustAlias = false; // Early return if we improved mod ref information - if (!isModAndRefSet(Result)) + if (!isModAndRefSet(Result)) { + if (isNoModRef(Result)) + return ModRefInfo::NoModRef; return IsMustAlias ? setMust(Result) : clearMust(Result); + } } // If the CallSite is to malloc or calloc, we can assume that it doesn't @@ -854,7 +894,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, // operands, i.e., source and destination of any given memcpy must no-alias. // If Loc must-aliases either one of these two locations, then it necessarily // no-aliases the other. - if (auto *Inst = dyn_cast<MemCpyInst>(CS.getInstruction())) { + if (auto *Inst = dyn_cast<AnyMemCpyInst>(CS.getInstruction())) { AliasResult SrcAA, DestAA; if ((SrcAA = getBestAAResults().alias(MemoryLocation::getForSource(Inst), @@ -958,12 +998,12 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS1, /// Provide ad-hoc rules to disambiguate accesses through two GEP operators, /// both having the exact same pointer operand. static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1, - uint64_t V1Size, + LocationSize V1Size, const GEPOperator *GEP2, - uint64_t V2Size, + LocationSize V2Size, const DataLayout &DL) { - assert(GEP1->getPointerOperand()->stripPointerCastsAndBarriers() == - GEP2->getPointerOperand()->stripPointerCastsAndBarriers() && + assert(GEP1->getPointerOperand()->stripPointerCastsAndInvariantGroups() == + GEP2->getPointerOperand()->stripPointerCastsAndInvariantGroups() && GEP1->getPointerOperandType() == GEP2->getPointerOperandType() && "Expected GEPs with the same pointer operand"); @@ -1135,8 +1175,8 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1, // the highest %f1 can be is (%alloca + 3). This means %random can not be higher // than (%alloca - 1), and so is not inbounds, a contradiction. bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp, - const DecomposedGEP &DecompGEP, const DecomposedGEP &DecompObject, - uint64_t ObjectAccessSize) { + const DecomposedGEP &DecompGEP, const DecomposedGEP &DecompObject, + LocationSize ObjectAccessSize) { // If the object access size is unknown, or the GEP isn't inbounds, bail. if (ObjectAccessSize == MemoryLocation::UnknownSize || !GEPOp->isInBounds()) return false; @@ -1153,13 +1193,13 @@ bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp, DecompObject.OtherOffset; // If the GEP has no variable indices, we know the precise offset - // from the base, then use it. If the GEP has variable indices, we're in - // a bit more trouble: we can't count on the constant offsets that come - // from non-struct sources, since these can be "rewound" by a negative - // variable offset. So use only offsets that came from structs. + // from the base, then use it. If the GEP has variable indices, + // we can't get exact GEP offset to identify pointer alias. So return + // false in that case. + if (!DecompGEP.VarIndices.empty()) + return false; int64_t GEPBaseOffset = DecompGEP.StructOffset; - if (DecompGEP.VarIndices.empty()) - GEPBaseOffset += DecompGEP.OtherOffset; + GEPBaseOffset += DecompGEP.OtherOffset; return (GEPBaseOffset >= ObjectBaseOffset + (int64_t)ObjectAccessSize); } @@ -1170,11 +1210,11 @@ bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp, /// We know that V1 is a GEP, but we don't know anything about V2. /// UnderlyingV1 is GetUnderlyingObject(GEP1, DL), UnderlyingV2 is the same for /// V2. -AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, - const AAMDNodes &V1AAInfo, const Value *V2, - uint64_t V2Size, const AAMDNodes &V2AAInfo, - const Value *UnderlyingV1, - const Value *UnderlyingV2) { +AliasResult +BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size, + const AAMDNodes &V1AAInfo, const Value *V2, + LocationSize V2Size, const AAMDNodes &V2AAInfo, + const Value *UnderlyingV1, const Value *UnderlyingV2) { DecomposedGEP DecompGEP1, DecompGEP2; bool GEP1MaxLookupReached = DecomposeGEPExpression(GEP1, DecompGEP1, DL, &AC, DT); @@ -1241,8 +1281,8 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // If we know the two GEPs are based off of the exact same pointer (and not // just the same underlying object), see if that tells us anything about // the resulting pointers. - if (GEP1->getPointerOperand()->stripPointerCastsAndBarriers() == - GEP2->getPointerOperand()->stripPointerCastsAndBarriers() && + if (GEP1->getPointerOperand()->stripPointerCastsAndInvariantGroups() == + GEP2->getPointerOperand()->stripPointerCastsAndInvariantGroups() && GEP1->getPointerOperandType() == GEP2->getPointerOperandType()) { AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, DL); // If we couldn't find anything interesting, don't abandon just yet. @@ -1403,9 +1443,10 @@ static AliasResult MergeAliasResults(AliasResult A, AliasResult B) { /// Provides a bunch of ad-hoc rules to disambiguate a Select instruction /// against another. -AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, uint64_t SISize, +AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, + LocationSize SISize, const AAMDNodes &SIAAInfo, - const Value *V2, uint64_t V2Size, + const Value *V2, LocationSize V2Size, const AAMDNodes &V2AAInfo, const Value *UnderV2) { // If the values are Selects with the same condition, we can do a more precise @@ -1438,9 +1479,10 @@ AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, uint64_t SISize, /// Provide a bunch of ad-hoc rules to disambiguate a PHI instruction against /// another. -AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize, +AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize, const AAMDNodes &PNAAInfo, const Value *V2, - uint64_t V2Size, const AAMDNodes &V2AAInfo, + LocationSize V2Size, + const AAMDNodes &V2AAInfo, const Value *UnderV2) { // Track phi nodes we have visited. We use this information when we determine // value equivalence. @@ -1545,9 +1587,9 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize, /// Provides a bunch of ad-hoc rules to disambiguate in common cases, such as /// array references. -AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size, +AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, AAMDNodes V1AAInfo, const Value *V2, - uint64_t V2Size, AAMDNodes V2AAInfo, + LocationSize V2Size, AAMDNodes V2AAInfo, const Value *O1, const Value *O2) { // If either of the memory references is empty, it doesn't matter what the // pointer values are. @@ -1555,8 +1597,8 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size, return NoAlias; // Strip off any casts if they exist. - V1 = V1->stripPointerCastsAndBarriers(); - V2 = V2->stripPointerCastsAndBarriers(); + V1 = V1->stripPointerCastsAndInvariantGroups(); + V2 = V2->stripPointerCastsAndInvariantGroups(); // If V1 or V2 is undef, the result is NoAlias because we can always pick a // value for undef that aliases nothing in the program. @@ -1585,10 +1627,10 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size, // Null values in the default address space don't point to any object, so they // don't alias any other pointer. if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O1)) - if (CPN->getType()->getAddressSpace() == 0) + if (!NullPointerIsDefined(&F, CPN->getType()->getAddressSpace())) return NoAlias; if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O2)) - if (CPN->getType()->getAddressSpace() == 0) + if (!NullPointerIsDefined(&F, CPN->getType()->getAddressSpace())) return NoAlias; if (O1 != O2) { @@ -1624,10 +1666,11 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size, // If the size of one access is larger than the entire object on the other // side, then we know such behavior is undefined and can assume no alias. + bool NullIsValidLocation = NullPointerIsDefined(&F); if ((V1Size != MemoryLocation::UnknownSize && - isObjectSmallerThan(O2, V1Size, DL, TLI)) || + isObjectSmallerThan(O2, V1Size, DL, TLI, NullIsValidLocation)) || (V2Size != MemoryLocation::UnknownSize && - isObjectSmallerThan(O1, V2Size, DL, TLI))) + isObjectSmallerThan(O1, V2Size, DL, TLI, NullIsValidLocation))) return NoAlias; // Check the cache before climbing up use-def chains. This also terminates @@ -1687,8 +1730,8 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size, if (O1 == O2) if (V1Size != MemoryLocation::UnknownSize && V2Size != MemoryLocation::UnknownSize && - (isObjectSize(O1, V1Size, DL, TLI) || - isObjectSize(O2, V2Size, DL, TLI))) + (isObjectSize(O1, V1Size, DL, TLI, NullIsValidLocation) || + isObjectSize(O2, V2Size, DL, TLI, NullIsValidLocation))) return AliasCache[Locs] = PartialAlias; // Recurse back into the best AA results we have, potentially with refined @@ -1771,8 +1814,8 @@ void BasicAAResult::GetIndexDifference( } bool BasicAAResult::constantOffsetHeuristic( - const SmallVectorImpl<VariableGEPIndex> &VarIndices, uint64_t V1Size, - uint64_t V2Size, int64_t BaseOffset, AssumptionCache *AC, + const SmallVectorImpl<VariableGEPIndex> &VarIndices, LocationSize V1Size, + LocationSize V2Size, int64_t BaseOffset, AssumptionCache *AC, DominatorTree *DT) { if (VarIndices.size() != 2 || V1Size == MemoryLocation::UnknownSize || V2Size == MemoryLocation::UnknownSize) @@ -1832,6 +1875,7 @@ AnalysisKey BasicAA::Key; BasicAAResult BasicAA::run(Function &F, FunctionAnalysisManager &AM) { return BasicAAResult(F.getParent()->getDataLayout(), + F, AM.getResult<TargetLibraryAnalysis>(F), AM.getResult<AssumptionAnalysis>(F), &AM.getResult<DominatorTreeAnalysis>(F), @@ -1864,7 +1908,7 @@ bool BasicAAWrapperPass::runOnFunction(Function &F) { auto &DTWP = getAnalysis<DominatorTreeWrapperPass>(); auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); - Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), TLIWP.getTLI(), + Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), F, TLIWP.getTLI(), ACT.getAssumptionCache(F), &DTWP.getDomTree(), LIWP ? &LIWP->getLoopInfo() : nullptr)); @@ -1881,6 +1925,7 @@ void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { BasicAAResult llvm::createLegacyPMBasicAAResult(Pass &P, Function &F) { return BasicAAResult( F.getParent()->getDataLayout(), + F, P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), P.getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F)); } diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp index 7e323022d9ce..3d095068e7ff 100644 --- a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/None.h" #include "llvm/ADT/SCCIterator.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/Function.h" #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/BranchProbability.h" @@ -73,7 +74,7 @@ using LoopData = BlockFrequencyInfoImplBase::LoopData; using Weight = BlockFrequencyInfoImplBase::Weight; using FrequencyData = BlockFrequencyInfoImplBase::FrequencyData; -/// \brief Dithering mass distributer. +/// Dithering mass distributer. /// /// This class splits up a single mass into portions by weight, dithering to /// spread out error. No mass is lost. The dithering precision depends on the @@ -155,9 +156,9 @@ static void combineWeight(Weight &W, const Weight &OtherW) { static void combineWeightsBySorting(WeightList &Weights) { // Sort so edges to the same node are adjacent. - std::sort(Weights.begin(), Weights.end(), - [](const Weight &L, - const Weight &R) { return L.TargetNode < R.TargetNode; }); + llvm::sort(Weights.begin(), Weights.end(), + [](const Weight &L, + const Weight &R) { return L.TargetNode < R.TargetNode; }); // Combine adjacent edges. WeightList::iterator O = Weights.begin(); @@ -276,7 +277,7 @@ void BlockFrequencyInfoImplBase::clear() { Loops.clear(); } -/// \brief Clear all memory not needed downstream. +/// Clear all memory not needed downstream. /// /// Releases all memory not used downstream. In particular, saves Freqs. static void cleanup(BlockFrequencyInfoImplBase &BFI) { @@ -315,13 +316,13 @@ bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist, #endif if (isLoopHeader(Resolved)) { - DEBUG(debugSuccessor("backedge")); + LLVM_DEBUG(debugSuccessor("backedge")); Dist.addBackedge(Resolved, Weight); return true; } if (Working[Resolved.Index].getContainingLoop() != OuterLoop) { - DEBUG(debugSuccessor(" exit ")); + LLVM_DEBUG(debugSuccessor(" exit ")); Dist.addExit(Resolved, Weight); return true; } @@ -333,7 +334,7 @@ bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist, "unhandled irreducible control flow"); // Irreducible backedge. Abort. - DEBUG(debugSuccessor("abort!!!")); + LLVM_DEBUG(debugSuccessor("abort!!!")); return false; } @@ -344,7 +345,7 @@ bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist, "unhandled irreducible control flow"); } - DEBUG(debugSuccessor(" local ")); + LLVM_DEBUG(debugSuccessor(" local ")); Dist.addLocal(Resolved, Weight); return true; } @@ -361,10 +362,10 @@ bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist( return true; } -/// \brief Compute the loop scale for a loop. +/// Compute the loop scale for a loop. void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) { // Compute loop scale. - DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n"); + LLVM_DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n"); // Infinite loops need special handling. If we give the back edge an infinite // mass, they may saturate all the other scales in the function down to 1, @@ -390,20 +391,21 @@ void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) { Loop.Scale = ExitMass.isEmpty() ? InfiniteLoopScale : ExitMass.toScaled().inverse(); - DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull() - << " - " << TotalBackedgeMass << ")\n" - << " - scale = " << Loop.Scale << "\n"); + LLVM_DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" + << BlockMass::getFull() << " - " << TotalBackedgeMass + << ")\n" + << " - scale = " << Loop.Scale << "\n"); } -/// \brief Package up a loop. +/// Package up a loop. void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) { - DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n"); + LLVM_DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n"); // Clear the subloop exits to prevent quadratic memory usage. for (const BlockNode &M : Loop.Nodes) { if (auto *Loop = Working[M.Index].getPackagedLoop()) Loop->Exits.clear(); - DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n"); + LLVM_DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n"); } Loop.IsPackaged = true; } @@ -425,7 +427,7 @@ void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source, LoopData *OuterLoop, Distribution &Dist) { BlockMass Mass = Working[Source.Index].getMass(); - DEBUG(dbgs() << " => mass: " << Mass << "\n"); + LLVM_DEBUG(dbgs() << " => mass: " << Mass << "\n"); // Distribute mass to successors as laid out in Dist. DitheringDistributer D(Dist, Mass); @@ -435,7 +437,7 @@ void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source, BlockMass Taken = D.takeMass(W.Amount); if (W.Type == Weight::Local) { Working[W.TargetNode.Index].getMass() += Taken; - DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr)); + LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr)); continue; } @@ -445,14 +447,14 @@ void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source, // Check for a backedge. if (W.Type == Weight::Backedge) { OuterLoop->BackedgeMass[OuterLoop->getHeaderIndex(W.TargetNode)] += Taken; - DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back")); + LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back")); continue; } // This must be an exit. assert(W.Type == Weight::Exit); OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken)); - DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit")); + LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit")); } } @@ -480,28 +482,28 @@ static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI, } // Translate the floats to integers. - DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max - << ", factor = " << ScalingFactor << "\n"); + LLVM_DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max + << ", factor = " << ScalingFactor << "\n"); for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) { Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor; BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>()); - DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = " - << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled - << ", int = " << BFI.Freqs[Index].Integer << "\n"); + LLVM_DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = " + << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled + << ", int = " << BFI.Freqs[Index].Integer << "\n"); } } -/// \brief Unwrap a loop package. +/// Unwrap a loop package. /// /// Visits all the members of a loop, adjusting their BlockData according to /// the loop's pseudo-node. static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) { - DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop) - << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale - << "\n"); + LLVM_DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop) + << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale + << "\n"); Loop.Scale *= Loop.Mass.toScaled(); Loop.IsPackaged = false; - DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n"); + LLVM_DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n"); // Propagate the head scale through the loop. Since members are visited in // RPO, the head scale will be updated by the loop scale first, and then the @@ -511,8 +513,8 @@ static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) { Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale : BFI.Freqs[N.Index].Scaled; Scaled64 New = Loop.Scale * F; - DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New - << "\n"); + LLVM_DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " + << New << "\n"); F = New; } } @@ -544,7 +546,7 @@ void BlockFrequencyInfoImplBase::finalizeMetrics() { cleanup(*this); // Print out the final stats. - DEBUG(dump()); + LLVM_DEBUG(dump()); } BlockFrequency @@ -567,7 +569,7 @@ BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F, if (!EntryCount) return None; // Use 128 bit APInt to do the arithmetic to avoid overflow. - APInt BlockCount(128, EntryCount.getValue()); + APInt BlockCount(128, EntryCount.getCount()); APInt BlockFreq(128, Freq); APInt EntryFreq(128, getEntryFreq()); BlockCount *= BlockFreq; @@ -669,7 +671,7 @@ template <> struct GraphTraits<IrreducibleGraph> { } // end namespace llvm -/// \brief Find extra irreducible headers. +/// Find extra irreducible headers. /// /// Find entry blocks and other blocks with backedges, which exist when \c G /// contains irreducible sub-SCCs. @@ -694,7 +696,8 @@ static void findIrreducibleHeaders( // This is an entry block. I->second = true; Headers.push_back(Irr.Node); - DEBUG(dbgs() << " => entry = " << BFI.getBlockName(Irr.Node) << "\n"); + LLVM_DEBUG(dbgs() << " => entry = " << BFI.getBlockName(Irr.Node) + << "\n"); break; } } @@ -702,7 +705,7 @@ static void findIrreducibleHeaders( "Expected irreducible CFG; -loop-info is likely invalid"); if (Headers.size() == InSCC.size()) { // Every block is a header. - std::sort(Headers.begin(), Headers.end()); + llvm::sort(Headers.begin(), Headers.end()); return; } @@ -725,7 +728,8 @@ static void findIrreducibleHeaders( // Store the extra header. Headers.push_back(Irr.Node); - DEBUG(dbgs() << " => extra = " << BFI.getBlockName(Irr.Node) << "\n"); + LLVM_DEBUG(dbgs() << " => extra = " << BFI.getBlockName(Irr.Node) + << "\n"); break; } if (Headers.back() == Irr.Node) @@ -734,10 +738,10 @@ static void findIrreducibleHeaders( // This is not a header. Others.push_back(Irr.Node); - DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n"); + LLVM_DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n"); } - std::sort(Headers.begin(), Headers.end()); - std::sort(Others.begin(), Others.end()); + llvm::sort(Headers.begin(), Headers.end()); + llvm::sort(Others.begin(), Others.end()); } static void createIrreducibleLoop( @@ -745,7 +749,7 @@ static void createIrreducibleLoop( LoopData *OuterLoop, std::list<LoopData>::iterator Insert, const std::vector<const IrreducibleGraph::IrrNode *> &SCC) { // Translate the SCC into RPO. - DEBUG(dbgs() << " - found-scc\n"); + LLVM_DEBUG(dbgs() << " - found-scc\n"); LoopData::NodeList Headers; LoopData::NodeList Others; @@ -806,27 +810,28 @@ void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) { BlockMass LoopMass = BlockMass::getFull(); Distribution Dist; - DEBUG(dbgs() << "adjust-loop-header-mass:\n"); + LLVM_DEBUG(dbgs() << "adjust-loop-header-mass:\n"); for (uint32_t H = 0; H < Loop.NumHeaders; ++H) { auto &HeaderNode = Loop.Nodes[H]; auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)]; - DEBUG(dbgs() << " - Add back edge mass for node " - << getBlockName(HeaderNode) << ": " << BackedgeMass << "\n"); + LLVM_DEBUG(dbgs() << " - Add back edge mass for node " + << getBlockName(HeaderNode) << ": " << BackedgeMass + << "\n"); if (BackedgeMass.getMass() > 0) Dist.addLocal(HeaderNode, BackedgeMass.getMass()); else - DEBUG(dbgs() << " Nothing added. Back edge mass is zero\n"); + LLVM_DEBUG(dbgs() << " Nothing added. Back edge mass is zero\n"); } DitheringDistributer D(Dist, LoopMass); - DEBUG(dbgs() << " Distribute loop mass " << LoopMass - << " to headers using above weights\n"); + LLVM_DEBUG(dbgs() << " Distribute loop mass " << LoopMass + << " to headers using above weights\n"); for (const Weight &W : Dist.Weights) { BlockMass Taken = D.takeMass(W.Amount); assert(W.Type == Weight::Local && "all weights should be local"); Working[W.TargetNode.Index].getMass() = Taken; - DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr)); + LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr)); } } @@ -837,6 +842,6 @@ void BlockFrequencyInfoImplBase::distributeIrrLoopHeaderMass(Distribution &Dist) BlockMass Taken = D.takeMass(W.Amount); assert(W.Type == Weight::Local && "all weights should be local"); Working[W.TargetNode.Index].getMass() = Taken; - DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr)); + LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr)); } } diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp index 58ccad89d508..54a657073f0f 100644 --- a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" @@ -85,15 +86,17 @@ char BranchProbabilityInfoWrapperPass::ID = 0; // Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125 static const uint32_t LBH_TAKEN_WEIGHT = 124; static const uint32_t LBH_NONTAKEN_WEIGHT = 4; +// Unlikely edges within a loop are half as likely as other edges +static const uint32_t LBH_UNLIKELY_WEIGHT = 62; -/// \brief Unreachable-terminating branch taken probability. +/// Unreachable-terminating branch taken probability. /// /// This is the probability for a branch being taken to a block that terminates /// (eventually) in unreachable. These are predicted as unlikely as possible. /// All reachable probability will equally share the remaining part. static const BranchProbability UR_TAKEN_PROB = BranchProbability::getRaw(1); -/// \brief Weight for a branch taken going into a cold block. +/// Weight for a branch taken going into a cold block. /// /// This is the weight for a branch taken toward a block marked /// cold. A block is marked cold if it's postdominated by a @@ -101,7 +104,7 @@ static const BranchProbability UR_TAKEN_PROB = BranchProbability::getRaw(1); /// are those marked with attribute 'cold'. static const uint32_t CC_TAKEN_WEIGHT = 4; -/// \brief Weight for a branch not-taken into a cold block. +/// Weight for a branch not-taken into a cold block. /// /// This is the weight for a branch not taken toward a block marked /// cold. @@ -116,20 +119,20 @@ static const uint32_t ZH_NONTAKEN_WEIGHT = 12; static const uint32_t FPH_TAKEN_WEIGHT = 20; static const uint32_t FPH_NONTAKEN_WEIGHT = 12; -/// \brief Invoke-terminating normal branch taken weight +/// Invoke-terminating normal branch taken weight /// /// This is the weight for branching to the normal destination of an invoke /// instruction. We expect this to happen most of the time. Set the weight to an /// absurdly high value so that nested loops subsume it. static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1; -/// \brief Invoke-terminating normal branch not-taken weight. +/// Invoke-terminating normal branch not-taken weight. /// /// This is the weight for branching to the unwind destination of an invoke /// instruction. This is essentially never taken. static const uint32_t IH_NONTAKEN_WEIGHT = 1; -/// \brief Add \p BB to PostDominatedByUnreachable set if applicable. +/// Add \p BB to PostDominatedByUnreachable set if applicable. void BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) { const TerminatorInst *TI = BB->getTerminator(); @@ -160,7 +163,7 @@ BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) { PostDominatedByUnreachable.insert(BB); } -/// \brief Add \p BB to PostDominatedByColdCall set if applicable. +/// Add \p BB to PostDominatedByColdCall set if applicable. void BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) { assert(!PostDominatedByColdCall.count(BB)); @@ -194,18 +197,16 @@ BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) { } } -/// \brief Calculate edge weights for successors lead to unreachable. +/// Calculate edge weights for successors lead to unreachable. /// /// Predict that a successor which leads necessarily to an /// unreachable-terminated block as extremely unlikely. bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) { const TerminatorInst *TI = BB->getTerminator(); + (void) TI; assert(TI->getNumSuccessors() > 1 && "expected more than one successor!"); - - // Return false here so that edge weights for InvokeInst could be decided - // in calcInvokeHeuristics(). - if (isa<InvokeInst>(TI)) - return false; + assert(!isa<InvokeInst>(TI) && + "Invokes should have already been handled by calcInvokeHeuristics"); SmallVector<unsigned, 4> UnreachableEdges; SmallVector<unsigned, 4> ReachableEdges; @@ -338,7 +339,7 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { return true; } -/// \brief Calculate edge weights for edges leading to cold blocks. +/// Calculate edge weights for edges leading to cold blocks. /// /// A cold block is one post-dominated by a block with a call to a /// cold function. Those edges are unlikely to be taken, so we give @@ -348,12 +349,10 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { /// Return false, otherwise. bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) { const TerminatorInst *TI = BB->getTerminator(); + (void) TI; assert(TI->getNumSuccessors() > 1 && "expected more than one successor!"); - - // Return false here so that edge weights for InvokeInst could be decided - // in calcInvokeHeuristics(). - if (isa<InvokeInst>(TI)) - return false; + assert(!isa<InvokeInst>(TI) && + "Invokes should have already been handled by calcInvokeHeuristics"); // Determine which successors are post-dominated by a cold block. SmallVector<unsigned, 4> ColdEdges; @@ -390,7 +389,7 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) { return true; } -// Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion +// Calculate Edge Weights using "Pointer Heuristics". Predict a comparison // between two pointer or pointer and NULL will fail. bool BranchProbabilityInfo::calcPointerHeuristics(const BasicBlock *BB) { const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); @@ -457,6 +456,113 @@ static bool isSCCHeader(const BasicBlock *BB, int SccNum, return HeaderMapIt->second; } +// Compute the unlikely successors to the block BB in the loop L, specifically +// those that are unlikely because this is a loop, and add them to the +// UnlikelyBlocks set. +static void +computeUnlikelySuccessors(const BasicBlock *BB, Loop *L, + SmallPtrSetImpl<const BasicBlock*> &UnlikelyBlocks) { + // Sometimes in a loop we have a branch whose condition is made false by + // taking it. This is typically something like + // int n = 0; + // while (...) { + // if (++n >= MAX) { + // n = 0; + // } + // } + // In this sort of situation taking the branch means that at the very least it + // won't be taken again in the next iteration of the loop, so we should + // consider it less likely than a typical branch. + // + // We detect this by looking back through the graph of PHI nodes that sets the + // value that the condition depends on, and seeing if we can reach a successor + // block which can be determined to make the condition false. + // + // FIXME: We currently consider unlikely blocks to be half as likely as other + // blocks, but if we consider the example above the likelyhood is actually + // 1/MAX. We could therefore be more precise in how unlikely we consider + // blocks to be, but it would require more careful examination of the form + // of the comparison expression. + const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); + if (!BI || !BI->isConditional()) + return; + + // Check if the branch is based on an instruction compared with a constant + CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); + if (!CI || !isa<Instruction>(CI->getOperand(0)) || + !isa<Constant>(CI->getOperand(1))) + return; + + // Either the instruction must be a PHI, or a chain of operations involving + // constants that ends in a PHI which we can then collapse into a single value + // if the PHI value is known. + Instruction *CmpLHS = dyn_cast<Instruction>(CI->getOperand(0)); + PHINode *CmpPHI = dyn_cast<PHINode>(CmpLHS); + Constant *CmpConst = dyn_cast<Constant>(CI->getOperand(1)); + // Collect the instructions until we hit a PHI + SmallVector<BinaryOperator *, 1> InstChain; + while (!CmpPHI && CmpLHS && isa<BinaryOperator>(CmpLHS) && + isa<Constant>(CmpLHS->getOperand(1))) { + // Stop if the chain extends outside of the loop + if (!L->contains(CmpLHS)) + return; + InstChain.push_back(cast<BinaryOperator>(CmpLHS)); + CmpLHS = dyn_cast<Instruction>(CmpLHS->getOperand(0)); + if (CmpLHS) + CmpPHI = dyn_cast<PHINode>(CmpLHS); + } + if (!CmpPHI || !L->contains(CmpPHI)) + return; + + // Trace the phi node to find all values that come from successors of BB + SmallPtrSet<PHINode*, 8> VisitedInsts; + SmallVector<PHINode*, 8> WorkList; + WorkList.push_back(CmpPHI); + VisitedInsts.insert(CmpPHI); + while (!WorkList.empty()) { + PHINode *P = WorkList.back(); + WorkList.pop_back(); + for (BasicBlock *B : P->blocks()) { + // Skip blocks that aren't part of the loop + if (!L->contains(B)) + continue; + Value *V = P->getIncomingValueForBlock(B); + // If the source is a PHI add it to the work list if we haven't + // already visited it. + if (PHINode *PN = dyn_cast<PHINode>(V)) { + if (VisitedInsts.insert(PN).second) + WorkList.push_back(PN); + continue; + } + // If this incoming value is a constant and B is a successor of BB, then + // we can constant-evaluate the compare to see if it makes the branch be + // taken or not. + Constant *CmpLHSConst = dyn_cast<Constant>(V); + if (!CmpLHSConst || + std::find(succ_begin(BB), succ_end(BB), B) == succ_end(BB)) + continue; + // First collapse InstChain + for (Instruction *I : llvm::reverse(InstChain)) { + CmpLHSConst = ConstantExpr::get(I->getOpcode(), CmpLHSConst, + cast<Constant>(I->getOperand(1)), true); + if (!CmpLHSConst) + break; + } + if (!CmpLHSConst) + continue; + // Now constant-evaluate the compare + Constant *Result = ConstantExpr::getCompare(CI->getPredicate(), + CmpLHSConst, CmpConst, true); + // If the result means we don't branch to the block then that block is + // unlikely. + if (Result && + ((Result->isZeroValue() && B == BI->getSuccessor(0)) || + (Result->isOneValue() && B == BI->getSuccessor(1)))) + UnlikelyBlocks.insert(B); + } + } +} + // Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges // as taken, exiting edges as not-taken. bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB, @@ -470,15 +576,22 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB, return false; } + SmallPtrSet<const BasicBlock*, 8> UnlikelyBlocks; + if (L) + computeUnlikelySuccessors(BB, L, UnlikelyBlocks); + SmallVector<unsigned, 8> BackEdges; SmallVector<unsigned, 8> ExitingEdges; SmallVector<unsigned, 8> InEdges; // Edges from header to the loop. + SmallVector<unsigned, 8> UnlikelyEdges; for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { // Use LoopInfo if we have it, otherwise fall-back to SCC info to catch // irreducible loops. if (L) { - if (!L->contains(*I)) + if (UnlikelyBlocks.count(*I) != 0) + UnlikelyEdges.push_back(I.getSuccessorIndex()); + else if (!L->contains(*I)) ExitingEdges.push_back(I.getSuccessorIndex()); else if (L->getHeader() == *I) BackEdges.push_back(I.getSuccessorIndex()); @@ -494,42 +607,46 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB, } } - if (BackEdges.empty() && ExitingEdges.empty()) + if (BackEdges.empty() && ExitingEdges.empty() && UnlikelyEdges.empty()) return false; // Collect the sum of probabilities of back-edges/in-edges/exiting-edges, and // normalize them so that they sum up to one. - BranchProbability Probs[] = {BranchProbability::getZero(), - BranchProbability::getZero(), - BranchProbability::getZero()}; unsigned Denom = (BackEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) + (InEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) + + (UnlikelyEdges.empty() ? 0 : LBH_UNLIKELY_WEIGHT) + (ExitingEdges.empty() ? 0 : LBH_NONTAKEN_WEIGHT); - if (!BackEdges.empty()) - Probs[0] = BranchProbability(LBH_TAKEN_WEIGHT, Denom); - if (!InEdges.empty()) - Probs[1] = BranchProbability(LBH_TAKEN_WEIGHT, Denom); - if (!ExitingEdges.empty()) - Probs[2] = BranchProbability(LBH_NONTAKEN_WEIGHT, Denom); if (uint32_t numBackEdges = BackEdges.size()) { - auto Prob = Probs[0] / numBackEdges; + BranchProbability TakenProb = BranchProbability(LBH_TAKEN_WEIGHT, Denom); + auto Prob = TakenProb / numBackEdges; for (unsigned SuccIdx : BackEdges) setEdgeProbability(BB, SuccIdx, Prob); } if (uint32_t numInEdges = InEdges.size()) { - auto Prob = Probs[1] / numInEdges; + BranchProbability TakenProb = BranchProbability(LBH_TAKEN_WEIGHT, Denom); + auto Prob = TakenProb / numInEdges; for (unsigned SuccIdx : InEdges) setEdgeProbability(BB, SuccIdx, Prob); } if (uint32_t numExitingEdges = ExitingEdges.size()) { - auto Prob = Probs[2] / numExitingEdges; + BranchProbability NotTakenProb = BranchProbability(LBH_NONTAKEN_WEIGHT, + Denom); + auto Prob = NotTakenProb / numExitingEdges; for (unsigned SuccIdx : ExitingEdges) setEdgeProbability(BB, SuccIdx, Prob); } + if (uint32_t numUnlikelyEdges = UnlikelyEdges.size()) { + BranchProbability UnlikelyProb = BranchProbability(LBH_UNLIKELY_WEIGHT, + Denom); + auto Prob = UnlikelyProb / numUnlikelyEdges; + for (unsigned SuccIdx : UnlikelyEdges) + setEdgeProbability(BB, SuccIdx, Prob); + } + return true; } @@ -752,8 +869,7 @@ BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, if (I != Probs.end()) return I->second; - return {1, - static_cast<uint32_t>(std::distance(succ_begin(Src), succ_end(Src)))}; + return {1, static_cast<uint32_t>(succ_size(Src))}; } BranchProbability @@ -788,8 +904,9 @@ void BranchProbabilityInfo::setEdgeProbability(const BasicBlock *Src, BranchProbability Prob) { Probs[std::make_pair(Src, IndexInSuccessors)] = Prob; Handles.insert(BasicBlockCallbackVH(Src, this)); - DEBUG(dbgs() << "set edge " << Src->getName() << " -> " << IndexInSuccessors - << " successor probability to " << Prob << "\n"); + LLVM_DEBUG(dbgs() << "set edge " << Src->getName() << " -> " + << IndexInSuccessors << " successor probability to " << Prob + << "\n"); } raw_ostream & @@ -814,8 +931,8 @@ void BranchProbabilityInfo::eraseBlock(const BasicBlock *BB) { void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, const TargetLibraryInfo *TLI) { - DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() - << " ----\n\n"); + LLVM_DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() + << " ----\n\n"); LastF = &F; // Store the last function we ran on for printing. assert(PostDominatedByUnreachable.empty()); assert(PostDominatedByColdCall.empty()); @@ -833,18 +950,19 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, if (Scc.size() == 1) continue; - DEBUG(dbgs() << "BPI: SCC " << SccNum << ":"); + LLVM_DEBUG(dbgs() << "BPI: SCC " << SccNum << ":"); for (auto *BB : Scc) { - DEBUG(dbgs() << " " << BB->getName()); + LLVM_DEBUG(dbgs() << " " << BB->getName()); SccI.SccNums[BB] = SccNum; } - DEBUG(dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "\n"); } // Walk the basic blocks in post-order so that we can build up state about // the successors of a block iteratively. for (auto BB : post_order(&F.getEntryBlock())) { - DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n"); + LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName() + << "\n"); updatePostDominatedByUnreachable(BB); updatePostDominatedByColdCall(BB); // If there is no at least two successors, no sense to set probability. @@ -852,6 +970,8 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, continue; if (calcMetadataWeights(BB)) continue; + if (calcInvokeHeuristics(BB)) + continue; if (calcUnreachableHeuristics(BB)) continue; if (calcColdCallHeuristics(BB)) @@ -864,7 +984,6 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, continue; if (calcFloatingPointHeuristics(BB)) continue; - calcInvokeHeuristics(BB); } PostDominatedByUnreachable.clear(); @@ -879,6 +998,10 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, void BranchProbabilityInfoWrapperPass::getAnalysisUsage( AnalysisUsage &AU) const { + // We require DT so it's available when LI is available. The LI updating code + // asserts that DT is also present so if we don't make sure that we have DT + // here, that assert will trigger. + AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<LoopInfoWrapperPass>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.setPreservesAll(); diff --git a/contrib/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm/lib/Analysis/CFGPrinter.cpp index fb261755e5d1..fc25cef8ddca 100644 --- a/contrib/llvm/lib/Analysis/CFGPrinter.cpp +++ b/contrib/llvm/lib/Analysis/CFGPrinter.cpp @@ -22,6 +22,11 @@ #include "llvm/Support/FileSystem.h" using namespace llvm; +static cl::opt<std::string> CFGFuncName( + "cfg-func-name", cl::Hidden, + cl::desc("The name of a function (or its substring)" + " whose CFG is viewed/printed.")); + namespace { struct CFGViewerLegacyPass : public FunctionPass { static char ID; // Pass identifcation, replacement for typeid @@ -83,6 +88,8 @@ PreservedAnalyses CFGOnlyViewerPass::run(Function &F, } static void writeCFGToDotFile(Function &F, bool CFGOnly = false) { + if (!CFGFuncName.empty() && !F.getName().contains(CFGFuncName)) + return; std::string Filename = ("cfg." + F.getName() + ".dot").str(); errs() << "Writing '" << Filename << "'..."; @@ -162,6 +169,8 @@ PreservedAnalyses CFGOnlyPrinterPass::run(Function &F, /// being a 'dot' and 'gv' program in your path. /// void Function::viewCFG() const { + if (!CFGFuncName.empty() && !getName().contains(CFGFuncName)) + return; ViewGraph(this, "cfg" + getName()); } @@ -171,6 +180,8 @@ void Function::viewCFG() const { /// this can make the graph smaller. /// void Function::viewCFGOnly() const { + if (!CFGFuncName.empty() && !getName().contains(CFGFuncName)) + return; ViewGraph(this, "cfg" + getName(), true); } diff --git a/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp index 076a2b205d00..194983418b08 100644 --- a/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp @@ -18,7 +18,7 @@ // // The algorithm used here is based on recursive state machine matching scheme // proposed in "Demand-driven alias analysis for C" by Xin Zheng and Radu -// Rugina. The general idea is to extend the tranditional transitive closure +// Rugina. The general idea is to extend the traditional transitive closure // algorithm to perform CFL matching along the way: instead of recording // "whether X is reachable from Y", we keep track of "whether X is reachable // from Y at state Z", where the "state" field indicates where we are in the CFL @@ -337,7 +337,7 @@ public: FunctionInfo(const Function &, const SmallVectorImpl<Value *> &, const ReachabilitySet &, const AliasAttrMap &); - bool mayAlias(const Value *, uint64_t, const Value *, uint64_t) const; + bool mayAlias(const Value *, LocationSize, const Value *, LocationSize) const; const AliasSummary &getAliasSummary() const { return Summary; } }; @@ -395,7 +395,7 @@ populateAliasMap(DenseMap<const Value *, std::vector<OffsetValue>> &AliasMap, } // Sort AliasList for faster lookup - std::sort(AliasList.begin(), AliasList.end()); + llvm::sort(AliasList.begin(), AliasList.end()); } } @@ -479,7 +479,7 @@ static void populateExternalRelations( } // Remove duplicates in ExtRelations - std::sort(ExtRelations.begin(), ExtRelations.end()); + llvm::sort(ExtRelations.begin(), ExtRelations.end()); ExtRelations.erase(std::unique(ExtRelations.begin(), ExtRelations.end()), ExtRelations.end()); } @@ -516,9 +516,9 @@ CFLAndersAAResult::FunctionInfo::getAttrs(const Value *V) const { } bool CFLAndersAAResult::FunctionInfo::mayAlias(const Value *LHS, - uint64_t LHSSize, + LocationSize LHSSize, const Value *RHS, - uint64_t RHSSize) const { + LocationSize RHSSize) const { assert(LHS && RHS); // Check if we've seen LHS and RHS before. Sometimes LHS or RHS can be created @@ -645,7 +645,7 @@ static void processWorkListItem(const WorkListItem &Item, const CFLGraph &Graph, // relations that are symmetric, we could actually cut the storage by half by // sorting FromNode and ToNode before insertion happens. - // The newly added value alias pair may pontentially generate more memory + // The newly added value alias pair may potentially generate more memory // alias pairs. Check for them here. auto FromNodeBelow = getNodeBelow(Graph, FromNode); auto ToNodeBelow = getNodeBelow(Graph, ToNode); @@ -855,8 +855,9 @@ AliasResult CFLAndersAAResult::query(const MemoryLocation &LocA, if (!Fn) { // The only times this is known to happen are when globals + InlineAsm are // involved - DEBUG(dbgs() - << "CFLAndersAA: could not extract parent function information.\n"); + LLVM_DEBUG( + dbgs() + << "CFLAndersAA: could not extract parent function information.\n"); return MayAlias; } } else { diff --git a/contrib/llvm/lib/Analysis/CFLGraph.h b/contrib/llvm/lib/Analysis/CFLGraph.h index e4e92864061f..86812009da7c 100644 --- a/contrib/llvm/lib/Analysis/CFLGraph.h +++ b/contrib/llvm/lib/Analysis/CFLGraph.h @@ -46,7 +46,7 @@ namespace llvm { namespace cflaa { -/// \brief The Program Expression Graph (PEG) of CFL analysis +/// The Program Expression Graph (PEG) of CFL analysis /// CFLGraph is auxiliary data structure used by CFL-based alias analysis to /// describe flow-insensitive pointer-related behaviors. Given an LLVM function, /// the main purpose of this graph is to abstract away unrelated facts and @@ -154,7 +154,7 @@ public: } }; -///\brief A builder class used to create CFLGraph instance from a given function +///A builder class used to create CFLGraph instance from a given function /// The CFL-AA that uses this builder must provide its own type as a template /// argument. This is necessary for interprocedural processing: CFLGraphBuilder /// needs a way of obtaining the summary of other functions when callinsts are @@ -423,17 +423,15 @@ template <typename CFLAA> class CFLGraphBuilder { addNode(Inst); // Check if Inst is a call to a library function that - // allocates/deallocates - // on the heap. Those kinds of functions do not introduce any aliases. + // allocates/deallocates on the heap. Those kinds of functions do not + // introduce any aliases. // TODO: address other common library functions such as realloc(), - // strdup(), - // etc. + // strdup(), etc. if (isMallocOrCallocLikeFn(Inst, &TLI) || isFreeCall(Inst, &TLI)) return; // TODO: Add support for noalias args/all the other fun function - // attributes - // that we can tack on. + // attributes that we can tack on. SmallVector<Function *, 4> Targets; if (getPossibleTargets(CS, Targets)) if (tryInterproceduralAnalysis(CS, Targets)) @@ -515,14 +513,16 @@ template <typename CFLAA> class CFLGraphBuilder { visitGEP(*GEPOp); break; } + case Instruction::PtrToInt: { - auto *Ptr = CE->getOperand(0); - addNode(Ptr, getAttrEscaped()); + addNode(CE->getOperand(0), getAttrEscaped()); break; } - case Instruction::IntToPtr: + + case Instruction::IntToPtr: { addNode(CE, getAttrUnknown()); break; + } case Instruction::BitCast: case Instruction::AddrSpaceCast: @@ -535,48 +535,29 @@ template <typename CFLAA> class CFLGraphBuilder { case Instruction::SIToFP: case Instruction::FPToUI: case Instruction::FPToSI: { - auto *Src = CE->getOperand(0); - addAssignEdge(Src, CE); + addAssignEdge(CE->getOperand(0), CE); break; } + case Instruction::Select: { - auto *TrueVal = CE->getOperand(0); - auto *FalseVal = CE->getOperand(1); - addAssignEdge(TrueVal, CE); - addAssignEdge(FalseVal, CE); - break; - } - case Instruction::InsertElement: { - auto *Vec = CE->getOperand(0); - auto *Val = CE->getOperand(1); - addAssignEdge(Vec, CE); - addStoreEdge(Val, CE); - break; - } - case Instruction::ExtractElement: { - auto *Ptr = CE->getOperand(0); - addLoadEdge(Ptr, CE); + addAssignEdge(CE->getOperand(1), CE); + addAssignEdge(CE->getOperand(2), CE); break; } + + case Instruction::InsertElement: case Instruction::InsertValue: { - auto *Agg = CE->getOperand(0); - auto *Val = CE->getOperand(1); - addAssignEdge(Agg, CE); - addStoreEdge(Val, CE); + addAssignEdge(CE->getOperand(0), CE); + addStoreEdge(CE->getOperand(1), CE); break; } + + case Instruction::ExtractElement: case Instruction::ExtractValue: { - auto *Ptr = CE->getOperand(0); - addLoadEdge(Ptr, CE); - break; - } - case Instruction::ShuffleVector: { - auto *From1 = CE->getOperand(0); - auto *From2 = CE->getOperand(1); - addAssignEdge(From1, CE); - addAssignEdge(From2, CE); + addLoadEdge(CE->getOperand(0), CE); break; } + case Instruction::Add: case Instruction::Sub: case Instruction::FSub: @@ -596,9 +577,11 @@ template <typename CFLAA> class CFLGraphBuilder { case Instruction::AShr: case Instruction::ICmp: case Instruction::FCmp: + case Instruction::ShuffleVector: { addAssignEdge(CE->getOperand(0), CE); addAssignEdge(CE->getOperand(1), CE); break; + } default: llvm_unreachable("Unknown instruction type encountered!"); diff --git a/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp index eee6d26ba787..30ce13578e54 100644 --- a/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp @@ -276,8 +276,9 @@ AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA, if (!MaybeFnA && !MaybeFnB) { // The only times this is known to happen are when globals + InlineAsm are // involved - DEBUG(dbgs() - << "CFLSteensAA: could not extract parent function information.\n"); + LLVM_DEBUG( + dbgs() + << "CFLSteensAA: could not extract parent function information.\n"); return MayAlias; } diff --git a/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp b/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp index ceff94756fe3..b325afb8e7c5 100644 --- a/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp +++ b/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp @@ -32,7 +32,7 @@ using namespace llvm; -// Explicit template instantiations and specialization defininitions for core +// Explicit template instantiations and specialization definitions for core // template typedefs. namespace llvm { @@ -75,7 +75,7 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, // If the CGSCC pass wasn't able to provide a valid updated SCC, the // current SCC may simply need to be skipped if invalid. if (UR.InvalidatedSCCs.count(C)) { - DEBUG(dbgs() << "Skipping invalidated root or island SCC!\n"); + LLVM_DEBUG(dbgs() << "Skipping invalidated root or island SCC!\n"); break; } // Check that we didn't miss any update scenario. @@ -96,7 +96,7 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, // ...getContext().yield(); } - // Invaliadtion was handled after each pass in the above loop for the current + // Invalidation was handled after each pass in the above loop for the current // SCC. Therefore, the remaining analysis results in the AnalysisManager are // preserved. We mark this with a set so that we don't need to inspect each // one individually. @@ -353,7 +353,8 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G, // Add the current SCC to the worklist as its shape has changed. UR.CWorklist.insert(C); - DEBUG(dbgs() << "Enqueuing the existing SCC in the worklist:" << *C << "\n"); + LLVM_DEBUG(dbgs() << "Enqueuing the existing SCC in the worklist:" << *C + << "\n"); SCC *OldC = C; @@ -372,7 +373,7 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G, // We need to propagate an invalidation call to all but the newly current SCC // because the outer pass manager won't do that for us after splitting them. // FIXME: We should accept a PreservedAnalysis from the CG updater so that if - // there are preserved ananalyses we can avoid invalidating them here for + // there are preserved analysis we can avoid invalidating them here for // split-off SCCs. // We know however that this will preserve any FAM proxy so go ahead and mark // that. @@ -389,7 +390,7 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G, assert(C != &NewC && "No need to re-visit the current SCC!"); assert(OldC != &NewC && "Already handled the original SCC!"); UR.CWorklist.insert(&NewC); - DEBUG(dbgs() << "Enqueuing a newly formed SCC:" << NewC << "\n"); + LLVM_DEBUG(dbgs() << "Enqueuing a newly formed SCC:" << NewC << "\n"); // Ensure new SCCs' function analyses are updated. if (NeedFAMProxy) @@ -514,8 +515,8 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( return false; RC->removeOutgoingEdge(N, *TargetN); - DEBUG(dbgs() << "Deleting outgoing edge from '" << N - << "' to '" << TargetN << "'\n"); + LLVM_DEBUG(dbgs() << "Deleting outgoing edge from '" + << N << "' to '" << TargetN << "'\n"); return true; }), DeadTargets.end()); @@ -546,8 +547,8 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( assert(NewRC != RC && "Should not encounter the current RefSCC further " "in the postorder list of new RefSCCs."); UR.RCWorklist.insert(NewRC); - DEBUG(dbgs() << "Enqueuing a new RefSCC in the update worklist: " - << *NewRC << "\n"); + LLVM_DEBUG(dbgs() << "Enqueuing a new RefSCC in the update worklist: " + << *NewRC << "\n"); } } @@ -564,8 +565,8 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( assert(RC->isAncestorOf(TargetRC) && "Cannot potentially form RefSCC cycles here!"); RC->switchOutgoingEdgeToRef(N, *RefTarget); - DEBUG(dbgs() << "Switch outgoing call edge to a ref edge from '" << N - << "' to '" << *RefTarget << "'\n"); + LLVM_DEBUG(dbgs() << "Switch outgoing call edge to a ref edge from '" << N + << "' to '" << *RefTarget << "'\n"); continue; } @@ -593,12 +594,12 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( assert(RC->isAncestorOf(TargetRC) && "Cannot potentially form RefSCC cycles here!"); RC->switchOutgoingEdgeToCall(N, *CallTarget); - DEBUG(dbgs() << "Switch outgoing ref edge to a call edge from '" << N - << "' to '" << *CallTarget << "'\n"); + LLVM_DEBUG(dbgs() << "Switch outgoing ref edge to a call edge from '" << N + << "' to '" << *CallTarget << "'\n"); continue; } - DEBUG(dbgs() << "Switch an internal ref edge to a call edge from '" << N - << "' to '" << *CallTarget << "'\n"); + LLVM_DEBUG(dbgs() << "Switch an internal ref edge to a call edge from '" + << N << "' to '" << *CallTarget << "'\n"); // Otherwise we are switching an internal ref edge to a call edge. This // may merge away some SCCs, and we add those to the UpdateResult. We also @@ -635,7 +636,7 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( // If one of the invalidated SCCs had a cached proxy to a function // analysis manager, we need to create a proxy in the new current SCC as - // the invaliadted SCCs had their functions moved. + // the invalidated SCCs had their functions moved. if (HasFunctionAnalysisProxy) AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, G); @@ -661,14 +662,14 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( // post-order sequence, and may end up observing more precise context to // optimize the current SCC. UR.CWorklist.insert(C); - DEBUG(dbgs() << "Enqueuing the existing SCC in the worklist: " << *C - << "\n"); + LLVM_DEBUG(dbgs() << "Enqueuing the existing SCC in the worklist: " << *C + << "\n"); // Enqueue in reverse order as we pop off the back of the worklist. for (SCC &MovedC : llvm::reverse(make_range(RC->begin() + InitialSCCIndex, RC->begin() + NewSCCIndex))) { UR.CWorklist.insert(&MovedC); - DEBUG(dbgs() << "Enqueuing a newly earlier in post-order SCC: " - << MovedC << "\n"); + LLVM_DEBUG(dbgs() << "Enqueuing a newly earlier in post-order SCC: " + << MovedC << "\n"); } } } diff --git a/contrib/llvm/lib/Analysis/CallGraph.cpp b/contrib/llvm/lib/Analysis/CallGraph.cpp index ac3ea2b73fed..7d5d2d2e4496 100644 --- a/contrib/llvm/lib/Analysis/CallGraph.cpp +++ b/contrib/llvm/lib/Analysis/CallGraph.cpp @@ -10,6 +10,7 @@ #include "llvm/Analysis/CallGraph.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Module.h" #include "llvm/IR/Function.h" @@ -96,8 +97,8 @@ void CallGraph::print(raw_ostream &OS) const { for (const auto &I : *this) Nodes.push_back(I.second.get()); - std::sort(Nodes.begin(), Nodes.end(), - [](CallGraphNode *LHS, CallGraphNode *RHS) { + llvm::sort(Nodes.begin(), Nodes.end(), + [](CallGraphNode *LHS, CallGraphNode *RHS) { if (Function *LF = LHS->getFunction()) if (Function *RF = RHS->getFunction()) return LF->getName() < RF->getName(); diff --git a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp index a2dda58a6a2f..f2211edba216 100644 --- a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp +++ b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp @@ -120,6 +120,7 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, bool &DevirtualizedCall) { bool Changed = false; PMDataManager *PM = P->getAsPMDataManager(); + Module &M = CG.getModule(); if (!PM) { CallGraphSCCPass *CGSP = (CallGraphSCCPass*)P; @@ -129,8 +130,17 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, } { + unsigned InstrCount = 0; + bool EmitICRemark = M.shouldEmitInstrCountChangedRemark(); TimeRegion PassTimer(getPassTimer(CGSP)); + if (EmitICRemark) + InstrCount = initSizeRemarkInfo(M); Changed = CGSP->runOnSCC(CurSCC); + + // If the pass modified the module, it may have modified the instruction + // count of the module. Try emitting a remark. + if (EmitICRemark) + emitInstrCountChangedRemark(P, M, InstrCount); } // After the CGSCCPass is done, when assertions are enabled, use @@ -162,8 +172,8 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, // The function pass(es) modified the IR, they may have clobbered the // callgraph. if (Changed && CallGraphUpToDate) { - DEBUG(dbgs() << "CGSCCPASSMGR: Pass Dirtied SCC: " - << P->getPassName() << '\n'); + LLVM_DEBUG(dbgs() << "CGSCCPASSMGR: Pass Dirtied SCC: " << P->getPassName() + << '\n'); CallGraphUpToDate = false; } return Changed; @@ -181,12 +191,11 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, bool CheckingMode) { DenseMap<Value*, CallGraphNode*> CallSites; - - DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size() - << " nodes:\n"; - for (CallGraphNode *CGN : CurSCC) - CGN->dump(); - ); + + LLVM_DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size() + << " nodes:\n"; + for (CallGraphNode *CGN + : CurSCC) CGN->dump();); bool MadeChange = false; bool DevirtualizedCall = false; @@ -307,8 +316,8 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, // one. if (!ExistingNode->getFunction()) { DevirtualizedCall = true; - DEBUG(dbgs() << " CGSCCPASSMGR: Devirtualized call to '" - << Callee->getName() << "'\n"); + LLVM_DEBUG(dbgs() << " CGSCCPASSMGR: Devirtualized call to '" + << Callee->getName() << "'\n"); } } else { CalleeNode = CG.getCallsExternalNode(); @@ -363,17 +372,15 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, CallSites.clear(); } - DEBUG(if (MadeChange) { - dbgs() << "CGSCCPASSMGR: Refreshed SCC is now:\n"; - for (CallGraphNode *CGN : CurSCC) - CGN->dump(); - if (DevirtualizedCall) - dbgs() << "CGSCCPASSMGR: Refresh devirtualized a call!\n"; - - } else { - dbgs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n"; - } - ); + LLVM_DEBUG(if (MadeChange) { + dbgs() << "CGSCCPASSMGR: Refreshed SCC is now:\n"; + for (CallGraphNode *CGN : CurSCC) + CGN->dump(); + if (DevirtualizedCall) + dbgs() << "CGSCCPASSMGR: Refresh devirtualized a call!\n"; + } else { + dbgs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n"; + }); (void)MadeChange; return DevirtualizedCall; @@ -472,16 +479,17 @@ bool CGPassManager::runOnModule(Module &M) { unsigned Iteration = 0; bool DevirtualizedCall = false; do { - DEBUG(if (Iteration) - dbgs() << " SCCPASSMGR: Re-visiting SCC, iteration #" - << Iteration << '\n'); + LLVM_DEBUG(if (Iteration) dbgs() + << " SCCPASSMGR: Re-visiting SCC, iteration #" << Iteration + << '\n'); DevirtualizedCall = false; Changed |= RunAllPassesOnSCC(CurSCC, CG, DevirtualizedCall); } while (Iteration++ < MaxIterations && DevirtualizedCall); if (DevirtualizedCall) - DEBUG(dbgs() << " CGSCCPASSMGR: Stopped iteration after " << Iteration - << " times, due to -max-cg-scc-iterations\n"); + LLVM_DEBUG(dbgs() << " CGSCCPASSMGR: Stopped iteration after " + << Iteration + << " times, due to -max-cg-scc-iterations\n"); MaxSCCIterations.updateMax(Iteration); } @@ -648,7 +656,7 @@ Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &OS, bool CallGraphSCCPass::skipSCC(CallGraphSCC &SCC) const { return !SCC.getCallGraph().getModule() .getContext() - .getOptBisect() + .getOptPassGate() .shouldRunPass(this, SCC); } diff --git a/contrib/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm/lib/Analysis/CaptureTracking.cpp index 3b0026ba10e9..d4f73bdb4361 100644 --- a/contrib/llvm/lib/Analysis/CaptureTracking.cpp +++ b/contrib/llvm/lib/Analysis/CaptureTracking.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/OrderedBasicBlock.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" @@ -60,7 +61,7 @@ namespace { /// as the given instruction and the use. struct CapturesBefore : public CaptureTracker { - CapturesBefore(bool ReturnCaptures, const Instruction *I, DominatorTree *DT, + CapturesBefore(bool ReturnCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI, OrderedBasicBlock *IC) : OrderedBB(IC), BeforeHere(I), DT(DT), ReturnCaptures(ReturnCaptures), IncludeI(IncludeI), Captured(false) {} @@ -140,7 +141,7 @@ namespace { OrderedBasicBlock *OrderedBB; const Instruction *BeforeHere; - DominatorTree *DT; + const DominatorTree *DT; bool ReturnCaptures; bool IncludeI; @@ -184,7 +185,7 @@ bool llvm::PointerMayBeCaptured(const Value *V, /// queries about relative order among instructions in the same basic block. bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, - DominatorTree *DT, bool IncludeI, + const DominatorTree *DT, bool IncludeI, OrderedBasicBlock *OBB) { assert(!isa<GlobalValue>(V) && "It doesn't make sense to ask whether a global is captured."); @@ -215,18 +216,22 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) { assert(V->getType()->isPointerTy() && "Capture is for pointers only!"); SmallVector<const Use *, Threshold> Worklist; SmallSet<const Use *, Threshold> Visited; - int Count = 0; - for (const Use &U : V->uses()) { - // If there are lots of uses, conservatively say that the value - // is captured to avoid taking too much compile time. - if (Count++ >= Threshold) - return Tracker->tooManyUses(); - - if (!Tracker->shouldExplore(&U)) continue; - Visited.insert(&U); - Worklist.push_back(&U); - } + auto AddUses = [&](const Value *V) { + int Count = 0; + for (const Use &U : V->uses()) { + // If there are lots of uses, conservatively say that the value + // is captured to avoid taking too much compile time. + if (Count++ >= Threshold) + return Tracker->tooManyUses(); + if (!Visited.insert(&U).second) + continue; + if (!Tracker->shouldExplore(&U)) + continue; + Worklist.push_back(&U); + } + }; + AddUses(V); while (!Worklist.empty()) { const Use *U = Worklist.pop_back_val(); @@ -243,6 +248,16 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) { if (CS.onlyReadsMemory() && CS.doesNotThrow() && I->getType()->isVoidTy()) break; + // The pointer is not captured if returned pointer is not captured. + // NOTE: CaptureTracking users should not assume that only functions + // marked with nocapture do not capture. This means that places like + // GetUnderlyingObject in ValueTracking or DecomposeGEPExpression + // in BasicAA also need to know about this property. + if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(CS)) { + AddUses(I); + break; + } + // Volatile operations effectively capture the memory location that they // load and store to. if (auto *MI = dyn_cast<MemIntrinsic>(I)) @@ -313,17 +328,7 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) { case Instruction::Select: case Instruction::AddrSpaceCast: // The original value is not captured via this if the new value isn't. - Count = 0; - for (Use &UU : I->uses()) { - // If there are lots of uses, conservatively say that the value - // is captured to avoid taking too much compile time. - if (Count++ >= Threshold) - return Tracker->tooManyUses(); - - if (Visited.insert(&UU).second) - if (Tracker->shouldExplore(&UU)) - Worklist.push_back(&UU); - } + AddUses(I); break; case Instruction::ICmp: { // Don't count comparisons of a no-alias return value against null as diff --git a/contrib/llvm/lib/Analysis/CodeMetrics.cpp b/contrib/llvm/lib/Analysis/CodeMetrics.cpp index ac7d14ebdaea..46cc87d2b178 100644 --- a/contrib/llvm/lib/Analysis/CodeMetrics.cpp +++ b/contrib/llvm/lib/Analysis/CodeMetrics.cpp @@ -61,7 +61,7 @@ static void completeEphemeralValues(SmallPtrSetImpl<const Value *> &Visited, continue; EphValues.insert(V); - DEBUG(dbgs() << "Ephemeral Value: " << *V << "\n"); + LLVM_DEBUG(dbgs() << "Ephemeral Value: " << *V << "\n"); // Append any more operands to consider. appendSpeculatableOperands(V, Visited, Worklist); diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp index e88b8f14d54e..c5281c57bc19 100644 --- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp +++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp @@ -286,7 +286,7 @@ bool llvm::IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, APInt &Offset, const DataLayout &DL) { // Trivial case, constant is the global. if ((GV = dyn_cast<GlobalValue>(C))) { - unsigned BitWidth = DL.getPointerTypeSizeInBits(GV->getType()); + unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType()); Offset = APInt(BitWidth, 0); return true; } @@ -305,7 +305,7 @@ bool llvm::IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, if (!GEP) return false; - unsigned BitWidth = DL.getPointerTypeSizeInBits(GEP->getType()); + unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType()); APInt TmpOffset(BitWidth, 0); // If the base isn't a global+constant, we aren't either. @@ -320,6 +320,41 @@ bool llvm::IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, return true; } +Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy, + const DataLayout &DL) { + do { + Type *SrcTy = C->getType(); + + // If the type sizes are the same and a cast is legal, just directly + // cast the constant. + if (DL.getTypeSizeInBits(DestTy) == DL.getTypeSizeInBits(SrcTy)) { + Instruction::CastOps Cast = Instruction::BitCast; + // If we are going from a pointer to int or vice versa, we spell the cast + // differently. + if (SrcTy->isIntegerTy() && DestTy->isPointerTy()) + Cast = Instruction::IntToPtr; + else if (SrcTy->isPointerTy() && DestTy->isIntegerTy()) + Cast = Instruction::PtrToInt; + + if (CastInst::castIsValid(Cast, C, DestTy)) + return ConstantExpr::getCast(Cast, C, DestTy); + } + + // If this isn't an aggregate type, there is nothing we can do to drill down + // and find a bitcastable constant. + if (!SrcTy->isAggregateType()) + return nullptr; + + // We're simulating a load through a pointer that was bitcast to point to + // a different type, so we can try to walk down through the initial + // elements of an aggregate to see if some part of th e aggregate is + // castable to implement the "load" semantic model. + C = C->getAggregateElement(0u); + } while (C); + + return nullptr; +} + namespace { /// Recursive helper to read bits out of global. C is the constant being copied @@ -537,8 +572,8 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy, return ConstantInt::get(IntType->getContext(), ResultVal); } -Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE, Type *DestTy, - const DataLayout &DL) { +Constant *ConstantFoldLoadThroughBitcastExpr(ConstantExpr *CE, Type *DestTy, + const DataLayout &DL) { auto *SrcPtr = CE->getOperand(0); auto *SrcPtrTy = dyn_cast<PointerType>(SrcPtr->getType()); if (!SrcPtrTy) @@ -549,37 +584,7 @@ Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE, Type *DestTy, if (!C) return nullptr; - do { - Type *SrcTy = C->getType(); - - // If the type sizes are the same and a cast is legal, just directly - // cast the constant. - if (DL.getTypeSizeInBits(DestTy) == DL.getTypeSizeInBits(SrcTy)) { - Instruction::CastOps Cast = Instruction::BitCast; - // If we are going from a pointer to int or vice versa, we spell the cast - // differently. - if (SrcTy->isIntegerTy() && DestTy->isPointerTy()) - Cast = Instruction::IntToPtr; - else if (SrcTy->isPointerTy() && DestTy->isIntegerTy()) - Cast = Instruction::PtrToInt; - - if (CastInst::castIsValid(Cast, C, DestTy)) - return ConstantExpr::getCast(Cast, C, DestTy); - } - - // If this isn't an aggregate type, there is nothing we can do to drill down - // and find a bitcastable constant. - if (!SrcTy->isAggregateType()) - return nullptr; - - // We're simulating a load through a pointer that was bitcast to point to - // a different type, so we can try to walk down through the initial - // elements of an aggregate to see if some part of th e aggregate is - // castable to implement the "load" semantic model. - C = C->getAggregateElement(0u); - } while (C); - - return nullptr; + return llvm::ConstantFoldLoadThroughBitcast(C, DestTy, DL); } } // end anonymous namespace @@ -611,7 +616,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, } if (CE->getOpcode() == Instruction::BitCast) - if (Constant *LoadedC = ConstantFoldLoadThroughBitcast(CE, Ty, DL)) + if (Constant *LoadedC = ConstantFoldLoadThroughBitcastExpr(CE, Ty, DL)) return LoadedC; // Instead of loading constant c string, use corresponding integer value @@ -808,26 +813,26 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, // If this is a constant expr gep that is effectively computing an // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12' for (unsigned i = 1, e = Ops.size(); i != e; ++i) - if (!isa<ConstantInt>(Ops[i])) { - - // If this is "gep i8* Ptr, (sub 0, V)", fold this as: - // "inttoptr (sub (ptrtoint Ptr), V)" - if (Ops.size() == 2 && ResElemTy->isIntegerTy(8)) { - auto *CE = dyn_cast<ConstantExpr>(Ops[1]); - assert((!CE || CE->getType() == IntPtrTy) && - "CastGEPIndices didn't canonicalize index types!"); - if (CE && CE->getOpcode() == Instruction::Sub && - CE->getOperand(0)->isNullValue()) { - Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType()); - Res = ConstantExpr::getSub(Res, CE->getOperand(1)); - Res = ConstantExpr::getIntToPtr(Res, ResTy); - if (auto *FoldedRes = ConstantFoldConstant(Res, DL, TLI)) - Res = FoldedRes; - return Res; + if (!isa<ConstantInt>(Ops[i])) { + + // If this is "gep i8* Ptr, (sub 0, V)", fold this as: + // "inttoptr (sub (ptrtoint Ptr), V)" + if (Ops.size() == 2 && ResElemTy->isIntegerTy(8)) { + auto *CE = dyn_cast<ConstantExpr>(Ops[1]); + assert((!CE || CE->getType() == IntPtrTy) && + "CastGEPIndices didn't canonicalize index types!"); + if (CE && CE->getOpcode() == Instruction::Sub && + CE->getOperand(0)->isNullValue()) { + Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType()); + Res = ConstantExpr::getSub(Res, CE->getOperand(1)); + Res = ConstantExpr::getIntToPtr(Res, ResTy); + if (auto *FoldedRes = ConstantFoldConstant(Res, DL, TLI)) + Res = FoldedRes; + return Res; + } } + return nullptr; } - return nullptr; - } unsigned BitWidth = DL.getTypeSizeInBits(IntPtrTy); APInt Offset = @@ -1387,6 +1392,8 @@ bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) { case Intrinsic::fma: case Intrinsic::fmuladd: case Intrinsic::copysign: + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: case Intrinsic::round: case Intrinsic::masked_load: case Intrinsic::sadd_with_overflow: @@ -1582,16 +1589,37 @@ double getValueAsDouble(ConstantFP *Op) { Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, ArrayRef<Constant *> Operands, - const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI, + ImmutableCallSite CS) { if (Operands.size() == 1) { if (isa<UndefValue>(Operands[0])) { // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN if (IntrinsicID == Intrinsic::cos) return Constant::getNullValue(Ty); if (IntrinsicID == Intrinsic::bswap || - IntrinsicID == Intrinsic::bitreverse) + IntrinsicID == Intrinsic::bitreverse || + IntrinsicID == Intrinsic::launder_invariant_group || + IntrinsicID == Intrinsic::strip_invariant_group) return Operands[0]; } + + if (isa<ConstantPointerNull>(Operands[0])) { + // launder(null) == null == strip(null) iff in addrspace 0 + if (IntrinsicID == Intrinsic::launder_invariant_group || + IntrinsicID == Intrinsic::strip_invariant_group) { + // If instruction is not yet put in a basic block (e.g. when cloning + // a function during inlining), CS caller may not be available. + // So check CS's BB first before querying CS.getCaller. + const Function *Caller = CS.getParent() ? CS.getCaller() : nullptr; + if (Caller && + !NullPointerIsDefined( + Caller, Operands[0]->getType()->getPointerAddressSpace())) { + return Operands[0]; + } + return nullptr; + } + } + if (auto *Op = dyn_cast<ConstantFP>(Operands[0])) { if (IntrinsicID == Intrinsic::convert_to_fp16) { APFloat Val(Op->getValueAPF()); @@ -1988,7 +2016,8 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID, VectorType *VTy, ArrayRef<Constant *> Operands, const DataLayout &DL, - const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI, + ImmutableCallSite CS) { SmallVector<Constant *, 4> Result(VTy->getNumElements()); SmallVector<Constant *, 4> Lane(Operands.size()); Type *Ty = VTy->getElementType(); @@ -2051,7 +2080,7 @@ Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID, } // Use the regular scalar folding to simplify this column. - Constant *Folded = ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI); + Constant *Folded = ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI, CS); if (!Folded) return nullptr; Result[I] = Folded; @@ -2076,9 +2105,9 @@ llvm::ConstantFoldCall(ImmutableCallSite CS, Function *F, if (auto *VTy = dyn_cast<VectorType>(Ty)) return ConstantFoldVectorCall(Name, F->getIntrinsicID(), VTy, Operands, - F->getParent()->getDataLayout(), TLI); + F->getParent()->getDataLayout(), TLI, CS); - return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI); + return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI, CS); } bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) { diff --git a/contrib/llvm/lib/Analysis/Delinearization.cpp b/contrib/llvm/lib/Analysis/Delinearization.cpp index dd5af9d43ef8..4cafb7da16d3 100644 --- a/contrib/llvm/lib/Analysis/Delinearization.cpp +++ b/contrib/llvm/lib/Analysis/Delinearization.cpp @@ -69,16 +69,6 @@ bool Delinearization::runOnFunction(Function &F) { return false; } -static Value *getPointerOperand(Instruction &Inst) { - if (LoadInst *Load = dyn_cast<LoadInst>(&Inst)) - return Load->getPointerOperand(); - else if (StoreInst *Store = dyn_cast<StoreInst>(&Inst)) - return Store->getPointerOperand(); - else if (GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(&Inst)) - return Gep->getPointerOperand(); - return nullptr; -} - void Delinearization::print(raw_ostream &O, const Module *) const { O << "Delinearization on function " << F->getName() << ":\n"; for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { @@ -93,7 +83,7 @@ void Delinearization::print(raw_ostream &O, const Module *) const { // Delinearize the memory access as analyzed in all the surrounding loops. // Do not analyze memory accesses outside loops. for (Loop *L = LI->getLoopFor(BB); L != nullptr; L = L->getParentLoop()) { - const SCEV *AccessFn = SE->getSCEVAtScope(getPointerOperand(*Inst), L); + const SCEV *AccessFn = SE->getSCEVAtScope(getPointerOperand(Inst), L); const SCEVUnknown *BasePointer = dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFn)); diff --git a/contrib/llvm/lib/Analysis/DemandedBits.cpp b/contrib/llvm/lib/Analysis/DemandedBits.cpp index de7d21f9f133..58c5bccff65d 100644 --- a/contrib/llvm/lib/Analysis/DemandedBits.cpp +++ b/contrib/llvm/lib/Analysis/DemandedBits.cpp @@ -283,7 +283,7 @@ void DemandedBits::performAnalysis() { if (!isAlwaysLive(&I)) continue; - DEBUG(dbgs() << "DemandedBits: Root: " << I << "\n"); + LLVM_DEBUG(dbgs() << "DemandedBits: Root: " << I << "\n"); // For integer-valued instructions, set up an initial empty set of alive // bits and add the instruction to the work list. For other instructions // add their operands to the work list (for integer values operands, mark @@ -313,13 +313,13 @@ void DemandedBits::performAnalysis() { while (!Worklist.empty()) { Instruction *UserI = Worklist.pop_back_val(); - DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI); + LLVM_DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI); APInt AOut; if (UserI->getType()->isIntegerTy()) { AOut = AliveBits[UserI]; - DEBUG(dbgs() << " Alive Out: " << AOut); + LLVM_DEBUG(dbgs() << " Alive Out: " << AOut); } - DEBUG(dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "\n"); if (!UserI->getType()->isIntegerTy()) Visited.insert(UserI); diff --git a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp index 34eccc07f265..79c2728d5620 100644 --- a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -24,8 +24,7 @@ // Both of these are conservative weaknesses; // that is, not a source of correctness problems. // -// The implementation depends on the GEP instruction to differentiate -// subscripts. Since Clang linearizes some array subscripts, the dependence +// Since Clang linearizes some array subscripts, the dependence // analysis is using SCEV->delinearize to recover the representation of multiple // subscripts, and thus avoid the more expensive and less precise MIV tests. The // delinearization is controlled by the flag -da-delinearize. @@ -59,6 +58,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" @@ -108,8 +108,8 @@ STATISTIC(BanerjeeIndependence, "Banerjee independence"); STATISTIC(BanerjeeSuccesses, "Banerjee successes"); static cl::opt<bool> -Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore, - cl::desc("Try to delinearize array references.")); + Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::ZeroOrMore, + cl::desc("Try to delinearize array references.")); //===----------------------------------------------------------------------===// // basics @@ -415,9 +415,9 @@ LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const { // PLDI 1991 bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { ++DeltaApplications; - DEBUG(dbgs() << "\tintersect constraints\n"); - DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); - DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); + LLVM_DEBUG(dbgs() << "\tintersect constraints\n"); + LLVM_DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); + LLVM_DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); assert(!Y->isPoint() && "Y must not be a Point"); if (X->isAny()) { if (Y->isAny()) @@ -433,7 +433,7 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { } if (X->isDistance() && Y->isDistance()) { - DEBUG(dbgs() << "\t intersect 2 distances\n"); + LLVM_DEBUG(dbgs() << "\t intersect 2 distances\n"); if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) return false; if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { @@ -460,12 +460,12 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { "We shouldn't ever see X->isPoint() && Y->isPoint()"); if (X->isLine() && Y->isLine()) { - DEBUG(dbgs() << "\t intersect 2 lines\n"); + LLVM_DEBUG(dbgs() << "\t intersect 2 lines\n"); const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { // slopes are equal, so lines are parallel - DEBUG(dbgs() << "\t\tsame slope\n"); + LLVM_DEBUG(dbgs() << "\t\tsame slope\n"); Prod1 = SE->getMulExpr(X->getC(), Y->getB()); Prod2 = SE->getMulExpr(X->getB(), Y->getC()); if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) @@ -479,7 +479,7 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { } if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { // slopes differ, so lines intersect - DEBUG(dbgs() << "\t\tdifferent slopes\n"); + LLVM_DEBUG(dbgs() << "\t\tdifferent slopes\n"); const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); @@ -501,10 +501,10 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { APInt Xbot = A1B2_A2B1->getAPInt(); APInt Ytop = C1A2_C2A1->getAPInt(); APInt Ybot = A2B1_A1B2->getAPInt(); - DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); - DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); - DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); - DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); + LLVM_DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); + LLVM_DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); + LLVM_DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); + LLVM_DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); APInt Xq = Xtop; // these need to be initialized, even APInt Xr = Xtop; // though they're just going to be overwritten APInt::sdivrem(Xtop, Xbot, Xq, Xr); @@ -516,7 +516,7 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { ++DeltaSuccesses; return true; } - DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); + LLVM_DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); if (Xq.slt(0) || Yq.slt(0)) { X->setEmpty(); ++DeltaSuccesses; @@ -525,7 +525,7 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { if (const SCEVConstant *CUB = collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { const APInt &UpperBound = CUB->getAPInt(); - DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); + LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { X->setEmpty(); ++DeltaSuccesses; @@ -545,7 +545,7 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); if (X->isPoint() && Y->isLine()) { - DEBUG(dbgs() << "\t intersect Point and Line\n"); + LLVM_DEBUG(dbgs() << "\t intersect Point and Line\n"); const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); @@ -622,13 +622,38 @@ void Dependence::dump(raw_ostream &OS) const { OS << "!\n"; } +// Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their +// underlaying objects. If LocA and LocB are known to not alias (for any reason: +// tbaa, non-overlapping regions etc), then it is known there is no dependecy. +// Otherwise the underlying objects are checked to see if they point to +// different identifiable objects. static AliasResult underlyingObjectsAlias(AliasAnalysis *AA, - const DataLayout &DL, const Value *A, - const Value *B) { - const Value *AObj = GetUnderlyingObject(A, DL); - const Value *BObj = GetUnderlyingObject(B, DL); - return AA->alias(AObj, DL.getTypeStoreSize(AObj->getType()), - BObj, DL.getTypeStoreSize(BObj->getType())); + const DataLayout &DL, + const MemoryLocation &LocA, + const MemoryLocation &LocB) { + // Check the original locations (minus size) for noalias, which can happen for + // tbaa, incompatible underlying object locations, etc. + MemoryLocation LocAS(LocA.Ptr, MemoryLocation::UnknownSize, LocA.AATags); + MemoryLocation LocBS(LocB.Ptr, MemoryLocation::UnknownSize, LocB.AATags); + if (AA->alias(LocAS, LocBS) == NoAlias) + return NoAlias; + + // Check the underlying objects are the same + const Value *AObj = GetUnderlyingObject(LocA.Ptr, DL); + const Value *BObj = GetUnderlyingObject(LocB.Ptr, DL); + + // If the underlying objects are the same, they must alias + if (AObj == BObj) + return MustAlias; + + // We may have hit the recursion limit for underlying objects, or have + // underlying objects where we don't know they will alias. + if (!isIdentifiedObject(AObj) || !isIdentifiedObject(BObj)) + return MayAlias; + + // Otherwise we know the objects are different and both identified objects so + // must not alias. + return NoAlias; } @@ -644,17 +669,6 @@ bool isLoadOrStore(const Instruction *I) { } -static -Value *getPointerOperand(Instruction *I) { - if (LoadInst *LI = dyn_cast<LoadInst>(I)) - return LI->getPointerOperand(); - if (StoreInst *SI = dyn_cast<StoreInst>(I)) - return SI->getPointerOperand(); - llvm_unreachable("Value is not load or store instruction"); - return nullptr; -} - - // Examines the loop nesting of the Src and Dst // instructions and establishes their shared loops. Sets the variables // CommonLevels, SrcLevels, and MaxLevels. @@ -980,6 +994,57 @@ bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X, } } +/// Compare to see if S is less than Size, using isKnownNegative(S - max(Size, 1)) +/// with some extra checking if S is an AddRec and we can prove less-than using +/// the loop bounds. +bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const { + // First unify to the same type + auto *SType = dyn_cast<IntegerType>(S->getType()); + auto *SizeType = dyn_cast<IntegerType>(Size->getType()); + if (!SType || !SizeType) + return false; + Type *MaxType = + (SType->getBitWidth() >= SizeType->getBitWidth()) ? SType : SizeType; + S = SE->getTruncateOrZeroExtend(S, MaxType); + Size = SE->getTruncateOrZeroExtend(Size, MaxType); + + // Special check for addrecs using BE taken count + const SCEV *Bound = SE->getMinusSCEV(S, Size); + if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Bound)) { + if (AddRec->isAffine()) { + const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop()); + if (!isa<SCEVCouldNotCompute>(BECount)) { + const SCEV *Limit = AddRec->evaluateAtIteration(BECount, *SE); + if (SE->isKnownNegative(Limit)) + return true; + } + } + } + + // Check using normal isKnownNegative + const SCEV *LimitedBound = + SE->getMinusSCEV(S, SE->getSMaxExpr(Size, SE->getOne(Size->getType()))); + return SE->isKnownNegative(LimitedBound); +} + +bool DependenceInfo::isKnownNonNegative(const SCEV *S, const Value *Ptr) const { + bool Inbounds = false; + if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(Ptr)) + Inbounds = SrcGEP->isInBounds(); + if (Inbounds) { + if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { + if (AddRec->isAffine()) { + // We know S is for Ptr, the operand on a load/store, so doesn't wrap. + // If both parts are NonNegative, the end result will be NonNegative + if (SE->isKnownNonNegative(AddRec->getStart()) && + SE->isKnownNonNegative(AddRec->getOperand(1))) + return true; + } + } + } + + return SE->isKnownNonNegative(S); +} // All subscripts are all the same type. // Loop bound may be smaller (e.g., a char). @@ -1019,19 +1084,19 @@ const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L, // Return true if dependence disproved. bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst, FullDependence &Result) const { - DEBUG(dbgs() << " src = " << *Src << "\n"); - DEBUG(dbgs() << " dst = " << *Dst << "\n"); + LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); + LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); ++ZIVapplications; if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { - DEBUG(dbgs() << " provably dependent\n"); + LLVM_DEBUG(dbgs() << " provably dependent\n"); return false; // provably dependent } if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { - DEBUG(dbgs() << " provably independent\n"); + LLVM_DEBUG(dbgs() << " provably independent\n"); ++ZIVindependence; return true; // provably independent } - DEBUG(dbgs() << " possibly dependent\n"); + LLVM_DEBUG(dbgs() << " possibly dependent\n"); Result.Consistent = false; return false; // possibly dependent } @@ -1068,25 +1133,25 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst, const Loop *CurLoop, unsigned Level, FullDependence &Result, Constraint &NewConstraint) const { - DEBUG(dbgs() << "\tStrong SIV test\n"); - DEBUG(dbgs() << "\t Coeff = " << *Coeff); - DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); - DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); - DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); - DEBUG(dbgs() << "\t DstConst = " << *DstConst); - DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); + LLVM_DEBUG(dbgs() << "\tStrong SIV test\n"); + LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff); + LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); + LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); + LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); + LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst); + LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); ++StrongSIVapplications; assert(0 < Level && Level <= CommonLevels && "level out of range"); Level--; const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); - DEBUG(dbgs() << "\t Delta = " << *Delta); - DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); + LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta); + LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); // check that |Delta| < iteration count if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { - DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); - DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); + LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); + LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); const SCEV *AbsDelta = SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); const SCEV *AbsCoeff = @@ -1107,8 +1172,8 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, APInt Distance = ConstDelta; // these need to be initialized APInt Remainder = ConstDelta; APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); - DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); - DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); + LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); + LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); // Make sure Coeff divides Delta exactly if (Remainder != 0) { // Coeff doesn't divide Distance, no dependence @@ -1135,7 +1200,7 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, } else { if (Coeff->isOne()) { - DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); + LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); Result.DV[Level].Distance = Delta; // since X/1 == X NewConstraint.setDistance(Delta, CurLoop); } @@ -1204,16 +1269,16 @@ bool DependenceInfo::weakCrossingSIVtest( const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst, const Loop *CurLoop, unsigned Level, FullDependence &Result, Constraint &NewConstraint, const SCEV *&SplitIter) const { - DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); - DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); - DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); - DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); + LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); + LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); + LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); + LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); ++WeakCrossingSIVapplications; assert(0 < Level && Level <= CommonLevels && "Level out of range"); Level--; Result.Consistent = false; const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); - DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); if (Delta->isZero()) { Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); @@ -1243,7 +1308,7 @@ bool DependenceInfo::weakCrossingSIVtest( SplitIter = SE->getUDivExpr( SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta), SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff)); - DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); + LLVM_DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); if (!ConstDelta) @@ -1251,8 +1316,8 @@ bool DependenceInfo::weakCrossingSIVtest( // We're certain that ConstCoeff > 0; therefore, // if Delta < 0, then no dependence. - DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); - DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); + LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + LLVM_DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); if (SE->isKnownNegative(Delta)) { // No dependence, Delta < 0 ++WeakCrossingSIVindependence; @@ -1263,11 +1328,11 @@ bool DependenceInfo::weakCrossingSIVtest( // We're certain that Delta > 0 and ConstCoeff > 0. // Check Delta/(2*ConstCoeff) against upper loop bound if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { - DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); + LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), ConstantTwo); - DEBUG(dbgs() << "\t ML = " << *ML << "\n"); + LLVM_DEBUG(dbgs() << "\t ML = " << *ML << "\n"); if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { // Delta too big, no dependence ++WeakCrossingSIVindependence; @@ -1295,19 +1360,19 @@ bool DependenceInfo::weakCrossingSIVtest( APInt Distance = APDelta; // these need to be initialzed APInt Remainder = APDelta; APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); - DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); + LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); if (Remainder != 0) { // Coeff doesn't divide Delta, no dependence ++WeakCrossingSIVindependence; ++WeakCrossingSIVsuccesses; return true; } - DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); + LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible APInt Two = APInt(Distance.getBitWidth(), 2, true); Remainder = Distance.srem(Two); - DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); + LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); if (Remainder != 0) { // Equal direction isn't possible Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); @@ -1343,7 +1408,7 @@ static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM, APInt::sdivrem(G0, G1, Q, R); } G = G1; - DEBUG(dbgs() << "\t GCD = " << G << "\n"); + LLVM_DEBUG(dbgs() << "\t GCD = " << G << "\n"); X = AM.slt(0) ? -A1 : A1; Y = BM.slt(0) ? B1 : -B1; @@ -1416,17 +1481,17 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, const Loop *CurLoop, unsigned Level, FullDependence &Result, Constraint &NewConstraint) const { - DEBUG(dbgs() << "\tExact SIV test\n"); - DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); - DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); - DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); - DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); + LLVM_DEBUG(dbgs() << "\tExact SIV test\n"); + LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); + LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); + LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); + LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); ++ExactSIVapplications; assert(0 < Level && Level <= CommonLevels && "Level out of range"); Level--; Result.Consistent = false; const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); - DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), Delta, CurLoop); const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); @@ -1447,7 +1512,7 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, return true; } - DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); + LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); // since SCEV construction normalizes, LM = 0 APInt UM(Bits, 1, true); @@ -1456,7 +1521,7 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, if (const SCEVConstant *CUB = collectConstantUpperBound(CurLoop, Delta->getType())) { UM = CUB->getAPInt(); - DEBUG(dbgs() << "\t UM = " << UM << "\n"); + LLVM_DEBUG(dbgs() << "\t UM = " << UM << "\n"); UMvalid = true; } @@ -1467,18 +1532,18 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, APInt TMUL = BM.sdiv(G); if (TMUL.sgt(0)) { TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); - DEBUG(dbgs() << "\t TL = " << TL << "\n"); + LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); if (UMvalid) { TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); - DEBUG(dbgs() << "\t TU = " << TU << "\n"); + LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); } } else { TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); - DEBUG(dbgs() << "\t TU = " << TU << "\n"); + LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); if (UMvalid) { TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); - DEBUG(dbgs() << "\t TL = " << TL << "\n"); + LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); } } @@ -1486,18 +1551,18 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, TMUL = AM.sdiv(G); if (TMUL.sgt(0)) { TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); - DEBUG(dbgs() << "\t TL = " << TL << "\n"); + LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); if (UMvalid) { TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); - DEBUG(dbgs() << "\t TU = " << TU << "\n"); + LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); } } else { TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); - DEBUG(dbgs() << "\t TU = " << TU << "\n"); + LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); if (UMvalid) { TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); - DEBUG(dbgs() << "\t TL = " << TL << "\n"); + LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); } } if (TL.sgt(TU)) { @@ -1512,15 +1577,15 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, // less than APInt SaveTU(TU); // save these APInt SaveTL(TL); - DEBUG(dbgs() << "\t exploring LT direction\n"); + LLVM_DEBUG(dbgs() << "\t exploring LT direction\n"); TMUL = AM - BM; if (TMUL.sgt(0)) { TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); - DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); + LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); } else { TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); - DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); + LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); } if (TL.sle(TU)) { NewDirection |= Dependence::DVEntry::LT; @@ -1530,23 +1595,23 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, // equal TU = SaveTU; // restore TL = SaveTL; - DEBUG(dbgs() << "\t exploring EQ direction\n"); + LLVM_DEBUG(dbgs() << "\t exploring EQ direction\n"); if (TMUL.sgt(0)) { TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); - DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); + LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); } else { TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); - DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); + LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); } TMUL = BM - AM; if (TMUL.sgt(0)) { TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); - DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); + LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); } else { TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); - DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); + LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); } if (TL.sle(TU)) { NewDirection |= Dependence::DVEntry::EQ; @@ -1556,14 +1621,14 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, // greater than TU = SaveTU; // restore TL = SaveTL; - DEBUG(dbgs() << "\t exploring GT direction\n"); + LLVM_DEBUG(dbgs() << "\t exploring GT direction\n"); if (TMUL.sgt(0)) { TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); - DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); + LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); } else { TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); - DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); + LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); } if (TL.sle(TU)) { NewDirection |= Dependence::DVEntry::GT; @@ -1607,9 +1672,9 @@ bool isRemainderZero(const SCEVConstant *Dividend, // // If i is not an integer, there's no dependence. // If i < 0 or > UB, there's no dependence. -// If i = 0, the direction is <= and peeling the +// If i = 0, the direction is >= and peeling the // 1st iteration will break the dependence. -// If i = UB, the direction is >= and peeling the +// If i = UB, the direction is <= and peeling the // last iteration will break the dependence. // Otherwise, the direction is *. // @@ -1629,10 +1694,10 @@ bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, // For the WeakSIV test, it's possible the loop isn't common to // the Src and Dst loops. If it isn't, then there's no need to // record a direction. - DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); - DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); - DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); - DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); + LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); + LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); + LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); + LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); ++WeakZeroSIVapplications; assert(0 < Level && Level <= MaxLevels && "Level out of range"); Level--; @@ -1640,10 +1705,10 @@ bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta, CurLoop); - DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { if (Level < CommonLevels) { - Result.DV[Level].Direction &= Dependence::DVEntry::LE; + Result.DV[Level].Direction &= Dependence::DVEntry::GE; Result.DV[Level].PeelFirst = true; ++WeakZeroSIVsuccesses; } @@ -1661,7 +1726,7 @@ bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, // check that Delta/SrcCoeff < iteration count // really check NewDelta < count*AbsCoeff if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { - DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); + LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { ++WeakZeroSIVindependence; @@ -1671,7 +1736,7 @@ bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { // dependences caused by last iteration if (Level < CommonLevels) { - Result.DV[Level].Direction &= Dependence::DVEntry::GE; + Result.DV[Level].Direction &= Dependence::DVEntry::LE; Result.DV[Level].PeelLast = true; ++WeakZeroSIVsuccesses; } @@ -1738,10 +1803,10 @@ bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff, Constraint &NewConstraint) const { // For the WeakSIV test, it's possible the loop isn't common to the // Src and Dst loops. If it isn't, then there's no need to record a direction. - DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); - DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); - DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); - DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); + LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); + LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); + LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); + LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); ++WeakZeroSIVapplications; assert(0 < Level && Level <= SrcLevels && "Level out of range"); Level--; @@ -1749,7 +1814,7 @@ bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff, const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta, CurLoop); - DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { if (Level < CommonLevels) { Result.DV[Level].Direction &= Dependence::DVEntry::LE; @@ -1770,7 +1835,7 @@ bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff, // check that Delta/SrcCoeff < iteration count // really check NewDelta < count*AbsCoeff if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { - DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); + LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { ++WeakZeroSIVindependence; @@ -1819,15 +1884,15 @@ bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, const SCEV *SrcConst, const SCEV *DstConst, const Loop *SrcLoop, const Loop *DstLoop, FullDependence &Result) const { - DEBUG(dbgs() << "\tExact RDIV test\n"); - DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); - DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); - DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); - DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); + LLVM_DEBUG(dbgs() << "\tExact RDIV test\n"); + LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); + LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); + LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); + LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); ++ExactRDIVapplications; Result.Consistent = false; const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); - DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); @@ -1845,7 +1910,7 @@ bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, return true; } - DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); + LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); // since SCEV construction seems to normalize, LM = 0 APInt SrcUM(Bits, 1, true); @@ -1854,7 +1919,7 @@ bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, if (const SCEVConstant *UpperBound = collectConstantUpperBound(SrcLoop, Delta->getType())) { SrcUM = UpperBound->getAPInt(); - DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); + LLVM_DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); SrcUMvalid = true; } @@ -1864,7 +1929,7 @@ bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, if (const SCEVConstant *UpperBound = collectConstantUpperBound(DstLoop, Delta->getType())) { DstUM = UpperBound->getAPInt(); - DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); + LLVM_DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); DstUMvalid = true; } @@ -1875,18 +1940,18 @@ bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, APInt TMUL = BM.sdiv(G); if (TMUL.sgt(0)) { TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); - DEBUG(dbgs() << "\t TL = " << TL << "\n"); + LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); if (SrcUMvalid) { TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); - DEBUG(dbgs() << "\t TU = " << TU << "\n"); + LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); } } else { TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); - DEBUG(dbgs() << "\t TU = " << TU << "\n"); + LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); if (SrcUMvalid) { TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); - DEBUG(dbgs() << "\t TL = " << TL << "\n"); + LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); } } @@ -1894,18 +1959,18 @@ bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, TMUL = AM.sdiv(G); if (TMUL.sgt(0)) { TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); - DEBUG(dbgs() << "\t TL = " << TL << "\n"); + LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); if (DstUMvalid) { TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); - DEBUG(dbgs() << "\t TU = " << TU << "\n"); + LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); } } else { TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); - DEBUG(dbgs() << "\t TU = " << TU << "\n"); + LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); if (DstUMvalid) { TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); - DEBUG(dbgs() << "\t TL = " << TL << "\n"); + LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); } } if (TL.sgt(TU)) @@ -1961,27 +2026,27 @@ bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2, const Loop *Loop1, const Loop *Loop2) const { ++SymbolicRDIVapplications; - DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); - DEBUG(dbgs() << "\t A1 = " << *A1); - DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); - DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); - DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); - DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); + LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); + LLVM_DEBUG(dbgs() << "\t A1 = " << *A1); + LLVM_DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); + LLVM_DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); + LLVM_DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); + LLVM_DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); - DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); - DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); + LLVM_DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); + LLVM_DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); - DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); - DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); + LLVM_DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); + LLVM_DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); if (SE->isKnownNonNegative(A1)) { if (SE->isKnownNonNegative(A2)) { // A1 >= 0 && A2 >= 0 if (N1) { // make sure that c2 - c1 <= a1*N1 const SCEV *A1N1 = SE->getMulExpr(A1, N1); - DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); + LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { ++SymbolicRDIVindependence; return true; @@ -1990,7 +2055,7 @@ bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2, if (N2) { // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 const SCEV *A2N2 = SE->getMulExpr(A2, N2); - DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); + LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { ++SymbolicRDIVindependence; return true; @@ -2004,7 +2069,7 @@ bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2, const SCEV *A1N1 = SE->getMulExpr(A1, N1); const SCEV *A2N2 = SE->getMulExpr(A2, N2); const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); - DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); + LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { ++SymbolicRDIVindependence; return true; @@ -2025,7 +2090,7 @@ bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2, const SCEV *A1N1 = SE->getMulExpr(A1, N1); const SCEV *A2N2 = SE->getMulExpr(A2, N2); const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); - DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); + LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { ++SymbolicRDIVindependence; return true; @@ -2042,7 +2107,7 @@ bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2, if (N1) { // make sure that a1*N1 <= c2 - c1 const SCEV *A1N1 = SE->getMulExpr(A1, N1); - DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); + LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { ++SymbolicRDIVindependence; return true; @@ -2051,7 +2116,7 @@ bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2, if (N2) { // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 const SCEV *A2N2 = SE->getMulExpr(A2, N2); - DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); + LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { ++SymbolicRDIVindependence; return true; @@ -2074,8 +2139,8 @@ bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2, bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level, FullDependence &Result, Constraint &NewConstraint, const SCEV *&SplitIter) const { - DEBUG(dbgs() << " src = " << *Src << "\n"); - DEBUG(dbgs() << " dst = " << *Dst << "\n"); + LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); + LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); if (SrcAddRec && DstAddRec) { @@ -2151,8 +2216,8 @@ bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst, const SCEV *SrcCoeff, *DstCoeff; const Loop *SrcLoop, *DstLoop; - DEBUG(dbgs() << " src = " << *Src << "\n"); - DEBUG(dbgs() << " dst = " << *Dst << "\n"); + LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); + LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); if (SrcAddRec && DstAddRec) { @@ -2208,8 +2273,8 @@ bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst, bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst, const SmallBitVector &Loops, FullDependence &Result) const { - DEBUG(dbgs() << " src = " << *Src << "\n"); - DEBUG(dbgs() << " dst = " << *Dst << "\n"); + LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); + LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); Result.Consistent = false; return gcdMIVtest(Src, Dst, Result) || banerjeeMIVtest(Src, Dst, Loops, Result); @@ -2249,7 +2314,7 @@ const SCEVConstant *getConstantPart(const SCEV *Expr) { // to "a common divisor". bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, FullDependence &Result) const { - DEBUG(dbgs() << "starting gcd\n"); + LLVM_DEBUG(dbgs() << "starting gcd\n"); ++GCDapplications; unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); APInt RunningGCD = APInt::getNullValue(BitWidth); @@ -2294,7 +2359,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, APInt ExtraGCD = APInt::getNullValue(BitWidth); const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); - DEBUG(dbgs() << " Delta = " << *Delta << "\n"); + LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n"); const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { // If Delta is a sum of products, we may be able to make further progress. @@ -2321,11 +2386,11 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, if (!Constant) return false; APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt(); - DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); + LLVM_DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); if (ConstDelta == 0) return false; RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); - DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); + LLVM_DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); APInt Remainder = ConstDelta.srem(RunningGCD); if (Remainder != 0) { ++GCDindependence; @@ -2344,7 +2409,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], // we need to remember that the constant part is 5 and the RunningGCD should // be initialized to ExtraGCD = 30. - DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); + LLVM_DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); bool Improved = false; Coefficients = Src; @@ -2399,10 +2464,10 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, continue; APInt ConstCoeff = Constant->getAPInt(); RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); - DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); + LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); if (RunningGCD != 0) { Remainder = ConstDelta.srem(RunningGCD); - DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); + LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); if (Remainder != 0) { unsigned Level = mapSrcLoop(CurLoop); Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); @@ -2412,7 +2477,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, } if (Improved) ++GCDsuccesses; - DEBUG(dbgs() << "all done\n"); + LLVM_DEBUG(dbgs() << "all done\n"); return false; } @@ -2453,35 +2518,35 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst, const SmallBitVector &Loops, FullDependence &Result) const { - DEBUG(dbgs() << "starting Banerjee\n"); + LLVM_DEBUG(dbgs() << "starting Banerjee\n"); ++BanerjeeApplications; - DEBUG(dbgs() << " Src = " << *Src << '\n'); + LLVM_DEBUG(dbgs() << " Src = " << *Src << '\n'); const SCEV *A0; CoefficientInfo *A = collectCoeffInfo(Src, true, A0); - DEBUG(dbgs() << " Dst = " << *Dst << '\n'); + LLVM_DEBUG(dbgs() << " Dst = " << *Dst << '\n'); const SCEV *B0; CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; const SCEV *Delta = SE->getMinusSCEV(B0, A0); - DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); + LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); // Compute bounds for all the * directions. - DEBUG(dbgs() << "\tBounds[*]\n"); + LLVM_DEBUG(dbgs() << "\tBounds[*]\n"); for (unsigned K = 1; K <= MaxLevels; ++K) { Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; Bound[K].Direction = Dependence::DVEntry::ALL; Bound[K].DirSet = Dependence::DVEntry::NONE; findBoundsALL(A, B, Bound, K); #ifndef NDEBUG - DEBUG(dbgs() << "\t " << K << '\t'); + LLVM_DEBUG(dbgs() << "\t " << K << '\t'); if (Bound[K].Lower[Dependence::DVEntry::ALL]) - DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); + LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); else - DEBUG(dbgs() << "-inf\t"); + LLVM_DEBUG(dbgs() << "-inf\t"); if (Bound[K].Upper[Dependence::DVEntry::ALL]) - DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); + LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); else - DEBUG(dbgs() << "+inf\n"); + LLVM_DEBUG(dbgs() << "+inf\n"); #endif } @@ -2537,23 +2602,23 @@ unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A, const SCEV *Delta) const { if (Level > CommonLevels) { // record result - DEBUG(dbgs() << "\t["); + LLVM_DEBUG(dbgs() << "\t["); for (unsigned K = 1; K <= CommonLevels; ++K) { if (Loops[K]) { Bound[K].DirSet |= Bound[K].Direction; #ifndef NDEBUG switch (Bound[K].Direction) { case Dependence::DVEntry::LT: - DEBUG(dbgs() << " <"); + LLVM_DEBUG(dbgs() << " <"); break; case Dependence::DVEntry::EQ: - DEBUG(dbgs() << " ="); + LLVM_DEBUG(dbgs() << " ="); break; case Dependence::DVEntry::GT: - DEBUG(dbgs() << " >"); + LLVM_DEBUG(dbgs() << " >"); break; case Dependence::DVEntry::ALL: - DEBUG(dbgs() << " *"); + LLVM_DEBUG(dbgs() << " *"); break; default: llvm_unreachable("unexpected Bound[K].Direction"); @@ -2561,7 +2626,7 @@ unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A, #endif } } - DEBUG(dbgs() << " ]\n"); + LLVM_DEBUG(dbgs() << " ]\n"); return 1; } if (Loops[Level]) { @@ -2572,34 +2637,40 @@ unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A, findBoundsGT(A, B, Bound, Level); findBoundsEQ(A, B, Bound, Level); #ifndef NDEBUG - DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); - DEBUG(dbgs() << "\t <\t"); + LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); + LLVM_DEBUG(dbgs() << "\t <\t"); if (Bound[Level].Lower[Dependence::DVEntry::LT]) - DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t'); + LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] + << '\t'); else - DEBUG(dbgs() << "-inf\t"); + LLVM_DEBUG(dbgs() << "-inf\t"); if (Bound[Level].Upper[Dependence::DVEntry::LT]) - DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n'); + LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] + << '\n'); else - DEBUG(dbgs() << "+inf\n"); - DEBUG(dbgs() << "\t =\t"); + LLVM_DEBUG(dbgs() << "+inf\n"); + LLVM_DEBUG(dbgs() << "\t =\t"); if (Bound[Level].Lower[Dependence::DVEntry::EQ]) - DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t'); + LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] + << '\t'); else - DEBUG(dbgs() << "-inf\t"); + LLVM_DEBUG(dbgs() << "-inf\t"); if (Bound[Level].Upper[Dependence::DVEntry::EQ]) - DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n'); + LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] + << '\n'); else - DEBUG(dbgs() << "+inf\n"); - DEBUG(dbgs() << "\t >\t"); + LLVM_DEBUG(dbgs() << "+inf\n"); + LLVM_DEBUG(dbgs() << "\t >\t"); if (Bound[Level].Lower[Dependence::DVEntry::GT]) - DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t'); + LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] + << '\t'); else - DEBUG(dbgs() << "-inf\t"); + LLVM_DEBUG(dbgs() << "-inf\t"); if (Bound[Level].Upper[Dependence::DVEntry::GT]) - DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n'); + LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] + << '\n'); else - DEBUG(dbgs() << "+inf\n"); + LLVM_DEBUG(dbgs() << "+inf\n"); #endif } @@ -2846,21 +2917,21 @@ DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag, } Constant = Subscript; #ifndef NDEBUG - DEBUG(dbgs() << "\tCoefficient Info\n"); + LLVM_DEBUG(dbgs() << "\tCoefficient Info\n"); for (unsigned K = 1; K <= MaxLevels; ++K) { - DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); - DEBUG(dbgs() << "\tPos Part = "); - DEBUG(dbgs() << *CI[K].PosPart); - DEBUG(dbgs() << "\tNeg Part = "); - DEBUG(dbgs() << *CI[K].NegPart); - DEBUG(dbgs() << "\tUpper Bound = "); + LLVM_DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); + LLVM_DEBUG(dbgs() << "\tPos Part = "); + LLVM_DEBUG(dbgs() << *CI[K].PosPart); + LLVM_DEBUG(dbgs() << "\tNeg Part = "); + LLVM_DEBUG(dbgs() << *CI[K].NegPart); + LLVM_DEBUG(dbgs() << "\tUpper Bound = "); if (CI[K].Iterations) - DEBUG(dbgs() << *CI[K].Iterations); + LLVM_DEBUG(dbgs() << *CI[K].Iterations); else - DEBUG(dbgs() << "+inf"); - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "+inf"); + LLVM_DEBUG(dbgs() << '\n'); } - DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); + LLVM_DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); #endif return CI; } @@ -2985,8 +3056,8 @@ bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst, bool &Consistent) { bool Result = false; for (unsigned LI : Loops.set_bits()) { - DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); - DEBUG(Constraints[LI].dump(dbgs())); + LLVM_DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); + LLVM_DEBUG(Constraints[LI].dump(dbgs())); if (Constraints[LI].isDistance()) Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); else if (Constraints[LI].isLine()) @@ -3007,17 +3078,17 @@ bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst, Constraint &CurConstraint, bool &Consistent) { const Loop *CurLoop = CurConstraint.getAssociatedLoop(); - DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); + LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); const SCEV *A_K = findCoefficient(Src, CurLoop); if (A_K->isZero()) return false; const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); Src = SE->getMinusSCEV(Src, DA_K); Src = zeroCoefficient(Src, CurLoop); - DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); - DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); + LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); + LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); - DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); + LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); if (!findCoefficient(Dst, CurLoop)->isZero()) Consistent = false; return true; @@ -3036,9 +3107,10 @@ bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst, const SCEV *A = CurConstraint.getA(); const SCEV *B = CurConstraint.getB(); const SCEV *C = CurConstraint.getC(); - DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n"); - DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); - DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); + LLVM_DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C + << "\n"); + LLVM_DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); + LLVM_DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); if (A->isZero()) { const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); @@ -3094,8 +3166,8 @@ bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst, if (!findCoefficient(Dst, CurLoop)->isZero()) Consistent = false; } - DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); - DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); + LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); + LLVM_DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); return true; } @@ -3110,13 +3182,13 @@ bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst, const SCEV *AP_K = findCoefficient(Dst, CurLoop); const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); - DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); + LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); Src = zeroCoefficient(Src, CurLoop); - DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); - DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); + LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); + LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); Dst = zeroCoefficient(Dst, CurLoop); - DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); + LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); return true; } @@ -3124,8 +3196,8 @@ bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst, // Update direction vector entry based on the current constraint. void DependenceInfo::updateDirection(Dependence::DVEntry &Level, const Constraint &CurConstraint) const { - DEBUG(dbgs() << "\tUpdate direction, constraint ="); - DEBUG(CurConstraint.dump(dbgs())); + LLVM_DEBUG(dbgs() << "\tUpdate direction, constraint ="); + LLVM_DEBUG(CurConstraint.dump(dbgs())); if (CurConstraint.isAny()) ; // use defaults else if (CurConstraint.isDistance()) { @@ -3177,8 +3249,10 @@ void DependenceInfo::updateDirection(Dependence::DVEntry &Level, /// for each loop level. bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, SmallVectorImpl<Subscript> &Pair) { - Value *SrcPtr = getPointerOperand(Src); - Value *DstPtr = getPointerOperand(Dst); + assert(isLoadOrStore(Src) && "instruction is not load or store"); + assert(isLoadOrStore(Dst) && "instruction is not load or store"); + Value *SrcPtr = getLoadStorePointerOperand(Src); + Value *DstPtr = getLoadStorePointerOperand(Dst); Loop *SrcLoop = LI->getLoopFor(Src->getParent()); Loop *DstLoop = LI->getLoopFor(Dst->getParent()); @@ -3230,14 +3304,34 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, int size = SrcSubscripts.size(); - DEBUG({ - dbgs() << "\nSrcSubscripts: "; + // Statically check that the array bounds are in-range. The first subscript we + // don't have a size for and it cannot overflow into another subscript, so is + // always safe. The others need to be 0 <= subscript[i] < bound, for both src + // and dst. + // FIXME: It may be better to record these sizes and add them as constraints + // to the dependency checks. + for (int i = 1; i < size; ++i) { + if (!isKnownNonNegative(SrcSubscripts[i], SrcPtr)) + return false; + + if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1])) + return false; + + if (!isKnownNonNegative(DstSubscripts[i], DstPtr)) + return false; + + if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1])) + return false; + } + + LLVM_DEBUG({ + dbgs() << "\nSrcSubscripts: "; for (int i = 0; i < size; i++) dbgs() << *SrcSubscripts[i]; dbgs() << "\nDstSubscripts: "; for (int i = 0; i < size; i++) dbgs() << *DstSubscripts[i]; - }); + }); // The delinearization transforms a single-subscript MIV dependence test into // a multi-subscript SIV dependence test that is easier to compute. So we @@ -3248,13 +3342,6 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, Pair[i].Src = SrcSubscripts[i]; Pair[i].Dst = DstSubscripts[i]; unifySubscriptType(&Pair[i]); - - // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the - // delinearization has found, and add these constraints to the dependence - // check to avoid memory accesses overflow from one dimension into another. - // This is related to the problem of determining the existence of data - // dependences in array accesses using a different number of subscripts: in - // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc. } return true; @@ -3299,23 +3386,26 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { // can only analyze simple loads and stores, i.e., no calls, invokes, etc. - DEBUG(dbgs() << "can only handle simple loads and stores\n"); + LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n"); return make_unique<Dependence>(Src, Dst); } - Value *SrcPtr = getPointerOperand(Src); - Value *DstPtr = getPointerOperand(Dst); + assert(isLoadOrStore(Src) && "instruction is not load or store"); + assert(isLoadOrStore(Dst) && "instruction is not load or store"); + Value *SrcPtr = getLoadStorePointerOperand(Src); + Value *DstPtr = getLoadStorePointerOperand(Dst); - switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr, - SrcPtr)) { + switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), + MemoryLocation::get(Dst), + MemoryLocation::get(Src))) { case MayAlias: case PartialAlias: // cannot analyse objects if we don't understand their aliasing. - DEBUG(dbgs() << "can't analyze may or partial alias\n"); + LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n"); return make_unique<Dependence>(Src, Dst); case NoAlias: // If the objects noalias, they are distinct, accesses are independent. - DEBUG(dbgs() << "no alias\n"); + LLVM_DEBUG(dbgs() << "no alias\n"); return nullptr; case MustAlias: break; // The underlying objects alias; test accesses for dependence. @@ -3323,56 +3413,24 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, // establish loop nesting levels establishNestingLevels(Src, Dst); - DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); - DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); + LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); + LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); ++TotalArrayPairs; - // See if there are GEPs we can use. - bool UsefulGEP = false; - GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); - GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); - if (SrcGEP && DstGEP && - SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { - const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); - const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); - DEBUG(dbgs() << " SrcPtrSCEV = " << *SrcPtrSCEV << "\n"); - DEBUG(dbgs() << " DstPtrSCEV = " << *DstPtrSCEV << "\n"); - - UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && - isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) && - (SrcGEP->getNumOperands() == DstGEP->getNumOperands()) && - isKnownPredicate(CmpInst::ICMP_EQ, SrcPtrSCEV, DstPtrSCEV); - } - unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; - SmallVector<Subscript, 4> Pair(Pairs); - if (UsefulGEP) { - DEBUG(dbgs() << " using GEPs\n"); - unsigned P = 0; - for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), - SrcEnd = SrcGEP->idx_end(), - DstIdx = DstGEP->idx_begin(); - SrcIdx != SrcEnd; - ++SrcIdx, ++DstIdx, ++P) { - Pair[P].Src = SE->getSCEV(*SrcIdx); - Pair[P].Dst = SE->getSCEV(*DstIdx); - unifySubscriptType(&Pair[P]); - } - } - else { - DEBUG(dbgs() << " ignoring GEPs\n"); - const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); - const SCEV *DstSCEV = SE->getSCEV(DstPtr); - DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); - DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); - Pair[0].Src = SrcSCEV; - Pair[0].Dst = DstSCEV; - } + unsigned Pairs = 1; + SmallVector<Subscript, 2> Pair(Pairs); + const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); + const SCEV *DstSCEV = SE->getSCEV(DstPtr); + LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); + LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); + Pair[0].Src = SrcSCEV; + Pair[0].Dst = DstSCEV; - if (Delinearize && CommonLevels > 1) { + if (Delinearize) { if (tryDelinearize(Src, Dst, Pair)) { - DEBUG(dbgs() << " delinearized GEP\n"); + LLVM_DEBUG(dbgs() << " delinearized\n"); Pairs = Pair.size(); } } @@ -3388,12 +3446,12 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, Pair[P].Loops); Pair[P].GroupLoops = Pair[P].Loops; Pair[P].Group.set(P); - DEBUG(dbgs() << " subscript " << P << "\n"); - DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); - DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); - DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); - DEBUG(dbgs() << "\tloops = "); - DEBUG(dumpSmallBitVector(Pair[P].Loops)); + LLVM_DEBUG(dbgs() << " subscript " << P << "\n"); + LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); + LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); + LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); + LLVM_DEBUG(dbgs() << "\tloops = "); + LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops)); } SmallBitVector Separable(Pairs); @@ -3498,25 +3556,25 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, } } - DEBUG(dbgs() << " Separable = "); - DEBUG(dumpSmallBitVector(Separable)); - DEBUG(dbgs() << " Coupled = "); - DEBUG(dumpSmallBitVector(Coupled)); + LLVM_DEBUG(dbgs() << " Separable = "); + LLVM_DEBUG(dumpSmallBitVector(Separable)); + LLVM_DEBUG(dbgs() << " Coupled = "); + LLVM_DEBUG(dumpSmallBitVector(Coupled)); Constraint NewConstraint; NewConstraint.setAny(SE); // test separable subscripts for (unsigned SI : Separable.set_bits()) { - DEBUG(dbgs() << "testing subscript " << SI); + LLVM_DEBUG(dbgs() << "testing subscript " << SI); switch (Pair[SI].Classification) { case Subscript::ZIV: - DEBUG(dbgs() << ", ZIV\n"); + LLVM_DEBUG(dbgs() << ", ZIV\n"); if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) return nullptr; break; case Subscript::SIV: { - DEBUG(dbgs() << ", SIV\n"); + LLVM_DEBUG(dbgs() << ", SIV\n"); unsigned Level; const SCEV *SplitIter = nullptr; if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, @@ -3525,12 +3583,12 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, break; } case Subscript::RDIV: - DEBUG(dbgs() << ", RDIV\n"); + LLVM_DEBUG(dbgs() << ", RDIV\n"); if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) return nullptr; break; case Subscript::MIV: - DEBUG(dbgs() << ", MIV\n"); + LLVM_DEBUG(dbgs() << ", MIV\n"); if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) return nullptr; break; @@ -3541,20 +3599,20 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, if (Coupled.count()) { // test coupled subscript groups - DEBUG(dbgs() << "starting on coupled subscripts\n"); - DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); + LLVM_DEBUG(dbgs() << "starting on coupled subscripts\n"); + LLVM_DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); SmallVector<Constraint, 4> Constraints(MaxLevels + 1); for (unsigned II = 0; II <= MaxLevels; ++II) Constraints[II].setAny(SE); for (unsigned SI : Coupled.set_bits()) { - DEBUG(dbgs() << "testing subscript group " << SI << " { "); + LLVM_DEBUG(dbgs() << "testing subscript group " << SI << " { "); SmallBitVector Group(Pair[SI].Group); SmallBitVector Sivs(Pairs); SmallBitVector Mivs(Pairs); SmallBitVector ConstrainedLevels(MaxLevels + 1); SmallVector<Subscript *, 4> PairsInGroup; for (unsigned SJ : Group.set_bits()) { - DEBUG(dbgs() << SJ << " "); + LLVM_DEBUG(dbgs() << SJ << " "); if (Pair[SJ].Classification == Subscript::SIV) Sivs.set(SJ); else @@ -3562,15 +3620,15 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, PairsInGroup.push_back(&Pair[SJ]); } unifySubscriptType(PairsInGroup); - DEBUG(dbgs() << "}\n"); + LLVM_DEBUG(dbgs() << "}\n"); while (Sivs.any()) { bool Changed = false; for (unsigned SJ : Sivs.set_bits()) { - DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); + LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); // SJ is an SIV subscript that's part of the current coupled group unsigned Level; const SCEV *SplitIter = nullptr; - DEBUG(dbgs() << "SIV\n"); + LLVM_DEBUG(dbgs() << "SIV\n"); if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, SplitIter)) return nullptr; @@ -3586,15 +3644,15 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, } if (Changed) { // propagate, possibly creating new SIVs and ZIVs - DEBUG(dbgs() << " propagating\n"); - DEBUG(dbgs() << "\tMivs = "); - DEBUG(dumpSmallBitVector(Mivs)); + LLVM_DEBUG(dbgs() << " propagating\n"); + LLVM_DEBUG(dbgs() << "\tMivs = "); + LLVM_DEBUG(dumpSmallBitVector(Mivs)); for (unsigned SJ : Mivs.set_bits()) { // SJ is an MIV subscript that's part of the current coupled group - DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); + LLVM_DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Constraints, Result.Consistent)) { - DEBUG(dbgs() << "\t Changed\n"); + LLVM_DEBUG(dbgs() << "\t Changed\n"); ++DeltaPropagations; Pair[SJ].Classification = classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), @@ -3602,7 +3660,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, Pair[SJ].Loops); switch (Pair[SJ].Classification) { case Subscript::ZIV: - DEBUG(dbgs() << "ZIV\n"); + LLVM_DEBUG(dbgs() << "ZIV\n"); if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) return nullptr; Mivs.reset(SJ); @@ -3625,7 +3683,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, // test & propagate remaining RDIVs for (unsigned SJ : Mivs.set_bits()) { if (Pair[SJ].Classification == Subscript::RDIV) { - DEBUG(dbgs() << "RDIV test\n"); + LLVM_DEBUG(dbgs() << "RDIV test\n"); if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) return nullptr; // I don't yet understand how to propagate RDIV results @@ -3638,7 +3696,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, // Better to somehow test all remaining subscripts simultaneously. for (unsigned SJ : Mivs.set_bits()) { if (Pair[SJ].Classification == Subscript::MIV) { - DEBUG(dbgs() << "MIV test\n"); + LLVM_DEBUG(dbgs() << "MIV test\n"); if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) return nullptr; } @@ -3647,7 +3705,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, } // update Result.DV from constraint vector - DEBUG(dbgs() << " updating\n"); + LLVM_DEBUG(dbgs() << " updating\n"); for (unsigned SJ : ConstrainedLevels.set_bits()) { if (SJ > CommonLevels) break; @@ -3753,51 +3811,27 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); assert(isLoadOrStore(Src)); assert(isLoadOrStore(Dst)); - Value *SrcPtr = getPointerOperand(Src); - Value *DstPtr = getPointerOperand(Dst); - assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr, - SrcPtr) == MustAlias); + Value *SrcPtr = getLoadStorePointerOperand(Src); + Value *DstPtr = getLoadStorePointerOperand(Dst); + assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), + MemoryLocation::get(Dst), + MemoryLocation::get(Src)) == MustAlias); // establish loop nesting levels establishNestingLevels(Src, Dst); FullDependence Result(Src, Dst, false, CommonLevels); - // See if there are GEPs we can use. - bool UsefulGEP = false; - GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); - GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); - if (SrcGEP && DstGEP && - SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { - const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); - const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); - UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && - isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) && - (SrcGEP->getNumOperands() == DstGEP->getNumOperands()); - } - unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; - SmallVector<Subscript, 4> Pair(Pairs); - if (UsefulGEP) { - unsigned P = 0; - for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), - SrcEnd = SrcGEP->idx_end(), - DstIdx = DstGEP->idx_begin(); - SrcIdx != SrcEnd; - ++SrcIdx, ++DstIdx, ++P) { - Pair[P].Src = SE->getSCEV(*SrcIdx); - Pair[P].Dst = SE->getSCEV(*DstIdx); - } - } - else { - const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); - const SCEV *DstSCEV = SE->getSCEV(DstPtr); - Pair[0].Src = SrcSCEV; - Pair[0].Dst = DstSCEV; - } + unsigned Pairs = 1; + SmallVector<Subscript, 2> Pair(Pairs); + const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); + const SCEV *DstSCEV = SE->getSCEV(DstPtr); + Pair[0].Src = SrcSCEV; + Pair[0].Dst = DstSCEV; - if (Delinearize && CommonLevels > 1) { + if (Delinearize) { if (tryDelinearize(Src, Dst, Pair)) { - DEBUG(dbgs() << " delinearized GEP\n"); + LLVM_DEBUG(dbgs() << " delinearized\n"); Pairs = Pair.size(); } } diff --git a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp index ac684ec18466..f5f1874c9303 100644 --- a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp @@ -77,6 +77,8 @@ #include <vector> using namespace llvm; +#define DEBUG_TYPE "divergence" + namespace { class DivergencePropagator { @@ -299,6 +301,10 @@ bool DivergenceAnalysis::runOnFunction(Function &F) { PDT, DivergentValues); DP.populateWithSourcesOfDivergence(); DP.propagate(); + LLVM_DEBUG( + dbgs() << "\nAfter divergence analysis on " << F.getName() << ":\n"; + print(dbgs(), F.getParent()) + ); return false; } @@ -318,12 +324,17 @@ void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const { // Dumps all divergent values in F, arguments and then instructions. for (auto &Arg : F->args()) { - if (DivergentValues.count(&Arg)) - OS << "DIVERGENT: " << Arg << "\n"; + OS << (DivergentValues.count(&Arg) ? "DIVERGENT: " : " "); + OS << Arg << "\n"; } // Iterate instructions using instructions() to ensure a deterministic order. - for (auto &I : instructions(F)) { - if (DivergentValues.count(&I)) - OS << "DIVERGENT:" << I << "\n"; + for (auto BI = F->begin(), BE = F->end(); BI != BE; ++BI) { + auto &BB = *BI; + OS << "\n " << BB.getName() << ":\n"; + for (auto &I : BB.instructionsWithoutDebug()) { + OS << (DivergentValues.count(&I) ? "DIVERGENT: " : " "); + OS << I << "\n"; + } } + OS << "\n"; } diff --git a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp index bb8caf4a5174..de7f62cf4ecd 100644 --- a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp +++ b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp @@ -9,6 +9,7 @@ #include "llvm/Analysis/DominanceFrontier.h" #include "llvm/Analysis/DominanceFrontierImpl.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" diff --git a/contrib/llvm/lib/Analysis/EHPersonalities.cpp b/contrib/llvm/lib/Analysis/EHPersonalities.cpp index b12ae9884e3d..2d35a3fa9118 100644 --- a/contrib/llvm/lib/Analysis/EHPersonalities.cpp +++ b/contrib/llvm/lib/Analysis/EHPersonalities.cpp @@ -25,20 +25,21 @@ EHPersonality llvm::classifyEHPersonality(const Value *Pers) { if (!F) return EHPersonality::Unknown; return StringSwitch<EHPersonality>(F->getName()) - .Case("__gnat_eh_personality", EHPersonality::GNU_Ada) - .Case("__gxx_personality_v0", EHPersonality::GNU_CXX) - .Case("__gxx_personality_seh0",EHPersonality::GNU_CXX) - .Case("__gxx_personality_sj0", EHPersonality::GNU_CXX_SjLj) - .Case("__gcc_personality_v0", EHPersonality::GNU_C) - .Case("__gcc_personality_seh0",EHPersonality::GNU_C) - .Case("__gcc_personality_sj0", EHPersonality::GNU_C_SjLj) - .Case("__objc_personality_v0", EHPersonality::GNU_ObjC) - .Case("_except_handler3", EHPersonality::MSVC_X86SEH) - .Case("_except_handler4", EHPersonality::MSVC_X86SEH) - .Case("__C_specific_handler", EHPersonality::MSVC_Win64SEH) - .Case("__CxxFrameHandler3", EHPersonality::MSVC_CXX) - .Case("ProcessCLRException", EHPersonality::CoreCLR) - .Case("rust_eh_personality", EHPersonality::Rust) + .Case("__gnat_eh_personality", EHPersonality::GNU_Ada) + .Case("__gxx_personality_v0", EHPersonality::GNU_CXX) + .Case("__gxx_personality_seh0", EHPersonality::GNU_CXX) + .Case("__gxx_personality_sj0", EHPersonality::GNU_CXX_SjLj) + .Case("__gcc_personality_v0", EHPersonality::GNU_C) + .Case("__gcc_personality_seh0", EHPersonality::GNU_C) + .Case("__gcc_personality_sj0", EHPersonality::GNU_C_SjLj) + .Case("__objc_personality_v0", EHPersonality::GNU_ObjC) + .Case("_except_handler3", EHPersonality::MSVC_X86SEH) + .Case("_except_handler4", EHPersonality::MSVC_X86SEH) + .Case("__C_specific_handler", EHPersonality::MSVC_Win64SEH) + .Case("__CxxFrameHandler3", EHPersonality::MSVC_CXX) + .Case("ProcessCLRException", EHPersonality::CoreCLR) + .Case("rust_eh_personality", EHPersonality::Rust) + .Case("__gxx_wasm_personality_v0", EHPersonality::Wasm_CXX) .Default(EHPersonality::Unknown); } @@ -55,6 +56,7 @@ StringRef llvm::getEHPersonalityName(EHPersonality Pers) { case EHPersonality::MSVC_CXX: return "__CxxFrameHandler3"; case EHPersonality::CoreCLR: return "ProcessCLRException"; case EHPersonality::Rust: return "rust_eh_personality"; + case EHPersonality::Wasm_CXX: return "__gxx_wasm_personality_v0"; case EHPersonality::Unknown: llvm_unreachable("Unknown EHPersonality!"); } diff --git a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp index 94306d0f54ad..197aee9dacb7 100644 --- a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp +++ b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp @@ -65,7 +65,7 @@ class GlobalsAAResult::FunctionInfo { /// Build a wrapper struct that has 8-byte alignment. All heap allocations /// should provide this much alignment at least, but this makes it clear we /// specifically rely on this amount of alignment. - struct LLVM_ALIGNAS(8) AlignedMap { + struct alignas(8) AlignedMap { AlignedMap() {} AlignedMap(const AlignedMap &Arg) : Map(Arg.Map) {} GlobalInfoMapType Map; @@ -584,6 +584,10 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { } else if (Function *Callee = CS.getCalledFunction()) { // The callgraph doesn't include intrinsic calls. if (Callee->isIntrinsic()) { + if (isa<DbgInfoIntrinsic>(I)) + // Don't let dbg intrinsics affect alias info. + continue; + FunctionModRefBehavior Behaviour = AAResultBase::getModRefBehavior(Callee); FI.addModRefInfo(createModRefInfo(Behaviour)); diff --git a/contrib/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm/lib/Analysis/IVUsers.cpp index c30feb973e60..609e5e3a1448 100644 --- a/contrib/llvm/lib/Analysis/IVUsers.cpp +++ b/contrib/llvm/lib/Analysis/IVUsers.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -234,13 +235,13 @@ bool IVUsers::AddUsersImpl(Instruction *I, if (LI->getLoopFor(User->getParent()) != L) { if (isa<PHINode>(User) || Processed.count(User) || !AddUsersImpl(User, SimpleLoopNests)) { - DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n' - << " OF SCEV: " << *ISE << '\n'); + LLVM_DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n' + << " OF SCEV: " << *ISE << '\n'); AddUserToIVUsers = true; } } else if (Processed.count(User) || !AddUsersImpl(User, SimpleLoopNests)) { - DEBUG(dbgs() << "FOUND USER: " << *User << '\n' - << " OF SCEV: " << *ISE << '\n'); + LLVM_DEBUG(dbgs() << "FOUND USER: " << *User << '\n' + << " OF SCEV: " << *ISE << '\n'); AddUserToIVUsers = true; } @@ -273,14 +274,15 @@ bool IVUsers::AddUsersImpl(Instruction *I, // If we normalized the expression, but denormalization doesn't give the // original one, discard this user. if (OriginalISE != DenormalizedISE) { - DEBUG(dbgs() << " DISCARDING (NORMALIZATION ISN'T INVERTIBLE): " - << *ISE << '\n'); + LLVM_DEBUG(dbgs() + << " DISCARDING (NORMALIZATION ISN'T INVERTIBLE): " + << *ISE << '\n'); IVUses.pop_back(); return false; } } - DEBUG(if (SE->getSCEV(I) != ISE) - dbgs() << " NORMALIZED TO: " << *ISE << '\n'); + LLVM_DEBUG(if (SE->getSCEV(I) != ISE) dbgs() + << " NORMALIZED TO: " << *ISE << '\n'); } } return true; diff --git a/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp index c11176bbb9c8..4659c0a00629 100644 --- a/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp @@ -71,19 +71,19 @@ uint32_t ICallPromotionAnalysis::getProfitablePromotionCandidates( const Instruction *Inst, uint32_t NumVals, uint64_t TotalCount) { ArrayRef<InstrProfValueData> ValueDataRef(ValueDataArray.get(), NumVals); - DEBUG(dbgs() << " \nWork on callsite " << *Inst << " Num_targets: " << NumVals - << "\n"); + LLVM_DEBUG(dbgs() << " \nWork on callsite " << *Inst + << " Num_targets: " << NumVals << "\n"); uint32_t I = 0; uint64_t RemainingCount = TotalCount; for (; I < MaxNumPromotions && I < NumVals; I++) { uint64_t Count = ValueDataRef[I].Count; assert(Count <= RemainingCount); - DEBUG(dbgs() << " Candidate " << I << " Count=" << Count - << " Target_func: " << ValueDataRef[I].Value << "\n"); + LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count + << " Target_func: " << ValueDataRef[I].Value << "\n"); if (!isPromotionProfitable(Count, TotalCount, RemainingCount)) { - DEBUG(dbgs() << " Not promote: Cold target.\n"); + LLVM_DEBUG(dbgs() << " Not promote: Cold target.\n"); return I; } RemainingCount -= Count; diff --git a/contrib/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm/lib/Analysis/InlineCost.cpp index b0cb29203a5a..a6cccc3b5910 100644 --- a/contrib/llvm/lib/Analysis/InlineCost.cpp +++ b/contrib/llvm/lib/Analysis/InlineCost.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" @@ -135,7 +136,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { bool ContainsNoDuplicateCall; bool HasReturn; bool HasIndirectBr; - bool HasFrameEscape; + bool HasUninlineableIntrinsic; + bool UsesVarArgs; /// Number of bytes allocated statically by the callee. uint64_t AllocatedSize; @@ -280,12 +282,13 @@ public: IsCallerRecursive(false), IsRecursiveCall(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), - HasFrameEscape(false), AllocatedSize(0), NumInstructions(0), - NumVectorInstructions(0), VectorBonus(0), SingleBBBonus(0), - EnableLoadElimination(true), LoadEliminationCost(0), NumConstantArgs(0), - NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), - NumConstantPtrDiffs(0), NumInstructionsSimplified(0), - SROACostSavings(0), SROACostSavingsLost(0) {} + HasUninlineableIntrinsic(false), UsesVarArgs(false), AllocatedSize(0), + NumInstructions(0), NumVectorInstructions(0), VectorBonus(0), + SingleBBBonus(0), EnableLoadElimination(true), LoadEliminationCost(0), + NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), + NumConstantPtrCmps(0), NumConstantPtrDiffs(0), + NumInstructionsSimplified(0), SROACostSavings(0), + SROACostSavingsLost(0) {} bool analyzeCall(CallSite CS); @@ -308,12 +311,12 @@ public: } // namespace -/// \brief Test whether the given value is an Alloca-derived function argument. +/// Test whether the given value is an Alloca-derived function argument. bool CallAnalyzer::isAllocaDerivedArg(Value *V) { return SROAArgValues.count(V); } -/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to. +/// Lookup the SROA-candidate argument and cost iterator which V maps to. /// Returns false if V does not map to a SROA-candidate. bool CallAnalyzer::lookupSROAArgAndCost( Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) { @@ -329,7 +332,7 @@ bool CallAnalyzer::lookupSROAArgAndCost( return CostIt != SROAArgCosts.end(); } -/// \brief Disable SROA for the candidate marked by this cost iterator. +/// Disable SROA for the candidate marked by this cost iterator. /// /// This marks the candidate as no longer viable for SROA, and adds the cost /// savings associated with it back into the inline cost measurement. @@ -343,7 +346,7 @@ void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) { disableLoadElimination(); } -/// \brief If 'V' maps to a SROA candidate, disable SROA for it. +/// If 'V' maps to a SROA candidate, disable SROA for it. void CallAnalyzer::disableSROA(Value *V) { Value *SROAArg; DenseMap<Value *, int>::iterator CostIt; @@ -351,7 +354,7 @@ void CallAnalyzer::disableSROA(Value *V) { disableSROA(CostIt); } -/// \brief Accumulate the given cost for a particular SROA candidate. +/// Accumulate the given cost for a particular SROA candidate. void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, int InstructionCost) { CostIt->second += InstructionCost; @@ -366,12 +369,12 @@ void CallAnalyzer::disableLoadElimination() { } } -/// \brief Accumulate a constant GEP offset into an APInt if possible. +/// Accumulate a constant GEP offset into an APInt if possible. /// /// Returns false if unable to compute the offset for any reason. Respects any /// simplified values known during the analysis of this callsite. bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { - unsigned IntPtrWidth = DL.getPointerSizeInBits(); + unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType()); assert(IntPtrWidth == Offset.getBitWidth()); for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); @@ -399,7 +402,7 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { return true; } -/// \brief Use TTI to check whether a GEP is free. +/// Use TTI to check whether a GEP is free. /// /// Respects any simplified values known during the analysis of this callsite. bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) { @@ -450,8 +453,12 @@ bool CallAnalyzer::visitPHI(PHINode &I) { // SROA if it *might* be used in an inappropriate manner. // Phi nodes are always zero-cost. - - APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits()); + // FIXME: Pointer sizes may differ between different address spaces, so do we + // need to use correct address space in the call to getPointerSizeInBits here? + // Or could we skip the getPointerSizeInBits call completely? As far as I can + // see the ZeroOffset is used as a dummy value, so we can probably use any + // bit width for the ZeroOffset? + APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits(0)); bool CheckSROA = I.getType()->isPointerTy(); // Track the constant or pointer with constant offset we've seen so far. @@ -536,7 +543,7 @@ bool CallAnalyzer::visitPHI(PHINode &I) { return true; } -/// \brief Check we can fold GEPs of constant-offset call site argument pointers. +/// Check we can fold GEPs of constant-offset call site argument pointers. /// This requires target data and inbounds GEPs. /// /// \return true if the specified GEP can be folded. @@ -641,7 +648,8 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { // Track base/offset pairs when converted to a plain integer provided the // integer is large enough to represent the pointer. unsigned IntegerSize = I.getType()->getScalarSizeInBits(); - if (IntegerSize >= DL.getPointerSizeInBits()) { + unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace(); + if (IntegerSize >= DL.getPointerSizeInBits(AS)) { std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(I.getOperand(0)); if (BaseAndOffset.first) @@ -674,7 +682,7 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { // modifications provided the integer is not too large. Value *Op = I.getOperand(0); unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); - if (IntegerSize <= DL.getPointerSizeInBits()) { + if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) { std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op); if (BaseAndOffset.first) ConstantOffsetPtrs[&I] = BaseAndOffset; @@ -913,14 +921,14 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr; auto HotCallSiteThreshold = getHotCallSiteThreshold(CS, CallerBFI); if (!Caller->optForSize() && HotCallSiteThreshold) { - DEBUG(dbgs() << "Hot callsite.\n"); + LLVM_DEBUG(dbgs() << "Hot callsite.\n"); // FIXME: This should update the threshold only if it exceeds the // current threshold, but AutoFDO + ThinLTO currently relies on this // behavior to prevent inlining of hot callsites during ThinLTO // compile phase. Threshold = HotCallSiteThreshold.getValue(); } else if (isColdCallSite(CS, CallerBFI)) { - DEBUG(dbgs() << "Cold callsite.\n"); + LLVM_DEBUG(dbgs() << "Cold callsite.\n"); // Do not apply bonuses for a cold callsite including the // LastCallToStatic bonus. While this bonus might result in code size // reduction, it can cause the size of a non-cold caller to increase @@ -931,13 +939,13 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { // Use callee's global profile information only if we have no way of // determining this via callsite information. if (PSI->isFunctionEntryHot(&Callee)) { - DEBUG(dbgs() << "Hot callee.\n"); + LLVM_DEBUG(dbgs() << "Hot callee.\n"); // If callsite hotness can not be determined, we may still know // that the callee is hot and treat it as a weaker hint for threshold // increase. Threshold = MaxIfValid(Threshold, Params.HintThreshold); } else if (PSI->isFunctionEntryCold(&Callee)) { - DEBUG(dbgs() << "Cold callee.\n"); + LLVM_DEBUG(dbgs() << "Cold callee.\n"); // Do not apply bonuses for a cold callee including the // LastCallToStatic bonus. While this bonus might result in code size // reduction, it can cause the size of a non-cold caller to increase @@ -1155,7 +1163,7 @@ bool CallAnalyzer::visitInsertValue(InsertValueInst &I) { return false; } -/// \brief Try to simplify a call site. +/// Try to simplify a call site. /// /// Takes a concrete function and callsite and tries to actually simplify it by /// analyzing the arguments and call itself with instsimplify. Returns true if @@ -1225,8 +1233,13 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { disableLoadElimination(); // SROA can usually chew through these intrinsics, but they aren't free. return false; + case Intrinsic::icall_branch_funnel: case Intrinsic::localescape: - HasFrameEscape = true; + HasUninlineableIntrinsic = true; + return false; + case Intrinsic::vastart: + case Intrinsic::vaend: + UsesVarArgs = true; return false; } } @@ -1521,7 +1534,7 @@ bool CallAnalyzer::visitInstruction(Instruction &I) { return false; } -/// \brief Analyze a basic block for its contribution to the inline cost. +/// Analyze a basic block for its contribution to the inline cost. /// /// This method walks the analyzer over every instruction in the given basic /// block and accounts for their cost during inlining at this callsite. It @@ -1562,7 +1575,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB, using namespace ore; // If the visit this instruction detected an uninlinable pattern, abort. if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || - HasIndirectBr || HasFrameEscape) { + HasIndirectBr || HasUninlineableIntrinsic || UsesVarArgs) { if (ORE) ORE->emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", @@ -1598,7 +1611,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB, return true; } -/// \brief Compute the base pointer and cumulative constant offsets for V. +/// Compute the base pointer and cumulative constant offsets for V. /// /// This strips all constant offsets off of V, leaving it the base pointer, and /// accumulates the total constant offset applied in the returned constant. It @@ -1608,7 +1621,8 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { if (!V->getType()->isPointerTy()) return nullptr; - unsigned IntPtrWidth = DL.getPointerSizeInBits(); + unsigned AS = V->getType()->getPointerAddressSpace(); + unsigned IntPtrWidth = DL.getIndexSizeInBits(AS); APInt Offset = APInt::getNullValue(IntPtrWidth); // Even though we don't look through PHI nodes, we could be called on an @@ -1632,11 +1646,11 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { assert(V->getType()->isPointerTy() && "Unexpected operand type!"); } while (Visited.insert(V).second); - Type *IntPtrTy = DL.getIntPtrType(V->getContext()); + Type *IntPtrTy = DL.getIntPtrType(V->getContext(), AS); return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset)); } -/// \brief Find dead blocks due to deleted CFG edges during inlining. +/// Find dead blocks due to deleted CFG edges during inlining. /// /// If we know the successor of the current block, \p CurrBB, has to be \p /// NextBB, the other successors of \p CurrBB are dead if these successors have @@ -1674,7 +1688,7 @@ void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) { } } -/// \brief Analyze a call site for potential inlining. +/// Analyze a call site for potential inlining. /// /// Returns true if inlining this call is viable, and false if it is not /// viable. It computes the cost and adjusts the threshold based on numerous @@ -1867,7 +1881,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -/// \brief Dump stats about this call's analysis. +/// Dump stats about this call's analysis. LLVM_DUMP_METHOD void CallAnalyzer::dump() { #define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n" DEBUG_PRINT_STAT(NumConstantArgs); @@ -1887,7 +1901,7 @@ LLVM_DUMP_METHOD void CallAnalyzer::dump() { } #endif -/// \brief Test that there are no attribute conflicts between Caller and Callee +/// Test that there are no attribute conflicts between Caller and Callee /// that prevent inlining. static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, @@ -1904,7 +1918,8 @@ int llvm::getCallsiteCost(CallSite CS, const DataLayout &DL) { // size of the byval type by the target's pointer size. PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType()); - unsigned PointerSize = DL.getPointerSizeInBits(); + unsigned AS = PTy->getAddressSpace(); + unsigned PointerSize = DL.getPointerSizeInBits(AS); // Ceiling division. unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; @@ -1948,6 +1963,19 @@ InlineCost llvm::getInlineCost( if (!Callee) return llvm::InlineCost::getNever(); + // Never inline calls with byval arguments that does not have the alloca + // address space. Since byval arguments can be replaced with a copy to an + // alloca, the inlined code would need to be adjusted to handle that the + // argument is in the alloca address space (so it is a little bit complicated + // to solve). + unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace(); + for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) + if (CS.isByValArgument(I)) { + PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); + if (PTy->getAddressSpace() != AllocaAS) + return llvm::InlineCost::getNever(); + } + // Calls to functions with always-inline attributes should be inlined // whenever possible. if (CS.hasFnAttr(Attribute::AlwaysInline)) { @@ -1966,6 +1994,11 @@ InlineCost llvm::getInlineCost( if (Caller->hasFnAttribute(Attribute::OptimizeNone)) return llvm::InlineCost::getNever(); + // Don't inline a function that treats null pointer as valid into a caller + // that does not have this attribute. + if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined()) + return llvm::InlineCost::getNever(); + // Don't inline functions which can be interposed at link-time. Don't inline // functions marked noinline or call sites marked noinline. // Note: inlining non-exact non-interposable functions is fine, since we know @@ -1974,14 +2007,14 @@ InlineCost llvm::getInlineCost( CS.isNoInline()) return llvm::InlineCost::getNever(); - DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() - << "... (caller:" << Caller->getName() << ")\n"); + LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() + << "... (caller:" << Caller->getName() << ")\n"); CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS, Params); bool ShouldInline = CA.analyzeCall(CS); - DEBUG(CA.dump()); + LLVM_DEBUG(CA.dump()); // Check if there was a reason to force inlining or no inlining. if (!ShouldInline && CA.getCost() < CA.getThreshold()) @@ -2015,12 +2048,21 @@ bool llvm::isInlineViable(Function &F) { cast<CallInst>(CS.getInstruction())->canReturnTwice()) return false; - // Disallow inlining functions that call @llvm.localescape. Doing this - // correctly would require major changes to the inliner. - if (CS.getCalledFunction() && - CS.getCalledFunction()->getIntrinsicID() == - llvm::Intrinsic::localescape) - return false; + if (CS.getCalledFunction()) + switch (CS.getCalledFunction()->getIntrinsicID()) { + default: + break; + // Disallow inlining of @llvm.icall.branch.funnel because current + // backend can't separate call targets from call arguments. + case llvm::Intrinsic::icall_branch_funnel: + // Disallow inlining functions that call @llvm.localescape. Doing this + // correctly would require major changes to the inliner. + case llvm::Intrinsic::localescape: + // Disallow inlining of functions that access VarArgs. + case llvm::Intrinsic::vastart: + case llvm::Intrinsic::vaend: + return false; + } } } diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp index c814ff122e44..519d6d67be51 100644 --- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp @@ -62,6 +62,8 @@ static Value *SimplifyOrInst(Value *, Value *, const SimplifyQuery &, unsigned); static Value *SimplifyXorInst(Value *, Value *, const SimplifyQuery &, unsigned); static Value *SimplifyCastInst(unsigned, Value *, Type *, const SimplifyQuery &, unsigned); +static Value *SimplifyGEPInst(Type *, ArrayRef<Value *>, const SimplifyQuery &, + unsigned); /// For a boolean type or a vector of boolean type, return false or a vector /// with every element false. @@ -90,7 +92,7 @@ static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS, } /// Does the given value dominate the specified phi node? -static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { +static bool valueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { Instruction *I = dyn_cast<Instruction>(V); if (!I) // Arguments and constants dominate all instructions. @@ -99,7 +101,7 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { // If we are processing instructions (and/or basic blocks) that have not been // fully added to a function, the parent nodes may still be null. Simply // return the conservative answer in these cases. - if (!I->getParent() || !P->getParent() || !I->getParent()->getParent()) + if (!I->getParent() || !P->getParent() || !I->getFunction()) return false; // If we have a DominatorTree then do a precise test. @@ -108,7 +110,7 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { // Otherwise, if the instruction is in the entry block and is not an invoke, // then it obviously dominates all phi nodes. - if (I->getParent() == &I->getParent()->getParent()->getEntryBlock() && + if (I->getParent() == &I->getFunction()->getEntryBlock() && !isa<InvokeInst>(I)) return true; @@ -443,13 +445,13 @@ static Value *ThreadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS, if (isa<PHINode>(LHS)) { PI = cast<PHINode>(LHS); // Bail out if RHS and the phi may be mutually interdependent due to a loop. - if (!ValueDominatesPHI(RHS, PI, Q.DT)) + if (!valueDominatesPHI(RHS, PI, Q.DT)) return nullptr; } else { assert(isa<PHINode>(RHS) && "No PHI instruction operand!"); PI = cast<PHINode>(RHS); // Bail out if LHS and the phi may be mutually interdependent due to a loop. - if (!ValueDominatesPHI(LHS, PI, Q.DT)) + if (!valueDominatesPHI(LHS, PI, Q.DT)) return nullptr; } @@ -490,7 +492,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, PHINode *PI = cast<PHINode>(LHS); // Bail out if RHS and the phi may be mutually interdependent due to a loop. - if (!ValueDominatesPHI(RHS, PI, Q.DT)) + if (!valueDominatesPHI(RHS, PI, Q.DT)) return nullptr; // Evaluate the BinOp on the incoming phi values. @@ -525,7 +527,7 @@ static Constant *foldOrCommuteConstant(Instruction::BinaryOps Opcode, /// Given operands for an Add, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, +static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Add, Op0, Op1, Q)) return C; @@ -538,6 +540,10 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, if (match(Op1, m_Zero())) return Op0; + // If two operands are negative, return 0. + if (isKnownNegation(Op0, Op1)) + return Constant::getNullValue(Op0->getType()); + // X + (Y - X) -> Y // (Y - X) + X -> Y // Eg: X + -X -> 0 @@ -555,10 +561,14 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, // add nsw/nuw (xor Y, signmask), signmask --> Y // The no-wrapping add guarantees that the top bit will be set by the add. // Therefore, the xor must be clearing the already set sign bit of Y. - if ((isNSW || isNUW) && match(Op1, m_SignMask()) && + if ((IsNSW || IsNUW) && match(Op1, m_SignMask()) && match(Op0, m_Xor(m_Value(Y), m_SignMask()))) return Y; + // add nuw %x, -1 -> -1, because %x can only be 0. + if (IsNUW && match(Op1, m_AllOnes())) + return Op1; // Which is -1. + /// i1 add -> xor. if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1)) if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1)) @@ -581,12 +591,12 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, return nullptr; } -Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, +Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW, const SimplifyQuery &Query) { - return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query, RecursionLimit); + return ::SimplifyAddInst(Op0, Op1, IsNSW, IsNUW, Query, RecursionLimit); } -/// \brief Compute the base pointer and cumulative constant offsets for V. +/// Compute the base pointer and cumulative constant offsets for V. /// /// This strips all constant offsets off of V, leaving it the base pointer, and /// accumulates the total constant offset applied in the returned constant. It @@ -637,7 +647,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V, return OffsetIntPtr; } -/// \brief Compute the constant difference between two pointer values. +/// Compute the constant difference between two pointer values. /// If the difference is not a constant, returns zero. static Constant *computePointerDifference(const DataLayout &DL, Value *LHS, Value *RHS) { @@ -680,14 +690,14 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, if (match(Op0, m_Zero())) { // 0 - X -> 0 if the sub is NUW. if (isNUW) - return Op0; + return Constant::getNullValue(Op0->getType()); KnownBits Known = computeKnownBits(Op1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); if (Known.Zero.isMaxSignedValue()) { // Op1 is either 0 or the minimum signed value. If the sub is NSW, then // Op1 must be 0 because negating the minimum signed value is undefined. if (isNSW) - return Op0; + return Constant::getNullValue(Op0->getType()); // 0 - X -> X if X is 0 or the minimum signed value. return Op1; @@ -799,12 +809,9 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return C; // X * undef -> 0 - if (match(Op1, m_Undef())) - return Constant::getNullValue(Op0->getType()); - // X * 0 -> 0 - if (match(Op1, m_Zero())) - return Op1; + if (match(Op1, m_CombineOr(m_Undef(), m_Zero()))) + return Constant::getNullValue(Op0->getType()); // X * 1 -> X if (match(Op1, m_One())) @@ -868,13 +875,14 @@ static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv) { if (match(Op1, m_Zero())) return UndefValue::get(Ty); - // If any element of a constant divisor vector is zero, the whole op is undef. + // If any element of a constant divisor vector is zero or undef, the whole op + // is undef. auto *Op1C = dyn_cast<Constant>(Op1); if (Op1C && Ty->isVectorTy()) { unsigned NumElts = Ty->getVectorNumElements(); for (unsigned i = 0; i != NumElts; ++i) { Constant *Elt = Op1C->getAggregateElement(i); - if (Elt && Elt->isNullValue()) + if (Elt && (Elt->isNullValue() || isa<UndefValue>(Elt))) return UndefValue::get(Ty); } } @@ -887,7 +895,7 @@ static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv) { // 0 / X -> 0 // 0 % X -> 0 if (match(Op0, m_Zero())) - return Op0; + return Constant::getNullValue(Op0->getType()); // X / X -> 1 // X % X -> 0 @@ -898,7 +906,10 @@ static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv) { // X % 1 -> 0 // If this is a boolean op (single-bit element type), we can't have // division-by-zero or remainder-by-zero, so assume the divisor is 1. - if (match(Op1, m_One()) || Ty->isIntOrIntVectorTy(1)) + // Similarly, if we're zero-extending a boolean divisor, then assume it's a 1. + Value *X; + if (match(Op1, m_One()) || Ty->isIntOrIntVectorTy(1) || + (match(Op1, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))) return IsDiv ? Op0 : Constant::getNullValue(Ty); return nullptr; @@ -978,18 +989,17 @@ static Value *simplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, bool IsSigned = Opcode == Instruction::SDiv; // (X * Y) / Y -> X if the multiplication does not overflow. - Value *X = nullptr, *Y = nullptr; - if (match(Op0, m_Mul(m_Value(X), m_Value(Y))) && (X == Op1 || Y == Op1)) { - if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1 - OverflowingBinaryOperator *Mul = cast<OverflowingBinaryOperator>(Op0); - // If the Mul knows it does not overflow, then we are good to go. + Value *X; + if (match(Op0, m_c_Mul(m_Value(X), m_Specific(Op1)))) { + auto *Mul = cast<OverflowingBinaryOperator>(Op0); + // If the Mul does not overflow, then we are good to go. if ((IsSigned && Mul->hasNoSignedWrap()) || (!IsSigned && Mul->hasNoUnsignedWrap())) return X; - // If X has the form X = A / Y then X * Y cannot overflow. - if (BinaryOperator *Div = dyn_cast<BinaryOperator>(X)) - if (Div->getOpcode() == Opcode && Div->getOperand(1) == Y) - return X; + // If X has the form X = A / Y, then X * Y cannot overflow. + if ((IsSigned && match(X, m_SDiv(m_Value(), m_Specific(Op1)))) || + (!IsSigned && match(X, m_UDiv(m_Value(), m_Specific(Op1))))) + return X; } // (X rem Y) / Y -> 0 @@ -1041,6 +1051,13 @@ static Value *simplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, match(Op0, m_URem(m_Value(), m_Specific(Op1))))) return Op0; + // (X << Y) % X -> 0 + if ((Opcode == Instruction::SRem && + match(Op0, m_NSWShl(m_Specific(Op1), m_Value()))) || + (Opcode == Instruction::URem && + match(Op0, m_NUWShl(m_Specific(Op1), m_Value())))) + return Constant::getNullValue(Op0->getType()); + // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) @@ -1064,6 +1081,10 @@ static Value *simplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, /// If not, this returns null. static Value *SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { + // If two operands are negated and no signed overflow, return -1. + if (isKnownNegation(Op0, Op1, /*NeedNSW=*/true)) + return Constant::getAllOnesValue(Op0->getType()); + return simplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse); } @@ -1086,6 +1107,16 @@ Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { /// If not, this returns null. static Value *SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { + // If the divisor is 0, the result is undefined, so assume the divisor is -1. + // srem Op0, (sext i1 X) --> srem Op0, -1 --> 0 + Value *X; + if (match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) + return ConstantInt::getNullValue(Op0->getType()); + + // If the two operands are negated, return 0. + if (isKnownNegation(Op0, Op1)) + return ConstantInt::getNullValue(Op0->getType()); + return simplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse); } @@ -1140,10 +1171,14 @@ static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0, // 0 shift by X -> 0 if (match(Op0, m_Zero())) - return Op0; + return Constant::getNullValue(Op0->getType()); // X shift by 0 -> X - if (match(Op1, m_Zero())) + // Shift-by-sign-extended bool must be shift-by-0 because shift-by-all-ones + // would be poison. + Value *X; + if (match(Op1, m_Zero()) || + (match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))) return Op0; // Fold undefined shifts. @@ -1177,7 +1212,7 @@ static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0, return nullptr; } -/// \brief Given operands for an Shl, LShr or AShr, see if we can +/// Given operands for an Shl, LShr or AShr, see if we can /// fold the result. If not, this returns null. static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, bool isExact, const SimplifyQuery &Q, @@ -1220,6 +1255,13 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, Value *X; if (match(Op0, m_Exact(m_Shr(m_Value(X), m_Specific(Op1))))) return X; + + // shl nuw i8 C, %x -> C iff C has sign bit set. + if (isNUW && match(Op0, m_Negative())) + return Op0; + // NOTE: could use computeKnownBits() / LazyValueInfo, + // but the cost-benefit analysis suggests it isn't worth it. + return nullptr; } @@ -1257,9 +1299,10 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, MaxRecurse)) return V; - // all ones >>a X -> all ones + // all ones >>a X -> -1 + // Do not return Op0 because it may contain undef elements if it's a vector. if (match(Op0, m_AllOnes())) - return Op0; + return Constant::getAllOnesValue(Op0->getType()); // (X << A) >> A -> X Value *X; @@ -1295,7 +1338,7 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, ICmpInst::isUnsigned(UnsignedPred)) ; else if (match(UnsignedICmp, - m_ICmp(UnsignedPred, m_Value(Y), m_Specific(X))) && + m_ICmp(UnsignedPred, m_Specific(Y), m_Value(X))) && ICmpInst::isUnsigned(UnsignedPred)) UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred); else @@ -1413,6 +1456,43 @@ static Value *simplifyAndOrOfICmpsWithConstants(ICmpInst *Cmp0, ICmpInst *Cmp1, return nullptr; } +static Value *simplifyAndOrOfICmpsWithZero(ICmpInst *Cmp0, ICmpInst *Cmp1, + bool IsAnd) { + ICmpInst::Predicate P0 = Cmp0->getPredicate(), P1 = Cmp1->getPredicate(); + if (!match(Cmp0->getOperand(1), m_Zero()) || + !match(Cmp1->getOperand(1), m_Zero()) || P0 != P1) + return nullptr; + + if ((IsAnd && P0 != ICmpInst::ICMP_NE) || (!IsAnd && P1 != ICmpInst::ICMP_EQ)) + return nullptr; + + // We have either "(X == 0 || Y == 0)" or "(X != 0 && Y != 0)". + Value *X = Cmp0->getOperand(0); + Value *Y = Cmp1->getOperand(0); + + // If one of the compares is a masked version of a (not) null check, then + // that compare implies the other, so we eliminate the other. Optionally, look + // through a pointer-to-int cast to match a null check of a pointer type. + + // (X == 0) || (([ptrtoint] X & ?) == 0) --> ([ptrtoint] X & ?) == 0 + // (X == 0) || ((? & [ptrtoint] X) == 0) --> (? & [ptrtoint] X) == 0 + // (X != 0) && (([ptrtoint] X & ?) != 0) --> ([ptrtoint] X & ?) != 0 + // (X != 0) && ((? & [ptrtoint] X) != 0) --> (? & [ptrtoint] X) != 0 + if (match(Y, m_c_And(m_Specific(X), m_Value())) || + match(Y, m_c_And(m_PtrToInt(m_Specific(X)), m_Value()))) + return Cmp1; + + // (([ptrtoint] Y & ?) == 0) || (Y == 0) --> ([ptrtoint] Y & ?) == 0 + // ((? & [ptrtoint] Y) == 0) || (Y == 0) --> (? & [ptrtoint] Y) == 0 + // (([ptrtoint] Y & ?) != 0) && (Y != 0) --> ([ptrtoint] Y & ?) != 0 + // ((? & [ptrtoint] Y) != 0) && (Y != 0) --> (? & [ptrtoint] Y) != 0 + if (match(X, m_c_And(m_Specific(Y), m_Value())) || + match(X, m_c_And(m_PtrToInt(m_Specific(Y)), m_Value()))) + return Cmp0; + + return nullptr; +} + static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) { // (icmp (add V, C0), C1) & (icmp V, C0) ICmpInst::Predicate Pred0, Pred1; @@ -1473,6 +1553,9 @@ static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, true)) return X; + if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, true)) + return X; + if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1)) return X; if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0)) @@ -1541,6 +1624,9 @@ static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) { if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, false)) return X; + if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, false)) + return X; + if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1)) return X; if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0)) @@ -1638,7 +1724,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, // X & 0 = 0 if (match(Op1, m_Zero())) - return Op1; + return Constant::getNullValue(Op0->getType()); // X & -1 = X if (match(Op1, m_AllOnes())) @@ -1733,21 +1819,16 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return C; // X | undef -> -1 - if (match(Op1, m_Undef())) + // X | -1 = -1 + // Do not return Op1 because it may contain undef elements if it's a vector. + if (match(Op1, m_Undef()) || match(Op1, m_AllOnes())) return Constant::getAllOnesValue(Op0->getType()); // X | X = X - if (Op0 == Op1) - return Op0; - // X | 0 = X - if (match(Op1, m_Zero())) + if (Op0 == Op1 || match(Op1, m_Zero())) return Op0; - // X | -1 = -1 - if (match(Op1, m_AllOnes())) - return Op1; - // A | ~A = ~A | A = -1 if (match(Op0, m_Not(m_Specific(Op1))) || match(Op1, m_Not(m_Specific(Op0)))) @@ -2051,9 +2132,12 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI, ConstantInt *LHSOffsetCI = dyn_cast<ConstantInt>(LHSOffset); ConstantInt *RHSOffsetCI = dyn_cast<ConstantInt>(RHSOffset); uint64_t LHSSize, RHSSize; + ObjectSizeOpts Opts; + Opts.NullIsUnknownSize = + NullPointerIsDefined(cast<AllocaInst>(LHS)->getFunction()); if (LHSOffsetCI && RHSOffsetCI && - getObjectSize(LHS, LHSSize, DL, TLI) && - getObjectSize(RHS, RHSSize, DL, TLI)) { + getObjectSize(LHS, LHSSize, DL, TLI, Opts) && + getObjectSize(RHS, RHSSize, DL, TLI, Opts)) { const APInt &LHSOffsetValue = LHSOffsetCI->getValue(); const APInt &RHSOffsetValue = RHSOffsetCI->getValue(); if (!LHSOffsetValue.isNegative() && @@ -2442,6 +2526,20 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) { static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, Value *RHS) { + Type *ITy = GetCompareTy(RHS); // The return type. + + Value *X; + // Sign-bit checks can be optimized to true/false after unsigned + // floating-point casts: + // icmp slt (bitcast (uitofp X)), 0 --> false + // icmp sgt (bitcast (uitofp X)), -1 --> true + if (match(LHS, m_BitCast(m_UIToFP(m_Value(X))))) { + if (Pred == ICmpInst::ICMP_SLT && match(RHS, m_Zero())) + return ConstantInt::getFalse(ITy); + if (Pred == ICmpInst::ICMP_SGT && match(RHS, m_AllOnes())) + return ConstantInt::getTrue(ITy); + } + const APInt *C; if (!match(RHS, m_APInt(C))) return nullptr; @@ -2449,9 +2547,9 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, // Rule out tautological comparisons (eg., ult 0 or uge 0). ConstantRange RHS_CR = ConstantRange::makeExactICmpRegion(Pred, *C); if (RHS_CR.isEmptySet()) - return ConstantInt::getFalse(GetCompareTy(RHS)); + return ConstantInt::getFalse(ITy); if (RHS_CR.isFullSet()) - return ConstantInt::getTrue(GetCompareTy(RHS)); + return ConstantInt::getTrue(ITy); // Find the range of possible values for binary operators. unsigned Width = C->getBitWidth(); @@ -2469,9 +2567,9 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, if (!LHS_CR.isFullSet()) { if (RHS_CR.contains(LHS_CR)) - return ConstantInt::getTrue(GetCompareTy(RHS)); + return ConstantInt::getTrue(ITy); if (RHS_CR.inverse().contains(LHS_CR)) - return ConstantInt::getFalse(GetCompareTy(RHS)); + return ConstantInt::getFalse(ITy); } return nullptr; @@ -3008,8 +3106,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, Type *ITy = GetCompareTy(LHS); // The return type. // icmp X, X -> true/false - // X icmp undef -> true/false. For example, icmp ugt %X, undef -> false - // because X could be 0. + // icmp X, undef -> true/false because undef could be X. if (LHS == RHS || isa<UndefValue>(RHS)) return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred)); @@ -3309,6 +3406,12 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getTrue(RetTy); } + // NaN is unordered; NaN is not ordered. + assert((FCmpInst::isOrdered(Pred) || FCmpInst::isUnordered(Pred)) && + "Comparison must be either ordered or unordered"); + if (match(RHS, m_NaN())) + return ConstantInt::get(RetTy, CmpInst::isUnordered(Pred)); + // fcmp pred x, undef and fcmp pred undef, x // fold to true if unordered, false if ordered if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) { @@ -3328,15 +3431,6 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Handle fcmp with constant RHS. const APFloat *C; if (match(RHS, m_APFloat(C))) { - // If the constant is a nan, see if we can fold the comparison based on it. - if (C->isNaN()) { - if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo" - return getFalse(RetTy); - assert(FCmpInst::isUnordered(Pred) && - "Comparison must be either ordered or unordered!"); - // True if unordered. - return getTrue(RetTy); - } // Check whether the constant is an infinity. if (C->isInfinity()) { if (C->isNegative()) { @@ -3475,6 +3569,17 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, } } + // Same for GEPs. + if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) { + if (MaxRecurse) { + SmallVector<Value *, 8> NewOps(GEP->getNumOperands()); + transform(GEP->operands(), NewOps.begin(), + [&](Value *V) { return V == Op ? RepOp : V; }); + return SimplifyGEPInst(GEP->getSourceElementType(), NewOps, Q, + MaxRecurse - 1); + } + } + // TODO: We could hand off more cases to instsimplify here. // If all operands are constant after substituting Op for RepOp then we can @@ -3581,24 +3686,6 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, TrueVal, FalseVal)) return V; - if (CondVal->hasOneUse()) { - const APInt *C; - if (match(CmpRHS, m_APInt(C))) { - // X < MIN ? T : F --> F - if (Pred == ICmpInst::ICMP_SLT && C->isMinSignedValue()) - return FalseVal; - // X < MIN ? T : F --> F - if (Pred == ICmpInst::ICMP_ULT && C->isMinValue()) - return FalseVal; - // X > MAX ? T : F --> F - if (Pred == ICmpInst::ICMP_SGT && C->isMaxSignedValue()) - return FalseVal; - // X > MAX ? T : F --> F - if (Pred == ICmpInst::ICMP_UGT && C->isMaxValue()) - return FalseVal; - } - } - // If we have an equality comparison, then we know the value in one of the // arms of the select. See if substituting this value into the arm and // simplifying the result yields the same value as the other arm. @@ -3631,37 +3718,38 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, /// Given operands for a SelectInst, see if we can fold the result. /// If not, this returns null. -static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, - Value *FalseVal, const SimplifyQuery &Q, - unsigned MaxRecurse) { - // select true, X, Y -> X - // select false, X, Y -> Y - if (Constant *CB = dyn_cast<Constant>(CondVal)) { - if (Constant *CT = dyn_cast<Constant>(TrueVal)) - if (Constant *CF = dyn_cast<Constant>(FalseVal)) - return ConstantFoldSelectInstruction(CB, CT, CF); - if (CB->isAllOnesValue()) +static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, + const SimplifyQuery &Q, unsigned MaxRecurse) { + if (auto *CondC = dyn_cast<Constant>(Cond)) { + if (auto *TrueC = dyn_cast<Constant>(TrueVal)) + if (auto *FalseC = dyn_cast<Constant>(FalseVal)) + return ConstantFoldSelectInstruction(CondC, TrueC, FalseC); + + // select undef, X, Y -> X or Y + if (isa<UndefValue>(CondC)) + return isa<Constant>(FalseVal) ? FalseVal : TrueVal; + + // TODO: Vector constants with undef elements don't simplify. + + // select true, X, Y -> X + if (CondC->isAllOnesValue()) return TrueVal; - if (CB->isNullValue()) + // select false, X, Y -> Y + if (CondC->isNullValue()) return FalseVal; } - // select C, X, X -> X + // select ?, X, X -> X if (TrueVal == FalseVal) return TrueVal; - if (isa<UndefValue>(CondVal)) { // select undef, X, Y -> X or Y - if (isa<Constant>(FalseVal)) - return FalseVal; - return TrueVal; - } - if (isa<UndefValue>(TrueVal)) // select C, undef, X -> X + if (isa<UndefValue>(TrueVal)) // select ?, undef, X -> X return FalseVal; - if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X + if (isa<UndefValue>(FalseVal)) // select ?, X, undef -> X return TrueVal; if (Value *V = - simplifySelectWithICmpCond(CondVal, TrueVal, FalseVal, Q, MaxRecurse)) + simplifySelectWithICmpCond(Cond, TrueVal, FalseVal, Q, MaxRecurse)) return V; return nullptr; @@ -3712,7 +3800,7 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, // The following transforms are only safe if the ptrtoint cast // doesn't truncate the pointers. if (Ops[1]->getType()->getScalarSizeInBits() == - Q.DL.getPointerSizeInBits(AS)) { + Q.DL.getIndexSizeInBits(AS)) { auto PtrToIntOrZero = [GEPTy](Value *P) -> Value * { if (match(P, m_Zero())) return Constant::getNullValue(GEPTy); @@ -3752,10 +3840,10 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, if (Q.DL.getTypeAllocSize(LastType) == 1 && all_of(Ops.slice(1).drop_back(1), [](Value *Idx) { return match(Idx, m_Zero()); })) { - unsigned PtrWidth = - Q.DL.getPointerSizeInBits(Ops[0]->getType()->getPointerAddressSpace()); - if (Q.DL.getTypeSizeInBits(Ops.back()->getType()) == PtrWidth) { - APInt BasePtrOffset(PtrWidth, 0); + unsigned IdxWidth = + Q.DL.getIndexSizeInBits(Ops[0]->getType()->getPointerAddressSpace()); + if (Q.DL.getTypeSizeInBits(Ops.back()->getType()) == IdxWidth) { + APInt BasePtrOffset(IdxWidth, 0); Value *StrippedBasePtr = Ops[0]->stripAndAccumulateInBoundsConstantOffsets(Q.DL, BasePtrOffset); @@ -3946,7 +4034,7 @@ static Value *SimplifyPHINode(PHINode *PN, const SimplifyQuery &Q) { // instruction, we cannot return X as the result of the PHI node unless it // dominates the PHI block. if (HasUndefInput) - return ValueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : nullptr; + return valueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : nullptr; return CommonValue; } @@ -4123,6 +4211,28 @@ Value *llvm::SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit); } +static Constant *propagateNaN(Constant *In) { + // If the input is a vector with undef elements, just return a default NaN. + if (!In->isNaN()) + return ConstantFP::getNaN(In->getType()); + + // Propagate the existing NaN constant when possible. + // TODO: Should we quiet a signaling NaN? + return In; +} + +static Constant *simplifyFPBinop(Value *Op0, Value *Op1) { + if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1)) + return ConstantFP::getNaN(Op0->getType()); + + if (match(Op0, m_NaN())) + return propagateNaN(cast<Constant>(Op0)); + if (match(Op1, m_NaN())) + return propagateNaN(cast<Constant>(Op1)); + + return nullptr; +} + /// Given operands for an FAdd, see if we can fold the result. If not, this /// returns null. static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, @@ -4130,29 +4240,28 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q)) return C; + if (Constant *C = simplifyFPBinop(Op0, Op1)) + return C; + // fadd X, -0 ==> X - if (match(Op1, m_NegZero())) + if (match(Op1, m_NegZeroFP())) return Op0; // fadd X, 0 ==> X, when we know X is not -0 - if (match(Op1, m_Zero()) && + if (match(Op1, m_PosZeroFP()) && (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI))) return Op0; - // fadd [nnan ninf] X, (fsub [nnan ninf] 0, X) ==> 0 - // where nnan and ninf have to occur at least once somewhere in this - // expression - Value *SubOp = nullptr; - if (match(Op1, m_FSub(m_AnyZero(), m_Specific(Op0)))) - SubOp = Op1; - else if (match(Op0, m_FSub(m_AnyZero(), m_Specific(Op1)))) - SubOp = Op0; - if (SubOp) { - Instruction *FSub = cast<Instruction>(SubOp); - if ((FMF.noNaNs() || FSub->hasNoNaNs()) && - (FMF.noInfs() || FSub->hasNoInfs())) - return Constant::getNullValue(Op0->getType()); - } + // With nnan: (+/-0.0 - X) + X --> 0.0 (and commuted variant) + // We don't have to explicitly exclude infinities (ninf): INF + -INF == NaN. + // Negative zeros are allowed because we always end up with positive zero: + // X = -0.0: (-0.0 - (-0.0)) + (-0.0) == ( 0.0) + (-0.0) == 0.0 + // X = -0.0: ( 0.0 - (-0.0)) + (-0.0) == ( 0.0) + (-0.0) == 0.0 + // X = 0.0: (-0.0 - ( 0.0)) + ( 0.0) == (-0.0) + ( 0.0) == 0.0 + // X = 0.0: ( 0.0 - ( 0.0)) + ( 0.0) == ( 0.0) + ( 0.0) == 0.0 + if (FMF.noNaNs() && (match(Op0, m_FSub(m_AnyZeroFP(), m_Specific(Op1))) || + match(Op1, m_FSub(m_AnyZeroFP(), m_Specific(Op0))))) + return ConstantFP::getNullValue(Op0->getType()); return nullptr; } @@ -4164,23 +4273,27 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q)) return C; - // fsub X, 0 ==> X - if (match(Op1, m_Zero())) + if (Constant *C = simplifyFPBinop(Op0, Op1)) + return C; + + // fsub X, +0 ==> X + if (match(Op1, m_PosZeroFP())) return Op0; // fsub X, -0 ==> X, when we know X is not -0 - if (match(Op1, m_NegZero()) && + if (match(Op1, m_NegZeroFP()) && (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI))) return Op0; // fsub -0.0, (fsub -0.0, X) ==> X Value *X; - if (match(Op0, m_NegZero()) && match(Op1, m_FSub(m_NegZero(), m_Value(X)))) + if (match(Op0, m_NegZeroFP()) && + match(Op1, m_FSub(m_NegZeroFP(), m_Value(X)))) return X; // fsub 0.0, (fsub 0.0, X) ==> X if signed zeros are ignored. - if (FMF.noSignedZeros() && match(Op0, m_AnyZero()) && - match(Op1, m_FSub(m_AnyZero(), m_Value(X)))) + if (FMF.noSignedZeros() && match(Op0, m_AnyZeroFP()) && + match(Op1, m_FSub(m_AnyZeroFP(), m_Value(X)))) return X; // fsub nnan x, x ==> 0.0 @@ -4196,13 +4309,25 @@ static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q)) return C; + if (Constant *C = simplifyFPBinop(Op0, Op1)) + return C; + // fmul X, 1.0 ==> X if (match(Op1, m_FPOne())) return Op0; // fmul nnan nsz X, 0 ==> 0 - if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero())) - return Op1; + if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZeroFP())) + return ConstantFP::getNullValue(Op0->getType()); + + // sqrt(X) * sqrt(X) --> X, if we can: + // 1. Remove the intermediate rounding (reassociate). + // 2. Ignore non-zero negative numbers because sqrt would produce NAN. + // 3. Ignore -0.0 because sqrt(-0.0) == -0.0, but -0.0 * -0.0 == 0.0. + Value *X; + if (Op0 == Op1 && match(Op0, m_Intrinsic<Intrinsic::sqrt>(m_Value(X))) && + FMF.allowReassoc() && FMF.noNaNs() && FMF.noSignedZeros()) + return X; return nullptr; } @@ -4228,13 +4353,8 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q)) return C; - // undef / X -> undef (the undef could be a snan). - if (match(Op0, m_Undef())) - return Op0; - - // X / undef -> undef - if (match(Op1, m_Undef())) - return Op1; + if (Constant *C = simplifyFPBinop(Op0, Op1)) + return C; // X / 1.0 -> X if (match(Op1, m_FPOne())) @@ -4243,14 +4363,20 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, // 0 / X -> 0 // Requires that NaNs are off (X could be zero) and signed zeroes are // ignored (X could be positive or negative, so the output sign is unknown). - if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZero())) - return Op0; + if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZeroFP())) + return ConstantFP::getNullValue(Op0->getType()); if (FMF.noNaNs()) { // X / X -> 1.0 is legal when NaNs are ignored. + // We can ignore infinities because INF/INF is NaN. if (Op0 == Op1) return ConstantFP::get(Op0->getType(), 1.0); + // (X * Y) / Y --> X if we can reassociate to the above form. + Value *X; + if (FMF.allowReassoc() && match(Op0, m_c_FMul(m_Value(X), m_Specific(Op1)))) + return X; + // -X / X -> -1.0 and // X / -X -> -1.0 are legal when NaNs are ignored. // We can ignore signed zeros because +-0.0/+-0.0 is NaN and ignored. @@ -4274,19 +4400,20 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q)) return C; - // undef % X -> undef (the undef could be a snan). - if (match(Op0, m_Undef())) - return Op0; - - // X % undef -> undef - if (match(Op1, m_Undef())) - return Op1; + if (Constant *C = simplifyFPBinop(Op0, Op1)) + return C; - // 0 % X -> 0 - // Requires that NaNs are off (X could be zero) and signed zeroes are - // ignored (X could be positive or negative, so the output sign is unknown). - if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZero())) - return Op0; + // Unlike fdiv, the result of frem always matches the sign of the dividend. + // The constant match may include undef elements in a vector, so return a full + // zero constant as the result. + if (FMF.noNaNs()) { + // +0 % X -> 0 + if (match(Op0, m_PosZeroFP())) + return ConstantFP::getNullValue(Op0->getType()); + // -0 % X -> -0 + if (match(Op0, m_NegZeroFP())) + return ConstantFP::getNegativeZero(Op0->getType()); + } return nullptr; } @@ -4515,28 +4642,28 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, } case Intrinsic::exp: { // exp(log(x)) -> x - if (Q.CxtI->isFast() && + if (Q.CxtI->hasAllowReassoc() && match(IIOperand, m_Intrinsic<Intrinsic::log>(m_Value(X)))) return X; return nullptr; } case Intrinsic::exp2: { // exp2(log2(x)) -> x - if (Q.CxtI->isFast() && + if (Q.CxtI->hasAllowReassoc() && match(IIOperand, m_Intrinsic<Intrinsic::log2>(m_Value(X)))) return X; return nullptr; } case Intrinsic::log: { // log(exp(x)) -> x - if (Q.CxtI->isFast() && + if (Q.CxtI->hasAllowReassoc() && match(IIOperand, m_Intrinsic<Intrinsic::exp>(m_Value(X)))) return X; return nullptr; } case Intrinsic::log2: { // log2(exp2(x)) -> x - if (Q.CxtI->isFast() && + if (Q.CxtI->hasAllowReassoc() && match(IIOperand, m_Intrinsic<Intrinsic::exp2>(m_Value(X)))) { return X; } @@ -4606,6 +4733,14 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, return LHS; } return nullptr; + case Intrinsic::maxnum: + case Intrinsic::minnum: + // If one argument is NaN, return the other argument. + if (match(LHS, m_NaN())) + return RHS; + if (match(RHS, m_NaN())) + return LHS; + return nullptr; default: return nullptr; } @@ -4843,7 +4978,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ, return Result == I ? UndefValue::get(I->getType()) : Result; } -/// \brief Implementation of recursive simplification through an instruction's +/// Implementation of recursive simplification through an instruction's /// uses. /// /// This is the common implementation of the recursive simplification routines. diff --git a/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp b/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp index 3992657417c5..e7751d32aab3 100644 --- a/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp +++ b/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp @@ -21,15 +21,20 @@ template <class NodeTy, bool IsPostDom> void IDFCalculator<NodeTy, IsPostDom>::calculate( SmallVectorImpl<BasicBlock *> &PHIBlocks) { // Use a priority queue keyed on dominator tree level so that inserted nodes - // are handled from the bottom of the dominator tree upwards. - typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair; + // are handled from the bottom of the dominator tree upwards. We also augment + // the level with a DFS number to ensure that the blocks are ordered in a + // deterministic way. + typedef std::pair<DomTreeNode *, std::pair<unsigned, unsigned>> + DomTreeNodePair; typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>, less_second> IDFPriorityQueue; IDFPriorityQueue PQ; + DT.updateDFSNumbers(); + for (BasicBlock *BB : *DefBlocks) { if (DomTreeNode *Node = DT.getNode(BB)) - PQ.push({Node, Node->getLevel()}); + PQ.push({Node, std::make_pair(Node->getLevel(), Node->getDFSNumIn())}); } SmallVector<DomTreeNode *, 32> Worklist; @@ -40,7 +45,7 @@ void IDFCalculator<NodeTy, IsPostDom>::calculate( DomTreeNodePair RootPair = PQ.top(); PQ.pop(); DomTreeNode *Root = RootPair.first; - unsigned RootLevel = RootPair.second; + unsigned RootLevel = RootPair.second.first; // Walk all dominator tree children of Root, inspecting their CFG edges with // targets elsewhere on the dominator tree. Only targets whose level is at @@ -77,7 +82,8 @@ void IDFCalculator<NodeTy, IsPostDom>::calculate( PHIBlocks.emplace_back(SuccBB); if (!DefBlocks->count(SuccBB)) - PQ.push(std::make_pair(SuccNode, SuccLevel)); + PQ.push(std::make_pair( + SuccNode, std::make_pair(SuccLevel, SuccNode->getDFSNumIn()))); } for (auto DomChild : *Node) { diff --git a/contrib/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp index a8178ecc0a24..93c23bca96af 100644 --- a/contrib/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp +++ b/contrib/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LazyBranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/Dominators.h" using namespace llvm; @@ -41,6 +42,10 @@ void LazyBlockFrequencyInfoPass::print(raw_ostream &OS, const Module *) const { void LazyBlockFrequencyInfoPass::getAnalysisUsage(AnalysisUsage &AU) const { LazyBranchProbabilityInfoPass::getLazyBPIAnalysisUsage(AU); + // We require DT so it's available when LI is available. The LI updating code + // asserts that DT is also present so if we don't make sure that we have DT + // here, that assert will trigger. + AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<LoopInfoWrapperPass>(); AU.setPreservesAll(); } diff --git a/contrib/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp index e2884d0a4564..429b78c3a47e 100644 --- a/contrib/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp +++ b/contrib/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/LazyBranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Dominators.h" using namespace llvm; @@ -42,6 +43,10 @@ void LazyBranchProbabilityInfoPass::print(raw_ostream &OS, } void LazyBranchProbabilityInfoPass::getAnalysisUsage(AnalysisUsage &AU) const { + // We require DT so it's available when LI is available. The LI updating code + // asserts that DT is also present so if we don't make sure that we have DT + // here, that assert will trigger. + AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<LoopInfoWrapperPass>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.setPreservesAll(); diff --git a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp index 54299d078be5..b1d585bfc683 100644 --- a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp +++ b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" @@ -65,15 +66,15 @@ static void addEdge(SmallVectorImpl<LazyCallGraph::Edge> &Edges, if (!EdgeIndexMap.insert({&N, Edges.size()}).second) return; - DEBUG(dbgs() << " Added callable function: " << N.getName() << "\n"); + LLVM_DEBUG(dbgs() << " Added callable function: " << N.getName() << "\n"); Edges.emplace_back(LazyCallGraph::Edge(N, EK)); } LazyCallGraph::EdgeSequence &LazyCallGraph::Node::populateSlow() { assert(!Edges && "Must not have already populated the edges for this node!"); - DEBUG(dbgs() << " Adding functions called by '" << getName() - << "' to the graph.\n"); + LLVM_DEBUG(dbgs() << " Adding functions called by '" << getName() + << "' to the graph.\n"); Edges = EdgeSequence(); @@ -151,8 +152,8 @@ static bool isKnownLibFunction(Function &F, TargetLibraryInfo &TLI) { } LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) { - DEBUG(dbgs() << "Building CG for module: " << M.getModuleIdentifier() - << "\n"); + LLVM_DEBUG(dbgs() << "Building CG for module: " << M.getModuleIdentifier() + << "\n"); for (Function &F : M) { if (F.isDeclaration()) continue; @@ -167,8 +168,8 @@ LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) { // External linkage defined functions have edges to them from other // modules. - DEBUG(dbgs() << " Adding '" << F.getName() - << "' to entry set of the graph.\n"); + LLVM_DEBUG(dbgs() << " Adding '" << F.getName() + << "' to entry set of the graph.\n"); addEdge(EntryEdges.Edges, EntryEdges.EdgeIndexMap, get(F), Edge::Ref); } @@ -180,8 +181,9 @@ LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) { if (Visited.insert(GV.getInitializer()).second) Worklist.push_back(GV.getInitializer()); - DEBUG(dbgs() << " Adding functions referenced by global initializers to the " - "entry set.\n"); + LLVM_DEBUG( + dbgs() << " Adding functions referenced by global initializers to the " + "entry set.\n"); visitReferences(Worklist, Visited, [&](Function &F) { addEdge(EntryEdges.Edges, EntryEdges.EdgeIndexMap, get(F), LazyCallGraph::Edge::Ref); @@ -427,7 +429,7 @@ bool LazyCallGraph::RefSCC::isAncestorOf(const RefSCC &RC) const { /// source to target. /// /// This helper routine, in addition to updating the postorder sequence itself -/// will also update a map from SCCs to indices within that sequecne. +/// will also update a map from SCCs to indices within that sequence. /// /// The sequence and the map must operate on pointers to the SCC type. /// @@ -713,7 +715,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) { // // However, we specially handle the target node. The target node is known to // reach all other nodes in the original SCC by definition. This means that - // we want the old SCC to be replaced with an SCC contaning that node as it + // we want the old SCC to be replaced with an SCC containing that node as it // will be the root of whatever SCC DAG results from the DFS. Assumptions // about an SCC such as the set of functions called will continue to hold, // etc. @@ -822,7 +824,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) { // Cleared the DFS early, start another round. break; - // We've finished processing N and its descendents, put it on our pending + // We've finished processing N and its descendants, put it on our pending // SCC stack to eventually get merged into an SCC of nodes. PendingSCCStack.push_back(N); @@ -1234,7 +1236,7 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, ++I; } - // We've finished processing N and its descendents, put it on our pending + // We've finished processing N and its descendants, put it on our pending // stack to eventually get merged into a RefSCC. PendingRefSCCStack.push_back(N); @@ -1271,8 +1273,7 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, // the removal hasn't changed the structure at all. This is an important // special case and we can directly exit the entire routine more // efficiently as soon as we discover it. - if (std::distance(RefSCCNodes.begin(), RefSCCNodes.end()) == - NumRefSCCNodes) { + if (llvm::size(RefSCCNodes) == NumRefSCCNodes) { // Clear out the low link field as we won't need it. for (Node *N : RefSCCNodes) N->LowLink = -1; @@ -1294,7 +1295,7 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, // Otherwise we create a collection of new RefSCC nodes and build // a radix-sort style map from postorder number to these new RefSCCs. We then - // append SCCs to each of these RefSCCs in the order they occured in the + // append SCCs to each of these RefSCCs in the order they occurred in the // original SCCs container. for (int i = 0; i < PostOrderNumber; ++i) Result.push_back(G->createRefSCC(*G)); @@ -1617,7 +1618,7 @@ void LazyCallGraph::buildGenericSCCs(RootsT &&Roots, GetBeginT &&GetBegin, ++I; } - // We've finished processing N and its descendents, put it on our pending + // We've finished processing N and its descendants, put it on our pending // SCC stack to eventually get merged into an SCC of nodes. PendingSCCStack.push_back(N); @@ -1738,7 +1739,7 @@ static void printNode(raw_ostream &OS, LazyCallGraph::Node &N) { } static void printSCC(raw_ostream &OS, LazyCallGraph::SCC &C) { - ptrdiff_t Size = std::distance(C.begin(), C.end()); + ptrdiff_t Size = size(C); OS << " SCC with " << Size << " functions:\n"; for (LazyCallGraph::Node &N : C) @@ -1746,7 +1747,7 @@ static void printSCC(raw_ostream &OS, LazyCallGraph::SCC &C) { } static void printRefSCC(raw_ostream &OS, LazyCallGraph::RefSCC &C) { - ptrdiff_t Size = std::distance(C.begin(), C.end()); + ptrdiff_t Size = size(C); OS << " RefSCC with " << Size << " call SCCs:\n"; for (LazyCallGraph::SCC &InnerC : C) diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp index d7da669f6e79..435b6f205199 100644 --- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp @@ -392,8 +392,8 @@ namespace { if (!BlockValueSet.insert(BV).second) return false; // It's already in the stack. - DEBUG(dbgs() << "PUSH: " << *BV.second << " in " << BV.first->getName() - << "\n"); + LLVM_DEBUG(dbgs() << "PUSH: " << *BV.second << " in " + << BV.first->getName() << "\n"); BlockValueStack.push_back(BV); return true; } @@ -401,6 +401,7 @@ namespace { AssumptionCache *AC; ///< A pointer to the cache of @llvm.assume calls. const DataLayout &DL; ///< A mandatory DataLayout DominatorTree *DT; ///< An optional DT pointer. + DominatorTree *DisabledDT; ///< Stores DT if it's disabled. ValueLatticeElement getBlockValue(Value *Val, BasicBlock *BB); bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T, @@ -463,13 +464,30 @@ namespace { TheCache.eraseBlock(BB); } + /// Disables use of the DominatorTree within LVI. + void disableDT() { + if (DT) { + assert(!DisabledDT && "Both DT and DisabledDT are not nullptr!"); + std::swap(DT, DisabledDT); + } + } + + /// Enables use of the DominatorTree within LVI. Does nothing if the class + /// instance was initialized without a DT pointer. + void enableDT() { + if (DisabledDT) { + assert(!DT && "Both DT and DisabledDT are not nullptr!"); + std::swap(DT, DisabledDT); + } + } + /// This is the update interface to inform the cache that an edge from /// PredBB to OldSucc has been threaded to be from PredBB to NewSucc. void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc); LazyValueInfoImpl(AssumptionCache *AC, const DataLayout &DL, DominatorTree *DT = nullptr) - : AC(AC), DL(DL), DT(DT) {} + : AC(AC), DL(DL), DT(DT), DisabledDT(nullptr) {} }; } // end anonymous namespace @@ -490,7 +508,8 @@ void LazyValueInfoImpl::solve() { // PredicateInfo is used in LVI or CVP, we should be able to make the // overdefined cache global, and remove this throttle. if (processedCount > MaxProcessedPerValue) { - DEBUG(dbgs() << "Giving up on stack because we are getting too deep\n"); + LLVM_DEBUG( + dbgs() << "Giving up on stack because we are getting too deep\n"); // Fill in the original values while (!StartingStack.empty()) { std::pair<BasicBlock *, Value *> &e = StartingStack.back(); @@ -511,8 +530,9 @@ void LazyValueInfoImpl::solve() { assert(TheCache.hasCachedValueInfo(e.second, e.first) && "Result should be in cache!"); - DEBUG(dbgs() << "POP " << *e.second << " in " << e.first->getName() - << " = " << TheCache.getCachedValueInfo(e.second, e.first) << "\n"); + LLVM_DEBUG( + dbgs() << "POP " << *e.second << " in " << e.first->getName() << " = " + << TheCache.getCachedValueInfo(e.second, e.first) << "\n"); BlockValueStack.pop_back(); BlockValueSet.erase(e); @@ -563,8 +583,8 @@ bool LazyValueInfoImpl::solveBlockValue(Value *Val, BasicBlock *BB) { if (TheCache.hasCachedValueInfo(Val, BB)) { // If we have a cached value, use that. - DEBUG(dbgs() << " reuse BB '" << BB->getName() - << "' val=" << TheCache.getCachedValueInfo(Val, BB) << '\n'); + LLVM_DEBUG(dbgs() << " reuse BB '" << BB->getName() << "' val=" + << TheCache.getCachedValueInfo(Val, BB) << '\n'); // Since we're reusing a cached value, we don't need to update the // OverDefinedCache. The cache will have been properly updated whenever the @@ -619,8 +639,8 @@ bool LazyValueInfoImpl::solveBlockValueImpl(ValueLatticeElement &Res, return solveBlockValueBinaryOp(Res, BO, BB); } - DEBUG(dbgs() << " compute BB '" << BB->getName() - << "' - unknown inst def found.\n"); + LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - unknown inst def found.\n"); Res = getFromRangeMetadata(BBI); return true; } @@ -684,9 +704,11 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(ValueLatticeElement &BBLV, assert(isa<Argument>(Val) && "Unknown live-in to the entry block"); // Before giving up, see if we can prove the pointer non-null local to // this particular block. - if (Val->getType()->isPointerTy() && - (isKnownNonZero(Val, DL) || isObjectDereferencedInBlock(Val, BB))) { - PointerType *PTy = cast<PointerType>(Val->getType()); + PointerType *PTy = dyn_cast<PointerType>(Val->getType()); + if (PTy && + (isKnownNonZero(Val, DL) || + (isObjectDereferencedInBlock(Val, BB) && + !NullPointerIsDefined(BB->getParent(), PTy->getAddressSpace())))) { Result = ValueLatticeElement::getNot(ConstantPointerNull::get(PTy)); } else { Result = ValueLatticeElement::getOverdefined(); @@ -715,13 +737,13 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(ValueLatticeElement &BBLV, // If we hit overdefined, exit early. The BlockVals entry is already set // to overdefined. if (Result.isOverdefined()) { - DEBUG(dbgs() << " compute BB '" << BB->getName() - << "' - overdefined because of pred (non local).\n"); + LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined because of pred (non local).\n"); // Before giving up, see if we can prove the pointer non-null local to // this particular block. - if (Val->getType()->isPointerTy() && - isObjectDereferencedInBlock(Val, BB)) { - PointerType *PTy = cast<PointerType>(Val->getType()); + PointerType *PTy = dyn_cast<PointerType>(Val->getType()); + if (PTy && isObjectDereferencedInBlock(Val, BB) && + !NullPointerIsDefined(BB->getParent(), PTy->getAddressSpace())) { Result = ValueLatticeElement::getNot(ConstantPointerNull::get(PTy)); } @@ -759,8 +781,8 @@ bool LazyValueInfoImpl::solveBlockValuePHINode(ValueLatticeElement &BBLV, // If we hit overdefined, exit early. The BlockVals entry is already set // to overdefined. if (Result.isOverdefined()) { - DEBUG(dbgs() << " compute BB '" << BB->getName() - << "' - overdefined because of pred (local).\n"); + LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined because of pred (local).\n"); BBLV = Result; return true; @@ -950,8 +972,8 @@ bool LazyValueInfoImpl::solveBlockValueCast(ValueLatticeElement &BBLV, break; default: // Unhandled instructions are overdefined. - DEBUG(dbgs() << " compute BB '" << BB->getName() - << "' - overdefined (unknown cast).\n"); + LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined (unknown cast).\n"); BBLV = ValueLatticeElement::getOverdefined(); return true; } @@ -1009,8 +1031,8 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(ValueLatticeElement &BBLV, break; default: // Unhandled instructions are overdefined. - DEBUG(dbgs() << " compute BB '" << BB->getName() - << "' - overdefined (unknown binary operator).\n"); + LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined (unknown binary operator).\n"); BBLV = ValueLatticeElement::getOverdefined(); return true; }; @@ -1127,9 +1149,17 @@ getValueFromConditionImpl(Value *Val, Value *Cond, bool isTrueDest, (!isTrueDest && BO->getOpcode() != BinaryOperator::Or)) return ValueLatticeElement::getOverdefined(); - auto RHS = getValueFromCondition(Val, BO->getOperand(0), isTrueDest, Visited); - auto LHS = getValueFromCondition(Val, BO->getOperand(1), isTrueDest, Visited); - return intersect(RHS, LHS); + // Prevent infinite recursion if Cond references itself as in this example: + // Cond: "%tmp4 = and i1 %tmp4, undef" + // BL: "%tmp4 = and i1 %tmp4, undef" + // BR: "i1 undef" + Value *BL = BO->getOperand(0); + Value *BR = BO->getOperand(1); + if (BL == Cond || BR == Cond) + return ValueLatticeElement::getOverdefined(); + + return intersect(getValueFromCondition(Val, BL, isTrueDest, Visited), + getValueFromCondition(Val, BR, isTrueDest, Visited)); } static ValueLatticeElement @@ -1196,7 +1226,7 @@ static ValueLatticeElement constantFoldUser(User *Usr, Value *Op, return ValueLatticeElement::getOverdefined(); } -/// \brief Compute the value of Val on the edge BBFrom -> BBTo. Returns false if +/// Compute the value of Val on the edge BBFrom -> BBTo. Returns false if /// Val is not constrained on the edge. Result is unspecified if return value /// is false. static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, @@ -1321,7 +1351,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, return false; } -/// \brief Compute the value of Val on the edge BBFrom -> BBTo or the value at +/// Compute the value of Val on the edge BBFrom -> BBTo or the value at /// the basic block if the edge does not constrain Val. bool LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom, BasicBlock *BBTo, @@ -1373,8 +1403,8 @@ bool LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom, ValueLatticeElement LazyValueInfoImpl::getValueInBlock(Value *V, BasicBlock *BB, Instruction *CxtI) { - DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '" - << BB->getName() << "'\n"); + LLVM_DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '" + << BB->getName() << "'\n"); assert(BlockValueStack.empty() && BlockValueSet.empty()); if (!hasBlockValue(V, BB)) { @@ -1384,13 +1414,13 @@ ValueLatticeElement LazyValueInfoImpl::getValueInBlock(Value *V, BasicBlock *BB, ValueLatticeElement Result = getBlockValue(V, BB); intersectAssumeOrGuardBlockValueConstantRange(V, Result, CxtI); - DEBUG(dbgs() << " Result = " << Result << "\n"); + LLVM_DEBUG(dbgs() << " Result = " << Result << "\n"); return Result; } ValueLatticeElement LazyValueInfoImpl::getValueAt(Value *V, Instruction *CxtI) { - DEBUG(dbgs() << "LVI Getting value " << *V << " at '" - << CxtI->getName() << "'\n"); + LLVM_DEBUG(dbgs() << "LVI Getting value " << *V << " at '" << CxtI->getName() + << "'\n"); if (auto *C = dyn_cast<Constant>(V)) return ValueLatticeElement::get(C); @@ -1400,15 +1430,16 @@ ValueLatticeElement LazyValueInfoImpl::getValueAt(Value *V, Instruction *CxtI) { Result = getFromRangeMetadata(I); intersectAssumeOrGuardBlockValueConstantRange(V, Result, CxtI); - DEBUG(dbgs() << " Result = " << Result << "\n"); + LLVM_DEBUG(dbgs() << " Result = " << Result << "\n"); return Result; } ValueLatticeElement LazyValueInfoImpl:: getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI) { - DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '" - << FromBB->getName() << "' to '" << ToBB->getName() << "'\n"); + LLVM_DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '" + << FromBB->getName() << "' to '" << ToBB->getName() + << "'\n"); ValueLatticeElement Result; if (!getEdgeValue(V, FromBB, ToBB, Result, CxtI)) { @@ -1418,7 +1449,7 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, assert(WasFastQuery && "More work to do after problem solved?"); } - DEBUG(dbgs() << " Result = " << Result << "\n"); + LLVM_DEBUG(dbgs() << " Result = " << Result << "\n"); return Result; } @@ -1791,6 +1822,16 @@ void LazyValueInfo::printLVI(Function &F, DominatorTree &DTree, raw_ostream &OS) } } +void LazyValueInfo::disableDT() { + if (PImpl) + getImpl(PImpl, AC, DL, DT).disableDT(); +} + +void LazyValueInfo::enableDT() { + if (PImpl) + getImpl(PImpl, AC, DL, DT).enableDT(); +} + // Print the LVI for the function arguments at the start of each basic block. void LazyValueInfoAnnotatedWriter::emitBasicBlockStartAnnot( const BasicBlock *BB, formatted_raw_ostream &OS) { @@ -1807,7 +1848,7 @@ void LazyValueInfoAnnotatedWriter::emitBasicBlockStartAnnot( // This function prints the LVI analysis for the instruction I at the beginning // of various basic blocks. It relies on calculated values that are stored in -// the LazyValueInfoCache, and in the absence of cached values, recalculte the +// the LazyValueInfoCache, and in the absence of cached values, recalculate the // LazyValueInfo for `I`, and print that info. void LazyValueInfoAnnotatedWriter::emitInstructionAnnot( const Instruction *I, formatted_raw_ostream &OS) { @@ -1830,7 +1871,7 @@ void LazyValueInfoAnnotatedWriter::emitInstructionAnnot( }; printResult(ParentBB); - // Print the LVI analysis results for the the immediate successor blocks, that + // Print the LVI analysis results for the immediate successor blocks, that // are dominated by `ParentBB`. for (auto *BBSucc : successors(ParentBB)) if (DT.dominates(ParentBB, BBSucc)) diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp index 0e3f498cb14c..db919bd233bf 100644 --- a/contrib/llvm/lib/Analysis/Lint.cpp +++ b/contrib/llvm/lib/Analysis/Lint.cpp @@ -165,13 +165,13 @@ namespace { } } - /// \brief A check failed, so printout out the condition and the message. + /// A check failed, so printout out the condition and the message. /// /// This provides a nice place to put a breakpoint if you want to see why /// something is not correct. void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; } - /// \brief A check failed (with values to print). + /// A check failed (with values to print). /// /// This calls the Message-only version so that the above is easier to set /// a breakpoint on. @@ -323,9 +323,9 @@ void Lint::visitCallSite(CallSite CS) { MemCpyInst *MCI = cast<MemCpyInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MCI->getDest(), MemoryLocation::UnknownSize, - MCI->getAlignment(), nullptr, MemRef::Write); + MCI->getDestAlignment(), nullptr, MemRef::Write); visitMemoryReference(I, MCI->getSource(), MemoryLocation::UnknownSize, - MCI->getAlignment(), nullptr, MemRef::Read); + MCI->getSourceAlignment(), nullptr, MemRef::Read); // Check that the memcpy arguments don't overlap. The AliasAnalysis API // isn't expressive enough for what we really want to do. Known partial @@ -345,16 +345,16 @@ void Lint::visitCallSite(CallSite CS) { MemMoveInst *MMI = cast<MemMoveInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MMI->getDest(), MemoryLocation::UnknownSize, - MMI->getAlignment(), nullptr, MemRef::Write); + MMI->getDestAlignment(), nullptr, MemRef::Write); visitMemoryReference(I, MMI->getSource(), MemoryLocation::UnknownSize, - MMI->getAlignment(), nullptr, MemRef::Read); + MMI->getSourceAlignment(), nullptr, MemRef::Read); break; } case Intrinsic::memset: { MemSetInst *MSI = cast<MemSetInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MSI->getDest(), MemoryLocation::UnknownSize, - MSI->getAlignment(), nullptr, MemRef::Write); + MSI->getDestAlignment(), nullptr, MemRef::Write); break; } diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp index 834727c9224d..d319d4c249d3 100644 --- a/contrib/llvm/lib/Analysis/Loads.cpp +++ b/contrib/llvm/lib/Analysis/Loads.cpp @@ -80,7 +80,7 @@ static bool isDereferenceableAndAlignedPointer( if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { const Value *Base = GEP->getPointerOperand(); - APInt Offset(DL.getPointerTypeSizeInBits(GEP->getType()), 0); + APInt Offset(DL.getIndexTypeSizeInBits(GEP->getType()), 0); if (!GEP->accumulateConstantOffset(DL, Offset) || Offset.isNegative() || !Offset.urem(APInt(Offset.getBitWidth(), Align)).isMinValue()) return false; @@ -108,8 +108,8 @@ static bool isDereferenceableAndAlignedPointer( DL, CtxI, DT, Visited); if (auto CS = ImmutableCallSite(V)) - if (const Value *RV = CS.getReturnedArgOperand()) - return isDereferenceableAndAlignedPointer(RV, Align, Size, DL, CtxI, DT, + if (auto *RP = getArgumentAliasingToReturnedPointer(CS)) + return isDereferenceableAndAlignedPointer(RP, Align, Size, DL, CtxI, DT, Visited); // If we don't know, assume the worst. @@ -146,7 +146,7 @@ bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, SmallPtrSet<const Value *, 32> Visited; return ::isDereferenceableAndAlignedPointer( - V, Align, APInt(DL.getTypeSizeInBits(VTy), DL.getTypeStoreSize(Ty)), DL, + V, Align, APInt(DL.getIndexTypeSizeInBits(VTy), DL.getTypeStoreSize(Ty)), DL, CtxI, DT, Visited); } @@ -156,7 +156,7 @@ bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL, return isDereferenceableAndAlignedPointer(V, 1, DL, CtxI, DT); } -/// \brief Test if A and B will obviously have the same value. +/// Test if A and B will obviously have the same value. /// /// This includes recognizing that %t0 and %t1 will have the same /// value in code like this: @@ -187,7 +187,7 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) { return false; } -/// \brief Check if executing a load of this pointer value cannot trap. +/// Check if executing a load of this pointer value cannot trap. /// /// If DT and ScanFrom are specified this method performs context-sensitive /// analysis and returns true if it is safe to load immediately before ScanFrom. diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp index e141d6c58b65..c6175bf9bee9 100644 --- a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -92,7 +92,7 @@ static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold( cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8)); unsigned VectorizerParams::RuntimeMemoryCheckThreshold; -/// \brief The maximum iterations used to merge memory checks +/// The maximum iterations used to merge memory checks static cl::opt<unsigned> MemoryCheckMergeThreshold( "memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " @@ -102,7 +102,7 @@ static cl::opt<unsigned> MemoryCheckMergeThreshold( /// Maximum SIMD width. const unsigned VectorizerParams::MaxVectorWidth = 64; -/// \brief We collect dependences up to this threshold. +/// We collect dependences up to this threshold. static cl::opt<unsigned> MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " @@ -124,7 +124,7 @@ static cl::opt<bool> EnableMemAccessVersioning( "enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning")); -/// \brief Enable store-to-load forwarding conflict detection. This option can +/// Enable store-to-load forwarding conflict detection. This option can /// be disabled for correctness testing. static cl::opt<bool> EnableForwardingConflictDetection( "store-to-load-forwarding-conflict-detection", cl::Hidden, @@ -165,8 +165,8 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, PSE.addPredicate(*SE->getEqualPredicate(U, CT)); auto *Expr = PSE.getSCEV(Ptr); - DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *Expr - << "\n"); + LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV + << " by: " << *Expr << "\n"); return Expr; } @@ -490,23 +490,23 @@ void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const { namespace { -/// \brief Analyses memory accesses in a loop. +/// Analyses memory accesses in a loop. /// /// Checks whether run time pointer checks are needed and builds sets for data /// dependence checking. class AccessAnalysis { public: - /// \brief Read or write access location. + /// Read or write access location. typedef PointerIntPair<Value *, 1, bool> MemAccessInfo; typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList; - AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI, - MemoryDepChecker::DepCandidates &DA, + AccessAnalysis(const DataLayout &Dl, Loop *TheLoop, AliasAnalysis *AA, + LoopInfo *LI, MemoryDepChecker::DepCandidates &DA, PredicatedScalarEvolution &PSE) - : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(false), - PSE(PSE) {} + : DL(Dl), TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), + IsRTCheckAnalysisNeeded(false), PSE(PSE) {} - /// \brief Register a load and whether it is only read from. + /// Register a load and whether it is only read from. void addLoad(MemoryLocation &Loc, bool IsReadOnly) { Value *Ptr = const_cast<Value*>(Loc.Ptr); AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags); @@ -515,14 +515,14 @@ public: ReadOnlyPtr.insert(Ptr); } - /// \brief Register a store. + /// Register a store. void addStore(MemoryLocation &Loc) { Value *Ptr = const_cast<Value*>(Loc.Ptr); AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags); Accesses.insert(MemAccessInfo(Ptr, true)); } - /// \brief Check if we can emit a run-time no-alias check for \p Access. + /// Check if we can emit a run-time no-alias check for \p Access. /// /// Returns true if we can emit a run-time no alias check for \p Access. /// If we can check this access, this also adds it to a dependence set and @@ -537,7 +537,7 @@ public: unsigned ASId, bool ShouldCheckStride, bool Assume); - /// \brief Check whether we can check the pointers at runtime for + /// Check whether we can check the pointers at runtime for /// non-intersection. /// /// Returns true if we need no check or if we do and we can generate them @@ -546,13 +546,13 @@ public: Loop *TheLoop, const ValueToValueMap &Strides, bool ShouldCheckWrap = false); - /// \brief Goes over all memory accesses, checks whether a RT check is needed + /// Goes over all memory accesses, checks whether a RT check is needed /// and builds sets of dependent accesses. void buildDependenceSets() { processMemAccesses(); } - /// \brief Initial processing of memory accesses determined that we need to + /// Initial processing of memory accesses determined that we need to /// perform dependency checking. /// /// Note that this can later be cleared if we retry memcheck analysis without @@ -570,7 +570,7 @@ public: private: typedef SetVector<MemAccessInfo> PtrAccessSet; - /// \brief Go over all memory access and check whether runtime pointer checks + /// Go over all memory access and check whether runtime pointer checks /// are needed and build sets of dependency check candidates. void processMemAccesses(); @@ -579,6 +579,9 @@ private: const DataLayout &DL; + /// The loop being checked. + const Loop *TheLoop; + /// List of accesses that need a further dependence check. MemAccessInfoList CheckDeps; @@ -596,7 +599,7 @@ private: /// dependence check. MemoryDepChecker::DepCandidates &DepCands; - /// \brief Initial processing of memory accesses determined that we may need + /// Initial processing of memory accesses determined that we may need /// to add memchecks. Perform the analysis to determine the necessary checks. /// /// Note that, this is different from isDependencyCheckNeeded. When we retry @@ -611,7 +614,7 @@ private: } // end anonymous namespace -/// \brief Check whether a pointer can participate in a runtime bounds check. +/// Check whether a pointer can participate in a runtime bounds check. /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr /// by adding run-time checks (overflow checks) if necessary. static bool hasComputableBounds(PredicatedScalarEvolution &PSE, @@ -634,7 +637,7 @@ static bool hasComputableBounds(PredicatedScalarEvolution &PSE, return AR->isAffine(); } -/// \brief Check whether a pointer address cannot wrap. +/// Check whether a pointer address cannot wrap. static bool isNoWrap(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L) { const SCEV *PtrScev = PSE.getSCEV(Ptr); @@ -684,7 +687,7 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck, bool IsWrite = Access.getInt(); RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE); - DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); + LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); return true; } @@ -729,7 +732,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop, RunningDepId, ASId, ShouldCheckWrap, false)) { - DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n'); + LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n'); Retries.push_back(Access); CanDoAliasSetRT = false; } @@ -791,8 +794,9 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, unsigned ASi = PtrI->getType()->getPointerAddressSpace(); unsigned ASj = PtrJ->getType()->getPointerAddressSpace(); if (ASi != ASj) { - DEBUG(dbgs() << "LAA: Runtime check would require comparison between" - " different address spaces\n"); + LLVM_DEBUG( + dbgs() << "LAA: Runtime check would require comparison between" + " different address spaces\n"); return false; } } @@ -801,8 +805,8 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, if (NeedRTCheck && CanDoRT) RtCheck.generateChecks(DepCands, IsDepCheckNeeded); - DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks() - << " pointer comparisons.\n"); + LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks() + << " pointer comparisons.\n"); RtCheck.Need = NeedRTCheck; @@ -817,10 +821,10 @@ void AccessAnalysis::processMemAccesses() { // process read-only pointers. This allows us to skip dependence tests for // read-only pointers. - DEBUG(dbgs() << "LAA: Processing memory accesses...\n"); - DEBUG(dbgs() << " AST: "; AST.dump()); - DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n"); - DEBUG({ + LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n"); + LLVM_DEBUG(dbgs() << " AST: "; AST.dump()); + LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n"); + LLVM_DEBUG({ for (auto A : Accesses) dbgs() << "\t" << *A.getPointer() << " (" << (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? @@ -904,11 +908,15 @@ void AccessAnalysis::processMemAccesses() { ValueVector TempObjects; GetUnderlyingObjects(Ptr, TempObjects, DL, LI); - DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n"); + LLVM_DEBUG(dbgs() + << "Underlying objects for pointer " << *Ptr << "\n"); for (Value *UnderlyingObj : TempObjects) { // nullptr never alias, don't join sets for pointer that have "null" // in their UnderlyingObjects list. - if (isa<ConstantPointerNull>(UnderlyingObj)) + if (isa<ConstantPointerNull>(UnderlyingObj) && + !NullPointerIsDefined( + TheLoop->getHeader()->getParent(), + UnderlyingObj->getType()->getPointerAddressSpace())) continue; UnderlyingObjToAccessMap::iterator Prev = @@ -917,7 +925,7 @@ void AccessAnalysis::processMemAccesses() { DepCands.unionSets(Access, Prev->second); ObjToLastAccess[UnderlyingObj] = Access; - DEBUG(dbgs() << " " << *UnderlyingObj << "\n"); + LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n"); } } } @@ -931,7 +939,7 @@ static bool isInBoundsGep(Value *Ptr) { return false; } -/// \brief Return true if an AddRec pointer \p Ptr is unsigned non-wrapping, +/// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping, /// i.e. monotonically increasing/decreasing. static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, PredicatedScalarEvolution &PSE, const Loop *L) { @@ -979,7 +987,7 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, return false; } -/// \brief Check whether the access through \p Ptr has a constant stride. +/// Check whether the access through \p Ptr has a constant stride. int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap, bool Assume, bool ShouldCheckWrap) { @@ -989,8 +997,8 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, // Make sure that the pointer does not point to aggregate types. auto *PtrTy = cast<PointerType>(Ty); if (PtrTy->getElementType()->isAggregateType()) { - DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type" << *Ptr - << "\n"); + LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type" + << *Ptr << "\n"); return 0; } @@ -1001,15 +1009,15 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, AR = PSE.getAsAddRec(Ptr); if (!AR) { - DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr - << " SCEV: " << *PtrScev << "\n"); + LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr + << " SCEV: " << *PtrScev << "\n"); return 0; } // The accesss function must stride over the innermost loop. if (Lp != AR->getLoop()) { - DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " << - *Ptr << " SCEV: " << *AR << "\n"); + LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " + << *Ptr << " SCEV: " << *AR << "\n"); return 0; } @@ -1024,18 +1032,20 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, bool IsNoWrapAddRec = !ShouldCheckWrap || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) || isNoWrapAddRec(Ptr, AR, PSE, Lp); - bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0; - if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) { + if (!IsNoWrapAddRec && !IsInBoundsGEP && + NullPointerIsDefined(Lp->getHeader()->getParent(), + PtrTy->getAddressSpace())) { if (Assume) { PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); IsNoWrapAddRec = true; - DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n" - << "LAA: Pointer: " << *Ptr << "\n" - << "LAA: SCEV: " << *AR << "\n" - << "LAA: Added an overflow assumption\n"); + LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n" + << "LAA: Pointer: " << *Ptr << "\n" + << "LAA: SCEV: " << *AR << "\n" + << "LAA: Added an overflow assumption\n"); } else { - DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " - << *Ptr << " SCEV: " << *AR << "\n"); + LLVM_DEBUG( + dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " + << *Ptr << " SCEV: " << *AR << "\n"); return 0; } } @@ -1046,8 +1056,8 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, // Calculate the pointer stride and check if it is constant. const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); if (!C) { - DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr << - " SCEV: " << *AR << "\n"); + LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr + << " SCEV: " << *AR << "\n"); return 0; } @@ -1070,15 +1080,16 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, // If the SCEV could wrap but we have an inbounds gep with a unit stride we // know we can't "wrap around the address space". In case of address space // zero we know that this won't happen without triggering undefined behavior. - if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) && - Stride != 1 && Stride != -1) { + if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 && + (IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(), + PtrTy->getAddressSpace()))) { if (Assume) { // We can avoid this case by adding a run-time check. - DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either " - << "inbouds or in address space 0 may wrap:\n" - << "LAA: Pointer: " << *Ptr << "\n" - << "LAA: SCEV: " << *AR << "\n" - << "LAA: Added an overflow assumption\n"); + LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either " + << "inbouds or in address space 0 may wrap:\n" + << "LAA: Pointer: " << *Ptr << "\n" + << "LAA: SCEV: " << *AR << "\n" + << "LAA: Added an overflow assumption\n"); PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); } else return 0; @@ -1087,14 +1098,65 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, return Stride; } -/// Take the pointer operand from the Load/Store instruction. -/// Returns NULL if this is not a valid Load/Store instruction. -static Value *getPointerOperand(Value *I) { - if (auto *LI = dyn_cast<LoadInst>(I)) - return LI->getPointerOperand(); - if (auto *SI = dyn_cast<StoreInst>(I)) - return SI->getPointerOperand(); - return nullptr; +bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL, + ScalarEvolution &SE, + SmallVectorImpl<unsigned> &SortedIndices) { + assert(llvm::all_of( + VL, [](const Value *V) { return V->getType()->isPointerTy(); }) && + "Expected list of pointer operands."); + SmallVector<std::pair<int64_t, Value *>, 4> OffValPairs; + OffValPairs.reserve(VL.size()); + + // Walk over the pointers, and map each of them to an offset relative to + // first pointer in the array. + Value *Ptr0 = VL[0]; + const SCEV *Scev0 = SE.getSCEV(Ptr0); + Value *Obj0 = GetUnderlyingObject(Ptr0, DL); + + llvm::SmallSet<int64_t, 4> Offsets; + for (auto *Ptr : VL) { + // TODO: Outline this code as a special, more time consuming, version of + // computeConstantDifference() function. + if (Ptr->getType()->getPointerAddressSpace() != + Ptr0->getType()->getPointerAddressSpace()) + return false; + // If a pointer refers to a different underlying object, bail - the + // pointers are by definition incomparable. + Value *CurrObj = GetUnderlyingObject(Ptr, DL); + if (CurrObj != Obj0) + return false; + + const SCEV *Scev = SE.getSCEV(Ptr); + const auto *Diff = dyn_cast<SCEVConstant>(SE.getMinusSCEV(Scev, Scev0)); + // The pointers may not have a constant offset from each other, or SCEV + // may just not be smart enough to figure out they do. Regardless, + // there's nothing we can do. + if (!Diff) + return false; + + // Check if the pointer with the same offset is found. + int64_t Offset = Diff->getAPInt().getSExtValue(); + if (!Offsets.insert(Offset).second) + return false; + OffValPairs.emplace_back(Offset, Ptr); + } + SortedIndices.clear(); + SortedIndices.resize(VL.size()); + std::iota(SortedIndices.begin(), SortedIndices.end(), 0); + + // Sort the memory accesses and keep the order of their uses in UseOrder. + std::stable_sort(SortedIndices.begin(), SortedIndices.end(), + [&OffValPairs](unsigned Left, unsigned Right) { + return OffValPairs[Left].first < OffValPairs[Right].first; + }); + + // Check if the order is consecutive already. + if (llvm::all_of(SortedIndices, [&SortedIndices](const unsigned I) { + return I == SortedIndices[I]; + })) + SortedIndices.clear(); + + return true; } /// Take the address space operand from the Load/Store instruction. @@ -1110,8 +1172,8 @@ static unsigned getAddressSpaceOperand(Value *I) { /// Returns true if the memory operations \p A and \p B are consecutive. bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType) { - Value *PtrA = getPointerOperand(A); - Value *PtrB = getPointerOperand(B); + Value *PtrA = getLoadStorePointerOperand(A); + Value *PtrB = getLoadStorePointerOperand(B); unsigned ASA = getAddressSpaceOperand(A); unsigned ASB = getAddressSpaceOperand(B); @@ -1127,11 +1189,11 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, if (CheckType && PtrA->getType() != PtrB->getType()) return false; - unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA); + unsigned IdxWidth = DL.getIndexSizeInBits(ASA); Type *Ty = cast<PointerType>(PtrA->getType())->getElementType(); - APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty)); + APInt Size(IdxWidth, DL.getTypeStoreSize(Ty)); - APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0); + APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0); PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); @@ -1242,8 +1304,9 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance, } if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) { - DEBUG(dbgs() << "LAA: Distance " << Distance - << " that could cause a store-load forwarding conflict\n"); + LLVM_DEBUG( + dbgs() << "LAA: Distance " << Distance + << " that could cause a store-load forwarding conflict\n"); return true; } @@ -1321,7 +1384,7 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, return false; } -/// \brief Check the dependence for two accesses with the same stride \p Stride. +/// Check the dependence for two accesses with the same stride \p Stride. /// \p Distance is the positive distance and \p TypeByteSize is type size in /// bytes. /// @@ -1395,16 +1458,16 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src); - DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink - << "(Induction step: " << StrideAPtr << ")\n"); - DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to " - << *InstMap[BIdx] << ": " << *Dist << "\n"); + LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink + << "(Induction step: " << StrideAPtr << ")\n"); + LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to " + << *InstMap[BIdx] << ": " << *Dist << "\n"); // Need accesses with constant stride. We don't want to vectorize // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in // the address space. if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){ - DEBUG(dbgs() << "Pointer access with non-constant stride\n"); + LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n"); return Dependence::Unknown; } @@ -1421,7 +1484,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, TypeByteSize)) return Dependence::NoDep; - DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n"); + LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n"); ShouldRetryWithRuntimeCheck = true; return Dependence::Unknown; } @@ -1432,7 +1495,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // Attempt to prove strided accesses independent. if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy && areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) { - DEBUG(dbgs() << "LAA: Strided accesses are independent\n"); + LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n"); return Dependence::NoDep; } @@ -1442,11 +1505,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, if (IsTrueDataDependence && EnableForwardingConflictDetection && (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) || ATy != BTy)) { - DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n"); + LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n"); return Dependence::ForwardButPreventsForwarding; } - DEBUG(dbgs() << "LAA: Dependence is negative\n"); + LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n"); return Dependence::Forward; } @@ -1455,15 +1518,17 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, if (Val == 0) { if (ATy == BTy) return Dependence::Forward; - DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n"); + LLVM_DEBUG( + dbgs() << "LAA: Zero dependence difference but different types\n"); return Dependence::Unknown; } assert(Val.isStrictlyPositive() && "Expect a positive value"); if (ATy != BTy) { - DEBUG(dbgs() << - "LAA: ReadWrite-Write positive dependency with different types\n"); + LLVM_DEBUG( + dbgs() + << "LAA: ReadWrite-Write positive dependency with different types\n"); return Dependence::Unknown; } @@ -1504,15 +1569,15 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, uint64_t MinDistanceNeeded = TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize; if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) { - DEBUG(dbgs() << "LAA: Failure because of positive distance " << Distance - << '\n'); + LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance " + << Distance << '\n'); return Dependence::Backward; } // Unsafe if the minimum distance needed is greater than max safe distance. if (MinDistanceNeeded > MaxSafeDepDistBytes) { - DEBUG(dbgs() << "LAA: Failure because it needs at least " - << MinDistanceNeeded << " size in bytes"); + LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least " + << MinDistanceNeeded << " size in bytes"); return Dependence::Backward; } @@ -1541,8 +1606,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, return Dependence::BackwardVectorizableButPreventsForwarding; uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride); - DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() - << " with max VF = " << MaxVF << '\n'); + LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() + << " with max VF = " << MaxVF << '\n'); uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8; MaxSafeRegisterWidth = std::min(MaxSafeRegisterWidth, MaxVFInBits); return Dependence::BackwardVectorizable; @@ -1600,7 +1665,8 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, if (Dependences.size() >= MaxDependences) { RecordDependences = false; Dependences.clear(); - DEBUG(dbgs() << "Too many dependences, stopped recording\n"); + LLVM_DEBUG(dbgs() + << "Too many dependences, stopped recording\n"); } } if (!RecordDependences && !SafeForVectorization) @@ -1612,7 +1678,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, } } - DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n"); + LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n"); return SafeForVectorization; } @@ -1642,20 +1708,21 @@ void MemoryDepChecker::Dependence::print( bool LoopAccessInfo::canAnalyzeLoop() { // We need to have a loop header. - DEBUG(dbgs() << "LAA: Found a loop in " - << TheLoop->getHeader()->getParent()->getName() << ": " - << TheLoop->getHeader()->getName() << '\n'); + LLVM_DEBUG(dbgs() << "LAA: Found a loop in " + << TheLoop->getHeader()->getParent()->getName() << ": " + << TheLoop->getHeader()->getName() << '\n'); // We can only analyze innermost loops. if (!TheLoop->empty()) { - DEBUG(dbgs() << "LAA: loop is not the innermost loop\n"); + LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n"); recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop"; return false; } // We must have a single backedge. if (TheLoop->getNumBackEdges() != 1) { - DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n"); + LLVM_DEBUG( + dbgs() << "LAA: loop control flow is not understood by analyzer\n"); recordAnalysis("CFGNotUnderstood") << "loop control flow is not understood by analyzer"; return false; @@ -1663,7 +1730,8 @@ bool LoopAccessInfo::canAnalyzeLoop() { // We must have a single exiting block. if (!TheLoop->getExitingBlock()) { - DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n"); + LLVM_DEBUG( + dbgs() << "LAA: loop control flow is not understood by analyzer\n"); recordAnalysis("CFGNotUnderstood") << "loop control flow is not understood by analyzer"; return false; @@ -1673,7 +1741,8 @@ bool LoopAccessInfo::canAnalyzeLoop() { // checked at the end of each iteration. With that we can assume that all // instructions in the loop are executed the same number of times. if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { - DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n"); + LLVM_DEBUG( + dbgs() << "LAA: loop control flow is not understood by analyzer\n"); recordAnalysis("CFGNotUnderstood") << "loop control flow is not understood by analyzer"; return false; @@ -1684,7 +1753,7 @@ bool LoopAccessInfo::canAnalyzeLoop() { if (ExitCount == PSE->getSE()->getCouldNotCompute()) { recordAnalysis("CantComputeNumberOfIterations") << "could not determine number of loop iterations"; - DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n"); + LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n"); return false; } @@ -1734,7 +1803,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) { recordAnalysis("NonSimpleLoad", Ld) << "read with atomic ordering or volatile read"; - DEBUG(dbgs() << "LAA: Found a non-simple load.\n"); + LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n"); CanVecMem = false; return; } @@ -1758,7 +1827,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, if (!St->isSimple() && !IsAnnotatedParallel) { recordAnalysis("NonSimpleStore", St) << "write with atomic ordering or volatile write"; - DEBUG(dbgs() << "LAA: Found a non-simple store.\n"); + LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n"); CanVecMem = false; return; } @@ -1777,14 +1846,14 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, // Check if we see any stores. If there are no stores, then we don't // care if the pointers are *restrict*. if (!Stores.size()) { - DEBUG(dbgs() << "LAA: Found a read-only loop!\n"); + LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n"); CanVecMem = true; return; } MemoryDepChecker::DepCandidates DependentAccesses; AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(), - AA, LI, DependentAccesses, *PSE); + TheLoop, AA, LI, DependentAccesses, *PSE); // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects // multiple times on the same object. If the ptr is accessed twice, once @@ -1814,9 +1883,9 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, } if (IsAnnotatedParallel) { - DEBUG(dbgs() - << "LAA: A loop annotated parallel, ignore memory dependency " - << "checks.\n"); + LLVM_DEBUG( + dbgs() << "LAA: A loop annotated parallel, ignore memory dependency " + << "checks.\n"); CanVecMem = true; return; } @@ -1851,7 +1920,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, // If we write (or read-write) to a single destination and there are no // other reads in this loop then is it safe to vectorize. if (NumReadWrites == 1 && NumReads == 0) { - DEBUG(dbgs() << "LAA: Found a write-only loop!\n"); + LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n"); CanVecMem = true; return; } @@ -1866,23 +1935,24 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, TheLoop, SymbolicStrides); if (!CanDoRTIfNeeded) { recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds"; - DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " - << "the array bounds.\n"); + LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " + << "the array bounds.\n"); CanVecMem = false; return; } - DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n"); + LLVM_DEBUG( + dbgs() << "LAA: We can perform a memory runtime check if needed.\n"); CanVecMem = true; if (Accesses.isDependencyCheckNeeded()) { - DEBUG(dbgs() << "LAA: Checking memory dependencies\n"); + LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n"); CanVecMem = DepChecker->areDepsSafe( DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides); MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes(); if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) { - DEBUG(dbgs() << "LAA: Retrying with memory checks\n"); + LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n"); // Clear the dependency checks. We assume they are not needed. Accesses.resetDepChecks(*DepChecker); @@ -1898,7 +1968,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, if (!CanDoRTIfNeeded) { recordAnalysis("CantCheckMemDepsAtRunTime") << "cannot check memory dependencies at runtime"; - DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n"); + LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n"); CanVecMem = false; return; } @@ -1908,16 +1978,17 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, } if (CanVecMem) - DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop. We" - << (PtrRtChecking->Need ? "" : " don't") - << " need runtime memory checks.\n"); + LLVM_DEBUG( + dbgs() << "LAA: No unsafe dependent memory operations in loop. We" + << (PtrRtChecking->Need ? "" : " don't") + << " need runtime memory checks.\n"); else { recordAnalysis("UnsafeMemDep") << "unsafe dependent memory operations in loop. Use " "#pragma loop distribute(enable) to allow loop distribution " "to attempt to isolate the offending operations into a separate " "loop"; - DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n"); + LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n"); } } @@ -1974,7 +2045,7 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V, namespace { -/// \brief IR Values for the lower and upper bounds of a pointer evolution. We +/// IR Values for the lower and upper bounds of a pointer evolution. We /// need to use value-handles because SCEV expansion can invalidate previously /// expanded values. Thus expansion of a pointer can invalidate the bounds for /// a previous one. @@ -1985,7 +2056,7 @@ struct PointerBounds { } // end anonymous namespace -/// \brief Expand code for the lower and upper bound of the pointer group \p CG +/// Expand code for the lower and upper bound of the pointer group \p CG /// in \p TheLoop. \return the values for the bounds. static PointerBounds expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, @@ -2001,8 +2072,8 @@ expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); if (SE->isLoopInvariant(Sc, TheLoop)) { - DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr - << "\n"); + LLVM_DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" + << *Ptr << "\n"); // Ptr could be in the loop body. If so, expand a new one at the correct // location. Instruction *Inst = dyn_cast<Instruction>(Ptr); @@ -2015,15 +2086,16 @@ expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, return {NewPtr, NewPtrPlusOne}; } else { Value *Start = nullptr, *End = nullptr; - DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); + LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc); End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc); - DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n"); + LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High + << "\n"); return {Start, End}; } } -/// \brief Turns a collection of checks into a collection of expanded upper and +/// Turns a collection of checks into a collection of expanded upper and /// lower bounds for both pointers in the check. static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> expandBounds( const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks, @@ -2136,9 +2208,9 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { if (!Stride) return; - DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for " - "versioning:"); - DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n"); + LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for " + "versioning:"); + LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n"); // Avoid adding the "Stride == 1" predicate when we know that // Stride >= Trip-Count. Such a predicate will effectively optimize a single @@ -2174,12 +2246,13 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { // "Stride >= TripCount" is equivalent to checking: // Stride - BETakenCount > 0 if (SE->isKnownPositive(StrideMinusBETaken)) { - DEBUG(dbgs() << "LAA: Stride>=TripCount; No point in versioning as the " - "Stride==1 predicate will imply that the loop executes " - "at most once.\n"); + LLVM_DEBUG( + dbgs() << "LAA: Stride>=TripCount; No point in versioning as the " + "Stride==1 predicate will imply that the loop executes " + "at most once.\n"); return; - } - DEBUG(dbgs() << "LAA: Found a strided access that we can version."); + } + LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version."); SymbolicStrides[Ptr] = Stride; StrideSet.insert(Stride); diff --git a/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp b/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp index ea7a62d179c4..074023a7e1e2 100644 --- a/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp +++ b/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp @@ -24,7 +24,7 @@ cl::opt<bool> EnableMSSALoopDependency( "enable-mssa-loop-dependency", cl::Hidden, cl::init(false), cl::desc("Enable MemorySSA dependency for loop pass manager")); -// Explicit template instantiations and specialization defininitions for core +// Explicit template instantiations and specialization definitions for core // template typedefs. template class AllAnalysesOn<Loop>; template class AnalysisManager<Loop, LoopStandardAnalysisResults &>; diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp index 9e54d60779a0..3f78456b3586 100644 --- a/contrib/llvm/lib/Analysis/LoopInfo.cpp +++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/LoopInfoImpl.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugLoc.h" @@ -377,69 +378,6 @@ Loop::LocRange Loop::getLocRange() const { return LocRange(); } -bool Loop::hasDedicatedExits() const { - // Each predecessor of each exit block of a normal loop is contained - // within the loop. - SmallVector<BasicBlock *, 4> ExitBlocks; - getExitBlocks(ExitBlocks); - for (BasicBlock *BB : ExitBlocks) - for (BasicBlock *Predecessor : predecessors(BB)) - if (!contains(Predecessor)) - return false; - // All the requirements are met. - return true; -} - -void Loop::getUniqueExitBlocks( - SmallVectorImpl<BasicBlock *> &ExitBlocks) const { - assert(hasDedicatedExits() && - "getUniqueExitBlocks assumes the loop has canonical form exits!"); - - SmallVector<BasicBlock *, 32> SwitchExitBlocks; - for (BasicBlock *BB : this->blocks()) { - SwitchExitBlocks.clear(); - for (BasicBlock *Successor : successors(BB)) { - // If block is inside the loop then it is not an exit block. - if (contains(Successor)) - continue; - - pred_iterator PI = pred_begin(Successor); - BasicBlock *FirstPred = *PI; - - // If current basic block is this exit block's first predecessor - // then only insert exit block in to the output ExitBlocks vector. - // This ensures that same exit block is not inserted twice into - // ExitBlocks vector. - if (BB != FirstPred) - continue; - - // If a terminator has more then two successors, for example SwitchInst, - // then it is possible that there are multiple edges from current block - // to one exit block. - if (std::distance(succ_begin(BB), succ_end(BB)) <= 2) { - ExitBlocks.push_back(Successor); - continue; - } - - // In case of multiple edges from current block to exit block, collect - // only one edge in ExitBlocks. Use switchExitBlocks to keep track of - // duplicate edges. - if (!is_contained(SwitchExitBlocks, Successor)) { - SwitchExitBlocks.push_back(Successor); - ExitBlocks.push_back(Successor); - } - } - } -} - -BasicBlock *Loop::getUniqueExitBlock() const { - SmallVector<BasicBlock *, 8> UniqueExitBlocks; - getUniqueExitBlocks(UniqueExitBlocks); - if (UniqueExitBlocks.size() == 1) - return UniqueExitBlocks[0]; - return nullptr; -} - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void Loop::dump() const { print(dbgs()); } diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp index 9af717bafdca..07a151ce0fce 100644 --- a/contrib/llvm/lib/Analysis/LoopPass.cpp +++ b/contrib/llvm/lib/Analysis/LoopPass.cpp @@ -142,8 +142,17 @@ void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const { void LPPassManager::markLoopAsDeleted(Loop &L) { assert((&L == CurrentLoop || CurrentLoop->contains(&L)) && "Must not delete loop outside the current loop tree!"); - if (&L == CurrentLoop) + // If this loop appears elsewhere within the queue, we also need to remove it + // there. However, we have to be careful to not remove the back of the queue + // as that is assumed to match the current loop. + assert(LQ.back() == CurrentLoop && "Loop queue back isn't the current loop!"); + LQ.erase(std::remove(LQ.begin(), LQ.end(), &L), LQ.end()); + + if (&L == CurrentLoop) { CurrentLoopDeleted = true; + // Add this loop back onto the back of the queue to preserve our invariants. + LQ.push_back(&L); + } } /// run - Execute all of the passes scheduled for execution. Keep track of @@ -151,7 +160,10 @@ void LPPassManager::markLoopAsDeleted(Loop &L) { bool LPPassManager::runOnFunction(Function &F) { auto &LIWP = getAnalysis<LoopInfoWrapperPass>(); LI = &LIWP.getLoopInfo(); + Module &M = *F.getParent(); +#if 0 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); +#endif bool Changed = false; // Collect inherited analysis from Module level pass manager. @@ -181,6 +193,8 @@ bool LPPassManager::runOnFunction(Function &F) { } // Walk Loops + unsigned InstrCount = 0; + bool EmitICRemark = M.shouldEmitInstrCountChangedRemark(); while (!LQ.empty()) { CurrentLoopDeleted = false; CurrentLoop = LQ.back(); @@ -198,8 +212,11 @@ bool LPPassManager::runOnFunction(Function &F) { { PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader()); TimeRegion PassTimer(getPassTimer(P)); - + if (EmitICRemark) + InstrCount = initSizeRemarkInfo(M); Changed |= P->runOnLoop(CurrentLoop, *this); + if (EmitICRemark) + emitInstrCountChangedRemark(P, M, InstrCount); } if (Changed) @@ -225,8 +242,12 @@ bool LPPassManager::runOnFunction(Function &F) { // is that LPPassManager might run passes which do not require LCSSA // form (LoopPassPrinter for example). We should skip verification for // such passes. + // FIXME: Loop-sink currently break LCSSA. Fix it and reenable the + // verification! +#if 0 if (mustPreserveAnalysisID(LCSSAVerificationPass::ID)) - CurrentLoop->isRecursivelyLCSSAForm(*DT, *LI); + assert(CurrentLoop->isRecursivelyLCSSAForm(*DT, *LI)); +#endif // Then call the regular verifyAnalysis functions. verifyPreservedAnalysis(P); @@ -351,13 +372,13 @@ bool LoopPass::skipLoop(const Loop *L) const { return false; // Check the opt bisect limit. LLVMContext &Context = F->getContext(); - if (!Context.getOptBisect().shouldRunPass(this, *L)) + if (!Context.getOptPassGate().shouldRunPass(this, *L)) return true; // Check for the OptimizeNone attribute. if (F->hasFnAttribute(Attribute::OptimizeNone)) { // FIXME: Report this to dbgs() only once per function. - DEBUG(dbgs() << "Skipping pass '" << getPassName() - << "' in function " << F->getName() << "\n"); + LLVM_DEBUG(dbgs() << "Skipping pass '" << getPassName() << "' in function " + << F->getName() << "\n"); // FIXME: Delete loop from pass manager's queue? return true; } diff --git a/contrib/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp b/contrib/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp index 0da90dae3d9a..c8b91a7a1a51 100644 --- a/contrib/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp +++ b/contrib/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp @@ -17,7 +17,7 @@ using namespace llvm; -/// \brief Try to simplify instruction \param I using its SCEV expression. +/// Try to simplify instruction \param I using its SCEV expression. /// /// The idea is that some AddRec expressions become constants, which then /// could trigger folding of other instructions. However, that only happens diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp index 24fedfed772c..686ad294378c 100644 --- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" @@ -40,7 +41,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/Local.h" #include <cassert> #include <cstdint> #include <iterator> @@ -75,12 +75,24 @@ static const std::pair<LibFunc, AllocFnsTy> AllocationFnData[] = { {LibFunc_valloc, {MallocLike, 1, 0, -1}}, {LibFunc_Znwj, {OpNewLike, 1, 0, -1}}, // new(unsigned int) {LibFunc_ZnwjRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new(unsigned int, nothrow) + {LibFunc_ZnwjSt11align_val_t, {OpNewLike, 2, 0, -1}}, // new(unsigned int, align_val_t) + {LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t, // new(unsigned int, align_val_t, nothrow) + {MallocLike, 3, 0, -1}}, {LibFunc_Znwm, {OpNewLike, 1, 0, -1}}, // new(unsigned long) {LibFunc_ZnwmRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new(unsigned long, nothrow) + {LibFunc_ZnwmSt11align_val_t, {OpNewLike, 2, 0, -1}}, // new(unsigned long, align_val_t) + {LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t, // new(unsigned long, align_val_t, nothrow) + {MallocLike, 3, 0, -1}}, {LibFunc_Znaj, {OpNewLike, 1, 0, -1}}, // new[](unsigned int) {LibFunc_ZnajRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new[](unsigned int, nothrow) + {LibFunc_ZnajSt11align_val_t, {OpNewLike, 2, 0, -1}}, // new[](unsigned int, align_val_t) + {LibFunc_ZnajSt11align_val_tRKSt9nothrow_t, // new[](unsigned int, align_val_t, nothrow) + {MallocLike, 3, 0, -1}}, {LibFunc_Znam, {OpNewLike, 1, 0, -1}}, // new[](unsigned long) {LibFunc_ZnamRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new[](unsigned long, nothrow) + {LibFunc_ZnamSt11align_val_t, {OpNewLike, 2, 0, -1}}, // new[](unsigned long, align_val_t) + {LibFunc_ZnamSt11align_val_tRKSt9nothrow_t, // new[](unsigned long, align_val_t, nothrow) + {MallocLike, 3, 0, -1}}, {LibFunc_msvc_new_int, {OpNewLike, 1, 0, -1}}, // new(unsigned int) {LibFunc_msvc_new_int_nothrow, {MallocLike, 2, 0, -1}}, // new(unsigned int, nothrow) {LibFunc_msvc_new_longlong, {OpNewLike, 1, 0, -1}}, // new(unsigned long long) @@ -112,10 +124,9 @@ static const Function *getCalledFunction(const Value *V, bool LookThroughBitCast IsNoBuiltin = CS.isNoBuiltin(); - const Function *Callee = CS.getCalledFunction(); - if (!Callee || !Callee->isDeclaration()) - return nullptr; - return Callee; + if (const Function *Callee = CS.getCalledFunction()) + return Callee; + return nullptr; } /// Returns the allocation data for the given value if it's either a call to a @@ -206,7 +217,7 @@ static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) { return CS && CS.hasRetAttr(Attribute::NoAlias); } -/// \brief Tests if a value is a call or invoke to a library function that +/// Tests if a value is a call or invoke to a library function that /// allocates or reallocates memory (either malloc, calloc, realloc, or strdup /// like). bool llvm::isAllocationFn(const Value *V, const TargetLibraryInfo *TLI, @@ -214,7 +225,7 @@ bool llvm::isAllocationFn(const Value *V, const TargetLibraryInfo *TLI, return getAllocationData(V, AnyAlloc, TLI, LookThroughBitCast).hasValue(); } -/// \brief Tests if a value is a call or invoke to a function that returns a +/// Tests if a value is a call or invoke to a function that returns a /// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions). bool llvm::isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast) { @@ -224,29 +235,29 @@ bool llvm::isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI, hasNoAliasAttr(V, LookThroughBitCast); } -/// \brief Tests if a value is a call or invoke to a library function that +/// Tests if a value is a call or invoke to a library function that /// allocates uninitialized memory (such as malloc). bool llvm::isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast) { return getAllocationData(V, MallocLike, TLI, LookThroughBitCast).hasValue(); } -/// \brief Tests if a value is a call or invoke to a library function that +/// Tests if a value is a call or invoke to a library function that /// allocates zero-filled memory (such as calloc). bool llvm::isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast) { return getAllocationData(V, CallocLike, TLI, LookThroughBitCast).hasValue(); } -/// \brief Tests if a value is a call or invoke to a library function that -/// allocates memory similiar to malloc or calloc. +/// Tests if a value is a call or invoke to a library function that +/// allocates memory similar to malloc or calloc. bool llvm::isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast) { return getAllocationData(V, MallocOrCallocLike, TLI, LookThroughBitCast).hasValue(); } -/// \brief Tests if a value is a call or invoke to a library function that +/// Tests if a value is a call or invoke to a library function that /// allocates memory (either malloc, calloc, or strdup like). bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast) { @@ -350,11 +361,10 @@ const CallInst *llvm::extractCallocCall(const Value *I, /// isFreeCall - Returns non-null if the value is a call to the builtin free() const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { - const CallInst *CI = dyn_cast<CallInst>(I); - if (!CI || isa<IntrinsicInst>(CI)) - return nullptr; - Function *Callee = CI->getCalledFunction(); - if (Callee == nullptr) + bool IsNoBuiltinCall; + const Function *Callee = + getCalledFunction(I, /*LookThroughBitCast=*/false, IsNoBuiltinCall); + if (Callee == nullptr || IsNoBuiltinCall) return nullptr; StringRef FnName = Callee->getName(); @@ -374,9 +384,11 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { else if (TLIFn == LibFunc_ZdlPvj || // delete(void*, uint) TLIFn == LibFunc_ZdlPvm || // delete(void*, ulong) TLIFn == LibFunc_ZdlPvRKSt9nothrow_t || // delete(void*, nothrow) + TLIFn == LibFunc_ZdlPvSt11align_val_t || // delete(void*, align_val_t) TLIFn == LibFunc_ZdaPvj || // delete[](void*, uint) TLIFn == LibFunc_ZdaPvm || // delete[](void*, ulong) TLIFn == LibFunc_ZdaPvRKSt9nothrow_t || // delete[](void*, nothrow) + TLIFn == LibFunc_ZdaPvSt11align_val_t || // delete[](void*, align_val_t) TLIFn == LibFunc_msvc_delete_ptr32_int || // delete(void*, uint) TLIFn == LibFunc_msvc_delete_ptr64_longlong || // delete(void*, ulonglong) TLIFn == LibFunc_msvc_delete_ptr32_nothrow || // delete(void*, nothrow) @@ -386,6 +398,9 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { TLIFn == LibFunc_msvc_delete_array_ptr32_nothrow || // delete[](void*, nothrow) TLIFn == LibFunc_msvc_delete_array_ptr64_nothrow) // delete[](void*, nothrow) ExpectedNumParams = 2; + else if (TLIFn == LibFunc_ZdaPvSt11align_val_tRKSt9nothrow_t || // delete(void*, align_val_t, nothrow) + TLIFn == LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t) // delete[](void*, align_val_t, nothrow) + ExpectedNumParams = 3; else return nullptr; @@ -400,7 +415,7 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { if (FTy->getParamType(0) != Type::getInt8PtrTy(Callee->getContext())) return nullptr; - return CI; + return dyn_cast<CallInst>(I); } //===----------------------------------------------------------------------===// @@ -412,7 +427,7 @@ static APInt getSizeWithOverflow(const SizeOffsetType &Data) { return Data.first - Data.second; } -/// \brief Compute the size of the object pointed by Ptr. Returns true and the +/// Compute the size of the object pointed by Ptr. Returns true and the /// object size in Size if successful, and false otherwise. /// If RoundToAlign is true, then Size is rounded up to the alignment of /// allocas, byval arguments, and global variables. @@ -513,8 +528,8 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { return visitGEPOperator(cast<GEPOperator>(*CE)); } - DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V - << '\n'); + LLVM_DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " + << *V << '\n'); return unknown(); } @@ -627,7 +642,14 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) { SizeOffsetType ObjectSizeOffsetVisitor::visitConstantPointerNull(ConstantPointerNull& CPN) { - if (Options.NullIsUnknownSize && CPN.getType()->getAddressSpace() == 0) + // If null is unknown, there's nothing we can do. Additionally, non-zero + // address spaces can make use of null, so we don't presume to know anything + // about that. + // + // TODO: How should this work with address space casts? We currently just drop + // them on the floor, but it's unclear what we should do when a NULL from + // addrspace(1) gets casted to addrspace(0) (or vice-versa). + if (Options.NullIsUnknownSize || CPN.getType()->getAddressSpace()) return unknown(); return std::make_pair(Zero, Zero); } @@ -714,7 +736,8 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitUndefValue(UndefValue&) { } SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) { - DEBUG(dbgs() << "ObjectSizeOffsetVisitor unknown instruction:" << I << '\n'); + LLVM_DEBUG(dbgs() << "ObjectSizeOffsetVisitor unknown instruction:" << I + << '\n'); return unknown(); } @@ -793,8 +816,9 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) { // Ignore values where we cannot do more than ObjectSizeVisitor. Result = unknown(); } else { - DEBUG(dbgs() << "ObjectSizeOffsetEvaluator::compute() unhandled value: " - << *V << '\n'); + LLVM_DEBUG( + dbgs() << "ObjectSizeOffsetEvaluator::compute() unhandled value: " << *V + << '\n'); Result = unknown(); } @@ -931,6 +955,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitSelectInst(SelectInst &I) { } SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitInstruction(Instruction &I) { - DEBUG(dbgs() << "ObjectSizeOffsetEvaluator unknown instruction:" << I <<'\n'); + LLVM_DEBUG(dbgs() << "ObjectSizeOffsetEvaluator unknown instruction:" << I + << '\n'); return unknown(); } diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index bf83f52ccf2e..7eeefd54f007 100644 --- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -154,24 +154,16 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, } if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { - AAMDNodes AAInfo; - switch (II->getIntrinsicID()) { case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::invariant_start: - II->getAAMetadata(AAInfo); - Loc = MemoryLocation( - II->getArgOperand(1), - cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(), AAInfo); + Loc = MemoryLocation::getForArgument(II, 1, TLI); // These intrinsics don't really modify the memory, but returning Mod // will allow them to be handled conservatively. return ModRefInfo::Mod; case Intrinsic::invariant_end: - II->getAAMetadata(AAInfo); - Loc = MemoryLocation( - II->getArgOperand(2), - cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(), AAInfo); + Loc = MemoryLocation::getForArgument(II, 2, TLI); // These intrinsics don't really modify the memory, but returning Mod // will allow them to be handled conservatively. return ModRefInfo::Mod; @@ -363,8 +355,8 @@ MemDepResult MemoryDependenceResults::getPointerDependencyFrom( MemDepResult MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB) { - auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group); - if (!InvariantGroupMD) + + if (!LI->getMetadata(LLVMContext::MD_invariant_group)) return MemDepResult::getUnknown(); // Take the ptr operand after all casts and geps 0. This way we can search @@ -425,7 +417,7 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, // same pointer operand) we can assume that value pointed by pointer // operand didn't change. if ((isa<LoadInst>(U) || isa<StoreInst>(U)) && - U->getMetadata(LLVMContext::MD_invariant_group) == InvariantGroupMD) + U->getMetadata(LLVMContext::MD_invariant_group) != nullptr) ClosestDependency = GetClosestDependency(ClosestDependency, U); } } @@ -441,6 +433,7 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, NonLocalDefsCache.try_emplace( LI, NonLocalDepResult(ClosestDependency->getParent(), MemDepResult::getDef(ClosestDependency), nullptr)); + ReverseNonLocalDefsCache[ClosestDependency].insert(LI); return MemDepResult::getNonLocal(); } @@ -813,7 +806,7 @@ MemoryDependenceResults::getNonLocalCallDependency(CallSite QueryCS) { DirtyBlocks.push_back(Entry.getBB()); // Sort the cache so that we can do fast binary search lookups below. - std::sort(Cache.begin(), Cache.end()); + llvm::sort(Cache.begin(), Cache.end()); ++NumCacheDirtyNonLocal; // cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: " @@ -832,7 +825,7 @@ MemoryDependenceResults::getNonLocalCallDependency(CallSite QueryCS) { SmallPtrSet<BasicBlock *, 32> Visited; unsigned NumSortedEntries = Cache.size(); - DEBUG(AssertSorted(Cache)); + LLVM_DEBUG(AssertSorted(Cache)); // Iterate while we still have blocks to update. while (!DirtyBlocks.empty()) { @@ -845,7 +838,7 @@ MemoryDependenceResults::getNonLocalCallDependency(CallSite QueryCS) { // Do a binary search to see if we already have an entry for this block in // the cache set. If so, find it. - DEBUG(AssertSorted(Cache, NumSortedEntries)); + LLVM_DEBUG(AssertSorted(Cache, NumSortedEntries)); NonLocalDepInfo::iterator Entry = std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries, NonLocalDepEntry(DirtyBB)); @@ -927,12 +920,12 @@ void MemoryDependenceResults::getNonLocalPointerDependency( "Can't get pointer deps of a non-pointer!"); Result.clear(); { - // Check if there is cached Def with invariant.group. FIXME: cache might be - // invalid if cached instruction would be removed between call to - // getPointerDependencyFrom and this function. + // Check if there is cached Def with invariant.group. auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst); if (NonLocalDefIt != NonLocalDefsCache.end()) { - Result.push_back(std::move(NonLocalDefIt->second)); + Result.push_back(NonLocalDefIt->second); + ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()] + .erase(QueryInst); NonLocalDefsCache.erase(NonLocalDefIt); return; } @@ -1076,7 +1069,7 @@ SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, break; default: // Added many values, do a full scale sort. - std::sort(Cache.begin(), Cache.end()); + llvm::sort(Cache.begin(), Cache.end()); break; } } @@ -1218,7 +1211,7 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( unsigned NumSortedEntries = Cache->size(); unsigned WorklistEntries = BlockNumberLimit; bool GotWorklistLimit = false; - DEBUG(AssertSorted(*Cache)); + LLVM_DEBUG(AssertSorted(*Cache)); while (!Worklist.empty()) { BasicBlock *BB = Worklist.pop_back_val(); @@ -1249,7 +1242,7 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( // Get the dependency info for Pointer in BB. If we have cached // information, we will use it, otherwise we compute it. - DEBUG(AssertSorted(*Cache, NumSortedEntries)); + LLVM_DEBUG(AssertSorted(*Cache, NumSortedEntries)); MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst, Loc, isLoad, BB, Cache, NumSortedEntries); @@ -1463,13 +1456,33 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( // Okay, we're done now. If we added new values to the cache, re-sort it. SortNonLocalDepInfoCache(*Cache, NumSortedEntries); - DEBUG(AssertSorted(*Cache)); + LLVM_DEBUG(AssertSorted(*Cache)); return true; } -/// If P exists in CachedNonLocalPointerInfo, remove it. +/// If P exists in CachedNonLocalPointerInfo or NonLocalDefsCache, remove it. void MemoryDependenceResults::RemoveCachedNonLocalPointerDependencies( ValueIsLoadPair P) { + + // Most of the time this cache is empty. + if (!NonLocalDefsCache.empty()) { + auto it = NonLocalDefsCache.find(P.getPointer()); + if (it != NonLocalDefsCache.end()) { + RemoveFromReverseMap(ReverseNonLocalDefsCache, + it->second.getResult().getInst(), P.getPointer()); + NonLocalDefsCache.erase(it); + } + + if (auto *I = dyn_cast<Instruction>(P.getPointer())) { + auto toRemoveIt = ReverseNonLocalDefsCache.find(I); + if (toRemoveIt != ReverseNonLocalDefsCache.end()) { + for (const auto &entry : toRemoveIt->second) + NonLocalDefsCache.erase(entry); + ReverseNonLocalDefsCache.erase(toRemoveIt); + } + } + } + CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P); if (It == NonLocalPointerDeps.end()) return; @@ -1646,7 +1659,7 @@ void MemoryDependenceResults::removeInstruction(Instruction *RemInst) { // Re-sort the NonLocalDepInfo. Changing the dirty entry to its // subsequent value may invalidate the sortedness. - std::sort(NLPDI.begin(), NLPDI.end()); + llvm::sort(NLPDI.begin(), NLPDI.end()); } ReverseNonLocalPtrDeps.erase(ReversePtrDepIt); @@ -1659,7 +1672,7 @@ void MemoryDependenceResults::removeInstruction(Instruction *RemInst) { } assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?"); - DEBUG(verifyRemoved(RemInst)); + LLVM_DEBUG(verifyRemoved(RemInst)); } /// Verify that the specified instruction does not occur in our internal data diff --git a/contrib/llvm/lib/Analysis/MemoryLocation.cpp b/contrib/llvm/lib/Analysis/MemoryLocation.cpp index 9db6c499129a..55924db284ec 100644 --- a/contrib/llvm/lib/Analysis/MemoryLocation.cpp +++ b/contrib/llvm/lib/Analysis/MemoryLocation.cpp @@ -65,6 +65,14 @@ MemoryLocation MemoryLocation::get(const AtomicRMWInst *RMWI) { } MemoryLocation MemoryLocation::getForSource(const MemTransferInst *MTI) { + return getForSource(cast<AnyMemTransferInst>(MTI)); +} + +MemoryLocation MemoryLocation::getForSource(const AtomicMemTransferInst *MTI) { + return getForSource(cast<AnyMemTransferInst>(MTI)); +} + +MemoryLocation MemoryLocation::getForSource(const AnyMemTransferInst *MTI) { uint64_t Size = UnknownSize; if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength())) Size = C->getValue().getZExtValue(); @@ -77,17 +85,25 @@ MemoryLocation MemoryLocation::getForSource(const MemTransferInst *MTI) { return MemoryLocation(MTI->getRawSource(), Size, AATags); } -MemoryLocation MemoryLocation::getForDest(const MemIntrinsic *MTI) { +MemoryLocation MemoryLocation::getForDest(const MemIntrinsic *MI) { + return getForDest(cast<AnyMemIntrinsic>(MI)); +} + +MemoryLocation MemoryLocation::getForDest(const AtomicMemIntrinsic *MI) { + return getForDest(cast<AnyMemIntrinsic>(MI)); +} + +MemoryLocation MemoryLocation::getForDest(const AnyMemIntrinsic *MI) { uint64_t Size = UnknownSize; - if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength())) + if (ConstantInt *C = dyn_cast<ConstantInt>(MI->getLength())) Size = C->getValue().getZExtValue(); // memcpy/memmove can have AA tags. For memcpy, they apply // to both the source and the destination. AAMDNodes AATags; - MTI->getAAMetadata(AATags); + MI->getAAMetadata(AATags); - return MemoryLocation(MTI->getRawDest(), Size, AATags); + return MemoryLocation(MI->getRawDest(), Size, AATags); } MemoryLocation MemoryLocation::getForArgument(ImmutableCallSite CS, diff --git a/contrib/llvm/lib/Analysis/MemorySSA.cpp b/contrib/llvm/lib/Analysis/MemorySSA.cpp index 09605f61fa93..f57d490ce96e 100644 --- a/contrib/llvm/lib/Analysis/MemorySSA.cpp +++ b/contrib/llvm/lib/Analysis/MemorySSA.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/AssemblyAnnotationWriter.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" @@ -82,7 +83,7 @@ static cl::opt<bool> namespace llvm { -/// \brief An assembly annotator class to print Memory SSA information in +/// An assembly annotator class to print Memory SSA information in /// comments. class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter { friend class MemorySSA; @@ -235,13 +236,25 @@ static bool areLoadsReorderable(const LoadInst *Use, return !(SeqCstUse || MayClobberIsAcquire); } -static bool instructionClobbersQuery(MemoryDef *MD, - const MemoryLocation &UseLoc, - const Instruction *UseInst, - AliasAnalysis &AA) { +namespace { + +struct ClobberAlias { + bool IsClobber; + Optional<AliasResult> AR; +}; + +} // end anonymous namespace + +// Return a pair of {IsClobber (bool), AR (AliasResult)}. It relies on AR being +// ignored if IsClobber = false. +static ClobberAlias instructionClobbersQuery(MemoryDef *MD, + const MemoryLocation &UseLoc, + const Instruction *UseInst, + AliasAnalysis &AA) { Instruction *DefInst = MD->getMemoryInst(); assert(DefInst && "Defining instruction not actually an instruction"); ImmutableCallSite UseCS(UseInst); + Optional<AliasResult> AR; if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) { // These intrinsics will show up as affecting memory, but they are just @@ -249,13 +262,14 @@ static bool instructionClobbersQuery(MemoryDef *MD, switch (II->getIntrinsicID()) { case Intrinsic::lifetime_start: if (UseCS) - return false; - return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), UseLoc); + return {false, NoAlias}; + AR = AA.alias(MemoryLocation(II->getArgOperand(1)), UseLoc); + return {AR == MustAlias, AR}; case Intrinsic::lifetime_end: case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::assume: - return false; + return {false, NoAlias}; default: break; } @@ -263,19 +277,23 @@ static bool instructionClobbersQuery(MemoryDef *MD, if (UseCS) { ModRefInfo I = AA.getModRefInfo(DefInst, UseCS); - return isModOrRefSet(I); + AR = isMustSet(I) ? MustAlias : MayAlias; + return {isModOrRefSet(I), AR}; } if (auto *DefLoad = dyn_cast<LoadInst>(DefInst)) if (auto *UseLoad = dyn_cast<LoadInst>(UseInst)) - return !areLoadsReorderable(UseLoad, DefLoad); + return {!areLoadsReorderable(UseLoad, DefLoad), MayAlias}; - return isModSet(AA.getModRefInfo(DefInst, UseLoc)); + ModRefInfo I = AA.getModRefInfo(DefInst, UseLoc); + AR = isMustSet(I) ? MustAlias : MayAlias; + return {isModSet(I), AR}; } -static bool instructionClobbersQuery(MemoryDef *MD, const MemoryUseOrDef *MU, - const MemoryLocOrCall &UseMLOC, - AliasAnalysis &AA) { +static ClobberAlias instructionClobbersQuery(MemoryDef *MD, + const MemoryUseOrDef *MU, + const MemoryLocOrCall &UseMLOC, + AliasAnalysis &AA) { // FIXME: This is a temporary hack to allow a single instructionClobbersQuery // to exist while MemoryLocOrCall is pushed through places. if (UseMLOC.IsCall) @@ -288,7 +306,7 @@ static bool instructionClobbersQuery(MemoryDef *MD, const MemoryUseOrDef *MU, // Return true when MD may alias MU, return false otherwise. bool MemorySSAUtil::defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU, AliasAnalysis &AA) { - return instructionClobbersQuery(MD, MU, MemoryLocOrCall(MU), AA); + return instructionClobbersQuery(MD, MU, MemoryLocOrCall(MU), AA).IsClobber; } namespace { @@ -303,6 +321,7 @@ struct UpwardsMemoryQuery { const Instruction *Inst = nullptr; // The MemoryAccess we actually got called with, used to test local domination const MemoryAccess *OriginalAccess = nullptr; + Optional<AliasResult> AR = MayAlias; UpwardsMemoryQuery() = default; @@ -333,9 +352,6 @@ static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA, const Instruction *I) { // If the memory can't be changed, then loads of the memory can't be // clobbered. - // - // FIXME: We should handle invariant groups, as well. It's a bit harder, - // because we need to pay close attention to invariant group barriers. return isa<LoadInst>(I) && (I->getMetadata(LLVMContext::MD_invariant_load) || AA.pointsToConstantMemory(cast<LoadInst>(I)-> getPointerOperand())); @@ -386,9 +402,15 @@ checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt, // // Also, note that this can't be hoisted out of the `Worklist` loop, // since MD may only act as a clobber for 1 of N MemoryLocations. - FoundClobber = - FoundClobber || MSSA.isLiveOnEntryDef(MD) || - instructionClobbersQuery(MD, MAP.second, Query.Inst, AA); + FoundClobber = FoundClobber || MSSA.isLiveOnEntryDef(MD); + if (!FoundClobber) { + ClobberAlias CA = + instructionClobbersQuery(MD, MAP.second, Query.Inst, AA); + if (CA.IsClobber) { + FoundClobber = true; + // Not used: CA.AR; + } + } } break; } @@ -398,7 +420,8 @@ checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt, if (auto *MD = dyn_cast<MemoryDef>(MA)) { (void)MD; - assert(!instructionClobbersQuery(MD, MAP.second, Query.Inst, AA) && + assert(!instructionClobbersQuery(MD, MAP.second, Query.Inst, AA) + .IsClobber && "Found clobber before reaching ClobberAt!"); continue; } @@ -468,9 +491,10 @@ class ClobberWalker { /// Result of calling walkToPhiOrClobber. struct UpwardsWalkResult { /// The "Result" of the walk. Either a clobber, the last thing we walked, or - /// both. + /// both. Include alias info when clobber found. MemoryAccess *Result; bool IsKnownClobber; + Optional<AliasResult> AR; }; /// Walk to the next Phi or Clobber in the def chain starting at Desc.Last. @@ -486,17 +510,21 @@ class ClobberWalker { for (MemoryAccess *Current : def_chain(Desc.Last)) { Desc.Last = Current; if (Current == StopAt) - return {Current, false}; - - if (auto *MD = dyn_cast<MemoryDef>(Current)) - if (MSSA.isLiveOnEntryDef(MD) || - instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA)) - return {MD, true}; + return {Current, false, MayAlias}; + + if (auto *MD = dyn_cast<MemoryDef>(Current)) { + if (MSSA.isLiveOnEntryDef(MD)) + return {MD, true, MustAlias}; + ClobberAlias CA = + instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA); + if (CA.IsClobber) + return {MD, true, CA.AR}; + } } assert(isa<MemoryPhi>(Desc.Last) && "Ended at a non-clobber that's not a phi?"); - return {Desc.Last, false}; + return {Desc.Last, false, MayAlias}; } void addSearches(MemoryPhi *Phi, SmallVectorImpl<ListIndex> &PausedSearches, @@ -819,8 +847,6 @@ public: ClobberWalker(const MemorySSA &MSSA, AliasAnalysis &AA, DominatorTree &DT) : MSSA(MSSA), AA(AA), DT(DT) {} - void reset() {} - /// Finds the nearest clobber for the given query, optimizing phis if /// possible. MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q) { @@ -839,6 +865,7 @@ public: MemoryAccess *Result; if (WalkResult.IsKnownClobber) { Result = WalkResult.Result; + Q.AR = WalkResult.AR; } else { OptznResult OptRes = tryOptimizePhi(cast<MemoryPhi>(FirstDesc.Last), Current, Q.StartingLoc); @@ -876,12 +903,11 @@ struct RenamePassData { namespace llvm { -/// \brief A MemorySSAWalker that does AA walks to disambiguate accesses. It no -/// longer does caching on its own, -/// but the name has been retained for the moment. +/// A MemorySSAWalker that does AA walks to disambiguate accesses. It no +/// longer does caching on its own, but the name has been retained for the +/// moment. class MemorySSA::CachingWalker final : public MemorySSAWalker { ClobberWalker Walker; - bool AutoResetWalker = true; MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &); @@ -896,13 +922,6 @@ public: const MemoryLocation &) override; void invalidateInfo(MemoryAccess *) override; - /// Whether we call resetClobberWalker() after each time we *actually* walk to - /// answer a clobber query. - void setAutoResetWalker(bool AutoReset) { AutoResetWalker = AutoReset; } - - /// Drop the walker's persistent data structures. - void resetClobberWalker() { Walker.reset(); } - void verify(const MemorySSA *MSSA) override { MemorySSAWalker::verify(MSSA); Walker.verify(MSSA); @@ -930,7 +949,7 @@ void MemorySSA::renameSuccessorPhis(BasicBlock *BB, MemoryAccess *IncomingVal, } } -/// \brief Rename a single basic block into MemorySSA form. +/// Rename a single basic block into MemorySSA form. /// Uses the standard SSA renaming algorithm. /// \returns The new incoming value. MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, MemoryAccess *IncomingVal, @@ -953,7 +972,7 @@ MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, MemoryAccess *IncomingVal, return IncomingVal; } -/// \brief This is the standard SSA renaming algorithm. +/// This is the standard SSA renaming algorithm. /// /// We walk the dominator tree in preorder, renaming accesses, and then filling /// in phi nodes in our successors. @@ -1002,7 +1021,7 @@ void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal, } } -/// \brief This handles unreachable block accesses by deleting phi nodes in +/// This handles unreachable block accesses by deleting phi nodes in /// unreachable blocks, and marking all other unreachable MemoryAccess's as /// being uses of the live on entry definition. void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) { @@ -1044,7 +1063,7 @@ void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) { MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT) : AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr), - NextID(INVALID_MEMORYACCESS_ID) { + NextID(0) { buildMemorySSA(); } @@ -1106,6 +1125,7 @@ private: // This is where the last walk for this memory location ended. unsigned long LastKill; bool LastKillValid; + Optional<AliasResult> AR; }; void optimizeUsesInBlock(const BasicBlock *, unsigned long &, unsigned long &, @@ -1165,7 +1185,7 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock( } if (isUseTriviallyOptimizableToLiveOnEntry(*AA, MU->getMemoryInst())) { - MU->setDefiningAccess(MSSA->getLiveOnEntryDef(), true); + MU->setDefiningAccess(MSSA->getLiveOnEntryDef(), true, None); continue; } @@ -1207,6 +1227,7 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock( if (!LocInfo.LastKillValid) { LocInfo.LastKill = VersionStack.size() - 1; LocInfo.LastKillValid = true; + LocInfo.AR = MayAlias; } // At this point, we should have corrected last kill and LowerBound to be @@ -1219,10 +1240,11 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock( unsigned long UpperBound = VersionStack.size() - 1; if (UpperBound - LocInfo.LowerBound > MaxCheckLimit) { - DEBUG(dbgs() << "MemorySSA skipping optimization of " << *MU << " (" - << *(MU->getMemoryInst()) << ")" - << " because there are " << UpperBound - LocInfo.LowerBound - << " stores to disambiguate\n"); + LLVM_DEBUG(dbgs() << "MemorySSA skipping optimization of " << *MU << " (" + << *(MU->getMemoryInst()) << ")" + << " because there are " + << UpperBound - LocInfo.LowerBound + << " stores to disambiguate\n"); // Because we did not walk, LastKill is no longer valid, as this may // have been a kill. LocInfo.LastKillValid = false; @@ -1250,24 +1272,32 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock( // Reset UpperBound to liveOnEntryDef's place in the stack UpperBound = 0; FoundClobberResult = true; + LocInfo.AR = MustAlias; break; } - if (instructionClobbersQuery(MD, MU, UseMLOC, *AA)) { + ClobberAlias CA = instructionClobbersQuery(MD, MU, UseMLOC, *AA); + if (CA.IsClobber) { FoundClobberResult = true; + LocInfo.AR = CA.AR; break; } --UpperBound; } + + // Note: Phis always have AliasResult AR set to MayAlias ATM. + // At the end of this loop, UpperBound is either a clobber, or lower bound // PHI walking may cause it to be < LowerBound, and in fact, < LastKill. if (FoundClobberResult || UpperBound < LocInfo.LastKill) { - MU->setDefiningAccess(VersionStack[UpperBound], true); // We were last killed now by where we got to + if (MSSA->isLiveOnEntryDef(VersionStack[UpperBound])) + LocInfo.AR = None; + MU->setDefiningAccess(VersionStack[UpperBound], true, LocInfo.AR); LocInfo.LastKill = UpperBound; } else { // Otherwise, we checked all the new ones, and now we know we can get to // LastKill. - MU->setDefiningAccess(VersionStack[LocInfo.LastKill], true); + MU->setDefiningAccess(VersionStack[LocInfo.LastKill], true, LocInfo.AR); } LocInfo.LowerBound = VersionStack.size() - 1; LocInfo.LowerBoundBlock = BB; @@ -1289,19 +1319,13 @@ void MemorySSA::OptimizeUses::optimizeUses() { } void MemorySSA::placePHINodes( - const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks, - const DenseMap<const BasicBlock *, unsigned int> &BBNumbers) { + const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks) { // Determine where our MemoryPhi's should go ForwardIDFCalculator IDFs(*DT); IDFs.setDefiningBlocks(DefiningBlocks); SmallVector<BasicBlock *, 32> IDFBlocks; IDFs.calculate(IDFBlocks); - std::sort(IDFBlocks.begin(), IDFBlocks.end(), - [&BBNumbers](const BasicBlock *A, const BasicBlock *B) { - return BBNumbers.lookup(A) < BBNumbers.lookup(B); - }); - // Now place MemoryPhi nodes. for (auto &BB : IDFBlocks) createMemoryPhi(BB); @@ -1315,11 +1339,8 @@ void MemorySSA::buildMemorySSA() { // semantics do *not* imply that something with no immediate uses can simply // be removed. BasicBlock &StartingPoint = F.getEntryBlock(); - LiveOnEntryDef = - llvm::make_unique<MemoryDef>(F.getContext(), nullptr, nullptr, - &StartingPoint, NextID++); - DenseMap<const BasicBlock *, unsigned int> BBNumbers; - unsigned NextBBNum = 0; + LiveOnEntryDef.reset(new MemoryDef(F.getContext(), nullptr, nullptr, + &StartingPoint, NextID++)); // We maintain lists of memory accesses per-block, trading memory for time. We // could just look up the memory access for every possible instruction in the @@ -1328,7 +1349,6 @@ void MemorySSA::buildMemorySSA() { // Go through each block, figure out where defs occur, and chain together all // the accesses. for (BasicBlock &B : F) { - BBNumbers[&B] = NextBBNum++; bool InsertIntoDef = false; AccessList *Accesses = nullptr; DefsList *Defs = nullptr; @@ -1350,7 +1370,7 @@ void MemorySSA::buildMemorySSA() { if (InsertIntoDef) DefiningBlocks.insert(&B); } - placePHINodes(DefiningBlocks, BBNumbers); + placePHINodes(DefiningBlocks); // Now do regular SSA renaming on the MemoryDef/MemoryUse. Visited will get // filled in with all blocks. @@ -1359,11 +1379,7 @@ void MemorySSA::buildMemorySSA() { CachingWalker *Walker = getWalkerImpl(); - // We're doing a batch of updates; don't drop useful caches between them. - Walker->setAutoResetWalker(false); OptimizeUses(this, Walker, AA, DT).optimizeUses(); - Walker->setAutoResetWalker(true); - Walker->resetClobberWalker(); // Mark the uses in unreachable blocks as live on entry, so that they go // somewhere. @@ -1426,7 +1442,7 @@ void MemorySSA::insertIntoListsBefore(MemoryAccess *What, const BasicBlock *BB, auto *Defs = getOrCreateDefsList(BB); // If we got asked to insert at the end, we have an easy job, just shove it // at the end. If we got asked to insert before an existing def, we also get - // an terator. If we got asked to insert before a use, we have to hunt for + // an iterator. If we got asked to insert before a use, we have to hunt for // the next def. if (WasEnd) { Defs->push_back(*What); @@ -1445,7 +1461,7 @@ void MemorySSA::insertIntoListsBefore(MemoryAccess *What, const BasicBlock *BB, BlockNumberingValid.erase(BB); } -// Move What before Where in the IR. The end result is taht What will belong to +// Move What before Where in the IR. The end result is that What will belong to // the right lists and have the right Block set, but will not otherwise be // correct. It will not have the right defining access, and if it is a def, // things below it will not properly be updated. @@ -1457,8 +1473,18 @@ void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB, insertIntoListsBefore(What, BB, Where); } -void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB, +void MemorySSA::moveTo(MemoryAccess *What, BasicBlock *BB, InsertionPlace Point) { + if (isa<MemoryPhi>(What)) { + assert(Point == Beginning && + "Can only move a Phi at the beginning of the block"); + // Update lookup table entry + ValueToMemoryAccess.erase(What->getBlock()); + bool Inserted = ValueToMemoryAccess.insert({BB, What}).second; + (void)Inserted; + assert(Inserted && "Cannot move a Phi to a block that already has one"); + } + removeFromLists(What, false); What->setBlock(BB); insertIntoListsForBlock(What, BB, Point); @@ -1498,7 +1524,7 @@ static inline bool isOrdered(const Instruction *I) { return false; } -/// \brief Helper function to create new memory accesses +/// Helper function to create new memory accesses MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) { // The assume intrinsic has a control dependency which we model by claiming // that it writes arbitrarily. Ignore that fake memory dependency here. @@ -1526,9 +1552,6 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) { if (!Def && !Use) return nullptr; - assert((Def || Use) && - "Trying to create a memory access with a non-memory instruction"); - MemoryUseOrDef *MUD; if (Def) MUD = new MemoryDef(I->getContext(), nullptr, I, I->getParent(), NextID++); @@ -1538,7 +1561,7 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) { return MUD; } -/// \brief Returns true if \p Replacer dominates \p Replacee . +/// Returns true if \p Replacer dominates \p Replacee . bool MemorySSA::dominatesUse(const MemoryAccess *Replacer, const MemoryAccess *Replacee) const { if (isa<MemoryUseOrDef>(Replacee)) @@ -1555,40 +1578,40 @@ bool MemorySSA::dominatesUse(const MemoryAccess *Replacer, return true; } -/// \brief Properly remove \p MA from all of MemorySSA's lookup tables. +/// Properly remove \p MA from all of MemorySSA's lookup tables. void MemorySSA::removeFromLookups(MemoryAccess *MA) { assert(MA->use_empty() && "Trying to remove memory access that still has uses"); BlockNumbering.erase(MA); - if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA)) + if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) MUD->setDefiningAccess(nullptr); // Invalidate our walker's cache if necessary if (!isa<MemoryUse>(MA)) Walker->invalidateInfo(MA); - // The call below to erase will destroy MA, so we can't change the order we - // are doing things here + Value *MemoryInst; - if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA)) { + if (const auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) MemoryInst = MUD->getMemoryInst(); - } else { + else MemoryInst = MA->getBlock(); - } + auto VMA = ValueToMemoryAccess.find(MemoryInst); if (VMA->second == MA) ValueToMemoryAccess.erase(VMA); } -/// \brief Properly remove \p MA from all of MemorySSA's lists. +/// Properly remove \p MA from all of MemorySSA's lists. /// /// Because of the way the intrusive list and use lists work, it is important to /// do removal in the right order. /// ShouldDelete defaults to true, and will cause the memory access to also be /// deleted, not just removed. void MemorySSA::removeFromLists(MemoryAccess *MA, bool ShouldDelete) { + BasicBlock *BB = MA->getBlock(); // The access list owns the reference, so we erase it from the non-owning list // first. if (!isa<MemoryUse>(MA)) { - auto DefsIt = PerBlockDefs.find(MA->getBlock()); + auto DefsIt = PerBlockDefs.find(BB); std::unique_ptr<DefsList> &Defs = DefsIt->second; Defs->remove(*MA); if (Defs->empty()) @@ -1597,15 +1620,17 @@ void MemorySSA::removeFromLists(MemoryAccess *MA, bool ShouldDelete) { // The erase call here will delete it. If we don't want it deleted, we call // remove instead. - auto AccessIt = PerBlockAccesses.find(MA->getBlock()); + auto AccessIt = PerBlockAccesses.find(BB); std::unique_ptr<AccessList> &Accesses = AccessIt->second; if (ShouldDelete) Accesses->erase(MA); else Accesses->remove(MA); - if (Accesses->empty()) + if (Accesses->empty()) { PerBlockAccesses.erase(AccessIt); + BlockNumberingValid.erase(BB); + } } void MemorySSA::print(raw_ostream &OS) const { @@ -1621,10 +1646,49 @@ void MemorySSA::verifyMemorySSA() const { verifyDefUses(F); verifyDomination(F); verifyOrdering(F); + verifyDominationNumbers(F); Walker->verify(this); } -/// \brief Verify that the order and existence of MemoryAccesses matches the +/// Verify that all of the blocks we believe to have valid domination numbers +/// actually have valid domination numbers. +void MemorySSA::verifyDominationNumbers(const Function &F) const { +#ifndef NDEBUG + if (BlockNumberingValid.empty()) + return; + + SmallPtrSet<const BasicBlock *, 16> ValidBlocks = BlockNumberingValid; + for (const BasicBlock &BB : F) { + if (!ValidBlocks.count(&BB)) + continue; + + ValidBlocks.erase(&BB); + + const AccessList *Accesses = getBlockAccesses(&BB); + // It's correct to say an empty block has valid numbering. + if (!Accesses) + continue; + + // Block numbering starts at 1. + unsigned long LastNumber = 0; + for (const MemoryAccess &MA : *Accesses) { + auto ThisNumberIter = BlockNumbering.find(&MA); + assert(ThisNumberIter != BlockNumbering.end() && + "MemoryAccess has no domination number in a valid block!"); + + unsigned long ThisNumber = ThisNumberIter->second; + assert(ThisNumber > LastNumber && + "Domination numbers should be strictly increasing!"); + LastNumber = ThisNumber; + } + } + + assert(ValidBlocks.empty() && + "All valid BasicBlocks should exist in F -- dangling pointers?"); +#endif +} + +/// Verify that the order and existence of MemoryAccesses matches the /// order and existence of memory affecting instructions. void MemorySSA::verifyOrdering(Function &F) const { // Walk all the blocks, comparing what the lookups think and what the access @@ -1687,7 +1751,7 @@ void MemorySSA::verifyOrdering(Function &F) const { } } -/// \brief Verify the domination properties of MemorySSA by checking that each +/// Verify the domination properties of MemorySSA by checking that each /// definition dominates all of its uses. void MemorySSA::verifyDomination(Function &F) const { #ifndef NDEBUG @@ -1709,7 +1773,7 @@ void MemorySSA::verifyDomination(Function &F) const { #endif } -/// \brief Verify the def-use lists in MemorySSA, by verifying that \p Use +/// Verify the def-use lists in MemorySSA, by verifying that \p Use /// appears in the use list of \p Def. void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const { #ifndef NDEBUG @@ -1723,7 +1787,7 @@ void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const { #endif } -/// \brief Verify the immediate use information, by walking all the memory +/// Verify the immediate use information, by walking all the memory /// accesses and verifying that, for each use, it appears in the /// appropriate def's use list void MemorySSA::verifyDefUses(Function &F) const { @@ -1733,8 +1797,12 @@ void MemorySSA::verifyDefUses(Function &F) const { assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance( pred_begin(&B), pred_end(&B))) && "Incomplete MemoryPhi Node"); - for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) + for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) { verifyUseInDefs(Phi->getIncomingValue(I), Phi); + assert(find(predecessors(&B), Phi->getIncomingBlock(I)) != + pred_end(&B) && + "Incoming phi block not a block predecessor"); + } } for (Instruction &I : B) { @@ -1769,7 +1837,7 @@ void MemorySSA::renumberBlock(const BasicBlock *B) const { BlockNumberingValid.insert(B); } -/// \brief Determine, for two memory accesses in the same block, +/// Determine, for two memory accesses in the same block, /// whether \p Dominator dominates \p Dominatee. /// \returns True if \p Dominator dominates \p Dominatee. bool MemorySSA::locallyDominates(const MemoryAccess *Dominator, @@ -1844,12 +1912,24 @@ void MemoryAccess::print(raw_ostream &OS) const { void MemoryDef::print(raw_ostream &OS) const { MemoryAccess *UO = getDefiningAccess(); + auto printID = [&OS](MemoryAccess *A) { + if (A && A->getID()) + OS << A->getID(); + else + OS << LiveOnEntryStr; + }; + OS << getID() << " = MemoryDef("; - if (UO && UO->getID()) - OS << UO->getID(); - else - OS << LiveOnEntryStr; - OS << ')'; + printID(UO); + OS << ")"; + + if (isOptimized()) { + OS << "->"; + printID(getOptimized()); + + if (Optional<AliasResult> AR = getOptimizedAccessType()) + OS << " " << *AR; + } } void MemoryPhi::print(raw_ostream &OS) const { @@ -1886,6 +1966,9 @@ void MemoryUse::print(raw_ostream &OS) const { else OS << LiveOnEntryStr; OS << ')'; + + if (Optional<AliasResult> AR = getOptimizedAccessType()) + OS << " " << *AR; } void MemoryAccess::dump() const { @@ -1977,21 +2060,13 @@ void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) { MUD->resetOptimized(); } -/// \brief Walk the use-def chains starting at \p MA and find +/// Walk the use-def chains starting at \p MA and find /// the MemoryAccess that actually clobbers Loc. /// /// \returns our clobbering memory access MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( MemoryAccess *StartingAccess, UpwardsMemoryQuery &Q) { - MemoryAccess *New = Walker.findClobber(StartingAccess, Q); -#ifdef EXPENSIVE_CHECKS - MemoryAccess *NewNoCache = Walker.findClobber(StartingAccess, Q); - assert(NewNoCache == New && "Cache made us hand back a different result?"); - (void)NewNoCache; -#endif - if (AutoResetWalker) - resetClobberWalker(); - return New; + return Walker.findClobber(StartingAccess, Q); } MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( @@ -2023,10 +2098,10 @@ MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( : StartingUseOrDef; MemoryAccess *Clobber = getClobberingMemoryAccess(DefiningAccess, Q); - DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); - DEBUG(dbgs() << *StartingUseOrDef << "\n"); - DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); - DEBUG(dbgs() << *Clobber << "\n"); + LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); + LLVM_DEBUG(dbgs() << *StartingUseOrDef << "\n"); + LLVM_DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); + LLVM_DEBUG(dbgs() << *Clobber << "\n"); return Clobber; } @@ -2038,24 +2113,23 @@ MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) { return MA; // If this is an already optimized use or def, return the optimized result. - // Note: Currently, we do not store the optimized def result because we'd need - // a separate field, since we can't use it as the defining access. - if (auto *MUD = dyn_cast<MemoryUseOrDef>(StartingAccess)) - if (MUD->isOptimized()) - return MUD->getOptimized(); + // Note: Currently, we store the optimized def result in a separate field, + // since we can't use the defining access. + if (StartingAccess->isOptimized()) + return StartingAccess->getOptimized(); const Instruction *I = StartingAccess->getMemoryInst(); UpwardsMemoryQuery Q(I, StartingAccess); - // We can't sanely do anything with a fences, they conservatively - // clobber all memory, and have no locations to get pointers from to - // try to disambiguate. + // We can't sanely do anything with a fence, since they conservatively clobber + // all memory, and have no locations to get pointers from to try to + // disambiguate. if (!Q.IsCall && I->isFenceLike()) return StartingAccess; if (isUseTriviallyOptimizableToLiveOnEntry(*MSSA->AA, I)) { MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef(); - if (auto *MUD = dyn_cast<MemoryUseOrDef>(StartingAccess)) - MUD->setOptimized(LiveOnEntry); + StartingAccess->setOptimized(LiveOnEntry); + StartingAccess->setOptimizedAccessType(None); return LiveOnEntry; } @@ -2064,16 +2138,23 @@ MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) { // At this point, DefiningAccess may be the live on entry def. // If it is, we will not get a better result. - if (MSSA->isLiveOnEntryDef(DefiningAccess)) + if (MSSA->isLiveOnEntryDef(DefiningAccess)) { + StartingAccess->setOptimized(DefiningAccess); + StartingAccess->setOptimizedAccessType(None); return DefiningAccess; + } MemoryAccess *Result = getClobberingMemoryAccess(DefiningAccess, Q); - DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); - DEBUG(dbgs() << *DefiningAccess << "\n"); - DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); - DEBUG(dbgs() << *Result << "\n"); - if (auto *MUD = dyn_cast<MemoryUseOrDef>(StartingAccess)) - MUD->setOptimized(Result); + LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); + LLVM_DEBUG(dbgs() << *DefiningAccess << "\n"); + LLVM_DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); + LLVM_DEBUG(dbgs() << *Result << "\n"); + + StartingAccess->setOptimized(Result); + if (MSSA->isLiveOnEntryDef(Result)) + StartingAccess->setOptimizedAccessType(None); + else if (Q.AR == MustAlias) + StartingAccess->setOptimizedAccessType(MustAlias); return Result; } diff --git a/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp b/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp index f5d89f699a5a..abe2b3c25a58 100644 --- a/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp +++ b/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp @@ -37,36 +37,45 @@ using namespace llvm; // that there are two or more definitions needing to be merged. // This still will leave non-minimal form in the case of irreducible control // flow, where phi nodes may be in cycles with themselves, but unnecessary. -MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(BasicBlock *BB) { - // Single predecessor case, just recurse, we can only have one definition. +MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive( + BasicBlock *BB, + DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &CachedPreviousDef) { + // First, do a cache lookup. Without this cache, certain CFG structures + // (like a series of if statements) take exponential time to visit. + auto Cached = CachedPreviousDef.find(BB); + if (Cached != CachedPreviousDef.end()) { + return Cached->second; + } + if (BasicBlock *Pred = BB->getSinglePredecessor()) { - return getPreviousDefFromEnd(Pred); - } else if (VisitedBlocks.count(BB)) { + // Single predecessor case, just recurse, we can only have one definition. + MemoryAccess *Result = getPreviousDefFromEnd(Pred, CachedPreviousDef); + CachedPreviousDef.insert({BB, Result}); + return Result; + } + + if (VisitedBlocks.count(BB)) { // We hit our node again, meaning we had a cycle, we must insert a phi // node to break it so we have an operand. The only case this will // insert useless phis is if we have irreducible control flow. - return MSSA->createMemoryPhi(BB); - } else if (VisitedBlocks.insert(BB).second) { + MemoryAccess *Result = MSSA->createMemoryPhi(BB); + CachedPreviousDef.insert({BB, Result}); + return Result; + } + + if (VisitedBlocks.insert(BB).second) { // Mark us visited so we can detect a cycle - SmallVector<MemoryAccess *, 8> PhiOps; + SmallVector<TrackingVH<MemoryAccess>, 8> PhiOps; // Recurse to get the values in our predecessors for placement of a // potential phi node. This will insert phi nodes if we cycle in order to // break the cycle and have an operand. for (auto *Pred : predecessors(BB)) - PhiOps.push_back(getPreviousDefFromEnd(Pred)); + PhiOps.push_back(getPreviousDefFromEnd(Pred, CachedPreviousDef)); // Now try to simplify the ops to avoid placing a phi. // This may return null if we never created a phi yet, that's okay MemoryPhi *Phi = dyn_cast_or_null<MemoryPhi>(MSSA->getMemoryAccess(BB)); - bool PHIExistsButNeedsUpdate = false; - // See if the existing phi operands match what we need. - // Unlike normal SSA, we only allow one phi node per block, so we can't just - // create a new one. - if (Phi && Phi->getNumOperands() != 0) - if (!std::equal(Phi->op_begin(), Phi->op_end(), PhiOps.begin())) { - PHIExistsButNeedsUpdate = true; - } // See if we can avoid the phi by simplifying it. auto *Result = tryRemoveTrivialPhi(Phi, PhiOps); @@ -75,14 +84,20 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(BasicBlock *BB) { if (!Phi) Phi = MSSA->createMemoryPhi(BB); - // These will have been filled in by the recursive read we did above. - if (PHIExistsButNeedsUpdate) { - std::copy(PhiOps.begin(), PhiOps.end(), Phi->op_begin()); - std::copy(pred_begin(BB), pred_end(BB), Phi->block_begin()); + // See if the existing phi operands match what we need. + // Unlike normal SSA, we only allow one phi node per block, so we can't just + // create a new one. + if (Phi->getNumOperands() != 0) { + // FIXME: Figure out whether this is dead code and if so remove it. + if (!std::equal(Phi->op_begin(), Phi->op_end(), PhiOps.begin())) { + // These will have been filled in by the recursive read we did above. + std::copy(PhiOps.begin(), PhiOps.end(), Phi->op_begin()); + std::copy(pred_begin(BB), pred_end(BB), Phi->block_begin()); + } } else { unsigned i = 0; for (auto *Pred : predecessors(BB)) - Phi->addIncoming(PhiOps[i++], Pred); + Phi->addIncoming(&*PhiOps[i++], Pred); InsertedPHIs.push_back(Phi); } Result = Phi; @@ -90,6 +105,7 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(BasicBlock *BB) { // Set ourselves up for the next variable by resetting visited state. VisitedBlocks.erase(BB); + CachedPreviousDef.insert({BB, Result}); return Result; } llvm_unreachable("Should have hit one of the three cases above"); @@ -100,9 +116,10 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(BasicBlock *BB) { // it continues globally, creating phi nodes to ensure we have a single // definition. MemoryAccess *MemorySSAUpdater::getPreviousDef(MemoryAccess *MA) { - auto *LocalResult = getPreviousDefInBlock(MA); - - return LocalResult ? LocalResult : getPreviousDefRecursive(MA->getBlock()); + if (auto *LocalResult = getPreviousDefInBlock(MA)) + return LocalResult; + DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> CachedPreviousDef; + return getPreviousDefRecursive(MA->getBlock(), CachedPreviousDef); } // This starts at the memory access, and goes backwards in the block to the find @@ -133,13 +150,15 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefInBlock(MemoryAccess *MA) { } // This starts at the end of block -MemoryAccess *MemorySSAUpdater::getPreviousDefFromEnd(BasicBlock *BB) { +MemoryAccess *MemorySSAUpdater::getPreviousDefFromEnd( + BasicBlock *BB, + DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &CachedPreviousDef) { auto *Defs = MSSA->getWritableBlockDefs(BB); if (Defs) return &*Defs->rbegin(); - return getPreviousDefRecursive(BB); + return getPreviousDefRecursive(BB, CachedPreviousDef); } // Recurse over a set of phi uses to eliminate the trivial ones MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) { @@ -165,6 +184,10 @@ MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) { template <class RangeType> MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi, RangeType &Operands) { + // Bail out on non-opt Phis. + if (NonOptPhis.count(Phi)) + return Phi; + // Detect equal or self arguments MemoryAccess *Same = nullptr; for (auto &Op : Operands) { @@ -174,7 +197,7 @@ MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi, // not the same, return the phi since it's not eliminatable by us if (Same) return Phi; - Same = cast<MemoryAccess>(Op); + Same = cast<MemoryAccess>(&*Op); } // Never found a non-self reference, the phi is undef if (Same == nullptr) @@ -230,10 +253,8 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { InsertedPHIs.clear(); // See if we had a local def, and if not, go hunting. - MemoryAccess *DefBefore = getPreviousDefInBlock(MD); - bool DefBeforeSameBlock = DefBefore != nullptr; - if (!DefBefore) - DefBefore = getPreviousDefRecursive(MD->getBlock()); + MemoryAccess *DefBefore = getPreviousDef(MD); + bool DefBeforeSameBlock = DefBefore->getBlock() == MD->getBlock(); // There is a def before us, which means we can replace any store/phi uses // of that thing with us, since we are in the way of whatever was there @@ -255,8 +276,7 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { // above and reset ourselves. MD->setDefiningAccess(DefBefore); - SmallVector<MemoryAccess *, 8> FixupList(InsertedPHIs.begin(), - InsertedPHIs.end()); + SmallVector<WeakVH, 8> FixupList(InsertedPHIs.begin(), InsertedPHIs.end()); if (!DefBeforeSameBlock) { // If there was a local def before us, we must have the same effect it // did. Because every may-def is the same, any phis/etc we would create, it @@ -277,7 +297,7 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { fixupDefs(FixupList); FixupList.clear(); // Put any new phis on the fixup list, and process them - FixupList.append(InsertedPHIs.end() - StartingPHISize, InsertedPHIs.end()); + FixupList.append(InsertedPHIs.begin() + StartingPHISize, InsertedPHIs.end()); } // Now that all fixups are done, rename all uses if we are asked. if (RenameUses) { @@ -294,19 +314,29 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { MSSA->renamePass(MD->getBlock(), FirstDef, Visited); // We just inserted a phi into this block, so the incoming value will become // the phi anyway, so it does not matter what we pass. - for (auto *MP : InsertedPHIs) - MSSA->renamePass(MP->getBlock(), nullptr, Visited); + for (auto &MP : InsertedPHIs) { + MemoryPhi *Phi = dyn_cast_or_null<MemoryPhi>(MP); + if (Phi) + MSSA->renamePass(Phi->getBlock(), nullptr, Visited); + } } } -void MemorySSAUpdater::fixupDefs(const SmallVectorImpl<MemoryAccess *> &Vars) { +void MemorySSAUpdater::fixupDefs(const SmallVectorImpl<WeakVH> &Vars) { SmallPtrSet<const BasicBlock *, 8> Seen; SmallVector<const BasicBlock *, 16> Worklist; - for (auto *NewDef : Vars) { + for (auto &Var : Vars) { + MemoryAccess *NewDef = dyn_cast_or_null<MemoryAccess>(Var); + if (!NewDef) + continue; // First, see if there is a local def after the operand. auto *Defs = MSSA->getWritableBlockDefs(NewDef->getBlock()); auto DefIter = NewDef->getDefsIterator(); + // The temporary Phi is being fixed, unmark it for not to optimize. + if (MemoryPhi *Phi = dyn_cast<MemoryPhi>(NewDef)) + NonOptPhis.erase(Phi); + // If there is a local def after us, we only have to rename that. if (++DefIter != Defs->end()) { cast<MemoryDef>(DefIter)->setDefiningAccess(NewDef); @@ -366,6 +396,11 @@ void MemorySSAUpdater::fixupDefs(const SmallVectorImpl<MemoryAccess *> &Vars) { template <class WhereType> void MemorySSAUpdater::moveTo(MemoryUseOrDef *What, BasicBlock *BB, WhereType Where) { + // Mark MemoryPhi users of What not to be optimized. + for (auto *U : What->users()) + if (MemoryPhi *PhiUser = dyn_cast<MemoryPhi>(U)) + NonOptPhis.insert(PhiUser); + // Replace all our users with our defining access. What->replaceAllUsesWith(What->getDefiningAccess()); @@ -377,6 +412,10 @@ void MemorySSAUpdater::moveTo(MemoryUseOrDef *What, BasicBlock *BB, insertDef(MD); else insertUse(cast<MemoryUse>(What)); + + // Clear dangling pointers. We added all MemoryPhi users, but not all + // of them are removed by fixupDefs(). + NonOptPhis.clear(); } // Move What before Where in the MemorySSA IR. @@ -394,7 +433,57 @@ void MemorySSAUpdater::moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, return moveTo(What, BB, Where); } -/// \brief If all arguments of a MemoryPHI are defined by the same incoming +// All accesses in To used to be in From. Move to end and update access lists. +void MemorySSAUpdater::moveAllAccesses(BasicBlock *From, BasicBlock *To, + Instruction *Start) { + + MemorySSA::AccessList *Accs = MSSA->getWritableBlockAccesses(From); + if (!Accs) + return; + + MemoryAccess *FirstInNew = nullptr; + for (Instruction &I : make_range(Start->getIterator(), To->end())) + if ((FirstInNew = MSSA->getMemoryAccess(&I))) + break; + if (!FirstInNew) + return; + + auto *MUD = cast<MemoryUseOrDef>(FirstInNew); + do { + auto NextIt = ++MUD->getIterator(); + MemoryUseOrDef *NextMUD = (!Accs || NextIt == Accs->end()) + ? nullptr + : cast<MemoryUseOrDef>(&*NextIt); + MSSA->moveTo(MUD, To, MemorySSA::End); + // Moving MUD from Accs in the moveTo above, may delete Accs, so we need to + // retrieve it again. + Accs = MSSA->getWritableBlockAccesses(From); + MUD = NextMUD; + } while (MUD); +} + +void MemorySSAUpdater::moveAllAfterSpliceBlocks(BasicBlock *From, + BasicBlock *To, + Instruction *Start) { + assert(MSSA->getBlockAccesses(To) == nullptr && + "To block is expected to be free of MemoryAccesses."); + moveAllAccesses(From, To, Start); + for (BasicBlock *Succ : successors(To)) + if (MemoryPhi *MPhi = MSSA->getMemoryAccess(Succ)) + MPhi->setIncomingBlock(MPhi->getBasicBlockIndex(From), To); +} + +void MemorySSAUpdater::moveAllAfterMergeBlocks(BasicBlock *From, BasicBlock *To, + Instruction *Start) { + assert(From->getSinglePredecessor() == To && + "From block is expected to have a single predecessor (To)."); + moveAllAccesses(From, To, Start); + for (BasicBlock *Succ : successors(From)) + if (MemoryPhi *MPhi = MSSA->getMemoryAccess(Succ)) + MPhi->setIncomingBlock(MPhi->getBasicBlockIndex(From), To); +} + +/// If all arguments of a MemoryPHI are defined by the same incoming /// argument, return that argument. static MemoryAccess *onlySingleValue(MemoryPhi *MP) { MemoryAccess *MA = nullptr; @@ -408,6 +497,35 @@ static MemoryAccess *onlySingleValue(MemoryPhi *MP) { return MA; } +void MemorySSAUpdater::wireOldPredecessorsToNewImmediatePredecessor( + BasicBlock *Old, BasicBlock *New, ArrayRef<BasicBlock *> Preds) { + assert(!MSSA->getWritableBlockAccesses(New) && + "Access list should be null for a new block."); + MemoryPhi *Phi = MSSA->getMemoryAccess(Old); + if (!Phi) + return; + if (pred_size(Old) == 1) { + assert(pred_size(New) == Preds.size() && + "Should have moved all predecessors."); + MSSA->moveTo(Phi, New, MemorySSA::Beginning); + } else { + assert(!Preds.empty() && "Must be moving at least one predecessor to the " + "new immediate predecessor."); + MemoryPhi *NewPhi = MSSA->createMemoryPhi(New); + SmallPtrSet<BasicBlock *, 16> PredsSet(Preds.begin(), Preds.end()); + Phi->unorderedDeleteIncomingIf([&](MemoryAccess *MA, BasicBlock *B) { + if (PredsSet.count(B)) { + NewPhi->addIncoming(MA, B); + return true; + } + return false; + }); + Phi->addIncoming(NewPhi, New); + if (onlySingleValue(NewPhi)) + removeMemoryAccess(NewPhi); + } +} + void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) { assert(!MSSA->isLiveOnEntryDef(MA) && "Trying to remove the live on entry def"); @@ -456,6 +574,39 @@ void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) { MSSA->removeFromLists(MA); } +void MemorySSAUpdater::removeBlocks( + const SmallPtrSetImpl<BasicBlock *> &DeadBlocks) { + // First delete all uses of BB in MemoryPhis. + for (BasicBlock *BB : DeadBlocks) { + TerminatorInst *TI = BB->getTerminator(); + assert(TI && "Basic block expected to have a terminator instruction"); + for (BasicBlock *Succ : TI->successors()) + if (!DeadBlocks.count(Succ)) + if (MemoryPhi *MP = MSSA->getMemoryAccess(Succ)) { + MP->unorderedDeleteIncomingBlock(BB); + if (MP->getNumIncomingValues() == 1) + removeMemoryAccess(MP); + } + // Drop all references of all accesses in BB + if (MemorySSA::AccessList *Acc = MSSA->getWritableBlockAccesses(BB)) + for (MemoryAccess &MA : *Acc) + MA.dropAllReferences(); + } + + // Next, delete all memory accesses in each block + for (BasicBlock *BB : DeadBlocks) { + MemorySSA::AccessList *Acc = MSSA->getWritableBlockAccesses(BB); + if (!Acc) + continue; + for (auto AB = Acc->begin(), AE = Acc->end(); AB != AE;) { + MemoryAccess *MA = &*AB; + ++AB; + MSSA->removeFromLookups(MA); + MSSA->removeFromLists(MA); + } + } +} + MemoryAccess *MemorySSAUpdater::createMemoryAccessInBB( Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point) { diff --git a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp index efa5bd564ad0..17dae20ce3a1 100644 --- a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -49,6 +49,7 @@ #include "llvm/Object/SymbolicFile.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -58,6 +59,18 @@ using namespace llvm; #define DEBUG_TYPE "module-summary-analysis" +// Option to force edges cold which will block importing when the +// -import-cold-multiplier is set to 0. Useful for debugging. +FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold = + FunctionSummary::FSHT_None; +cl::opt<FunctionSummary::ForceSummaryHotnessType, true> FSEC( + "force-summary-edges-cold", cl::Hidden, cl::location(ForceSummaryEdgesCold), + cl::desc("Force all edges in the function summary to cold"), + cl::values(clEnumValN(FunctionSummary::FSHT_None, "none", "None."), + clEnumValN(FunctionSummary::FSHT_AllNonCritical, + "all-non-critical", "All non-critical edges."), + clEnumValN(FunctionSummary::FSHT_All, "all", "All edges."))); + // Walk through the operands of a given User via worklist iteration and populate // the set of GlobalValue references encountered. Invoked either on an // Instruction or a GlobalVariable (which walks its initializer). @@ -268,14 +281,23 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, auto ScaledCount = PSI->getProfileCount(&I, BFI); auto Hotness = ScaledCount ? getHotness(ScaledCount.getValue(), PSI) : CalleeInfo::HotnessType::Unknown; + if (ForceSummaryEdgesCold != FunctionSummary::FSHT_None) + Hotness = CalleeInfo::HotnessType::Cold; // Use the original CalledValue, in case it was an alias. We want // to record the call edge to the alias in that case. Eventually // an alias summary will be created to associate the alias and // aliasee. - CallGraphEdges[Index.getOrInsertValueInfo( - cast<GlobalValue>(CalledValue))] - .updateHotness(Hotness); + auto &ValueInfo = CallGraphEdges[Index.getOrInsertValueInfo( + cast<GlobalValue>(CalledValue))]; + ValueInfo.updateHotness(Hotness); + // Add the relative block frequency to CalleeInfo if there is no profile + // information. + if (BFI != nullptr && Hotness == CalleeInfo::HotnessType::Unknown) { + uint64_t BBFreq = BFI->getBlockFreq(&BB).getFrequency(); + uint64_t EntryFreq = BFI->getEntryFreq(); + ValueInfo.updateRelBlockFreq(BBFreq, EntryFreq); + } } else { // Skip inline assembly calls. if (CI && CI->isInlineAsm()) @@ -284,6 +306,18 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, if (!CalledValue || isa<Constant>(CalledValue)) continue; + // Check if the instruction has a callees metadata. If so, add callees + // to CallGraphEdges to reflect the references from the metadata, and + // to enable importing for subsequent indirect call promotion and + // inlining. + if (auto *MD = I.getMetadata(LLVMContext::MD_callees)) { + for (auto &Op : MD->operands()) { + Function *Callee = mdconst::extract_or_null<Function>(Op); + if (Callee) + CallGraphEdges[Index.getOrInsertValueInfo(Callee)]; + } + } + uint32_t NumVals, NumCandidates; uint64_t TotalCount; auto CandidateProfileData = @@ -299,7 +333,9 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, // sample PGO, to enable the same inlines as the profiled optimized binary. for (auto &I : F.getImportGUIDs()) CallGraphEdges[Index.getOrInsertValueInfo(I)].updateHotness( - CalleeInfo::HotnessType::Critical); + ForceSummaryEdgesCold == FunctionSummary::FSHT_All + ? CalleeInfo::HotnessType::Cold + : CalleeInfo::HotnessType::Critical); bool NonRenamableLocal = isNonRenamableLocal(F); bool NotEligibleForImport = @@ -325,7 +361,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, TypeCheckedLoadConstVCalls.takeVector()); if (NonRenamableLocal) CantBePromoted.insert(F.getGUID()); - Index.addGlobalValueSummary(F.getName(), std::move(FuncSummary)); + Index.addGlobalValueSummary(F, std::move(FuncSummary)); } static void @@ -341,7 +377,7 @@ computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V, llvm::make_unique<GlobalVarSummary>(Flags, RefEdges.takeVector()); if (NonRenamableLocal) CantBePromoted.insert(V.getGUID()); - Index.addGlobalValueSummary(V.getName(), std::move(GVarSummary)); + Index.addGlobalValueSummary(V, std::move(GVarSummary)); } static void @@ -357,7 +393,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, AS->setAliasee(AliaseeSummary); if (NonRenamableLocal) CantBePromoted.insert(A.getGUID()); - Index.addGlobalValueSummary(A.getName(), std::move(AS)); + Index.addGlobalValueSummary(A, std::move(AS)); } // Set LiveRoot flag on entries matching the given value name. @@ -372,7 +408,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback, ProfileSummaryInfo *PSI) { assert(PSI); - ModuleSummaryIndex Index; + ModuleSummaryIndex Index(/*HaveGVs=*/true); // Identify the local values in the llvm.used and llvm.compiler.used sets, // which should not be exported as they would then require renaming and @@ -419,7 +455,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( /* NotEligibleToImport = */ true, /* Live = */ true, /* Local */ GV->isDSOLocal()); - CantBePromoted.insert(GlobalValue::getGUID(Name)); + CantBePromoted.insert(GV->getGUID()); // Create the appropriate summary type. if (Function *F = dyn_cast<Function>(GV)) { std::unique_ptr<FunctionSummary> Summary = @@ -436,12 +472,12 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( ArrayRef<FunctionSummary::VFuncId>{}, ArrayRef<FunctionSummary::ConstVCall>{}, ArrayRef<FunctionSummary::ConstVCall>{}); - Index.addGlobalValueSummary(Name, std::move(Summary)); + Index.addGlobalValueSummary(*GV, std::move(Summary)); } else { std::unique_ptr<GlobalVarSummary> Summary = llvm::make_unique<GlobalVarSummary>(GVFlags, ArrayRef<ValueInfo>{}); - Index.addGlobalValueSummary(Name, std::move(Summary)); + Index.addGlobalValueSummary(*GV, std::move(Summary)); } }); } @@ -571,14 +607,14 @@ ModuleSummaryIndexWrapperPass::ModuleSummaryIndexWrapperPass() bool ModuleSummaryIndexWrapperPass::runOnModule(Module &M) { auto &PSI = *getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); - Index = buildModuleSummaryIndex( + Index.emplace(buildModuleSummaryIndex( M, [this](const Function &F) { return &(this->getAnalysis<BlockFrequencyInfoWrapperPass>( *const_cast<Function *>(&F)) .getBFI()); }, - &PSI); + &PSI)); return false; } diff --git a/contrib/llvm/lib/Analysis/MustExecute.cpp b/contrib/llvm/lib/Analysis/MustExecute.cpp new file mode 100644 index 000000000000..fc4049874622 --- /dev/null +++ b/contrib/llvm/lib/Analysis/MustExecute.cpp @@ -0,0 +1,269 @@ +//===- MustExecute.cpp - Printer for isGuaranteedToExecute ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/MustExecute.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/AssemblyAnnotationWriter.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +/// Computes loop safety information, checks loop body & header +/// for the possibility of may throw exception. +/// +void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) { + assert(CurLoop != nullptr && "CurLoop can't be null"); + BasicBlock *Header = CurLoop->getHeader(); + // Setting default safety values. + SafetyInfo->MayThrow = false; + SafetyInfo->HeaderMayThrow = false; + // Iterate over header and compute safety info. + SafetyInfo->HeaderMayThrow = + !isGuaranteedToTransferExecutionToSuccessor(Header); + + SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow; + // Iterate over loop instructions and compute safety info. + // Skip header as it has been computed and stored in HeaderMayThrow. + // The first block in loopinfo.Blocks is guaranteed to be the header. + assert(Header == *CurLoop->getBlocks().begin() && + "First block must be header"); + for (Loop::block_iterator BB = std::next(CurLoop->block_begin()), + BBE = CurLoop->block_end(); + (BB != BBE) && !SafetyInfo->MayThrow; ++BB) + SafetyInfo->MayThrow |= + !isGuaranteedToTransferExecutionToSuccessor(*BB); + + // Compute funclet colors if we might sink/hoist in a function with a funclet + // personality routine. + Function *Fn = CurLoop->getHeader()->getParent(); + if (Fn->hasPersonalityFn()) + if (Constant *PersonalityFn = Fn->getPersonalityFn()) + if (isScopedEHPersonality(classifyEHPersonality(PersonalityFn))) + SafetyInfo->BlockColors = colorEHFunclets(*Fn); +} + +/// Return true if we can prove that the given ExitBlock is not reached on the +/// first iteration of the given loop. That is, the backedge of the loop must +/// be executed before the ExitBlock is executed in any dynamic execution trace. +static bool CanProveNotTakenFirstIteration(BasicBlock *ExitBlock, + const DominatorTree *DT, + const Loop *CurLoop) { + auto *CondExitBlock = ExitBlock->getSinglePredecessor(); + if (!CondExitBlock) + // expect unique exits + return false; + assert(CurLoop->contains(CondExitBlock) && "meaning of exit block"); + auto *BI = dyn_cast<BranchInst>(CondExitBlock->getTerminator()); + if (!BI || !BI->isConditional()) + return false; + // If condition is constant and false leads to ExitBlock then we always + // execute the true branch. + if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) + return BI->getSuccessor(Cond->getZExtValue() ? 1 : 0) == ExitBlock; + auto *Cond = dyn_cast<CmpInst>(BI->getCondition()); + if (!Cond) + return false; + // todo: this would be a lot more powerful if we used scev, but all the + // plumbing is currently missing to pass a pointer in from the pass + // Check for cmp (phi [x, preheader] ...), y where (pred x, y is known + auto *LHS = dyn_cast<PHINode>(Cond->getOperand(0)); + auto *RHS = Cond->getOperand(1); + if (!LHS || LHS->getParent() != CurLoop->getHeader()) + return false; + auto DL = ExitBlock->getModule()->getDataLayout(); + auto *IVStart = LHS->getIncomingValueForBlock(CurLoop->getLoopPreheader()); + auto *SimpleValOrNull = SimplifyCmpInst(Cond->getPredicate(), + IVStart, RHS, + {DL, /*TLI*/ nullptr, + DT, /*AC*/ nullptr, BI}); + auto *SimpleCst = dyn_cast_or_null<Constant>(SimpleValOrNull); + if (!SimpleCst) + return false; + if (ExitBlock == BI->getSuccessor(0)) + return SimpleCst->isZeroValue(); + assert(ExitBlock == BI->getSuccessor(1) && "implied by above"); + return SimpleCst->isAllOnesValue(); +} + +/// Returns true if the instruction in a loop is guaranteed to execute at least +/// once. +bool llvm::isGuaranteedToExecute(const Instruction &Inst, + const DominatorTree *DT, const Loop *CurLoop, + const LoopSafetyInfo *SafetyInfo) { + // We have to check to make sure that the instruction dominates all + // of the exit blocks. If it doesn't, then there is a path out of the loop + // which does not execute this instruction, so we can't hoist it. + + // If the instruction is in the header block for the loop (which is very + // common), it is always guaranteed to dominate the exit blocks. Since this + // is a common case, and can save some work, check it now. + if (Inst.getParent() == CurLoop->getHeader()) + // If there's a throw in the header block, we can't guarantee we'll reach + // Inst unless we can prove that Inst comes before the potential implicit + // exit. At the moment, we use a (cheap) hack for the common case where + // the instruction of interest is the first one in the block. + return !SafetyInfo->HeaderMayThrow || + Inst.getParent()->getFirstNonPHIOrDbg() == &Inst; + + // Somewhere in this loop there is an instruction which may throw and make us + // exit the loop. + if (SafetyInfo->MayThrow) + return false; + + // Note: There are two styles of reasoning intermixed below for + // implementation efficiency reasons. They are: + // 1) If we can prove that the instruction dominates all exit blocks, then we + // know the instruction must have executed on *some* iteration before we + // exit. We do not prove *which* iteration the instruction must execute on. + // 2) If we can prove that the instruction dominates the latch and all exits + // which might be taken on the first iteration, we know the instruction must + // execute on the first iteration. This second style allows a conditional + // exit before the instruction of interest which is provably not taken on the + // first iteration. This is a quite common case for range check like + // patterns. TODO: support loops with multiple latches. + + const bool InstDominatesLatch = + CurLoop->getLoopLatch() != nullptr && + DT->dominates(Inst.getParent(), CurLoop->getLoopLatch()); + + // Get the exit blocks for the current loop. + SmallVector<BasicBlock *, 8> ExitBlocks; + CurLoop->getExitBlocks(ExitBlocks); + + // Verify that the block dominates each of the exit blocks of the loop. + for (BasicBlock *ExitBlock : ExitBlocks) + if (!DT->dominates(Inst.getParent(), ExitBlock)) + if (!InstDominatesLatch || + !CanProveNotTakenFirstIteration(ExitBlock, DT, CurLoop)) + return false; + + // As a degenerate case, if the loop is statically infinite then we haven't + // proven anything since there are no exit blocks. + if (ExitBlocks.empty()) + return false; + + // FIXME: In general, we have to prove that the loop isn't an infinite loop. + // See http::llvm.org/PR24078 . (The "ExitBlocks.empty()" check above is + // just a special case of this.) + return true; +} + + +namespace { + struct MustExecutePrinter : public FunctionPass { + + static char ID; // Pass identification, replacement for typeid + MustExecutePrinter() : FunctionPass(ID) { + initializeMustExecutePrinterPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); + } + bool runOnFunction(Function &F) override; + }; +} + +char MustExecutePrinter::ID = 0; +INITIALIZE_PASS_BEGIN(MustExecutePrinter, "print-mustexecute", + "Instructions which execute on loop entry", false, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_END(MustExecutePrinter, "print-mustexecute", + "Instructions which execute on loop entry", false, true) + +FunctionPass *llvm::createMustExecutePrinter() { + return new MustExecutePrinter(); +} + +static bool isMustExecuteIn(const Instruction &I, Loop *L, DominatorTree *DT) { + // TODO: merge these two routines. For the moment, we display the best + // result obtained by *either* implementation. This is a bit unfair since no + // caller actually gets the full power at the moment. + LoopSafetyInfo LSI; + computeLoopSafetyInfo(&LSI, L); + return isGuaranteedToExecute(I, DT, L, &LSI) || + isGuaranteedToExecuteForEveryIteration(&I, L); +} + +namespace { +/// An assembly annotator class to print must execute information in +/// comments. +class MustExecuteAnnotatedWriter : public AssemblyAnnotationWriter { + DenseMap<const Value*, SmallVector<Loop*, 4> > MustExec; + +public: + MustExecuteAnnotatedWriter(const Function &F, + DominatorTree &DT, LoopInfo &LI) { + for (auto &I: instructions(F)) { + Loop *L = LI.getLoopFor(I.getParent()); + while (L) { + if (isMustExecuteIn(I, L, &DT)) { + MustExec[&I].push_back(L); + } + L = L->getParentLoop(); + }; + } + } + MustExecuteAnnotatedWriter(const Module &M, + DominatorTree &DT, LoopInfo &LI) { + for (auto &F : M) + for (auto &I: instructions(F)) { + Loop *L = LI.getLoopFor(I.getParent()); + while (L) { + if (isMustExecuteIn(I, L, &DT)) { + MustExec[&I].push_back(L); + } + L = L->getParentLoop(); + }; + } + } + + + void printInfoComment(const Value &V, formatted_raw_ostream &OS) override { + if (!MustExec.count(&V)) + return; + + const auto &Loops = MustExec.lookup(&V); + const auto NumLoops = Loops.size(); + if (NumLoops > 1) + OS << " ; (mustexec in " << NumLoops << " loops: "; + else + OS << " ; (mustexec in: "; + + bool first = true; + for (const Loop *L : Loops) { + if (!first) + OS << ", "; + first = false; + OS << L->getHeader()->getName(); + } + OS << ")"; + } +}; +} // namespace + +bool MustExecutePrinter::runOnFunction(Function &F) { + auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + + MustExecuteAnnotatedWriter Writer(F, DT, LI); + F.print(dbgs(), &Writer); + + return false; +} diff --git a/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp b/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp index 55335f3a7cb0..d6db6386c38b 100644 --- a/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp +++ b/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp @@ -19,7 +19,7 @@ using namespace llvm; using namespace llvm::objcarc; -/// \brief A handy option to enable/disable all ARC Optimizations. +/// A handy option to enable/disable all ARC Optimizations. bool llvm::objcarc::EnableARCOpts; static cl::opt<bool, true> EnableARCOptimizations( "enable-objc-arc-opts", cl::desc("enable/disable all ARC Optimizations"), diff --git a/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp b/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp index f374dd33f86f..f268e2a9abdd 100644 --- a/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp +++ b/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp @@ -209,6 +209,7 @@ static bool isInertIntrinsic(unsigned ID) { // Don't let dbg info affect our results. case Intrinsic::dbg_declare: case Intrinsic::dbg_value: + case Intrinsic::dbg_label: // Short cut: Some intrinsics obviously don't use ObjC pointers. return true; default: @@ -233,7 +234,7 @@ static bool isUseOnlyIntrinsic(unsigned ID) { } } -/// \brief Determine what kind of construct V is. +/// Determine what kind of construct V is. ARCInstKind llvm::objcarc::GetARCInstKind(const Value *V) { if (const Instruction *I = dyn_cast<Instruction>(V)) { // Any instruction other than bitcast and gep with a pointer operand have a @@ -331,7 +332,7 @@ ARCInstKind llvm::objcarc::GetARCInstKind(const Value *V) { return ARCInstKind::None; } -/// \brief Test if the given class is a kind of user. +/// Test if the given class is a kind of user. bool llvm::objcarc::IsUser(ARCInstKind Class) { switch (Class) { case ARCInstKind::User: @@ -365,7 +366,7 @@ bool llvm::objcarc::IsUser(ARCInstKind Class) { llvm_unreachable("covered switch isn't covered?"); } -/// \brief Test if the given class is objc_retain or equivalent. +/// Test if the given class is objc_retain or equivalent. bool llvm::objcarc::IsRetain(ARCInstKind Class) { switch (Class) { case ARCInstKind::Retain: @@ -401,7 +402,7 @@ bool llvm::objcarc::IsRetain(ARCInstKind Class) { llvm_unreachable("covered switch isn't covered?"); } -/// \brief Test if the given class is objc_autorelease or equivalent. +/// Test if the given class is objc_autorelease or equivalent. bool llvm::objcarc::IsAutorelease(ARCInstKind Class) { switch (Class) { case ARCInstKind::Autorelease: @@ -435,7 +436,7 @@ bool llvm::objcarc::IsAutorelease(ARCInstKind Class) { llvm_unreachable("covered switch isn't covered?"); } -/// \brief Test if the given class represents instructions which return their +/// Test if the given class represents instructions which return their /// argument verbatim. bool llvm::objcarc::IsForwarding(ARCInstKind Class) { switch (Class) { @@ -470,7 +471,7 @@ bool llvm::objcarc::IsForwarding(ARCInstKind Class) { llvm_unreachable("covered switch isn't covered?"); } -/// \brief Test if the given class represents instructions which do nothing if +/// Test if the given class represents instructions which do nothing if /// passed a null pointer. bool llvm::objcarc::IsNoopOnNull(ARCInstKind Class) { switch (Class) { @@ -505,7 +506,7 @@ bool llvm::objcarc::IsNoopOnNull(ARCInstKind Class) { llvm_unreachable("covered switch isn't covered?"); } -/// \brief Test if the given class represents instructions which are always safe +/// Test if the given class represents instructions which are always safe /// to mark with the "tail" keyword. bool llvm::objcarc::IsAlwaysTail(ARCInstKind Class) { // ARCInstKind::RetainBlock may be given a stack argument. @@ -541,7 +542,7 @@ bool llvm::objcarc::IsAlwaysTail(ARCInstKind Class) { llvm_unreachable("covered switch isn't covered?"); } -/// \brief Test if the given class represents instructions which are never safe +/// Test if the given class represents instructions which are never safe /// to mark with the "tail" keyword. bool llvm::objcarc::IsNeverTail(ARCInstKind Class) { /// It is never safe to tail call objc_autorelease since by tail calling @@ -580,7 +581,7 @@ bool llvm::objcarc::IsNeverTail(ARCInstKind Class) { llvm_unreachable("covered switch isn't covered?"); } -/// \brief Test if the given class represents instructions which are always safe +/// Test if the given class represents instructions which are always safe /// to mark with the nounwind attribute. bool llvm::objcarc::IsNoThrow(ARCInstKind Class) { // objc_retainBlock is not nounwind because it calls user copy constructors diff --git a/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp b/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp index a04c0aef04be..6c47651eae9e 100644 --- a/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp +++ b/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp @@ -30,7 +30,7 @@ OrderedBasicBlock::OrderedBasicBlock(const BasicBlock *BasicB) LastInstFound = BB->end(); } -/// \brief Given no cached results, find if \p A comes before \p B in \p BB. +/// Given no cached results, find if \p A comes before \p B in \p BB. /// Cache and number out instruction while walking \p BB. bool OrderedBasicBlock::comesBefore(const Instruction *A, const Instruction *B) { @@ -58,7 +58,7 @@ bool OrderedBasicBlock::comesBefore(const Instruction *A, return Inst != B; } -/// \brief Find out whether \p A dominates \p B, meaning whether \p A +/// Find out whether \p A dominates \p B, meaning whether \p A /// comes before \p B in \p BB. This is a simplification that considers /// cached instruction positions and ignores other basic blocks, being /// only relevant to compare relative instructions positions inside \p BB. diff --git a/contrib/llvm/lib/Analysis/PHITransAddr.cpp b/contrib/llvm/lib/Analysis/PHITransAddr.cpp index 682af4dc708e..858f08f6537a 100644 --- a/contrib/llvm/lib/Analysis/PHITransAddr.cpp +++ b/contrib/llvm/lib/Analysis/PHITransAddr.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" diff --git a/contrib/llvm/lib/Analysis/PhiValues.cpp b/contrib/llvm/lib/Analysis/PhiValues.cpp new file mode 100644 index 000000000000..ef121815d2cf --- /dev/null +++ b/contrib/llvm/lib/Analysis/PhiValues.cpp @@ -0,0 +1,196 @@ +//===- PhiValues.cpp - Phi Value Analysis ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/PhiValues.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Instructions.h" + +using namespace llvm; + +bool PhiValues::invalidate(Function &, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &) { + // PhiValues is invalidated if it isn't preserved. + auto PAC = PA.getChecker<PhiValuesAnalysis>(); + return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()); +} + +// The goal here is to find all of the non-phi values reachable from this phi, +// and to do the same for all of the phis reachable from this phi, as doing so +// is necessary anyway in order to get the values for this phi. We do this using +// Tarjan's algorithm with Nuutila's improvements to find the strongly connected +// components of the phi graph rooted in this phi: +// * All phis in a strongly connected component will have the same reachable +// non-phi values. The SCC may not be the maximal subgraph for that set of +// reachable values, but finding out that isn't really necessary (it would +// only reduce the amount of memory needed to store the values). +// * Tarjan's algorithm completes components in a bottom-up manner, i.e. it +// never completes a component before the components reachable from it have +// been completed. This means that when we complete a component we have +// everything we need to collect the values reachable from that component. +// * We collect both the non-phi values reachable from each SCC, as that's what +// we're ultimately interested in, and all of the reachable values, i.e. +// including phis, as that makes invalidateValue easier. +void PhiValues::processPhi(const PHINode *Phi, + SmallVector<const PHINode *, 8> &Stack) { + // Initialize the phi with the next depth number. + assert(DepthMap.lookup(Phi) == 0); + assert(NextDepthNumber != UINT_MAX); + unsigned int DepthNumber = ++NextDepthNumber; + DepthMap[Phi] = DepthNumber; + + // Recursively process the incoming phis of this phi. + for (Value *PhiOp : Phi->incoming_values()) { + if (PHINode *PhiPhiOp = dyn_cast<PHINode>(PhiOp)) { + // Recurse if the phi has not yet been visited. + if (DepthMap.lookup(PhiPhiOp) == 0) + processPhi(PhiPhiOp, Stack); + assert(DepthMap.lookup(PhiPhiOp) != 0); + // If the phi did not become part of a component then this phi and that + // phi are part of the same component, so adjust the depth number. + if (!ReachableMap.count(DepthMap[PhiPhiOp])) + DepthMap[Phi] = std::min(DepthMap[Phi], DepthMap[PhiPhiOp]); + } + } + + // Now that incoming phis have been handled, push this phi to the stack. + Stack.push_back(Phi); + + // If the depth number has not changed then we've finished collecting the phis + // of a strongly connected component. + if (DepthMap[Phi] == DepthNumber) { + // Collect the reachable values for this component. The phis of this + // component will be those on top of the depth stach with the same or + // greater depth number. + ConstValueSet Reachable; + while (!Stack.empty() && DepthMap[Stack.back()] >= DepthNumber) { + const PHINode *ComponentPhi = Stack.pop_back_val(); + Reachable.insert(ComponentPhi); + DepthMap[ComponentPhi] = DepthNumber; + for (Value *Op : ComponentPhi->incoming_values()) { + if (PHINode *PhiOp = dyn_cast<PHINode>(Op)) { + // If this phi is not part of the same component then that component + // is guaranteed to have been completed before this one. Therefore we + // can just add its reachable values to the reachable values of this + // component. + auto It = ReachableMap.find(DepthMap[PhiOp]); + if (It != ReachableMap.end()) + Reachable.insert(It->second.begin(), It->second.end()); + } else { + Reachable.insert(Op); + } + } + } + ReachableMap.insert({DepthNumber,Reachable}); + + // Filter out phis to get the non-phi reachable values. + ValueSet NonPhi; + for (const Value *V : Reachable) + if (!isa<PHINode>(V)) + NonPhi.insert(const_cast<Value*>(V)); + NonPhiReachableMap.insert({DepthNumber,NonPhi}); + } +} + +const PhiValues::ValueSet &PhiValues::getValuesForPhi(const PHINode *PN) { + if (DepthMap.count(PN) == 0) { + SmallVector<const PHINode *, 8> Stack; + processPhi(PN, Stack); + assert(Stack.empty()); + } + assert(DepthMap.lookup(PN) != 0); + return NonPhiReachableMap[DepthMap[PN]]; +} + +void PhiValues::invalidateValue(const Value *V) { + // Components that can reach V are invalid. + SmallVector<unsigned int, 8> InvalidComponents; + for (auto &Pair : ReachableMap) + if (Pair.second.count(V)) + InvalidComponents.push_back(Pair.first); + + for (unsigned int N : InvalidComponents) { + for (const Value *V : ReachableMap[N]) + if (const PHINode *PN = dyn_cast<PHINode>(V)) + DepthMap.erase(PN); + NonPhiReachableMap.erase(N); + ReachableMap.erase(N); + } +} + +void PhiValues::releaseMemory() { + DepthMap.clear(); + NonPhiReachableMap.clear(); + ReachableMap.clear(); +} + +void PhiValues::print(raw_ostream &OS) const { + // Iterate through the phi nodes of the function rather than iterating through + // DepthMap in order to get predictable ordering. + for (const BasicBlock &BB : F) { + for (const PHINode &PN : BB.phis()) { + OS << "PHI "; + PN.printAsOperand(OS, false); + OS << " has values:\n"; + unsigned int N = DepthMap.lookup(&PN); + auto It = NonPhiReachableMap.find(N); + if (It == NonPhiReachableMap.end()) + OS << " UNKNOWN\n"; + else if (It->second.empty()) + OS << " NONE\n"; + else + for (Value *V : It->second) + // Printing of an instruction prints two spaces at the start, so + // handle instructions and everything else slightly differently in + // order to get consistent indenting. + if (Instruction *I = dyn_cast<Instruction>(V)) + OS << *I << "\n"; + else + OS << " " << *V << "\n"; + } + } +} + +AnalysisKey PhiValuesAnalysis::Key; +PhiValues PhiValuesAnalysis::run(Function &F, FunctionAnalysisManager &) { + return PhiValues(F); +} + +PreservedAnalyses PhiValuesPrinterPass::run(Function &F, + FunctionAnalysisManager &AM) { + OS << "PHI Values for function: " << F.getName() << "\n"; + PhiValues &PI = AM.getResult<PhiValuesAnalysis>(F); + for (const BasicBlock &BB : F) + for (const PHINode &PN : BB.phis()) + PI.getValuesForPhi(&PN); + PI.print(OS); + return PreservedAnalyses::all(); +} + +PhiValuesWrapperPass::PhiValuesWrapperPass() : FunctionPass(ID) { + initializePhiValuesWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +bool PhiValuesWrapperPass::runOnFunction(Function &F) { + Result.reset(new PhiValues(F)); + return false; +} + +void PhiValuesWrapperPass::releaseMemory() { + Result->releaseMemory(); +} + +void PhiValuesWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); +} + +char PhiValuesWrapperPass::ID = 0; + +INITIALIZE_PASS(PhiValuesWrapperPass, "phi-values", "Phi Values Analysis", false, + true) diff --git a/contrib/llvm/lib/Analysis/PostDominators.cpp b/contrib/llvm/lib/Analysis/PostDominators.cpp index 2282401085d4..e6b660fe26d7 100644 --- a/contrib/llvm/lib/Analysis/PostDominators.cpp +++ b/contrib/llvm/lib/Analysis/PostDominators.cpp @@ -21,6 +21,12 @@ using namespace llvm; #define DEBUG_TYPE "postdomtree" +#ifdef EXPENSIVE_CHECKS +static constexpr bool ExpensiveChecksEnabled = true; +#else +static constexpr bool ExpensiveChecksEnabled = false; +#endif + //===----------------------------------------------------------------------===// // PostDominatorTree Implementation //===----------------------------------------------------------------------===// @@ -44,6 +50,13 @@ bool PostDominatorTreeWrapperPass::runOnFunction(Function &F) { return false; } +void PostDominatorTreeWrapperPass::verifyAnalysis() const { + if (VerifyDomInfo) + assert(DT.verify(PostDominatorTree::VerificationLevel::Full)); + else if (ExpensiveChecksEnabled) + assert(DT.verify(PostDominatorTree::VerificationLevel::Basic)); +} + void PostDominatorTreeWrapperPass::print(raw_ostream &OS, const Module *) const { DT.print(OS); } @@ -56,8 +69,7 @@ AnalysisKey PostDominatorTreeAnalysis::Key; PostDominatorTree PostDominatorTreeAnalysis::run(Function &F, FunctionAnalysisManager &) { - PostDominatorTree PDT; - PDT.recalculate(F); + PostDominatorTree PDT(F); return PDT; } diff --git a/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp index 347d093b0f61..fb591f5d6a69 100644 --- a/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp +++ b/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp @@ -112,7 +112,7 @@ bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) { // FIXME: The heuristic used below for determining hotness is based on // preliminary SPEC tuning for inliner. This will eventually be a // convenience method that calls isHotCount. - return FunctionCount && isHotCount(FunctionCount.getValue()); + return FunctionCount && isHotCount(FunctionCount.getCount()); } /// Returns true if the function contains hot code. This can include a hot @@ -125,7 +125,7 @@ bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F, if (!F || !computeSummary()) return false; if (auto FunctionCount = F->getEntryCount()) - if (isHotCount(FunctionCount.getValue())) + if (isHotCount(FunctionCount.getCount())) return true; if (hasSampleProfile()) { @@ -154,7 +154,7 @@ bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F, if (!F || !computeSummary()) return false; if (auto FunctionCount = F->getEntryCount()) - if (!isColdCount(FunctionCount.getValue())) + if (!isColdCount(FunctionCount.getCount())) return false; if (hasSampleProfile()) { @@ -187,7 +187,7 @@ bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) { // FIXME: The heuristic used below for determining coldness is based on // preliminary SPEC tuning for inliner. This will eventually be a // convenience method that calls isHotCount. - return FunctionCount && isColdCount(FunctionCount.getValue()); + return FunctionCount && isColdCount(FunctionCount.getCount()); } /// Compute the hot and cold thresholds. @@ -223,6 +223,18 @@ bool ProfileSummaryInfo::isColdCount(uint64_t C) { return ColdCountThreshold && C <= ColdCountThreshold.getValue(); } +uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() { + if (!HotCountThreshold) + computeThresholds(); + return HotCountThreshold && HotCountThreshold.getValue(); +} + +uint64_t ProfileSummaryInfo::getOrCompColdCountThreshold() { + if (!ColdCountThreshold) + computeThresholds(); + return ColdCountThreshold && ColdCountThreshold.getValue(); +} + bool ProfileSummaryInfo::isHotBB(const BasicBlock *B, BlockFrequencyInfo *BFI) { auto Count = BFI->getBlockProfileCount(B); return Count && isHotCount(*Count); @@ -247,7 +259,7 @@ bool ProfileSummaryInfo::isColdCallSite(const CallSite &CS, return isColdCount(*C); // In SamplePGO, if the caller has been sampled, and there is no profile - // annotatedon the callsite, we consider the callsite as cold. + // annotated on the callsite, we consider the callsite as cold. // If there is no profile for the caller, and we know the profile is // accurate, we consider the callsite as cold. return (hasSampleProfile() && diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp index 900487323005..2bd611350f46 100644 --- a/contrib/llvm/lib/Analysis/RegionInfo.cpp +++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/RegionPrinter.h" #endif #include "llvm/Analysis/RegionInfoImpl.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" @@ -80,7 +81,7 @@ RegionInfo::~RegionInfo() = default; bool RegionInfo::invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &) { // Check whether the analysis, all analyses on functions, or the function's - // CFG have been preserved. + // CFG has been preserved. auto PAC = PA.getChecker<RegionInfoAnalysis>(); return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() || PAC.preservedSet<CFGAnalyses>()); diff --git a/contrib/llvm/lib/Analysis/RegionPass.cpp b/contrib/llvm/lib/Analysis/RegionPass.cpp index c5d71b25e022..ed17df2e7e93 100644 --- a/contrib/llvm/lib/Analysis/RegionPass.cpp +++ b/contrib/llvm/lib/Analysis/RegionPass.cpp @@ -158,12 +158,9 @@ bool RGPassManager::runOnFunction(Function &F) { } // Print the region tree after all pass. - DEBUG( - dbgs() << "\nRegion tree of function " << F.getName() - << " after all region Pass:\n"; - RI->dump(); - dbgs() << "\n"; - ); + LLVM_DEBUG(dbgs() << "\nRegion tree of function " << F.getName() + << " after all region Pass:\n"; + RI->dump(); dbgs() << "\n";); return Changed; } @@ -283,14 +280,14 @@ Pass *RegionPass::createPrinterPass(raw_ostream &O, bool RegionPass::skipRegion(Region &R) const { Function &F = *R.getEntry()->getParent(); - if (!F.getContext().getOptBisect().shouldRunPass(this, R)) + if (!F.getContext().getOptPassGate().shouldRunPass(this, R)) return true; if (F.hasFnAttribute(Attribute::OptimizeNone)) { // Report this only once per function. if (R.getEntry() == &F.getEntryBlock()) - DEBUG(dbgs() << "Skipping pass '" << getPassName() - << "' on function " << F.getName() << "\n"); + LLVM_DEBUG(dbgs() << "Skipping pass '" << getPassName() + << "' on function " << F.getName() << "\n"); return true; } return false; diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp index bfff7afb5b4e..aa95ace93014 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp @@ -83,6 +83,7 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -205,11 +206,6 @@ static cl::opt<unsigned> cl::desc("Max coefficients in AddRec during evolving"), cl::init(16)); -static cl::opt<bool> VersionUnknown( - "scev-version-unknown", cl::Hidden, - cl::desc("Use predicated scalar evolution to version SCEVUnknowns"), - cl::init(false)); - //===----------------------------------------------------------------------===// // SCEV class definitions //===----------------------------------------------------------------------===// @@ -425,24 +421,21 @@ SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, const SCEV *op, Type *ty) : SCEVCastExpr(ID, scTruncate, op, ty) { - assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && - (Ty->isIntegerTy() || Ty->isPointerTy()) && + assert(Op->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() && "Cannot truncate non-integer value!"); } SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID, const SCEV *op, Type *ty) : SCEVCastExpr(ID, scZeroExtend, op, ty) { - assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && - (Ty->isIntegerTy() || Ty->isPointerTy()) && + assert(Op->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() && "Cannot zero extend non-integer value!"); } SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID, const SCEV *op, Type *ty) : SCEVCastExpr(ID, scSignExtend, op, ty) { - assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && - (Ty->isIntegerTy() || Ty->isPointerTy()) && + assert(Op->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() && "Cannot sign extend non-integer value!"); } @@ -1260,42 +1253,32 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) return getTruncateOrZeroExtend(SZ->getOperand(), Ty); - // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can - // eliminate all the truncates, or we replace other casts with truncates. - if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) { + // trunc(x1 + ... + xN) --> trunc(x1) + ... + trunc(xN) and + // trunc(x1 * ... * xN) --> trunc(x1) * ... * trunc(xN), + // if after transforming we have at most one truncate, not counting truncates + // that replace other casts. + if (isa<SCEVAddExpr>(Op) || isa<SCEVMulExpr>(Op)) { + auto *CommOp = cast<SCEVCommutativeExpr>(Op); SmallVector<const SCEV *, 4> Operands; - bool hasTrunc = false; - for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) { - const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty); - if (!isa<SCEVCastExpr>(SA->getOperand(i))) - hasTrunc = isa<SCEVTruncateExpr>(S); + unsigned numTruncs = 0; + for (unsigned i = 0, e = CommOp->getNumOperands(); i != e && numTruncs < 2; + ++i) { + const SCEV *S = getTruncateExpr(CommOp->getOperand(i), Ty); + if (!isa<SCEVCastExpr>(CommOp->getOperand(i)) && isa<SCEVTruncateExpr>(S)) + numTruncs++; Operands.push_back(S); } - if (!hasTrunc) - return getAddExpr(Operands); - // In spite we checked in the beginning that ID is not in the cache, - // it is possible that during recursion and different modification - // ID came to cache, so if we found it, just return it. - if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) - return S; - } - - // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can - // eliminate all the truncates, or we replace other casts with truncates. - if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) { - SmallVector<const SCEV *, 4> Operands; - bool hasTrunc = false; - for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) { - const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty); - if (!isa<SCEVCastExpr>(SM->getOperand(i))) - hasTrunc = isa<SCEVTruncateExpr>(S); - Operands.push_back(S); + if (numTruncs < 2) { + if (isa<SCEVAddExpr>(Op)) + return getAddExpr(Operands); + else if (isa<SCEVMulExpr>(Op)) + return getMulExpr(Operands); + else + llvm_unreachable("Unexpected SCEV type for Op."); } - if (!hasTrunc) - return getMulExpr(Operands); - // In spite we checked in the beginning that ID is not in the cache, - // it is possible that during recursion and different modification - // ID came to cache, so if we found it, just return it. + // Although we checked in the beginning that ID is not in the cache, it is + // possible that during recursion and different modification ID was inserted + // into the cache. So if we find it, just return it. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; } @@ -1576,6 +1559,43 @@ bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start, return false; } +// Finds an integer D for an expression (C + x + y + ...) such that the top +// level addition in (D + (C - D + x + y + ...)) would not wrap (signed or +// unsigned) and the number of trailing zeros of (C - D + x + y + ...) is +// maximized, where C is the \p ConstantTerm, x, y, ... are arbitrary SCEVs, and +// the (C + x + y + ...) expression is \p WholeAddExpr. +static APInt extractConstantWithoutWrapping(ScalarEvolution &SE, + const SCEVConstant *ConstantTerm, + const SCEVAddExpr *WholeAddExpr) { + const APInt C = ConstantTerm->getAPInt(); + const unsigned BitWidth = C.getBitWidth(); + // Find number of trailing zeros of (x + y + ...) w/o the C first: + uint32_t TZ = BitWidth; + for (unsigned I = 1, E = WholeAddExpr->getNumOperands(); I < E && TZ; ++I) + TZ = std::min(TZ, SE.GetMinTrailingZeros(WholeAddExpr->getOperand(I))); + if (TZ) { + // Set D to be as many least significant bits of C as possible while still + // guaranteeing that adding D to (C - D + x + y + ...) won't cause a wrap: + return TZ < BitWidth ? C.trunc(TZ).zext(BitWidth) : C; + } + return APInt(BitWidth, 0); +} + +// Finds an integer D for an affine AddRec expression {C,+,x} such that the top +// level addition in (D + {C-D,+,x}) would not wrap (signed or unsigned) and the +// number of trailing zeros of (C - D + x * n) is maximized, where C is the \p +// ConstantStart, x is an arbitrary \p Step, and n is the loop trip count. +static APInt extractConstantWithoutWrapping(ScalarEvolution &SE, + const APInt &ConstantStart, + const SCEV *Step) { + const unsigned BitWidth = ConstantStart.getBitWidth(); + const uint32_t TZ = SE.GetMinTrailingZeros(Step); + if (TZ) + return TZ < BitWidth ? ConstantStart.trunc(TZ).zext(BitWidth) + : ConstantStart; + return APInt(BitWidth, 0); +} + const SCEV * ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && @@ -1732,9 +1752,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { const SCEV *N = getConstant(APInt::getMinValue(BitWidth) - getUnsignedRangeMax(Step)); if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) || - (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) && - isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, - AR->getPostIncExpr(*this), N))) { + isKnownOnEveryIteration(ICmpInst::ICMP_ULT, AR, N)) { // Cache knowledge of AR NUW, which is propagated to this // AddRec. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); @@ -1749,9 +1767,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) - getSignedRangeMin(Step)); if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) || - (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) && - isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, - AR->getPostIncExpr(*this), N))) { + isKnownOnEveryIteration(ICmpInst::ICMP_UGT, AR, N)) { // Cache knowledge of AR NW, which is propagated to this // AddRec. Negative step causes unsigned wrap, but it // still can't self-wrap. @@ -1766,6 +1782,23 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { } } + // zext({C,+,Step}) --> (zext(D) + zext({C-D,+,Step}))<nuw><nsw> + // if D + (C - D + Step * n) could be proven to not unsigned wrap + // where D maximizes the number of trailing zeros of (C - D + Step * n) + if (const auto *SC = dyn_cast<SCEVConstant>(Start)) { + const APInt &C = SC->getAPInt(); + const APInt &D = extractConstantWithoutWrapping(*this, C, Step); + if (D != 0) { + const SCEV *SZExtD = getZeroExtendExpr(getConstant(D), Ty, Depth); + const SCEV *SResidual = + getAddRecExpr(getConstant(C - D), Step, L, AR->getNoWrapFlags()); + const SCEV *SZExtR = getZeroExtendExpr(SResidual, Ty, Depth + 1); + return getAddExpr(SZExtD, SZExtR, + (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW), + Depth + 1); + } + } + if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) { const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); return getAddRecExpr( @@ -1774,6 +1807,20 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { } } + // zext(A % B) --> zext(A) % zext(B) + { + const SCEV *LHS; + const SCEV *RHS; + if (matchURem(Op, LHS, RHS)) + return getURemExpr(getZeroExtendExpr(LHS, Ty, Depth + 1), + getZeroExtendExpr(RHS, Ty, Depth + 1)); + } + + // zext(A / B) --> zext(A) / zext(B). + if (auto *Div = dyn_cast<SCEVUDivExpr>(Op)) + return getUDivExpr(getZeroExtendExpr(Div->getLHS(), Ty, Depth + 1), + getZeroExtendExpr(Div->getRHS(), Ty, Depth + 1)); + if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) { // zext((A + B + ...)<nuw>) --> (zext(A) + zext(B) + ...)<nuw> if (SA->hasNoUnsignedWrap()) { @@ -1784,6 +1831,65 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { Ops.push_back(getZeroExtendExpr(Op, Ty, Depth + 1)); return getAddExpr(Ops, SCEV::FlagNUW, Depth + 1); } + + // zext(C + x + y + ...) --> (zext(D) + zext((C - D) + x + y + ...)) + // if D + (C - D + x + y + ...) could be proven to not unsigned wrap + // where D maximizes the number of trailing zeros of (C - D + x + y + ...) + // + // Often address arithmetics contain expressions like + // (zext (add (shl X, C1), C2)), for instance, (zext (5 + (4 * X))). + // This transformation is useful while proving that such expressions are + // equal or differ by a small constant amount, see LoadStoreVectorizer pass. + if (const auto *SC = dyn_cast<SCEVConstant>(SA->getOperand(0))) { + const APInt &D = extractConstantWithoutWrapping(*this, SC, SA); + if (D != 0) { + const SCEV *SZExtD = getZeroExtendExpr(getConstant(D), Ty, Depth); + const SCEV *SResidual = + getAddExpr(getConstant(-D), SA, SCEV::FlagAnyWrap, Depth); + const SCEV *SZExtR = getZeroExtendExpr(SResidual, Ty, Depth + 1); + return getAddExpr(SZExtD, SZExtR, + (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW), + Depth + 1); + } + } + } + + if (auto *SM = dyn_cast<SCEVMulExpr>(Op)) { + // zext((A * B * ...)<nuw>) --> (zext(A) * zext(B) * ...)<nuw> + if (SM->hasNoUnsignedWrap()) { + // If the multiply does not unsign overflow then we can, by definition, + // commute the zero extension with the multiply operation. + SmallVector<const SCEV *, 4> Ops; + for (const auto *Op : SM->operands()) + Ops.push_back(getZeroExtendExpr(Op, Ty, Depth + 1)); + return getMulExpr(Ops, SCEV::FlagNUW, Depth + 1); + } + + // zext(2^K * (trunc X to iN)) to iM -> + // 2^K * (zext(trunc X to i{N-K}) to iM)<nuw> + // + // Proof: + // + // zext(2^K * (trunc X to iN)) to iM + // = zext((trunc X to iN) << K) to iM + // = zext((trunc X to i{N-K}) << K)<nuw> to iM + // (because shl removes the top K bits) + // = zext((2^K * (trunc X to i{N-K}))<nuw>) to iM + // = (2^K * (zext(trunc X to i{N-K}) to iM))<nuw>. + // + if (SM->getNumOperands() == 2) + if (auto *MulLHS = dyn_cast<SCEVConstant>(SM->getOperand(0))) + if (MulLHS->getAPInt().isPowerOf2()) + if (auto *TruncRHS = dyn_cast<SCEVTruncateExpr>(SM->getOperand(1))) { + int NewTruncBits = getTypeSizeInBits(TruncRHS->getType()) - + MulLHS->getAPInt().logBase2(); + Type *NewTruncTy = IntegerType::get(getContext(), NewTruncBits); + return getMulExpr( + getZeroExtendExpr(MulLHS, Ty), + getZeroExtendExpr( + getTruncateExpr(TruncRHS->getOperand(), NewTruncTy), Ty), + SCEV::FlagNUW, Depth + 1); + } } // The cast wasn't folded; create an explicit cast node. @@ -1847,24 +1953,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { return getTruncateOrSignExtend(X, Ty); } - // sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2 if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) { - if (SA->getNumOperands() == 2) { - auto *SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0)); - auto *SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1)); - if (SMul && SC1) { - if (auto *SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) { - const APInt &C1 = SC1->getAPInt(); - const APInt &C2 = SC2->getAPInt(); - if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && - C2.ugt(C1) && C2.isPowerOf2()) - return getAddExpr(getSignExtendExpr(SC1, Ty, Depth + 1), - getSignExtendExpr(SMul, Ty, Depth + 1), - SCEV::FlagAnyWrap, Depth + 1); - } - } - } - // sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw> if (SA->hasNoSignedWrap()) { // If the addition does not sign overflow then we can, by definition, @@ -1874,6 +1963,28 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { Ops.push_back(getSignExtendExpr(Op, Ty, Depth + 1)); return getAddExpr(Ops, SCEV::FlagNSW, Depth + 1); } + + // sext(C + x + y + ...) --> (sext(D) + sext((C - D) + x + y + ...)) + // if D + (C - D + x + y + ...) could be proven to not signed wrap + // where D maximizes the number of trailing zeros of (C - D + x + y + ...) + // + // For instance, this will bring two seemingly different expressions: + // 1 + sext(5 + 20 * %x + 24 * %y) and + // sext(6 + 20 * %x + 24 * %y) + // to the same form: + // 2 + sext(4 + 20 * %x + 24 * %y) + if (const auto *SC = dyn_cast<SCEVConstant>(SA->getOperand(0))) { + const APInt &D = extractConstantWithoutWrapping(*this, SC, SA); + if (D != 0) { + const SCEV *SSExtD = getSignExtendExpr(getConstant(D), Ty, Depth); + const SCEV *SResidual = + getAddExpr(getConstant(-D), SA, SCEV::FlagAnyWrap, Depth); + const SCEV *SSExtR = getSignExtendExpr(SResidual, Ty, Depth + 1); + return getAddExpr(SSExtD, SSExtR, + (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW), + Depth + 1); + } + } } // If the input value is a chrec scev, and we can prove that the value // did not overflow the old, smaller, value, we can sign extend all of the @@ -1994,9 +2105,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { getSignedOverflowLimitForStep(Step, &Pred, this); if (OverflowLimit && (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) || - (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) && - isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this), - OverflowLimit)))) { + isKnownOnEveryIteration(Pred, AR, OverflowLimit))) { // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); return getAddRecExpr( @@ -2005,21 +2114,20 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { } } - // If Start and Step are constants, check if we can apply this - // transformation: - // sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2 - auto *SC1 = dyn_cast<SCEVConstant>(Start); - auto *SC2 = dyn_cast<SCEVConstant>(Step); - if (SC1 && SC2) { - const APInt &C1 = SC1->getAPInt(); - const APInt &C2 = SC2->getAPInt(); - if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) && - C2.isPowerOf2()) { - Start = getSignExtendExpr(Start, Ty, Depth + 1); - const SCEV *NewAR = getAddRecExpr(getZero(AR->getType()), Step, L, - AR->getNoWrapFlags()); - return getAddExpr(Start, getSignExtendExpr(NewAR, Ty, Depth + 1), - SCEV::FlagAnyWrap, Depth + 1); + // sext({C,+,Step}) --> (sext(D) + sext({C-D,+,Step}))<nuw><nsw> + // if D + (C - D + Step * n) could be proven to not signed wrap + // where D maximizes the number of trailing zeros of (C - D + Step * n) + if (const auto *SC = dyn_cast<SCEVConstant>(Start)) { + const APInt &C = SC->getAPInt(); + const APInt &D = extractConstantWithoutWrapping(*this, C, Step); + if (D != 0) { + const SCEV *SSExtD = getSignExtendExpr(getConstant(D), Ty, Depth); + const SCEV *SResidual = + getAddRecExpr(getConstant(C - D), Step, L, AR->getNoWrapFlags()); + const SCEV *SSExtR = getSignExtendExpr(SResidual, Ty, Depth + 1); + return getAddExpr(SSExtD, SSExtR, + (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW), + Depth + 1); } } @@ -2215,22 +2323,35 @@ StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type, SignOrUnsignWrap = ScalarEvolution::maskFlags(Flags, SignOrUnsignMask); - if (SignOrUnsignWrap != SignOrUnsignMask && Type == scAddExpr && - Ops.size() == 2 && isa<SCEVConstant>(Ops[0])) { + if (SignOrUnsignWrap != SignOrUnsignMask && + (Type == scAddExpr || Type == scMulExpr) && Ops.size() == 2 && + isa<SCEVConstant>(Ops[0])) { - // (A + C) --> (A + C)<nsw> if the addition does not sign overflow - // (A + C) --> (A + C)<nuw> if the addition does not unsign overflow + auto Opcode = [&] { + switch (Type) { + case scAddExpr: + return Instruction::Add; + case scMulExpr: + return Instruction::Mul; + default: + llvm_unreachable("Unexpected SCEV op."); + } + }(); const APInt &C = cast<SCEVConstant>(Ops[0])->getAPInt(); + + // (A <opcode> C) --> (A <opcode> C)<nsw> if the op doesn't sign overflow. if (!(SignOrUnsignWrap & SCEV::FlagNSW)) { auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion( - Instruction::Add, C, OBO::NoSignedWrap); + Opcode, C, OBO::NoSignedWrap); if (NSWRegion.contains(SE->getSignedRange(Ops[1]))) Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW); } + + // (A <opcode> C) --> (A <opcode> C)<nuw> if the op doesn't unsign overflow. if (!(SignOrUnsignWrap & SCEV::FlagNUW)) { auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion( - Instruction::Add, C, OBO::NoUnsignedWrap); + Opcode, C, OBO::NoUnsignedWrap); if (NUWRegion.contains(SE->getUnsignedRange(Ops[1]))) Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); } @@ -2240,59 +2361,7 @@ StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type, } bool ScalarEvolution::isAvailableAtLoopEntry(const SCEV *S, const Loop *L) { - if (!isLoopInvariant(S, L)) - return false; - // If a value depends on a SCEVUnknown which is defined after the loop, we - // conservatively assume that we cannot calculate it at the loop's entry. - struct FindDominatedSCEVUnknown { - bool Found = false; - const Loop *L; - DominatorTree &DT; - LoopInfo &LI; - - FindDominatedSCEVUnknown(const Loop *L, DominatorTree &DT, LoopInfo &LI) - : L(L), DT(DT), LI(LI) {} - - bool checkSCEVUnknown(const SCEVUnknown *SU) { - if (auto *I = dyn_cast<Instruction>(SU->getValue())) { - if (DT.dominates(L->getHeader(), I->getParent())) - Found = true; - else - assert(DT.dominates(I->getParent(), L->getHeader()) && - "No dominance relationship between SCEV and loop?"); - } - return false; - } - - bool follow(const SCEV *S) { - switch (static_cast<SCEVTypes>(S->getSCEVType())) { - case scConstant: - return false; - case scAddRecExpr: - case scTruncate: - case scZeroExtend: - case scSignExtend: - case scAddExpr: - case scMulExpr: - case scUMaxExpr: - case scSMaxExpr: - case scUDivExpr: - return true; - case scUnknown: - return checkSCEVUnknown(cast<SCEVUnknown>(S)); - case scCouldNotCompute: - llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - } - return false; - } - - bool isDone() { return Found; } - }; - - FindDominatedSCEVUnknown FSU(L, DT, LI); - SCEVTraversal<FindDominatedSCEVUnknown> ST(FSU); - ST.visitAll(S); - return !FSU.Found; + return isLoopInvariant(S, L) && properlyDominates(S, L->getHeader()); } /// Get a canonical add expression, or something simpler if possible. @@ -2423,7 +2492,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, } if (Ok) { // Evaluate the expression in the larger type. - const SCEV *Fold = getAddExpr(LargeOps, Flags, Depth + 1); + const SCEV *Fold = getAddExpr(LargeOps, SCEV::FlagAnyWrap, Depth + 1); // If it folds to something simple, use it. Otherwise, don't. if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold)) return getTruncateExpr(Fold, Ty); @@ -2801,22 +2870,21 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, unsigned Idx = 0; if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { - // C1*(C2+V) -> C1*C2 + C1*V if (Ops.size() == 2) - if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) - // If any of Add's ops are Adds or Muls with a constant, - // apply this transformation as well. - if (Add->getNumOperands() == 2) - // TODO: There are some cases where this transformation is not - // profitable, for example: - // Add = (C0 + X) * Y + Z. - // Maybe the scope of this transformation should be narrowed down. - if (containsConstantInAddMulChain(Add)) - return getAddExpr(getMulExpr(LHSC, Add->getOperand(0), - SCEV::FlagAnyWrap, Depth + 1), - getMulExpr(LHSC, Add->getOperand(1), - SCEV::FlagAnyWrap, Depth + 1), - SCEV::FlagAnyWrap, Depth + 1); + // C1*(C2+V) -> C1*C2 + C1*V + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) + // If any of Add's ops are Adds or Muls with a constant, apply this + // transformation as well. + // + // TODO: There are some cases where this transformation is not + // profitable; for example, Add = (C0 + X) * Y + Z. Maybe the scope of + // this transformation should be narrowed down. + if (Add->getNumOperands() == 2 && containsConstantInAddMulChain(Add)) + return getAddExpr(getMulExpr(LHSC, Add->getOperand(0), + SCEV::FlagAnyWrap, Depth + 1), + getMulExpr(LHSC, Add->getOperand(1), + SCEV::FlagAnyWrap, Depth + 1), + SCEV::FlagAnyWrap, Depth + 1); ++Idx; while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { @@ -3128,6 +3196,21 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, } } } + + // (A/B)/C --> A/(B*C) if safe and B*C can be folded. + if (const SCEVUDivExpr *OtherDiv = dyn_cast<SCEVUDivExpr>(LHS)) { + if (auto *DivisorConstant = + dyn_cast<SCEVConstant>(OtherDiv->getRHS())) { + bool Overflow = false; + APInt NewRHS = + DivisorConstant->getAPInt().umul_ov(RHSC->getAPInt(), Overflow); + if (Overflow) { + return getConstant(RHSC->getType(), 0, false); + } + return getUDivExpr(OtherDiv->getLHS(), getConstant(NewRHS)); + } + } + // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded. if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) { SmallVector<const SCEV *, 4> Operands; @@ -3579,12 +3662,13 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) // X umax Y umax Y --> X umax Y // X umax Y --> X, if X is always greater than Y - if (Ops[i] == Ops[i+1] || - isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) { - Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); + if (Ops[i] == Ops[i + 1] || isKnownViaNonRecursiveReasoning( + ICmpInst::ICMP_UGE, Ops[i], Ops[i + 1])) { + Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2); --i; --e; - } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) { - Ops.erase(Ops.begin()+i, Ops.begin()+i+1); + } else if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, Ops[i], + Ops[i + 1])) { + Ops.erase(Ops.begin() + i, Ops.begin() + i + 1); --i; --e; } @@ -3611,14 +3695,35 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS, const SCEV *RHS) { - // ~smax(~x, ~y) == smin(x, y). - return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); + SmallVector<const SCEV *, 2> Ops = { LHS, RHS }; + return getSMinExpr(Ops); +} + +const SCEV *ScalarEvolution::getSMinExpr(SmallVectorImpl<const SCEV *> &Ops) { + // ~smax(~x, ~y, ~z) == smin(x, y, z). + SmallVector<const SCEV *, 2> NotOps; + for (auto *S : Ops) + NotOps.push_back(getNotSCEV(S)); + return getNotSCEV(getSMaxExpr(NotOps)); } const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, const SCEV *RHS) { - // ~umax(~x, ~y) == umin(x, y) - return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); + SmallVector<const SCEV *, 2> Ops = { LHS, RHS }; + return getUMinExpr(Ops); +} + +const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl<const SCEV *> &Ops) { + assert(!Ops.empty() && "At least one operand must be!"); + // Trivial case. + if (Ops.size() == 1) + return Ops[0]; + + // ~umax(~x, ~y, ~z) == umin(x, y, z). + SmallVector<const SCEV *, 2> NotOps; + for (auto *S : Ops) + NotOps.push_back(getNotSCEV(S)); + return getNotSCEV(getUMaxExpr(NotOps)); } const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { @@ -3670,13 +3775,15 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) { /// target-specific information. bool ScalarEvolution::isSCEVable(Type *Ty) const { // Integers and pointers are always SCEVable. - return Ty->isIntegerTy() || Ty->isPointerTy(); + return Ty->isIntOrPtrTy(); } /// Return the size in bits of the specified type, for which isSCEVable must /// return true. uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); + if (Ty->isPointerTy()) + return getDataLayout().getIndexTypeSizeInBits(Ty); return getDataLayout().getTypeSizeInBits(Ty); } @@ -3779,6 +3886,24 @@ void ScalarEvolution::eraseValueFromMap(Value *V) { } } +/// Check whether value has nuw/nsw/exact set but SCEV does not. +/// TODO: In reality it is better to check the poison recursevely +/// but this is better than nothing. +static bool SCEVLostPoisonFlags(const SCEV *S, const Value *V) { + if (auto *I = dyn_cast<Instruction>(V)) { + if (isa<OverflowingBinaryOperator>(I)) { + if (auto *NS = dyn_cast<SCEVNAryExpr>(S)) { + if (I->hasNoSignedWrap() && !NS->hasNoSignedWrap()) + return true; + if (I->hasNoUnsignedWrap() && !NS->hasNoUnsignedWrap()) + return true; + } + } else if (isa<PossiblyExactOperator>(I) && I->isExact()) + return true; + } + return false; +} + /// Return an existing SCEV if it exists, otherwise analyze the expression and /// create a new one. const SCEV *ScalarEvolution::getSCEV(Value *V) { @@ -3792,7 +3917,7 @@ const SCEV *ScalarEvolution::getSCEV(Value *V) { // ValueExprMap before insert S->{V, 0} into ExprValueMap. std::pair<ValueExprMapType::iterator, bool> Pair = ValueExprMap.insert({SCEVCallbackVH(V, this), S}); - if (Pair.second) { + if (Pair.second && !SCEVLostPoisonFlags(S, V)) { ExprValueMap[S].insert({V, nullptr}); // If S == Stripped + Offset, add Stripped -> {V, Offset} into @@ -3895,8 +4020,7 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, const SCEV * ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) { Type *SrcTy = V->getType(); - assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && - (Ty->isIntegerTy() || Ty->isPointerTy()) && + assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && "Cannot truncate or zero extend with non-integer arguments!"); if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) return V; // No conversion @@ -3909,8 +4033,7 @@ const SCEV * ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, Type *Ty) { Type *SrcTy = V->getType(); - assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && - (Ty->isIntegerTy() || Ty->isPointerTy()) && + assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && "Cannot truncate or zero extend with non-integer arguments!"); if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) return V; // No conversion @@ -3922,8 +4045,7 @@ ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, const SCEV * ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) { Type *SrcTy = V->getType(); - assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && - (Ty->isIntegerTy() || Ty->isPointerTy()) && + assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && "Cannot noop or zero extend with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrZeroExtend cannot truncate!"); @@ -3935,8 +4057,7 @@ ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) { const SCEV * ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) { Type *SrcTy = V->getType(); - assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && - (Ty->isIntegerTy() || Ty->isPointerTy()) && + assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && "Cannot noop or sign extend with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrSignExtend cannot truncate!"); @@ -3948,8 +4069,7 @@ ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) { const SCEV * ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) { Type *SrcTy = V->getType(); - assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && - (Ty->isIntegerTy() || Ty->isPointerTy()) && + assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && "Cannot noop or any extend with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrAnyExtend cannot truncate!"); @@ -3961,8 +4081,7 @@ ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) { const SCEV * ScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) { Type *SrcTy = V->getType(); - assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && - (Ty->isIntegerTy() || Ty->isPointerTy()) && + assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && "Cannot truncate or noop with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) && "getTruncateOrNoop cannot extend!"); @@ -3986,15 +4105,32 @@ const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS, const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS) { - const SCEV *PromotedLHS = LHS; - const SCEV *PromotedRHS = RHS; + SmallVector<const SCEV *, 2> Ops = { LHS, RHS }; + return getUMinFromMismatchedTypes(Ops); +} + +const SCEV *ScalarEvolution::getUMinFromMismatchedTypes( + SmallVectorImpl<const SCEV *> &Ops) { + assert(!Ops.empty() && "At least one operand must be!"); + // Trivial case. + if (Ops.size() == 1) + return Ops[0]; + + // Find the max type first. + Type *MaxType = nullptr; + for (auto *S : Ops) + if (MaxType) + MaxType = getWiderType(MaxType, S->getType()); + else + MaxType = S->getType(); - if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType())) - PromotedRHS = getZeroExtendExpr(RHS, LHS->getType()); - else - PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType()); + // Extend all ops to max type. + SmallVector<const SCEV *, 2> PromotedOps; + for (auto *S : Ops) + PromotedOps.push_back(getNoopOrZeroExtend(S, MaxType)); - return getUMinExpr(PromotedLHS, PromotedRHS); + // Generate umin. + return getUMinExpr(PromotedOps); } const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) { @@ -4071,37 +4207,90 @@ void ScalarEvolution::forgetSymbolicName(Instruction *PN, const SCEV *SymName) { namespace { +/// Takes SCEV S and Loop L. For each AddRec sub-expression, use its start +/// expression in case its Loop is L. If it is not L then +/// if IgnoreOtherLoops is true then use AddRec itself +/// otherwise rewrite cannot be done. +/// If SCEV contains non-invariant unknown SCEV rewrite cannot be done. class SCEVInitRewriter : public SCEVRewriteVisitor<SCEVInitRewriter> { public: - static const SCEV *rewrite(const SCEV *S, const Loop *L, - ScalarEvolution &SE) { + static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE, + bool IgnoreOtherLoops = true) { SCEVInitRewriter Rewriter(L, SE); const SCEV *Result = Rewriter.visit(S); - return Rewriter.isValid() ? Result : SE.getCouldNotCompute(); + if (Rewriter.hasSeenLoopVariantSCEVUnknown()) + return SE.getCouldNotCompute(); + return Rewriter.hasSeenOtherLoops() && !IgnoreOtherLoops + ? SE.getCouldNotCompute() + : Result; } const SCEV *visitUnknown(const SCEVUnknown *Expr) { if (!SE.isLoopInvariant(Expr, L)) - Valid = false; + SeenLoopVariantSCEVUnknown = true; return Expr; } const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { - // Only allow AddRecExprs for this loop. + // Only re-write AddRecExprs for this loop. if (Expr->getLoop() == L) return Expr->getStart(); - Valid = false; + SeenOtherLoops = true; return Expr; } - bool isValid() { return Valid; } + bool hasSeenLoopVariantSCEVUnknown() { return SeenLoopVariantSCEVUnknown; } + + bool hasSeenOtherLoops() { return SeenOtherLoops; } private: explicit SCEVInitRewriter(const Loop *L, ScalarEvolution &SE) : SCEVRewriteVisitor(SE), L(L) {} const Loop *L; - bool Valid = true; + bool SeenLoopVariantSCEVUnknown = false; + bool SeenOtherLoops = false; +}; + +/// Takes SCEV S and Loop L. For each AddRec sub-expression, use its post +/// increment expression in case its Loop is L. If it is not L then +/// use AddRec itself. +/// If SCEV contains non-invariant unknown SCEV rewrite cannot be done. +class SCEVPostIncRewriter : public SCEVRewriteVisitor<SCEVPostIncRewriter> { +public: + static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE) { + SCEVPostIncRewriter Rewriter(L, SE); + const SCEV *Result = Rewriter.visit(S); + return Rewriter.hasSeenLoopVariantSCEVUnknown() + ? SE.getCouldNotCompute() + : Result; + } + + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + if (!SE.isLoopInvariant(Expr, L)) + SeenLoopVariantSCEVUnknown = true; + return Expr; + } + + const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { + // Only re-write AddRecExprs for this loop. + if (Expr->getLoop() == L) + return Expr->getPostIncExpr(SE); + SeenOtherLoops = true; + return Expr; + } + + bool hasSeenLoopVariantSCEVUnknown() { return SeenLoopVariantSCEVUnknown; } + + bool hasSeenOtherLoops() { return SeenOtherLoops; } + +private: + explicit SCEVPostIncRewriter(const Loop *L, ScalarEvolution &SE) + : SCEVRewriteVisitor(SE), L(L) {} + + const Loop *L; + bool SeenLoopVariantSCEVUnknown = false; + bool SeenOtherLoops = false; }; /// This class evaluates the compare condition by matching it against the @@ -4673,7 +4862,7 @@ ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI const SCEV *StartExtended = getExtendedExpr(StartVal, Signed); if (PredIsKnownFalse(StartVal, StartExtended)) { - DEBUG(dbgs() << "P2 is compile-time false\n";); + LLVM_DEBUG(dbgs() << "P2 is compile-time false\n";); return None; } @@ -4681,7 +4870,7 @@ ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI // NSSW or NUSW) const SCEV *AccumExtended = getExtendedExpr(Accum, /*CreateSignExtend=*/true); if (PredIsKnownFalse(Accum, AccumExtended)) { - DEBUG(dbgs() << "P3 is compile-time false\n";); + LLVM_DEBUG(dbgs() << "P3 is compile-time false\n";); return None; } @@ -4690,7 +4879,7 @@ ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI if (Expr != ExtendedExpr && !isKnownPredicate(ICmpInst::ICMP_EQ, Expr, ExtendedExpr)) { const SCEVPredicate *Pred = getEqualPredicate(Expr, ExtendedExpr); - DEBUG (dbgs() << "Added Predicate: " << *Pred); + LLVM_DEBUG(dbgs() << "Added Predicate: " << *Pred); Predicates.push_back(Pred); } }; @@ -4953,7 +5142,7 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) { // by one iteration: // PHI(f(0), f({1,+,1})) --> f({0,+,1}) const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this); - const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this); + const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this, false); if (Shifted != getCouldNotCompute() && Start != getCouldNotCompute()) { const SCEV *StartVal = getSCEV(StartValueV); @@ -5515,6 +5704,25 @@ ScalarEvolution::getRangeRef(const SCEV *S, APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1)); } + // A range of Phi is a subset of union of all ranges of its input. + if (const PHINode *Phi = dyn_cast<PHINode>(U->getValue())) { + // Make sure that we do not run over cycled Phis. + if (PendingPhiRanges.insert(Phi).second) { + ConstantRange RangeFromOps(BitWidth, /*isFullSet=*/false); + for (auto &Op : Phi->operands()) { + auto OpRange = getRangeRef(getSCEV(Op), SignHint); + RangeFromOps = RangeFromOps.unionWith(OpRange); + // No point to continue if we already have a full set. + if (RangeFromOps.isFullSet()) + break; + } + ConservativeResult = ConservativeResult.intersectWith(RangeFromOps); + bool Erased = PendingPhiRanges.erase(Phi); + assert(Erased && "Failed to erase Phi properly?"); + (void) Erased; + } + } + return setRange(U, SignHint, std::move(ConservativeResult)); } @@ -6134,33 +6342,33 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { } break; - case Instruction::Shl: - // Turn shift left of a constant amount into a multiply. - if (ConstantInt *SA = dyn_cast<ConstantInt>(BO->RHS)) { - uint32_t BitWidth = cast<IntegerType>(SA->getType())->getBitWidth(); - - // If the shift count is not less than the bitwidth, the result of - // the shift is undefined. Don't try to analyze it, because the - // resolution chosen here may differ from the resolution chosen in - // other parts of the compiler. - if (SA->getValue().uge(BitWidth)) - break; + case Instruction::Shl: + // Turn shift left of a constant amount into a multiply. + if (ConstantInt *SA = dyn_cast<ConstantInt>(BO->RHS)) { + uint32_t BitWidth = cast<IntegerType>(SA->getType())->getBitWidth(); - // It is currently not resolved how to interpret NSW for left - // shift by BitWidth - 1, so we avoid applying flags in that - // case. Remove this check (or this comment) once the situation - // is resolved. See - // http://lists.llvm.org/pipermail/llvm-dev/2015-April/084195.html - // and http://reviews.llvm.org/D8890 . - auto Flags = SCEV::FlagAnyWrap; - if (BO->Op && SA->getValue().ult(BitWidth - 1)) - Flags = getNoWrapFlagsFromUB(BO->Op); + // If the shift count is not less than the bitwidth, the result of + // the shift is undefined. Don't try to analyze it, because the + // resolution chosen here may differ from the resolution chosen in + // other parts of the compiler. + if (SA->getValue().uge(BitWidth)) + break; - Constant *X = ConstantInt::get(getContext(), - APInt::getOneBitSet(BitWidth, SA->getZExtValue())); - return getMulExpr(getSCEV(BO->LHS), getSCEV(X), Flags); - } - break; + // It is currently not resolved how to interpret NSW for left + // shift by BitWidth - 1, so we avoid applying flags in that + // case. Remove this check (or this comment) once the situation + // is resolved. See + // http://lists.llvm.org/pipermail/llvm-dev/2015-April/084195.html + // and http://reviews.llvm.org/D8890 . + auto Flags = SCEV::FlagAnyWrap; + if (BO->Op && SA->getValue().ult(BitWidth - 1)) + Flags = getNoWrapFlagsFromUB(BO->Op); + + Constant *X = ConstantInt::get( + getContext(), APInt::getOneBitSet(BitWidth, SA->getZExtValue())); + return getMulExpr(getSCEV(BO->LHS), getSCEV(X), Flags); + } + break; case Instruction::AShr: { // AShr X, C, where C is a constant. @@ -6384,11 +6592,11 @@ const SCEV *ScalarEvolution::getExitCount(const Loop *L, const SCEV * ScalarEvolution::getPredicatedBackedgeTakenCount(const Loop *L, SCEVUnionPredicate &Preds) { - return getPredicatedBackedgeTakenInfo(L).getExact(this, &Preds); + return getPredicatedBackedgeTakenInfo(L).getExact(L, this, &Preds); } const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) { - return getBackedgeTakenInfo(L).getExact(this); + return getBackedgeTakenInfo(L).getExact(L, this); } /// Similar to getBackedgeTakenCount, except return the least SCEV value that is @@ -6445,8 +6653,13 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // must be cleared in this scope. BackedgeTakenInfo Result = computeBackedgeTakenCount(L); - if (Result.getExact(this) != getCouldNotCompute()) { - assert(isLoopInvariant(Result.getExact(this), L) && + // In product build, there are no usage of statistic. + (void)NumTripCountsComputed; + (void)NumTripCountsNotComputed; +#if LLVM_ENABLE_STATS || !defined(NDEBUG) + const SCEV *BEExact = Result.getExact(L, this); + if (BEExact != getCouldNotCompute()) { + assert(isLoopInvariant(BEExact, L) && isLoopInvariant(Result.getMax(this), L) && "Computed backedge-taken count isn't loop invariant for loop!"); ++NumTripCountsComputed; @@ -6456,6 +6669,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // Only count loops that have phi nodes as not being computable. ++NumTripCountsNotComputed; } +#endif // LLVM_ENABLE_STATS || !defined(NDEBUG) // Now that we know more about the trip count for this loop, forget any // existing SCEV values for PHI nodes in this loop since they are only @@ -6591,6 +6805,12 @@ void ScalarEvolution::forgetLoop(const Loop *L) { } } +void ScalarEvolution::forgetTopmostLoop(const Loop *L) { + while (Loop *Parent = L->getParentLoop()) + L = Parent; + forgetLoop(L); +} + void ScalarEvolution::forgetValue(Value *V) { Instruction *I = dyn_cast<Instruction>(V); if (!I) return; @@ -6619,28 +6839,35 @@ void ScalarEvolution::forgetValue(Value *V) { } /// Get the exact loop backedge taken count considering all loop exits. A -/// computable result can only be returned for loops with a single exit. -/// Returning the minimum taken count among all exits is incorrect because one -/// of the loop's exit limit's may have been skipped. howFarToZero assumes that -/// the limit of each loop test is never skipped. This is a valid assumption as -/// long as the loop exits via that test. For precise results, it is the -/// caller's responsibility to specify the relevant loop exit using -/// getExact(ExitingBlock, SE). +/// computable result can only be returned for loops with all exiting blocks +/// dominating the latch. howFarToZero assumes that the limit of each loop test +/// is never skipped. This is a valid assumption as long as the loop exits via +/// that test. For precise results, it is the caller's responsibility to specify +/// the relevant loop exiting block using getExact(ExitingBlock, SE). const SCEV * -ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE, +ScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE, SCEVUnionPredicate *Preds) const { // If any exits were not computable, the loop is not computable. if (!isComplete() || ExitNotTaken.empty()) return SE->getCouldNotCompute(); - const SCEV *BECount = nullptr; + const BasicBlock *Latch = L->getLoopLatch(); + // All exiting blocks we have collected must dominate the only backedge. + if (!Latch) + return SE->getCouldNotCompute(); + + // All exiting blocks we have gathered dominate loop's latch, so exact trip + // count is simply a minimum out of all these calculated exit counts. + SmallVector<const SCEV *, 2> Ops; for (auto &ENT : ExitNotTaken) { - assert(ENT.ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV"); + const SCEV *BECount = ENT.ExactNotTaken; + assert(BECount != SE->getCouldNotCompute() && "Bad exit SCEV!"); + assert(SE->DT.dominates(ENT.ExitingBlock, Latch) && + "We should only have known counts for exiting blocks that dominate " + "latch!"); + + Ops.push_back(BECount); - if (!BECount) - BECount = ENT.ExactNotTaken; - else if (BECount != ENT.ExactNotTaken) - return SE->getCouldNotCompute(); if (Preds && !ENT.hasAlwaysTruePredicate()) Preds->add(ENT.Predicate.get()); @@ -6648,8 +6875,7 @@ ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE, "Predicate should be always true!"); } - assert(BECount && "Invalid not taken count for loop exit"); - return BECount; + return SE->getUMinFromMismatchedTypes(Ops); } /// Get the exact not taken count for this loop exit. @@ -6846,99 +7072,60 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L, ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock, bool AllowPredicates) { - // Okay, we've chosen an exiting block. See what condition causes us to exit - // at this block and remember the exit block and whether all other targets - // lead to the loop header. - bool MustExecuteLoopHeader = true; - BasicBlock *Exit = nullptr; - for (auto *SBB : successors(ExitingBlock)) - if (!L->contains(SBB)) { - if (Exit) // Multiple exit successors. - return getCouldNotCompute(); - Exit = SBB; - } else if (SBB != L->getHeader()) { - MustExecuteLoopHeader = false; - } - - // At this point, we know we have a conditional branch that determines whether - // the loop is exited. However, we don't know if the branch is executed each - // time through the loop. If not, then the execution count of the branch will - // not be equal to the trip count of the loop. - // - // Currently we check for this by checking to see if the Exit branch goes to - // the loop header. If so, we know it will always execute the same number of - // times as the loop. We also handle the case where the exit block *is* the - // loop header. This is common for un-rotated loops. - // - // If both of those tests fail, walk up the unique predecessor chain to the - // header, stopping if there is an edge that doesn't exit the loop. If the - // header is reached, the execution count of the branch will be equal to the - // trip count of the loop. - // - // More extensive analysis could be done to handle more cases here. - // - if (!MustExecuteLoopHeader && ExitingBlock != L->getHeader()) { - // The simple checks failed, try climbing the unique predecessor chain - // up to the header. - bool Ok = false; - for (BasicBlock *BB = ExitingBlock; BB; ) { - BasicBlock *Pred = BB->getUniquePredecessor(); - if (!Pred) - return getCouldNotCompute(); - TerminatorInst *PredTerm = Pred->getTerminator(); - for (const BasicBlock *PredSucc : PredTerm->successors()) { - if (PredSucc == BB) - continue; - // If the predecessor has a successor that isn't BB and isn't - // outside the loop, assume the worst. - if (L->contains(PredSucc)) - return getCouldNotCompute(); - } - if (Pred == L->getHeader()) { - Ok = true; - break; - } - BB = Pred; - } - if (!Ok) - return getCouldNotCompute(); - } + assert(L->contains(ExitingBlock) && "Exit count for non-loop block?"); + // If our exiting block does not dominate the latch, then its connection with + // loop's exit limit may be far from trivial. + const BasicBlock *Latch = L->getLoopLatch(); + if (!Latch || !DT.dominates(ExitingBlock, Latch)) + return getCouldNotCompute(); bool IsOnlyExit = (L->getExitingBlock() != nullptr); TerminatorInst *Term = ExitingBlock->getTerminator(); if (BranchInst *BI = dyn_cast<BranchInst>(Term)) { assert(BI->isConditional() && "If unconditional, it can't be in loop!"); + bool ExitIfTrue = !L->contains(BI->getSuccessor(0)); + assert(ExitIfTrue == L->contains(BI->getSuccessor(1)) && + "It should have one successor in loop and one exit block!"); // Proceed to the next level to examine the exit condition expression. return computeExitLimitFromCond( - L, BI->getCondition(), BI->getSuccessor(0), BI->getSuccessor(1), + L, BI->getCondition(), ExitIfTrue, /*ControlsExit=*/IsOnlyExit, AllowPredicates); } - if (SwitchInst *SI = dyn_cast<SwitchInst>(Term)) + if (SwitchInst *SI = dyn_cast<SwitchInst>(Term)) { + // For switch, make sure that there is a single exit from the loop. + BasicBlock *Exit = nullptr; + for (auto *SBB : successors(ExitingBlock)) + if (!L->contains(SBB)) { + if (Exit) // Multiple exit successors. + return getCouldNotCompute(); + Exit = SBB; + } + assert(Exit && "Exiting block must have at least one exit"); return computeExitLimitFromSingleExitSwitch(L, SI, Exit, /*ControlsExit=*/IsOnlyExit); + } return getCouldNotCompute(); } ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCond( - const Loop *L, Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB, + const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsExit, bool AllowPredicates) { - ScalarEvolution::ExitLimitCacheTy Cache(L, TBB, FBB, AllowPredicates); - return computeExitLimitFromCondCached(Cache, L, ExitCond, TBB, FBB, + ScalarEvolution::ExitLimitCacheTy Cache(L, ExitIfTrue, AllowPredicates); + return computeExitLimitFromCondCached(Cache, L, ExitCond, ExitIfTrue, ControlsExit, AllowPredicates); } Optional<ScalarEvolution::ExitLimit> ScalarEvolution::ExitLimitCache::find(const Loop *L, Value *ExitCond, - BasicBlock *TBB, BasicBlock *FBB, - bool ControlsExit, bool AllowPredicates) { + bool ExitIfTrue, bool ControlsExit, + bool AllowPredicates) { (void)this->L; - (void)this->TBB; - (void)this->FBB; + (void)this->ExitIfTrue; (void)this->AllowPredicates; - assert(this->L == L && this->TBB == TBB && this->FBB == FBB && + assert(this->L == L && this->ExitIfTrue == ExitIfTrue && this->AllowPredicates == AllowPredicates && "Variance in assumed invariant key components!"); auto Itr = TripCountMap.find({ExitCond, ControlsExit}); @@ -6948,47 +7135,48 @@ ScalarEvolution::ExitLimitCache::find(const Loop *L, Value *ExitCond, } void ScalarEvolution::ExitLimitCache::insert(const Loop *L, Value *ExitCond, - BasicBlock *TBB, BasicBlock *FBB, + bool ExitIfTrue, bool ControlsExit, bool AllowPredicates, const ExitLimit &EL) { - assert(this->L == L && this->TBB == TBB && this->FBB == FBB && + assert(this->L == L && this->ExitIfTrue == ExitIfTrue && this->AllowPredicates == AllowPredicates && "Variance in assumed invariant key components!"); auto InsertResult = TripCountMap.insert({{ExitCond, ControlsExit}, EL}); assert(InsertResult.second && "Expected successful insertion!"); (void)InsertResult; + (void)ExitIfTrue; } ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondCached( - ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, BasicBlock *TBB, - BasicBlock *FBB, bool ControlsExit, bool AllowPredicates) { + ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue, + bool ControlsExit, bool AllowPredicates) { if (auto MaybeEL = - Cache.find(L, ExitCond, TBB, FBB, ControlsExit, AllowPredicates)) + Cache.find(L, ExitCond, ExitIfTrue, ControlsExit, AllowPredicates)) return *MaybeEL; - ExitLimit EL = computeExitLimitFromCondImpl(Cache, L, ExitCond, TBB, FBB, + ExitLimit EL = computeExitLimitFromCondImpl(Cache, L, ExitCond, ExitIfTrue, ControlsExit, AllowPredicates); - Cache.insert(L, ExitCond, TBB, FBB, ControlsExit, AllowPredicates, EL); + Cache.insert(L, ExitCond, ExitIfTrue, ControlsExit, AllowPredicates, EL); return EL; } ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl( - ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, BasicBlock *TBB, - BasicBlock *FBB, bool ControlsExit, bool AllowPredicates) { + ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue, + bool ControlsExit, bool AllowPredicates) { // Check if the controlling expression for this loop is an And or Or. if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) { if (BO->getOpcode() == Instruction::And) { // Recurse on the operands of the and. - bool EitherMayExit = L->contains(TBB); + bool EitherMayExit = !ExitIfTrue; ExitLimit EL0 = computeExitLimitFromCondCached( - Cache, L, BO->getOperand(0), TBB, FBB, ControlsExit && !EitherMayExit, - AllowPredicates); + Cache, L, BO->getOperand(0), ExitIfTrue, + ControlsExit && !EitherMayExit, AllowPredicates); ExitLimit EL1 = computeExitLimitFromCondCached( - Cache, L, BO->getOperand(1), TBB, FBB, ControlsExit && !EitherMayExit, - AllowPredicates); + Cache, L, BO->getOperand(1), ExitIfTrue, + ControlsExit && !EitherMayExit, AllowPredicates); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); if (EitherMayExit) { @@ -7010,7 +7198,6 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl( } else { // Both conditions must be true at the same time for the loop to exit. // For now, be conservative. - assert(L->contains(FBB) && "Loop block has no successor in loop!"); if (EL0.MaxNotTaken == EL1.MaxNotTaken) MaxBECount = EL0.MaxNotTaken; if (EL0.ExactNotTaken == EL1.ExactNotTaken) @@ -7031,13 +7218,13 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl( } if (BO->getOpcode() == Instruction::Or) { // Recurse on the operands of the or. - bool EitherMayExit = L->contains(FBB); + bool EitherMayExit = ExitIfTrue; ExitLimit EL0 = computeExitLimitFromCondCached( - Cache, L, BO->getOperand(0), TBB, FBB, ControlsExit && !EitherMayExit, - AllowPredicates); + Cache, L, BO->getOperand(0), ExitIfTrue, + ControlsExit && !EitherMayExit, AllowPredicates); ExitLimit EL1 = computeExitLimitFromCondCached( - Cache, L, BO->getOperand(1), TBB, FBB, ControlsExit && !EitherMayExit, - AllowPredicates); + Cache, L, BO->getOperand(1), ExitIfTrue, + ControlsExit && !EitherMayExit, AllowPredicates); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); if (EitherMayExit) { @@ -7059,7 +7246,6 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl( } else { // Both conditions must be false at the same time for the loop to exit. // For now, be conservative. - assert(L->contains(TBB) && "Loop block has no successor in loop!"); if (EL0.MaxNotTaken == EL1.MaxNotTaken) MaxBECount = EL0.MaxNotTaken; if (EL0.ExactNotTaken == EL1.ExactNotTaken) @@ -7075,12 +7261,12 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl( // Proceed to the next level to examine the icmp. if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) { ExitLimit EL = - computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit); + computeExitLimitFromICmp(L, ExitCondICmp, ExitIfTrue, ControlsExit); if (EL.hasFullInfo() || !AllowPredicates) return EL; // Try again, but use SCEV predicates this time. - return computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit, + return computeExitLimitFromICmp(L, ExitCondICmp, ExitIfTrue, ControlsExit, /*AllowPredicates=*/true); } @@ -7089,7 +7275,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl( // preserve the CFG and is temporarily leaving constant conditions // in place. if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) { - if (L->contains(FBB) == !CI->getZExtValue()) + if (ExitIfTrue == !CI->getZExtValue()) // The backedge is always taken. return getCouldNotCompute(); else @@ -7098,19 +7284,18 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl( } // If it's not an integer or pointer comparison then compute it the hard way. - return computeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); + return computeExitCountExhaustively(L, ExitCond, ExitIfTrue); } ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromICmp(const Loop *L, ICmpInst *ExitCond, - BasicBlock *TBB, - BasicBlock *FBB, + bool ExitIfTrue, bool ControlsExit, bool AllowPredicates) { // If the condition was exit on true, convert the condition to exit on false ICmpInst::Predicate Pred; - if (!L->contains(FBB)) + if (!ExitIfTrue) Pred = ExitCond->getPredicate(); else Pred = ExitCond->getInversePredicate(); @@ -7192,7 +7377,7 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, } auto *ExhaustiveCount = - computeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); + computeExitCountExhaustively(L, ExitCond, ExitIfTrue); if (!isa<SCEVCouldNotCompute>(ExhaustiveCount)) return ExhaustiveCount; @@ -8104,6 +8289,14 @@ const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) { return getSCEVAtScope(getSCEV(V), L); } +const SCEV *ScalarEvolution::stripInjectiveFunctions(const SCEV *S) const { + if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) + return stripInjectiveFunctions(ZExt->getOperand()); + if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) + return stripInjectiveFunctions(SExt->getOperand()); + return S; +} + /// Finds the minimum unsigned root of the following equation: /// /// A * X = B (mod N) @@ -8233,7 +8426,9 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, return getCouldNotCompute(); // Otherwise it will loop infinitely. } - const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V); + const SCEVAddRecExpr *AddRec = + dyn_cast<SCEVAddRecExpr>(stripInjectiveFunctions(V)); + if (!AddRec && AllowPredicates) // Try to make this an AddRec using runtime tests, in the first X // iterations of this loop, where X is the SCEV expression found by the @@ -8641,43 +8836,88 @@ bool ScalarEvolution::isKnownNonZero(const SCEV *S) { return isKnownNegative(S) || isKnownPositive(S); } +std::pair<const SCEV *, const SCEV *> +ScalarEvolution::SplitIntoInitAndPostInc(const Loop *L, const SCEV *S) { + // Compute SCEV on entry of loop L. + const SCEV *Start = SCEVInitRewriter::rewrite(S, L, *this); + if (Start == getCouldNotCompute()) + return { Start, Start }; + // Compute post increment SCEV for loop L. + const SCEV *PostInc = SCEVPostIncRewriter::rewrite(S, L, *this); + assert(PostInc != getCouldNotCompute() && "Unexpected could not compute"); + return { Start, PostInc }; +} + +bool ScalarEvolution::isKnownViaInduction(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + // First collect all loops. + SmallPtrSet<const Loop *, 8> LoopsUsed; + getUsedLoops(LHS, LoopsUsed); + getUsedLoops(RHS, LoopsUsed); + + if (LoopsUsed.empty()) + return false; + + // Domination relationship must be a linear order on collected loops. +#ifndef NDEBUG + for (auto *L1 : LoopsUsed) + for (auto *L2 : LoopsUsed) + assert((DT.dominates(L1->getHeader(), L2->getHeader()) || + DT.dominates(L2->getHeader(), L1->getHeader())) && + "Domination relationship is not a linear order"); +#endif + + const Loop *MDL = + *std::max_element(LoopsUsed.begin(), LoopsUsed.end(), + [&](const Loop *L1, const Loop *L2) { + return DT.properlyDominates(L1->getHeader(), L2->getHeader()); + }); + + // Get init and post increment value for LHS. + auto SplitLHS = SplitIntoInitAndPostInc(MDL, LHS); + // if LHS contains unknown non-invariant SCEV then bail out. + if (SplitLHS.first == getCouldNotCompute()) + return false; + assert (SplitLHS.second != getCouldNotCompute() && "Unexpected CNC"); + // Get init and post increment value for RHS. + auto SplitRHS = SplitIntoInitAndPostInc(MDL, RHS); + // if RHS contains unknown non-invariant SCEV then bail out. + if (SplitRHS.first == getCouldNotCompute()) + return false; + assert (SplitRHS.second != getCouldNotCompute() && "Unexpected CNC"); + // It is possible that init SCEV contains an invariant load but it does + // not dominate MDL and is not available at MDL loop entry, so we should + // check it here. + if (!isAvailableAtLoopEntry(SplitLHS.first, MDL) || + !isAvailableAtLoopEntry(SplitRHS.first, MDL)) + return false; + + return isLoopEntryGuardedByCond(MDL, Pred, SplitLHS.first, SplitRHS.first) && + isLoopBackedgeGuardedByCond(MDL, Pred, SplitLHS.second, + SplitRHS.second); +} + bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { // Canonicalize the inputs first. (void)SimplifyICmpOperands(Pred, LHS, RHS); - // If LHS or RHS is an addrec, check to see if the condition is true in - // every iteration of the loop. - // If LHS and RHS are both addrec, both conditions must be true in - // every iteration of the loop. - const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS); - const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS); - bool LeftGuarded = false; - bool RightGuarded = false; - if (LAR) { - const Loop *L = LAR->getLoop(); - if (isLoopEntryGuardedByCond(L, Pred, LAR->getStart(), RHS) && - isLoopBackedgeGuardedByCond(L, Pred, LAR->getPostIncExpr(*this), RHS)) { - if (!RAR) return true; - LeftGuarded = true; - } - } - if (RAR) { - const Loop *L = RAR->getLoop(); - if (isLoopEntryGuardedByCond(L, Pred, LHS, RAR->getStart()) && - isLoopBackedgeGuardedByCond(L, Pred, LHS, RAR->getPostIncExpr(*this))) { - if (!LAR) return true; - RightGuarded = true; - } - } - if (LeftGuarded && RightGuarded) + if (isKnownViaInduction(Pred, LHS, RHS)) return true; if (isKnownPredicateViaSplitting(Pred, LHS, RHS)) return true; - // Otherwise see what can be done with known constant ranges. - return isKnownPredicateViaConstantRanges(Pred, LHS, RHS); + // Otherwise see what can be done with some simple reasoning. + return isKnownViaNonRecursiveReasoning(Pred, LHS, RHS); +} + +bool ScalarEvolution::isKnownOnEveryIteration(ICmpInst::Predicate Pred, + const SCEVAddRecExpr *LHS, + const SCEV *RHS) { + const Loop *L = LHS->getLoop(); + return isLoopEntryGuardedByCond(L, Pred, LHS->getStart(), RHS) && + isLoopBackedgeGuardedByCond(L, Pred, LHS->getPostIncExpr(*this), RHS); } bool ScalarEvolution::isMonotonicPredicate(const SCEVAddRecExpr *LHS, @@ -8944,7 +9184,7 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, // (interprocedural conditions notwithstanding). if (!L) return true; - if (isKnownPredicateViaConstantRanges(Pred, LHS, RHS)) + if (isKnownViaNonRecursiveReasoning(Pred, LHS, RHS)) return true; BasicBlock *Latch = L->getLoopLatch(); @@ -9049,9 +9289,68 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, // (interprocedural conditions notwithstanding). if (!L) return false; - if (isKnownPredicateViaConstantRanges(Pred, LHS, RHS)) + // Both LHS and RHS must be available at loop entry. + assert(isAvailableAtLoopEntry(LHS, L) && + "LHS is not available at Loop Entry"); + assert(isAvailableAtLoopEntry(RHS, L) && + "RHS is not available at Loop Entry"); + + if (isKnownViaNonRecursiveReasoning(Pred, LHS, RHS)) return true; + // If we cannot prove strict comparison (e.g. a > b), maybe we can prove + // the facts (a >= b && a != b) separately. A typical situation is when the + // non-strict comparison is known from ranges and non-equality is known from + // dominating predicates. If we are proving strict comparison, we always try + // to prove non-equality and non-strict comparison separately. + auto NonStrictPredicate = ICmpInst::getNonStrictPredicate(Pred); + const bool ProvingStrictComparison = (Pred != NonStrictPredicate); + bool ProvedNonStrictComparison = false; + bool ProvedNonEquality = false; + + if (ProvingStrictComparison) { + ProvedNonStrictComparison = + isKnownViaNonRecursiveReasoning(NonStrictPredicate, LHS, RHS); + ProvedNonEquality = + isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_NE, LHS, RHS); + if (ProvedNonStrictComparison && ProvedNonEquality) + return true; + } + + // Try to prove (Pred, LHS, RHS) using isImpliedViaGuard. + auto ProveViaGuard = [&](BasicBlock *Block) { + if (isImpliedViaGuard(Block, Pred, LHS, RHS)) + return true; + if (ProvingStrictComparison) { + if (!ProvedNonStrictComparison) + ProvedNonStrictComparison = + isImpliedViaGuard(Block, NonStrictPredicate, LHS, RHS); + if (!ProvedNonEquality) + ProvedNonEquality = + isImpliedViaGuard(Block, ICmpInst::ICMP_NE, LHS, RHS); + if (ProvedNonStrictComparison && ProvedNonEquality) + return true; + } + return false; + }; + + // Try to prove (Pred, LHS, RHS) using isImpliedCond. + auto ProveViaCond = [&](Value *Condition, bool Inverse) { + if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse)) + return true; + if (ProvingStrictComparison) { + if (!ProvedNonStrictComparison) + ProvedNonStrictComparison = + isImpliedCond(NonStrictPredicate, LHS, RHS, Condition, Inverse); + if (!ProvedNonEquality) + ProvedNonEquality = + isImpliedCond(ICmpInst::ICMP_NE, LHS, RHS, Condition, Inverse); + if (ProvedNonStrictComparison && ProvedNonEquality) + return true; + } + return false; + }; + // Starting at the loop predecessor, climb up the predecessor chain, as long // as there are predecessors that can be found that have unique successors // leading to the original header. @@ -9060,7 +9359,7 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { - if (isImpliedViaGuard(Pair.first, Pred, LHS, RHS)) + if (ProveViaGuard(Pair.first)) return true; BranchInst *LoopEntryPredicate = @@ -9069,9 +9368,8 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, LoopEntryPredicate->isUnconditional()) continue; - if (isImpliedCond(Pred, LHS, RHS, - LoopEntryPredicate->getCondition(), - LoopEntryPredicate->getSuccessor(0) != Pair.second)) + if (ProveViaCond(LoopEntryPredicate->getCondition(), + LoopEntryPredicate->getSuccessor(0) != Pair.second)) return true; } @@ -9083,7 +9381,7 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, if (!DT.dominates(CI, L->getHeader())) continue; - if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false)) + if (ProveViaCond(CI->getArgOperand(0), false)) return true; } @@ -9318,17 +9616,25 @@ Optional<APInt> ScalarEvolution::computeConstantDifference(const SCEV *More, return M - L; } - const SCEV *L, *R; SCEV::NoWrapFlags Flags; - if (splitBinaryAdd(Less, L, R, Flags)) - if (const auto *LC = dyn_cast<SCEVConstant>(L)) - if (R == More) - return -(LC->getAPInt()); - - if (splitBinaryAdd(More, L, R, Flags)) - if (const auto *LC = dyn_cast<SCEVConstant>(L)) - if (R == Less) - return LC->getAPInt(); + const SCEV *LLess = nullptr, *RLess = nullptr; + const SCEV *LMore = nullptr, *RMore = nullptr; + const SCEVConstant *C1 = nullptr, *C2 = nullptr; + // Compare (X + C1) vs X. + if (splitBinaryAdd(Less, LLess, RLess, Flags)) + if ((C1 = dyn_cast<SCEVConstant>(LLess))) + if (RLess == More) + return -(C1->getAPInt()); + + // Compare X vs (X + C2). + if (splitBinaryAdd(More, LMore, RMore, Flags)) + if ((C2 = dyn_cast<SCEVConstant>(LMore))) + if (RMore == Less) + return C2->getAPInt(); + + // Compare (X + C1) vs (X + C2). + if (C1 && C2 && RLess == RMore) + return C2->getAPInt() - C1->getAPInt(); return None; } @@ -9405,10 +9711,121 @@ bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow( } // Try to prove (1) or (2), as needed. - return isLoopEntryGuardedByCond(L, Pred, FoundRHS, + return isAvailableAtLoopEntry(FoundRHS, L) && + isLoopEntryGuardedByCond(L, Pred, FoundRHS, getConstant(FoundRHSLimit)); } +bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, + const SCEV *FoundRHS, unsigned Depth) { + const PHINode *LPhi = nullptr, *RPhi = nullptr; + + auto ClearOnExit = make_scope_exit([&]() { + if (LPhi) { + bool Erased = PendingMerges.erase(LPhi); + assert(Erased && "Failed to erase LPhi!"); + (void)Erased; + } + if (RPhi) { + bool Erased = PendingMerges.erase(RPhi); + assert(Erased && "Failed to erase RPhi!"); + (void)Erased; + } + }); + + // Find respective Phis and check that they are not being pending. + if (const SCEVUnknown *LU = dyn_cast<SCEVUnknown>(LHS)) + if (auto *Phi = dyn_cast<PHINode>(LU->getValue())) { + if (!PendingMerges.insert(Phi).second) + return false; + LPhi = Phi; + } + if (const SCEVUnknown *RU = dyn_cast<SCEVUnknown>(RHS)) + if (auto *Phi = dyn_cast<PHINode>(RU->getValue())) { + // If we detect a loop of Phi nodes being processed by this method, for + // example: + // + // %a = phi i32 [ %some1, %preheader ], [ %b, %latch ] + // %b = phi i32 [ %some2, %preheader ], [ %a, %latch ] + // + // we don't want to deal with a case that complex, so return conservative + // answer false. + if (!PendingMerges.insert(Phi).second) + return false; + RPhi = Phi; + } + + // If none of LHS, RHS is a Phi, nothing to do here. + if (!LPhi && !RPhi) + return false; + + // If there is a SCEVUnknown Phi we are interested in, make it left. + if (!LPhi) { + std::swap(LHS, RHS); + std::swap(FoundLHS, FoundRHS); + std::swap(LPhi, RPhi); + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + assert(LPhi && "LPhi should definitely be a SCEVUnknown Phi!"); + const BasicBlock *LBB = LPhi->getParent(); + const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS); + + auto ProvedEasily = [&](const SCEV *S1, const SCEV *S2) { + return isKnownViaNonRecursiveReasoning(Pred, S1, S2) || + isImpliedCondOperandsViaRanges(Pred, S1, S2, FoundLHS, FoundRHS) || + isImpliedViaOperations(Pred, S1, S2, FoundLHS, FoundRHS, Depth); + }; + + if (RPhi && RPhi->getParent() == LBB) { + // Case one: RHS is also a SCEVUnknown Phi from the same basic block. + // If we compare two Phis from the same block, and for each entry block + // the predicate is true for incoming values from this block, then the + // predicate is also true for the Phis. + for (const BasicBlock *IncBB : predecessors(LBB)) { + const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB)); + const SCEV *R = getSCEV(RPhi->getIncomingValueForBlock(IncBB)); + if (!ProvedEasily(L, R)) + return false; + } + } else if (RAR && RAR->getLoop()->getHeader() == LBB) { + // Case two: RHS is also a Phi from the same basic block, and it is an + // AddRec. It means that there is a loop which has both AddRec and Unknown + // PHIs, for it we can compare incoming values of AddRec from above the loop + // and latch with their respective incoming values of LPhi. + // TODO: Generalize to handle loops with many inputs in a header. + if (LPhi->getNumIncomingValues() != 2) return false; + + auto *RLoop = RAR->getLoop(); + auto *Predecessor = RLoop->getLoopPredecessor(); + assert(Predecessor && "Loop with AddRec with no predecessor?"); + const SCEV *L1 = getSCEV(LPhi->getIncomingValueForBlock(Predecessor)); + if (!ProvedEasily(L1, RAR->getStart())) + return false; + auto *Latch = RLoop->getLoopLatch(); + assert(Latch && "Loop with AddRec with no latch?"); + const SCEV *L2 = getSCEV(LPhi->getIncomingValueForBlock(Latch)); + if (!ProvedEasily(L2, RAR->getPostIncExpr(*this))) + return false; + } else { + // In all other cases go over inputs of LHS and compare each of them to RHS, + // the predicate is true for (LHS, RHS) if it is true for all such pairs. + // At this point RHS is either a non-Phi, or it is a Phi from some block + // different from LBB. + for (const BasicBlock *IncBB : predecessors(LBB)) { + // Check that RHS is available in this block. + if (!dominates(RHS, IncBB)) + return false; + const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB)); + if (!ProvedEasily(L, RHS)) + return false; + } + } + return true; +} + bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, @@ -9562,13 +9979,14 @@ bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred, }; // Acquire values from extensions. + auto *OrigLHS = LHS; auto *OrigFoundLHS = FoundLHS; LHS = GetOpFromSExt(LHS); FoundLHS = GetOpFromSExt(FoundLHS); // Is the SGT predicate can be proved trivially or using the found context. auto IsSGTViaContext = [&](const SCEV *S1, const SCEV *S2) { - return isKnownViaSimpleReasoning(ICmpInst::ICMP_SGT, S1, S2) || + return isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SGT, S1, S2) || isImpliedViaOperations(ICmpInst::ICMP_SGT, S1, S2, OrigFoundLHS, FoundRHS, Depth + 1); }; @@ -9669,11 +10087,17 @@ bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred, } } + // If our expression contained SCEVUnknown Phis, and we split it down and now + // need to prove something for them, try to prove the predicate for every + // possible incoming values of those Phis. + if (isImpliedViaMerge(Pred, OrigLHS, RHS, OrigFoundLHS, FoundRHS, Depth + 1)) + return true; + return false; } bool -ScalarEvolution::isKnownViaSimpleReasoning(ICmpInst::Predicate Pred, +ScalarEvolution::isKnownViaNonRecursiveReasoning(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { return isKnownPredicateViaConstantRanges(Pred, LHS, RHS) || IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) || @@ -9695,26 +10119,26 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, break; case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - if (isKnownViaSimpleReasoning(ICmpInst::ICMP_SLE, LHS, FoundLHS) && - isKnownViaSimpleReasoning(ICmpInst::ICMP_SGE, RHS, FoundRHS)) + if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SLE, LHS, FoundLHS) && + isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SGE, RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - if (isKnownViaSimpleReasoning(ICmpInst::ICMP_SGE, LHS, FoundLHS) && - isKnownViaSimpleReasoning(ICmpInst::ICMP_SLE, RHS, FoundRHS)) + if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SGE, LHS, FoundLHS) && + isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SLE, RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: - if (isKnownViaSimpleReasoning(ICmpInst::ICMP_ULE, LHS, FoundLHS) && - isKnownViaSimpleReasoning(ICmpInst::ICMP_UGE, RHS, FoundRHS)) + if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, LHS, FoundLHS) && + isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_UGE, RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: - if (isKnownViaSimpleReasoning(ICmpInst::ICMP_UGE, LHS, FoundLHS) && - isKnownViaSimpleReasoning(ICmpInst::ICMP_ULE, RHS, FoundRHS)) + if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_UGE, LHS, FoundLHS) && + isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, RHS, FoundRHS)) return true; break; } @@ -10192,6 +10616,31 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range, return SE.getCouldNotCompute(); } +const SCEVAddRecExpr * +SCEVAddRecExpr::getPostIncExpr(ScalarEvolution &SE) const { + assert(getNumOperands() > 1 && "AddRec with zero step?"); + // There is a temptation to just call getAddExpr(this, getStepRecurrence(SE)), + // but in this case we cannot guarantee that the value returned will be an + // AddRec because SCEV does not have a fixed point where it stops + // simplification: it is legal to return ({rec1} + {rec2}). For example, it + // may happen if we reach arithmetic depth limit while simplifying. So we + // construct the returned value explicitly. + SmallVector<const SCEV *, 3> Ops; + // If this is {A,+,B,+,C,...,+,N}, then its step is {B,+,C,+,...,+,N}, and + // (this + Step) is {A+B,+,B+C,+...,+,N}. + for (unsigned i = 0, e = getNumOperands() - 1; i < e; ++i) + Ops.push_back(SE.getAddExpr(getOperand(i), getOperand(i + 1))); + // We know that the last operand is not a constant zero (otherwise it would + // have been popped out earlier). This guarantees us that if the result has + // the same last operand, then it will also not be popped out, meaning that + // the returned value will be an AddRec. + const SCEV *Last = getOperand(getNumOperands() - 1); + assert(!Last->isZero() && "Recurrency with zero step?"); + Ops.push_back(Last); + return cast<SCEVAddRecExpr>(SE.getAddRecExpr(Ops, getLoop(), + SCEV::FlagAnyWrap)); +} + // Return true when S contains at least an undef value. static inline bool containsUndefs(const SCEV *S) { return SCEVExprContains(S, [](const SCEV *S) { @@ -10334,22 +10783,22 @@ void ScalarEvolution::collectParametricTerms(const SCEV *Expr, SCEVCollectStrides StrideCollector(*this, Strides); visitAll(Expr, StrideCollector); - DEBUG({ - dbgs() << "Strides:\n"; - for (const SCEV *S : Strides) - dbgs() << *S << "\n"; - }); + LLVM_DEBUG({ + dbgs() << "Strides:\n"; + for (const SCEV *S : Strides) + dbgs() << *S << "\n"; + }); for (const SCEV *S : Strides) { SCEVCollectTerms TermCollector(Terms); visitAll(S, TermCollector); } - DEBUG({ - dbgs() << "Terms:\n"; - for (const SCEV *T : Terms) - dbgs() << *T << "\n"; - }); + LLVM_DEBUG({ + dbgs() << "Terms:\n"; + for (const SCEV *T : Terms) + dbgs() << *T << "\n"; + }); SCEVCollectAddRecMultiplies MulCollector(Terms, *this); visitAll(Expr, MulCollector); @@ -10460,18 +10909,18 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms, if (!containsParameters(Terms)) return; - DEBUG({ - dbgs() << "Terms:\n"; - for (const SCEV *T : Terms) - dbgs() << *T << "\n"; - }); + LLVM_DEBUG({ + dbgs() << "Terms:\n"; + for (const SCEV *T : Terms) + dbgs() << *T << "\n"; + }); // Remove duplicates. array_pod_sort(Terms.begin(), Terms.end()); Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end()); // Put larger terms first. - std::sort(Terms.begin(), Terms.end(), [](const SCEV *LHS, const SCEV *RHS) { + llvm::sort(Terms.begin(), Terms.end(), [](const SCEV *LHS, const SCEV *RHS) { return numberOfTerms(LHS) > numberOfTerms(RHS); }); @@ -10491,11 +10940,11 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms, if (const SCEV *NewT = removeConstantFactors(*this, T)) NewTerms.push_back(NewT); - DEBUG({ - dbgs() << "Terms after sorting:\n"; - for (const SCEV *T : NewTerms) - dbgs() << *T << "\n"; - }); + LLVM_DEBUG({ + dbgs() << "Terms after sorting:\n"; + for (const SCEV *T : NewTerms) + dbgs() << *T << "\n"; + }); if (NewTerms.empty() || !findArrayDimensionsRec(*this, NewTerms, Sizes)) { Sizes.clear(); @@ -10505,11 +10954,11 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms, // The last element to be pushed into Sizes is the size of an element. Sizes.push_back(ElementSize); - DEBUG({ - dbgs() << "Sizes:\n"; - for (const SCEV *S : Sizes) - dbgs() << *S << "\n"; - }); + LLVM_DEBUG({ + dbgs() << "Sizes:\n"; + for (const SCEV *S : Sizes) + dbgs() << *S << "\n"; + }); } void ScalarEvolution::computeAccessFunctions( @@ -10529,13 +10978,13 @@ void ScalarEvolution::computeAccessFunctions( const SCEV *Q, *R; SCEVDivision::divide(*this, Res, Sizes[i], &Q, &R); - DEBUG({ - dbgs() << "Res: " << *Res << "\n"; - dbgs() << "Sizes[i]: " << *Sizes[i] << "\n"; - dbgs() << "Res divided by Sizes[i]:\n"; - dbgs() << "Quotient: " << *Q << "\n"; - dbgs() << "Remainder: " << *R << "\n"; - }); + LLVM_DEBUG({ + dbgs() << "Res: " << *Res << "\n"; + dbgs() << "Sizes[i]: " << *Sizes[i] << "\n"; + dbgs() << "Res divided by Sizes[i]:\n"; + dbgs() << "Quotient: " << *Q << "\n"; + dbgs() << "Remainder: " << *R << "\n"; + }); Res = Q; @@ -10563,11 +11012,11 @@ void ScalarEvolution::computeAccessFunctions( std::reverse(Subscripts.begin(), Subscripts.end()); - DEBUG({ - dbgs() << "Subscripts:\n"; - for (const SCEV *S : Subscripts) - dbgs() << *S << "\n"; - }); + LLVM_DEBUG({ + dbgs() << "Subscripts:\n"; + for (const SCEV *S : Subscripts) + dbgs() << *S << "\n"; + }); } /// Splits the SCEV into two vectors of SCEVs representing the subscripts and @@ -10641,17 +11090,17 @@ void ScalarEvolution::delinearize(const SCEV *Expr, if (Subscripts.empty()) return; - DEBUG({ - dbgs() << "succeeded to delinearize " << *Expr << "\n"; - dbgs() << "ArrayDecl[UnknownSize]"; - for (const SCEV *S : Sizes) - dbgs() << "[" << *S << "]"; + LLVM_DEBUG({ + dbgs() << "succeeded to delinearize " << *Expr << "\n"; + dbgs() << "ArrayDecl[UnknownSize]"; + for (const SCEV *S : Sizes) + dbgs() << "[" << *S << "]"; - dbgs() << "\nArrayRef"; - for (const SCEV *S : Subscripts) - dbgs() << "[" << *S << "]"; - dbgs() << "\n"; - }); + dbgs() << "\nArrayRef"; + for (const SCEV *S : Subscripts) + dbgs() << "[" << *S << "]"; + dbgs() << "\n"; + }); } //===----------------------------------------------------------------------===// @@ -10728,6 +11177,8 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg) LI(Arg.LI), CouldNotCompute(std::move(Arg.CouldNotCompute)), ValueExprMap(std::move(Arg.ValueExprMap)), PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)), + PendingPhiRanges(std::move(Arg.PendingPhiRanges)), + PendingMerges(std::move(Arg.PendingMerges)), MinTrailingZerosCache(std::move(Arg.MinTrailingZerosCache)), BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)), PredicatedBackedgeTakenCounts( @@ -10771,6 +11222,8 @@ ScalarEvolution::~ScalarEvolution() { BTCI.second.clear(); assert(PendingLoopPredicates.empty() && "isImpliedCond garbage"); + assert(PendingPhiRanges.empty() && "getRangeRef garbage"); + assert(PendingMerges.empty() && "isImpliedViaMerge garbage"); assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!"); assert(!ProvingSplitPredicate && "ProvingSplitPredicate garbage!"); } @@ -11181,9 +11634,13 @@ ScalarEvolution::forgetMemoizedResults(const SCEV *S) { RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts); } -void ScalarEvolution::addToLoopUseLists(const SCEV *S) { +void +ScalarEvolution::getUsedLoops(const SCEV *S, + SmallPtrSetImpl<const Loop *> &LoopsUsed) { struct FindUsedLoops { - SmallPtrSet<const Loop *, 8> LoopsUsed; + FindUsedLoops(SmallPtrSetImpl<const Loop *> &LoopsUsed) + : LoopsUsed(LoopsUsed) {} + SmallPtrSetImpl<const Loop *> &LoopsUsed; bool follow(const SCEV *S) { if (auto *AR = dyn_cast<SCEVAddRecExpr>(S)) LoopsUsed.insert(AR->getLoop()); @@ -11193,10 +11650,14 @@ void ScalarEvolution::addToLoopUseLists(const SCEV *S) { bool isDone() const { return false; } }; - FindUsedLoops F; + FindUsedLoops F(LoopsUsed); SCEVTraversal<FindUsedLoops>(F).visitAll(S); +} - for (auto *L : F.LoopsUsed) +void ScalarEvolution::addToLoopUseLists(const SCEV *S) { + SmallPtrSet<const Loop *, 8> LoopsUsed; + getUsedLoops(S, LoopsUsed); + for (auto *L : LoopsUsed) LoopUsers[L].push_back(S); } @@ -11472,8 +11933,6 @@ private: // couldn't create an AddRec for it, or couldn't add the predicate), we just // return \p Expr. const SCEV *convertToAddRecWithPreds(const SCEVUnknown *Expr) { - if (!VersionUnknown) - return Expr; if (!isa<PHINode>(Expr->getValue())) return Expr; Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>> @@ -11481,6 +11940,12 @@ private: if (!PredicatedRewrite) return Expr; for (auto *P : PredicatedRewrite->second){ + // Wrap predicates from outer loops are not supported. + if (auto *WP = dyn_cast<const SCEVWrapPredicate>(P)) { + auto *AR = cast<const SCEVAddRecExpr>(WP->getExpr()); + if (L != AR->getLoop()) + return Expr; + } if (!addOverflowAssumption(P)) return Expr; } @@ -11786,3 +12251,43 @@ void PredicatedScalarEvolution::print(raw_ostream &OS, unsigned Depth) const { OS.indent(Depth + 2) << "--> " << *II->second.second << "\n"; } } + +// Match the mathematical pattern A - (A / B) * B, where A and B can be +// arbitrary expressions. +// It's not always easy, as A and B can be folded (imagine A is X / 2, and B is +// 4, A / B becomes X / 8). +bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS, + const SCEV *&RHS) { + const auto *Add = dyn_cast<SCEVAddExpr>(Expr); + if (Add == nullptr || Add->getNumOperands() != 2) + return false; + + const SCEV *A = Add->getOperand(1); + const auto *Mul = dyn_cast<SCEVMulExpr>(Add->getOperand(0)); + + if (Mul == nullptr) + return false; + + const auto MatchURemWithDivisor = [&](const SCEV *B) { + // (SomeExpr + (-(SomeExpr / B) * B)). + if (Expr == getURemExpr(A, B)) { + LHS = A; + RHS = B; + return true; + } + return false; + }; + + // (SomeExpr + (-1 * (SomeExpr / B) * B)). + if (Mul->getNumOperands() == 3 && isa<SCEVConstant>(Mul->getOperand(0))) + return MatchURemWithDivisor(Mul->getOperand(1)) || + MatchURemWithDivisor(Mul->getOperand(2)); + + // (SomeExpr + ((-SomeExpr / B) * B)) or (SomeExpr + ((SomeExpr / B) * -B)). + if (Mul->getNumOperands() == 2) + return MatchURemWithDivisor(Mul->getOperand(1)) || + MatchURemWithDivisor(Mul->getOperand(0)) || + MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(1))) || + MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(0))); + return false; +} diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp index 53ce33bacbe9..8f89389c4b5d 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -589,6 +589,12 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, return expand(SE.getAddExpr(Ops)); } +Value *SCEVExpander::expandAddToGEP(const SCEV *Op, PointerType *PTy, Type *Ty, + Value *V) { + const SCEV *const Ops[1] = {Op}; + return expandAddToGEP(Ops, Ops + 1, PTy, Ty, V); +} + /// PickMostRelevantLoop - Given two loops pick the one that's most relevant for /// SCEV expansion. If they are nested, this is the most nested. If they are /// neighboring, pick the later. @@ -1036,8 +1042,7 @@ Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L, if (!isa<ConstantInt>(StepV)) GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()), GEPPtrTy->getAddressSpace()); - const SCEV *const StepArray[1] = { SE.getSCEV(StepV) }; - IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN); + IncV = expandAddToGEP(SE.getSCEV(StepV), GEPPtrTy, IntTy, PN); if (IncV->getType() != PN->getType()) { IncV = Builder.CreateBitCast(IncV, PN->getType()); rememberInstruction(IncV); @@ -1051,7 +1056,7 @@ Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L, return IncV; } -/// \brief Hoist the addrec instruction chain rooted in the loop phi above the +/// Hoist the addrec instruction chain rooted in the loop phi above the /// position. This routine assumes that this is possible (has been checked). void SCEVExpander::hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist, Instruction *Pos, PHINode *LoopPhi) { @@ -1067,7 +1072,7 @@ void SCEVExpander::hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist, } while (InstToHoist != LoopPhi); } -/// \brief Check whether we can cheaply express the requested SCEV in terms of +/// Check whether we can cheaply express the requested SCEV in terms of /// the available PHI SCEV by truncation and/or inversion of the step. static bool canBeCheaplyTransformed(ScalarEvolution &SE, const SCEVAddRecExpr *Phi, @@ -1169,8 +1174,11 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, if (!IsMatchingSCEV && !TryNonMatchingSCEV) continue; + // TODO: this possibly can be reworked to avoid this cast at all. Instruction *TempIncV = - cast<Instruction>(PN.getIncomingValueForBlock(LatchBlock)); + dyn_cast<Instruction>(PN.getIncomingValueForBlock(LatchBlock)); + if (!TempIncV) + continue; // Check whether we can reuse this PHI node. if (LSRMode) { @@ -1387,7 +1395,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // IVUsers tries to prevent this case, so it is rare. However, it can // happen when an IVUser outside the loop is not dominated by the latch // block. Adjusting IVIncInsertPos before expansion begins cannot handle - // all cases. Consider a phi outide whose operand is replaced during + // all cases. Consider a phi outside whose operand is replaced during // expansion with the value of the postinc user. Without fundamentally // changing the way postinc users are tracked, the only remedy is // inserting an extra IV increment. StepV might fold into PostLoopOffset, @@ -1407,7 +1415,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { } // We have decided to reuse an induction variable of a dominating loop. Apply - // truncation and/or invertion of the step. + // truncation and/or inversion of the step. if (TruncTy) { Type *ResTy = Result->getType(); // Normalize the result type. @@ -1440,12 +1448,9 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) { if (Result->getType()->isIntegerTy()) { Value *Base = expandCodeFor(PostLoopOffset, ExpandTy); - const SCEV *const OffsetArray[1] = {SE.getUnknown(Result)}; - Result = expandAddToGEP(OffsetArray, OffsetArray + 1, PTy, IntTy, Base); + Result = expandAddToGEP(SE.getUnknown(Result), PTy, IntTy, Base); } else { - const SCEV *const OffsetArray[1] = {PostLoopOffset}; - Result = - expandAddToGEP(OffsetArray, OffsetArray + 1, PTy, IntTy, Result); + Result = expandAddToGEP(PostLoopOffset, PTy, IntTy, Result); } } else { Result = InsertNoopCastOfTo(Result, IntTy); @@ -1497,9 +1502,9 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the // comments on expandAddToGEP for details. const SCEV *Base = S->getStart(); - const SCEV *RestArray[1] = { Rest }; // Dig into the expression to find the pointer base for a GEP. - ExposePointerBase(Base, RestArray[0], SE); + const SCEV *ExposedRest = Rest; + ExposePointerBase(Base, ExposedRest, SE); // If we found a pointer, expand the AddRec with a GEP. if (PointerType *PTy = dyn_cast<PointerType>(Base->getType())) { // Make sure the Base isn't something exotic, such as a multiplied @@ -1508,7 +1513,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) { Value *StartV = expand(Base); assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!"); - return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV); + return expandAddToGEP(ExposedRest, PTy, Ty, StartV); } } @@ -1862,7 +1867,7 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, Phis.push_back(&PN); if (TTI) - std::sort(Phis.begin(), Phis.end(), [](Value *LHS, Value *RHS) { + llvm::sort(Phis.begin(), Phis.end(), [](Value *LHS, Value *RHS) { // Put pointers at the back and make sure pointer < pointer = false. if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy(); @@ -2154,8 +2159,9 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, const SCEV *Step = AR->getStepRecurrence(SE); const SCEV *Start = AR->getStart(); + Type *ARTy = AR->getType(); unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType()); - unsigned DstBits = SE.getTypeSizeInBits(AR->getType()); + unsigned DstBits = SE.getTypeSizeInBits(ARTy); // The expression {Start,+,Step} has nusw/nssw if // Step < 0, Start - |Step| * Backedge <= Start @@ -2167,11 +2173,12 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, Value *TripCountVal = expandCodeFor(ExitCount, CountTy, Loc); IntegerType *Ty = - IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(AR->getType())); + IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy)); + Type *ARExpandTy = DL.isNonIntegralPointerType(ARTy) ? ARTy : Ty; Value *StepValue = expandCodeFor(Step, Ty, Loc); Value *NegStepValue = expandCodeFor(SE.getNegativeSCEV(Step), Ty, Loc); - Value *StartValue = expandCodeFor(Start, Ty, Loc); + Value *StartValue = expandCodeFor(Start, ARExpandTy, Loc); ConstantInt *Zero = ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits)); @@ -2194,8 +2201,18 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, // Compute: // Start + |Step| * Backedge < Start // Start - |Step| * Backedge > Start - Value *Add = Builder.CreateAdd(StartValue, MulV); - Value *Sub = Builder.CreateSub(StartValue, MulV); + Value *Add = nullptr, *Sub = nullptr; + if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARExpandTy)) { + const SCEV *MulS = SE.getSCEV(MulV); + const SCEV *NegMulS = SE.getNegativeSCEV(MulS); + Add = Builder.CreateBitCast(expandAddToGEP(MulS, ARPtrTy, Ty, StartValue), + ARPtrTy); + Sub = Builder.CreateBitCast( + expandAddToGEP(NegMulS, ARPtrTy, Ty, StartValue), ARPtrTy); + } else { + Add = Builder.CreateAdd(StartValue, MulV); + Sub = Builder.CreateSub(StartValue, MulV); + } Value *EndCompareGT = Builder.CreateICmp( Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue); @@ -2209,7 +2226,7 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, // If the backedge taken count type is larger than the AR type, // check that we don't drop any bits by truncating it. If we are - // droping bits, then we have overflow (unless the step is zero). + // dropping bits, then we have overflow (unless the step is zero). if (SE.getTypeSizeInBits(CountTy) > SE.getTypeSizeInBits(Ty)) { auto MaxVal = APInt::getMaxValue(DstBits).zext(SrcBits); auto *BackedgeCheck = diff --git a/contrib/llvm/lib/Analysis/StratifiedSets.h b/contrib/llvm/lib/Analysis/StratifiedSets.h index 772df175b384..2f20cd12506c 100644 --- a/contrib/llvm/lib/Analysis/StratifiedSets.h +++ b/contrib/llvm/lib/Analysis/StratifiedSets.h @@ -29,7 +29,7 @@ typedef unsigned StratifiedIndex; /// NOTE: ^ This can't be a short -- bootstrapping clang has a case where /// ~1M sets exist. -// \brief Container of information related to a value in a StratifiedSet. +// Container of information related to a value in a StratifiedSet. struct StratifiedInfo { StratifiedIndex Index; /// For field sensitivity, etc. we can tack fields on here. @@ -37,7 +37,7 @@ struct StratifiedInfo { /// A "link" between two StratifiedSets. struct StratifiedLink { - /// \brief This is a value used to signify "does not exist" where the + /// This is a value used to signify "does not exist" where the /// StratifiedIndex type is used. /// /// This is used instead of Optional<StratifiedIndex> because @@ -63,7 +63,7 @@ struct StratifiedLink { void clearAbove() { Above = SetSentinel; } }; -/// \brief These are stratified sets, as described in "Fast algorithms for +/// These are stratified sets, as described in "Fast algorithms for /// Dyck-CFL-reachability with applications to Alias Analysis" by Zhang Q, Lyu M /// R, Yuan H, and Su Z. -- in short, this is meant to represent different sets /// of Value*s. If two Value*s are in the same set, or if both sets have @@ -172,7 +172,7 @@ private: /// remap has occurred, and use this information so we can defer renumbering set /// elements until build time. template <typename T> class StratifiedSetsBuilder { - /// \brief Represents a Stratified Set, with information about the Stratified + /// Represents a Stratified Set, with information about the Stratified /// Set above it, the set below it, and whether the current set has been /// remapped to another. struct BuilderLink { @@ -263,7 +263,7 @@ template <typename T> class StratifiedSetsBuilder { StratifiedIndex Remap; }; - /// \brief This function performs all of the set unioning/value renumbering + /// This function performs all of the set unioning/value renumbering /// that we've been putting off, and generates a vector<StratifiedLink> that /// may be placed in a StratifiedSets instance. void finalizeSets(std::vector<StratifiedLink> &StratLinks) { @@ -302,7 +302,7 @@ template <typename T> class StratifiedSetsBuilder { } } - /// \brief There's a guarantee in StratifiedLink where all bits set in a + /// There's a guarantee in StratifiedLink where all bits set in a /// Link.externals will be set in all Link.externals "below" it. static void propagateAttrs(std::vector<StratifiedLink> &Links) { const auto getHighestParentAbove = [&Links](StratifiedIndex Idx) { @@ -351,7 +351,7 @@ public: return addAtMerging(Main, NewIndex); } - /// \brief Restructures the stratified sets as necessary to make "ToAdd" in a + /// Restructures the stratified sets as necessary to make "ToAdd" in a /// set above "Main". There are some cases where this is not possible (see /// above), so we merge them such that ToAdd and Main are in the same set. bool addAbove(const T &Main, const T &ToAdd) { @@ -364,7 +364,7 @@ public: return addAtMerging(ToAdd, Above); } - /// \brief Restructures the stratified sets as necessary to make "ToAdd" in a + /// Restructures the stratified sets as necessary to make "ToAdd" in a /// set below "Main". There are some cases where this is not possible (see /// above), so we merge them such that ToAdd and Main are in the same set. bool addBelow(const T &Main, const T &ToAdd) { @@ -437,7 +437,7 @@ private: return *Current; } - /// \brief Merges two sets into one another. Assumes that these sets are not + /// Merges two sets into one another. Assumes that these sets are not /// already one in the same. void merge(StratifiedIndex Idx1, StratifiedIndex Idx2) { assert(inbounds(Idx1) && inbounds(Idx2)); @@ -458,7 +458,7 @@ private: mergeDirect(Idx1, Idx2); } - /// \brief Merges two sets assuming that the set at `Idx1` is unreachable from + /// Merges two sets assuming that the set at `Idx1` is unreachable from /// traversing above or below the set at `Idx2`. void mergeDirect(StratifiedIndex Idx1, StratifiedIndex Idx2) { assert(inbounds(Idx1) && inbounds(Idx2)); diff --git a/contrib/llvm/lib/Analysis/SyntheticCountsUtils.cpp b/contrib/llvm/lib/Analysis/SyntheticCountsUtils.cpp new file mode 100644 index 000000000000..b085fa274d7f --- /dev/null +++ b/contrib/llvm/lib/Analysis/SyntheticCountsUtils.cpp @@ -0,0 +1,113 @@ +//===--- SyntheticCountsUtils.cpp - synthetic counts propagation utils ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines utilities for propagating synthetic counts. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/SyntheticCountsUtils.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" + +using namespace llvm; + +// Given an SCC, propagate entry counts along the edge of the SCC nodes. +template <typename CallGraphType> +void SyntheticCountsUtils<CallGraphType>::propagateFromSCC( + const SccTy &SCC, GetRelBBFreqTy GetRelBBFreq, GetCountTy GetCount, + AddCountTy AddCount) { + + SmallPtrSet<NodeRef, 8> SCCNodes; + SmallVector<std::pair<NodeRef, EdgeRef>, 8> SCCEdges, NonSCCEdges; + + for (auto &Node : SCC) + SCCNodes.insert(Node); + + // Partition the edges coming out of the SCC into those whose destination is + // in the SCC and the rest. + for (const auto &Node : SCCNodes) { + for (auto &E : children_edges<CallGraphType>(Node)) { + if (SCCNodes.count(CGT::edge_dest(E))) + SCCEdges.emplace_back(Node, E); + else + NonSCCEdges.emplace_back(Node, E); + } + } + + // For nodes in the same SCC, update the counts in two steps: + // 1. Compute the additional count for each node by propagating the counts + // along all incoming edges to the node that originate from within the same + // SCC and summing them up. + // 2. Add the additional counts to the nodes in the SCC. + // This ensures that the order of + // traversal of nodes within the SCC doesn't affect the final result. + + DenseMap<NodeRef, uint64_t> AdditionalCounts; + for (auto &E : SCCEdges) { + auto OptRelFreq = GetRelBBFreq(E.second); + if (!OptRelFreq) + continue; + Scaled64 RelFreq = OptRelFreq.getValue(); + auto Caller = E.first; + auto Callee = CGT::edge_dest(E.second); + RelFreq *= Scaled64(GetCount(Caller), 0); + uint64_t AdditionalCount = RelFreq.toInt<uint64_t>(); + AdditionalCounts[Callee] += AdditionalCount; + } + + // Update the counts for the nodes in the SCC. + for (auto &Entry : AdditionalCounts) + AddCount(Entry.first, Entry.second); + + // Now update the counts for nodes outside the SCC. + for (auto &E : NonSCCEdges) { + auto OptRelFreq = GetRelBBFreq(E.second); + if (!OptRelFreq) + continue; + Scaled64 RelFreq = OptRelFreq.getValue(); + auto Caller = E.first; + auto Callee = CGT::edge_dest(E.second); + RelFreq *= Scaled64(GetCount(Caller), 0); + AddCount(Callee, RelFreq.toInt<uint64_t>()); + } +} + +/// Propgate synthetic entry counts on a callgraph \p CG. +/// +/// This performs a reverse post-order traversal of the callgraph SCC. For each +/// SCC, it first propagates the entry counts to the nodes within the SCC +/// through call edges and updates them in one shot. Then the entry counts are +/// propagated to nodes outside the SCC. This requires \p GraphTraits +/// to have a specialization for \p CallGraphType. + +template <typename CallGraphType> +void SyntheticCountsUtils<CallGraphType>::propagate(const CallGraphType &CG, + GetRelBBFreqTy GetRelBBFreq, + GetCountTy GetCount, + AddCountTy AddCount) { + std::vector<SccTy> SCCs; + + // Collect all the SCCs. + for (auto I = scc_begin(CG); !I.isAtEnd(); ++I) + SCCs.push_back(*I); + + // The callgraph-scc needs to be visited in top-down order for propagation. + // The scc iterator returns the scc in bottom-up order, so reverse the SCCs + // and call propagateFromSCC. + for (auto &SCC : reverse(SCCs)) + propagateFromSCC(SCC, GetRelBBFreq, GetCount, AddCount); +} + +template class llvm::SyntheticCountsUtils<const CallGraph *>; diff --git a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp index d18246ac5941..102135fbf313 100644 --- a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -62,6 +62,18 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, }) && "TargetLibraryInfoImpl function names must be sorted"); + // Set IO unlocked variants as unavailable + // Set them as available per system below + TLI.setUnavailable(LibFunc_getchar_unlocked); + TLI.setUnavailable(LibFunc_putc_unlocked); + TLI.setUnavailable(LibFunc_putchar_unlocked); + TLI.setUnavailable(LibFunc_fputc_unlocked); + TLI.setUnavailable(LibFunc_fgetc_unlocked); + TLI.setUnavailable(LibFunc_fread_unlocked); + TLI.setUnavailable(LibFunc_fwrite_unlocked); + TLI.setUnavailable(LibFunc_fputs_unlocked); + TLI.setUnavailable(LibFunc_fgets_unlocked); + bool ShouldExtI32Param = false, ShouldExtI32Return = false, ShouldSignExtI32Param = false; // PowerPC64, Sparc64, SystemZ need signext/zeroext on i32 parameters and @@ -73,8 +85,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, } // Mips, on the other hand, needs signext on i32 parameters corresponding // to both signed and unsigned ints. - if (T.getArch() == Triple::mips || T.getArch() == Triple::mipsel || - T.getArch() == Triple::mips64 || T.getArch() == Triple::mips64el) { + if (T.isMIPS()) { ShouldSignExtI32Param = true; } TLI.setShouldExtI32Param(ShouldExtI32Param); @@ -107,6 +118,12 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, // memset_pattern16 is only available on iOS 3.0 and Mac OS X 10.5 and later. // All versions of watchOS support it. if (T.isMacOSX()) { + // available IO unlocked variants on Mac OS X + TLI.setAvailable(LibFunc_getc_unlocked); + TLI.setAvailable(LibFunc_getchar_unlocked); + TLI.setAvailable(LibFunc_putc_unlocked); + TLI.setAvailable(LibFunc_putchar_unlocked); + if (T.isMacOSXVersionLT(10, 5)) TLI.setUnavailable(LibFunc_memset_pattern16); } else if (T.isiOS()) { @@ -245,51 +262,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_tanhf); } - // These definitions are due to math-finite.h header on Linux - TLI.setUnavailable(LibFunc_acos_finite); - TLI.setUnavailable(LibFunc_acosf_finite); - TLI.setUnavailable(LibFunc_acosl_finite); - TLI.setUnavailable(LibFunc_acosh_finite); - TLI.setUnavailable(LibFunc_acoshf_finite); - TLI.setUnavailable(LibFunc_acoshl_finite); - TLI.setUnavailable(LibFunc_asin_finite); - TLI.setUnavailable(LibFunc_asinf_finite); - TLI.setUnavailable(LibFunc_asinl_finite); - TLI.setUnavailable(LibFunc_atan2_finite); - TLI.setUnavailable(LibFunc_atan2f_finite); - TLI.setUnavailable(LibFunc_atan2l_finite); - TLI.setUnavailable(LibFunc_atanh_finite); - TLI.setUnavailable(LibFunc_atanhf_finite); - TLI.setUnavailable(LibFunc_atanhl_finite); - TLI.setUnavailable(LibFunc_cosh_finite); - TLI.setUnavailable(LibFunc_coshf_finite); - TLI.setUnavailable(LibFunc_coshl_finite); - TLI.setUnavailable(LibFunc_exp10_finite); - TLI.setUnavailable(LibFunc_exp10f_finite); - TLI.setUnavailable(LibFunc_exp10l_finite); - TLI.setUnavailable(LibFunc_exp2_finite); - TLI.setUnavailable(LibFunc_exp2f_finite); - TLI.setUnavailable(LibFunc_exp2l_finite); - TLI.setUnavailable(LibFunc_exp_finite); - TLI.setUnavailable(LibFunc_expf_finite); - TLI.setUnavailable(LibFunc_expl_finite); - TLI.setUnavailable(LibFunc_log10_finite); - TLI.setUnavailable(LibFunc_log10f_finite); - TLI.setUnavailable(LibFunc_log10l_finite); - TLI.setUnavailable(LibFunc_log2_finite); - TLI.setUnavailable(LibFunc_log2f_finite); - TLI.setUnavailable(LibFunc_log2l_finite); - TLI.setUnavailable(LibFunc_log_finite); - TLI.setUnavailable(LibFunc_logf_finite); - TLI.setUnavailable(LibFunc_logl_finite); - TLI.setUnavailable(LibFunc_pow_finite); - TLI.setUnavailable(LibFunc_powf_finite); - TLI.setUnavailable(LibFunc_powl_finite); - TLI.setUnavailable(LibFunc_sinh_finite); - TLI.setUnavailable(LibFunc_sinhf_finite); - TLI.setUnavailable(LibFunc_sinhl_finite); - - // Win32 does *not* provide provide these functions, but they are + // Win32 does *not* provide these functions, but they are // generally available on POSIX-compliant systems: TLI.setUnavailable(LibFunc_access); TLI.setUnavailable(LibFunc_bcmp); @@ -309,7 +282,6 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_ftello); TLI.setUnavailable(LibFunc_ftrylockfile); TLI.setUnavailable(LibFunc_funlockfile); - TLI.setUnavailable(LibFunc_getc_unlocked); TLI.setUnavailable(LibFunc_getitimer); TLI.setUnavailable(LibFunc_getlogin_r); TLI.setUnavailable(LibFunc_getpwnam); @@ -441,15 +413,18 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_flsll); } - // The following functions are available on at least Linux: - if (!T.isOSLinux()) { + // The following functions are available on Linux, + // but Android uses bionic instead of glibc. + if (!T.isOSLinux() || T.isAndroid()) { TLI.setUnavailable(LibFunc_dunder_strdup); TLI.setUnavailable(LibFunc_dunder_strtok_r); TLI.setUnavailable(LibFunc_dunder_isoc99_scanf); TLI.setUnavailable(LibFunc_dunder_isoc99_sscanf); TLI.setUnavailable(LibFunc_under_IO_getc); TLI.setUnavailable(LibFunc_under_IO_putc); - TLI.setUnavailable(LibFunc_memalign); + // But, Android has memalign. + if (!T.isAndroid()) + TLI.setUnavailable(LibFunc_memalign); TLI.setUnavailable(LibFunc_fopen64); TLI.setUnavailable(LibFunc_fseeko64); TLI.setUnavailable(LibFunc_fstat64); @@ -460,6 +435,65 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_stat64); TLI.setUnavailable(LibFunc_statvfs64); TLI.setUnavailable(LibFunc_tmpfile64); + + // Relaxed math functions are included in math-finite.h on Linux (GLIBC). + TLI.setUnavailable(LibFunc_acos_finite); + TLI.setUnavailable(LibFunc_acosf_finite); + TLI.setUnavailable(LibFunc_acosl_finite); + TLI.setUnavailable(LibFunc_acosh_finite); + TLI.setUnavailable(LibFunc_acoshf_finite); + TLI.setUnavailable(LibFunc_acoshl_finite); + TLI.setUnavailable(LibFunc_asin_finite); + TLI.setUnavailable(LibFunc_asinf_finite); + TLI.setUnavailable(LibFunc_asinl_finite); + TLI.setUnavailable(LibFunc_atan2_finite); + TLI.setUnavailable(LibFunc_atan2f_finite); + TLI.setUnavailable(LibFunc_atan2l_finite); + TLI.setUnavailable(LibFunc_atanh_finite); + TLI.setUnavailable(LibFunc_atanhf_finite); + TLI.setUnavailable(LibFunc_atanhl_finite); + TLI.setUnavailable(LibFunc_cosh_finite); + TLI.setUnavailable(LibFunc_coshf_finite); + TLI.setUnavailable(LibFunc_coshl_finite); + TLI.setUnavailable(LibFunc_exp10_finite); + TLI.setUnavailable(LibFunc_exp10f_finite); + TLI.setUnavailable(LibFunc_exp10l_finite); + TLI.setUnavailable(LibFunc_exp2_finite); + TLI.setUnavailable(LibFunc_exp2f_finite); + TLI.setUnavailable(LibFunc_exp2l_finite); + TLI.setUnavailable(LibFunc_exp_finite); + TLI.setUnavailable(LibFunc_expf_finite); + TLI.setUnavailable(LibFunc_expl_finite); + TLI.setUnavailable(LibFunc_log10_finite); + TLI.setUnavailable(LibFunc_log10f_finite); + TLI.setUnavailable(LibFunc_log10l_finite); + TLI.setUnavailable(LibFunc_log2_finite); + TLI.setUnavailable(LibFunc_log2f_finite); + TLI.setUnavailable(LibFunc_log2l_finite); + TLI.setUnavailable(LibFunc_log_finite); + TLI.setUnavailable(LibFunc_logf_finite); + TLI.setUnavailable(LibFunc_logl_finite); + TLI.setUnavailable(LibFunc_pow_finite); + TLI.setUnavailable(LibFunc_powf_finite); + TLI.setUnavailable(LibFunc_powl_finite); + TLI.setUnavailable(LibFunc_sinh_finite); + TLI.setUnavailable(LibFunc_sinhf_finite); + TLI.setUnavailable(LibFunc_sinhl_finite); + } + + if ((T.isOSLinux() && T.isGNUEnvironment()) || + (T.isAndroid() && !T.isAndroidVersionLT(28))) { + // available IO unlocked variants on GNU/Linux and Android P or later + TLI.setAvailable(LibFunc_getc_unlocked); + TLI.setAvailable(LibFunc_getchar_unlocked); + TLI.setAvailable(LibFunc_putc_unlocked); + TLI.setAvailable(LibFunc_putchar_unlocked); + TLI.setAvailable(LibFunc_fputc_unlocked); + TLI.setAvailable(LibFunc_fgetc_unlocked); + TLI.setAvailable(LibFunc_fread_unlocked); + TLI.setAvailable(LibFunc_fwrite_unlocked); + TLI.setAvailable(LibFunc_fputs_unlocked); + TLI.setAvailable(LibFunc_fgets_unlocked); } // As currently implemented in clang, NVPTX code has no standard library to @@ -689,10 +723,12 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_siprintf: case LibFunc_sprintf: return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() && - FTy.getParamType(1)->isPointerTy()); + FTy.getParamType(1)->isPointerTy() && + FTy.getReturnType()->isIntegerTy(32)); case LibFunc_snprintf: return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() && - FTy.getParamType(2)->isPointerTy()); + FTy.getParamType(2)->isPointerTy() && + FTy.getReturnType()->isIntegerTy(32)); case LibFunc_setitimer: return (NumParams == 3 && FTy.getParamType(1)->isPointerTy() && FTy.getParamType(2)->isPointerTy()); @@ -802,6 +838,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_feof: case LibFunc_fflush: case LibFunc_fgetc: + case LibFunc_fgetc_unlocked: case LibFunc_fileno: case LibFunc_flockfile: case LibFunc_free: @@ -830,6 +867,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, return (NumParams == 2 && FTy.getReturnType()->isPointerTy() && FTy.getParamType(1)->isPointerTy()); case LibFunc_fputc: + case LibFunc_fputc_unlocked: case LibFunc_fstat: case LibFunc_frexp: case LibFunc_frexpf: @@ -837,18 +875,22 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_fstatvfs: return (NumParams == 2 && FTy.getParamType(1)->isPointerTy()); case LibFunc_fgets: + case LibFunc_fgets_unlocked: return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(2)->isPointerTy()); case LibFunc_fread: + case LibFunc_fread_unlocked: return (NumParams == 4 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(3)->isPointerTy()); case LibFunc_fwrite: + case LibFunc_fwrite_unlocked: return (NumParams == 4 && FTy.getReturnType()->isIntegerTy() && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isIntegerTy() && FTy.getParamType(2)->isIntegerTy() && FTy.getParamType(3)->isPointerTy()); case LibFunc_fputs: + case LibFunc_fputs_unlocked: return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); case LibFunc_fscanf: @@ -861,6 +903,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); case LibFunc_getchar: + case LibFunc_getchar_unlocked: return (NumParams == 0 && FTy.getReturnType()->isIntegerTy()); case LibFunc_gets: return (NumParams == 1 && FTy.getParamType(0) == PCharTy); @@ -873,6 +916,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); case LibFunc_putc: + case LibFunc_putc_unlocked: return (NumParams == 2 && FTy.getParamType(1)->isPointerTy()); case LibFunc_pread: case LibFunc_pwrite: @@ -989,8 +1033,26 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_msvc_new_array_int_nothrow: // new[](unsigned long long, nothrow); case LibFunc_msvc_new_array_longlong_nothrow: + // new(unsigned int, align_val_t) + case LibFunc_ZnwjSt11align_val_t: + // new(unsigned long, align_val_t) + case LibFunc_ZnwmSt11align_val_t: + // new[](unsigned int, align_val_t) + case LibFunc_ZnajSt11align_val_t: + // new[](unsigned long, align_val_t) + case LibFunc_ZnamSt11align_val_t: return (NumParams == 2 && FTy.getReturnType()->isPointerTy()); + // new(unsigned int, align_val_t, nothrow) + case LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t: + // new(unsigned long, align_val_t, nothrow) + case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t: + // new[](unsigned int, align_val_t, nothrow) + case LibFunc_ZnajSt11align_val_tRKSt9nothrow_t: + // new[](unsigned long, align_val_t, nothrow) + case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t: + return (NumParams == 3 && FTy.getReturnType()->isPointerTy()); + // void operator delete[](void*); case LibFunc_ZdaPv: // void operator delete(void*); @@ -1017,6 +1079,10 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_ZdlPvj: // void operator delete(void*, unsigned long); case LibFunc_ZdlPvm: + // void operator delete(void*, align_val_t) + case LibFunc_ZdlPvSt11align_val_t: + // void operator delete[](void*, align_val_t) + case LibFunc_ZdaPvSt11align_val_t: // void operator delete[](void*, unsigned int); case LibFunc_msvc_delete_array_ptr32_int: // void operator delete[](void*, nothrow); @@ -1035,6 +1101,12 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_msvc_delete_ptr64_nothrow: return (NumParams == 2 && FTy.getParamType(0)->isPointerTy()); + // void operator delete(void*, align_val_t, nothrow) + case LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t: + // void operator delete[](void*, align_val_t, nothrow) + case LibFunc_ZdaPvSt11align_val_tRKSt9nothrow_t: + return (NumParams == 3 && FTy.getParamType(0)->isPointerTy()); + case LibFunc_memset_pattern16: return (!FTy.isVarArg() && NumParams == 3 && FTy.getParamType(0)->isPointerTy() && @@ -1231,6 +1303,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_isascii: case LibFunc_toascii: case LibFunc_putchar: + case LibFunc_putchar_unlocked: return (NumParams == 1 && FTy.getReturnType()->isIntegerTy(32) && FTy.getReturnType() == FTy.getParamType(0)); @@ -1326,10 +1399,10 @@ static bool compareWithVectorFnName(const VecDesc &LHS, StringRef S) { void TargetLibraryInfoImpl::addVectorizableFunctions(ArrayRef<VecDesc> Fns) { VectorDescs.insert(VectorDescs.end(), Fns.begin(), Fns.end()); - std::sort(VectorDescs.begin(), VectorDescs.end(), compareByScalarFnName); + llvm::sort(VectorDescs.begin(), VectorDescs.end(), compareByScalarFnName); ScalarDescs.insert(ScalarDescs.end(), Fns.begin(), Fns.end()); - std::sort(ScalarDescs.begin(), ScalarDescs.end(), compareByVectorFnName); + llvm::sort(ScalarDescs.begin(), ScalarDescs.end(), compareByVectorFnName); } void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( @@ -1387,6 +1460,14 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( {"sinf", "__svml_sinf8", 8}, {"sinf", "__svml_sinf16", 16}, + {"llvm.sin.f64", "__svml_sin2", 2}, + {"llvm.sin.f64", "__svml_sin4", 4}, + {"llvm.sin.f64", "__svml_sin8", 8}, + + {"llvm.sin.f32", "__svml_sinf4", 4}, + {"llvm.sin.f32", "__svml_sinf8", 8}, + {"llvm.sin.f32", "__svml_sinf16", 16}, + {"cos", "__svml_cos2", 2}, {"cos", "__svml_cos4", 4}, {"cos", "__svml_cos8", 8}, @@ -1395,6 +1476,14 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( {"cosf", "__svml_cosf8", 8}, {"cosf", "__svml_cosf16", 16}, + {"llvm.cos.f64", "__svml_cos2", 2}, + {"llvm.cos.f64", "__svml_cos4", 4}, + {"llvm.cos.f64", "__svml_cos8", 8}, + + {"llvm.cos.f32", "__svml_cosf4", 4}, + {"llvm.cos.f32", "__svml_cosf8", 8}, + {"llvm.cos.f32", "__svml_cosf16", 16}, + {"pow", "__svml_pow2", 2}, {"pow", "__svml_pow4", 4}, {"pow", "__svml_pow8", 8}, diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp index b744cae51ed7..9de2f789c89c 100644 --- a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -31,7 +31,7 @@ static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false), cl::desc("Recognize reduction patterns.")); namespace { -/// \brief No-op implementation of the TTI interface using the utility base +/// No-op implementation of the TTI interface using the utility base /// classes. /// /// This is used when no target specific information is available. @@ -155,6 +155,14 @@ bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const { return TTIImpl->isLSRCostLess(C1, C2); } +bool TargetTransformInfo::canMacroFuseCmp() const { + return TTIImpl->canMacroFuseCmp(); +} + +bool TargetTransformInfo::shouldFavorPostInc() const { + return TTIImpl->shouldFavorPostInc(); +} + bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const { return TTIImpl->isLegalMaskedStore(DataType); } @@ -207,6 +215,8 @@ bool TargetTransformInfo::isProfitableToHoist(Instruction *I) const { return TTIImpl->isProfitableToHoist(I); } +bool TargetTransformInfo::useAA() const { return TTIImpl->useAA(); } + bool TargetTransformInfo::isTypeLegal(Type *Ty) const { return TTIImpl->isTypeLegal(Ty); } @@ -226,6 +236,10 @@ bool TargetTransformInfo::shouldBuildLookupTablesForConstant(Constant *C) const return TTIImpl->shouldBuildLookupTablesForConstant(C); } +bool TargetTransformInfo::useColdCCForColdCall(Function &F) const { + return TTIImpl->useColdCCForColdCall(F); +} + unsigned TargetTransformInfo:: getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const { return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract); @@ -326,6 +340,14 @@ unsigned TargetTransformInfo::getMinVectorRegisterBitWidth() const { return TTIImpl->getMinVectorRegisterBitWidth(); } +bool TargetTransformInfo::shouldMaximizeVectorBandwidth(bool OptSize) const { + return TTIImpl->shouldMaximizeVectorBandwidth(OptSize); +} + +unsigned TargetTransformInfo::getMinimumVF(unsigned ElemWidth) const { + return TTIImpl->getMinimumVF(ElemWidth); +} + bool TargetTransformInfo::shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { return TTIImpl->shouldConsiderAddressTypePromotion( @@ -547,6 +569,16 @@ bool TargetTransformInfo::areInlineCompatible(const Function *Caller, return TTIImpl->areInlineCompatible(Caller, Callee); } +bool TargetTransformInfo::isIndexedLoadLegal(MemIndexedMode Mode, + Type *Ty) const { + return TTIImpl->isIndexedLoadLegal(Mode, Ty); +} + +bool TargetTransformInfo::isIndexedStoreLegal(MemIndexedMode Mode, + Type *Ty) const { + return TTIImpl->isIndexedStoreLegal(Mode, Ty); +} + unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const { return TTIImpl->getLoadStoreVecRegBitWidth(AS); } @@ -598,73 +630,43 @@ int TargetTransformInfo::getInstructionLatency(const Instruction *I) const { return TTIImpl->getInstructionLatency(I); } -static bool isReverseVectorMask(ArrayRef<int> Mask) { - for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i) - if (Mask[i] >= 0 && Mask[i] != (int)(MaskSize - 1 - i)) - return false; - return true; -} - -static bool isSingleSourceVectorMask(ArrayRef<int> Mask) { - bool Vec0 = false; - bool Vec1 = false; - for (unsigned i = 0, NumVecElts = Mask.size(); i < NumVecElts; ++i) { - if (Mask[i] >= 0) { - if ((unsigned)Mask[i] >= NumVecElts) - Vec1 = true; - else - Vec0 = true; - } - } - return !(Vec0 && Vec1); -} - -static bool isZeroEltBroadcastVectorMask(ArrayRef<int> Mask) { - for (unsigned i = 0; i < Mask.size(); ++i) - if (Mask[i] > 0) - return false; - return true; -} - -static bool isAlternateVectorMask(ArrayRef<int> Mask) { - bool isAlternate = true; - unsigned MaskSize = Mask.size(); - - // Example: shufflevector A, B, <0,5,2,7> - for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { - if (Mask[i] < 0) - continue; - isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i); - } - - if (isAlternate) - return true; +static TargetTransformInfo::OperandValueKind +getOperandInfo(Value *V, TargetTransformInfo::OperandValueProperties &OpProps) { + TargetTransformInfo::OperandValueKind OpInfo = + TargetTransformInfo::OK_AnyValue; + OpProps = TargetTransformInfo::OP_None; - isAlternate = true; - // Example: shufflevector A, B, <4,1,6,3> - for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { - if (Mask[i] < 0) - continue; - isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i); + if (auto *CI = dyn_cast<ConstantInt>(V)) { + if (CI->getValue().isPowerOf2()) + OpProps = TargetTransformInfo::OP_PowerOf2; + return TargetTransformInfo::OK_UniformConstantValue; } - return isAlternate; -} - -static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { - TargetTransformInfo::OperandValueKind OpInfo = - TargetTransformInfo::OK_AnyValue; + const Value *Splat = getSplatValue(V); - // Check for a splat of a constant or for a non uniform vector of constants. + // Check for a splat of a constant or for a non uniform vector of constants + // and check if the constant(s) are all powers of two. if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) { OpInfo = TargetTransformInfo::OK_NonUniformConstantValue; - if (cast<Constant>(V)->getSplatValue() != nullptr) + if (Splat) { OpInfo = TargetTransformInfo::OK_UniformConstantValue; + if (auto *CI = dyn_cast<ConstantInt>(Splat)) + if (CI->getValue().isPowerOf2()) + OpProps = TargetTransformInfo::OP_PowerOf2; + } else if (auto *CDS = dyn_cast<ConstantDataSequential>(V)) { + OpProps = TargetTransformInfo::OP_PowerOf2; + for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) { + if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I))) + if (CI->getValue().isPowerOf2()) + continue; + OpProps = TargetTransformInfo::OP_None; + break; + } + } } // Check for a splat of a uniform value. This is not loop aware, so return // true only for the obviously uniform cases (argument, globalvalue) - const Value *Splat = getSplatValue(V); if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat))) OpInfo = TargetTransformInfo::OK_UniformValue; @@ -994,15 +996,13 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { case Instruction::And: case Instruction::Or: case Instruction::Xor: { - TargetTransformInfo::OperandValueKind Op1VK = - getOperandInfo(I->getOperand(0)); - TargetTransformInfo::OperandValueKind Op2VK = - getOperandInfo(I->getOperand(1)); - SmallVector<const Value*, 2> Operands(I->operand_values()); - return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, - Op2VK, TargetTransformInfo::OP_None, - TargetTransformInfo::OP_None, - Operands); + TargetTransformInfo::OperandValueKind Op1VK, Op2VK; + TargetTransformInfo::OperandValueProperties Op1VP, Op2VP; + Op1VK = getOperandInfo(I->getOperand(0), Op1VP); + Op2VK = getOperandInfo(I->getOperand(1), Op2VP); + SmallVector<const Value *, 2> Operands(I->operand_values()); + return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK, + Op1VP, Op2VP, Operands); } case Instruction::Select: { const SelectInst *SI = cast<SelectInst>(I); @@ -1101,31 +1101,30 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { } case Instruction::ShuffleVector: { const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); - Type *VecTypOp0 = Shuffle->getOperand(0)->getType(); - unsigned NumVecElems = VecTypOp0->getVectorNumElements(); - SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); + // TODO: Identify and add costs for insert/extract subvector, etc. + if (Shuffle->changesLength()) + return -1; + + if (Shuffle->isIdentity()) + return 0; - if (NumVecElems == Mask.size()) { - if (isReverseVectorMask(Mask)) - return getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, - 0, nullptr); - if (isAlternateVectorMask(Mask)) - return getShuffleCost(TargetTransformInfo::SK_Alternate, - VecTypOp0, 0, nullptr); + Type *Ty = Shuffle->getType(); + if (Shuffle->isReverse()) + return TTIImpl->getShuffleCost(SK_Reverse, Ty, 0, nullptr); - if (isZeroEltBroadcastVectorMask(Mask)) - return getShuffleCost(TargetTransformInfo::SK_Broadcast, - VecTypOp0, 0, nullptr); + if (Shuffle->isSelect()) + return TTIImpl->getShuffleCost(SK_Select, Ty, 0, nullptr); - if (isSingleSourceVectorMask(Mask)) - return getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, - VecTypOp0, 0, nullptr); + if (Shuffle->isTranspose()) + return TTIImpl->getShuffleCost(SK_Transpose, Ty, 0, nullptr); - return getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, - VecTypOp0, 0, nullptr); - } + if (Shuffle->isZeroEltSplat()) + return TTIImpl->getShuffleCost(SK_Broadcast, Ty, 0, nullptr); - return -1; + if (Shuffle->isSingleSource()) + return TTIImpl->getShuffleCost(SK_PermuteSingleSrc, Ty, 0, nullptr); + + return TTIImpl->getShuffleCost(SK_PermuteTwoSrc, Ty, 0, nullptr); } case Instruction::Call: if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { diff --git a/contrib/llvm/lib/Analysis/Trace.cpp b/contrib/llvm/lib/Analysis/Trace.cpp index 34c998501a6c..4dec53151ed6 100644 --- a/contrib/llvm/lib/Analysis/Trace.cpp +++ b/contrib/llvm/lib/Analysis/Trace.cpp @@ -16,6 +16,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/Trace.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" #include "llvm/Support/Compiler.h" diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index 173db399b9d6..25a154edf4ac 100644 --- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -104,21 +104,6 @@ // If neither node is an ancestor of the other and they have the same root, // then we say NoAlias. // -// TODO: The current metadata format doesn't support struct -// fields. For example: -// struct X { -// double d; -// int i; -// }; -// void foo(struct X *x, struct X *y, double *p) { -// *x = *y; -// *p = 0.0; -// } -// Struct X has a double member, so the store to *x can alias the store to *p. -// Currently it's not possible to precisely describe all the things struct X -// aliases, so struct assignments must use conservative TBAA nodes. There's -// no scheme for attaching metadata to @llvm.memcpy yet either. -// //===----------------------------------------------------------------------===// #include "llvm/Analysis/TypeBasedAliasAnalysis.h" @@ -146,6 +131,17 @@ static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true), cl::Hidden); namespace { +/// isNewFormatTypeNode - Return true iff the given type node is in the new +/// size-aware format. +static bool isNewFormatTypeNode(const MDNode *N) { + if (N->getNumOperands() < 3) + return false; + // In the old format the first operand is a string. + if (!isa<MDNode>(N->getOperand(0))) + return false; + return true; +} + /// This is a simple wrapper around an MDNode which provides a higher-level /// interface by hiding the details of how alias analysis information is encoded /// in its operands. @@ -160,8 +156,15 @@ public: /// getNode - Get the MDNode for this TBAANode. MDNodeTy *getNode() const { return Node; } + /// isNewFormat - Return true iff the wrapped type node is in the new + /// size-aware format. + bool isNewFormat() const { return isNewFormatTypeNode(Node); } + /// getParent - Get this TBAANode's Alias tree parent. TBAANodeImpl<MDNodeTy> getParent() const { + if (isNewFormat()) + return TBAANodeImpl(cast<MDNodeTy>(Node->getOperand(0))); + if (Node->getNumOperands() < 2) return TBAANodeImpl<MDNodeTy>(); MDNodeTy *P = dyn_cast_or_null<MDNodeTy>(Node->getOperand(1)); @@ -196,7 +199,7 @@ using MutableTBAANode = TBAANodeImpl<MDNode>; /// information is encoded in its operands. template<typename MDNodeTy> class TBAAStructTagNodeImpl { - /// This node should be created with createTBAAStructTagNode. + /// This node should be created with createTBAAAccessTag(). MDNodeTy *Node; public: @@ -205,6 +208,17 @@ public: /// Get the MDNode for this TBAAStructTagNode. MDNodeTy *getNode() const { return Node; } + /// isNewFormat - Return true iff the wrapped access tag is in the new + /// size-aware format. + bool isNewFormat() const { + if (Node->getNumOperands() < 4) + return false; + if (MDNodeTy *AccessType = getAccessType()) + if (!TBAANodeImpl<MDNodeTy>(AccessType).isNewFormat()) + return false; + return true; + } + MDNodeTy *getBaseType() const { return dyn_cast_or_null<MDNode>(Node->getOperand(0)); } @@ -217,13 +231,20 @@ public: return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue(); } + uint64_t getSize() const { + if (!isNewFormat()) + return UINT64_MAX; + return mdconst::extract<ConstantInt>(Node->getOperand(3))->getZExtValue(); + } + /// Test if this TBAAStructTagNode represents a type for objects /// which are not modified (by any means) in the context where this /// AliasAnalysis is relevant. bool isTypeImmutable() const { - if (Node->getNumOperands() < 4) + unsigned OpNo = isNewFormat() ? 4 : 3; + if (Node->getNumOperands() < OpNo + 1) return false; - ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(3)); + ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(OpNo)); if (!CI) return false; return CI->getValue()[0]; @@ -241,7 +262,7 @@ using MutableTBAAStructTagNode = TBAAStructTagNodeImpl<MDNode>; /// higher-level interface by hiding the details of how alias analysis /// information is encoded in its operands. class TBAAStructTypeNode { - /// This node should be created with createTBAAStructTypeNode. + /// This node should be created with createTBAATypeNode(). const MDNode *Node = nullptr; public: @@ -251,43 +272,80 @@ public: /// Get the MDNode for this TBAAStructTypeNode. const MDNode *getNode() const { return Node; } + /// isNewFormat - Return true iff the wrapped type node is in the new + /// size-aware format. + bool isNewFormat() const { return isNewFormatTypeNode(Node); } + + bool operator==(const TBAAStructTypeNode &Other) const { + return getNode() == Other.getNode(); + } + + /// getId - Return type identifier. + Metadata *getId() const { + return Node->getOperand(isNewFormat() ? 2 : 0); + } + + unsigned getNumFields() const { + unsigned FirstFieldOpNo = isNewFormat() ? 3 : 1; + unsigned NumOpsPerField = isNewFormat() ? 3 : 2; + return (getNode()->getNumOperands() - FirstFieldOpNo) / NumOpsPerField; + } + + TBAAStructTypeNode getFieldType(unsigned FieldIndex) const { + unsigned FirstFieldOpNo = isNewFormat() ? 3 : 1; + unsigned NumOpsPerField = isNewFormat() ? 3 : 2; + unsigned OpIndex = FirstFieldOpNo + FieldIndex * NumOpsPerField; + auto *TypeNode = cast<MDNode>(getNode()->getOperand(OpIndex)); + return TBAAStructTypeNode(TypeNode); + } + /// Get this TBAAStructTypeNode's field in the type DAG with /// given offset. Update the offset to be relative to the field type. - TBAAStructTypeNode getParent(uint64_t &Offset) const { - // Parent can be omitted for the root node. - if (Node->getNumOperands() < 2) - return TBAAStructTypeNode(); - - // Fast path for a scalar type node and a struct type node with a single - // field. - if (Node->getNumOperands() <= 3) { - uint64_t Cur = Node->getNumOperands() == 2 - ? 0 - : mdconst::extract<ConstantInt>(Node->getOperand(2)) - ->getZExtValue(); - Offset -= Cur; - MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); - if (!P) + TBAAStructTypeNode getField(uint64_t &Offset) const { + bool NewFormat = isNewFormat(); + if (NewFormat) { + // New-format root and scalar type nodes have no fields. + if (Node->getNumOperands() < 6) + return TBAAStructTypeNode(); + } else { + // Parent can be omitted for the root node. + if (Node->getNumOperands() < 2) return TBAAStructTypeNode(); - return TBAAStructTypeNode(P); + + // Fast path for a scalar type node and a struct type node with a single + // field. + if (Node->getNumOperands() <= 3) { + uint64_t Cur = Node->getNumOperands() == 2 + ? 0 + : mdconst::extract<ConstantInt>(Node->getOperand(2)) + ->getZExtValue(); + Offset -= Cur; + MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); + if (!P) + return TBAAStructTypeNode(); + return TBAAStructTypeNode(P); + } } // Assume the offsets are in order. We return the previous field if // the current offset is bigger than the given offset. + unsigned FirstFieldOpNo = NewFormat ? 3 : 1; + unsigned NumOpsPerField = NewFormat ? 3 : 2; unsigned TheIdx = 0; - for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) { + for (unsigned Idx = FirstFieldOpNo; Idx < Node->getNumOperands(); + Idx += NumOpsPerField) { uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(Idx + 1)) ->getZExtValue(); if (Cur > Offset) { - assert(Idx >= 3 && - "TBAAStructTypeNode::getParent should have an offset match!"); - TheIdx = Idx - 2; + assert(Idx >= FirstFieldOpNo + NumOpsPerField && + "TBAAStructTypeNode::getField should have an offset match!"); + TheIdx = Idx - NumOpsPerField; break; } } // Move along the last field. if (TheIdx == 0) - TheIdx = Node->getNumOperands() - 2; + TheIdx = Node->getNumOperands() - NumOpsPerField; uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(TheIdx + 1)) ->getZExtValue(); Offset -= Cur; @@ -403,15 +461,11 @@ bool MDNode::isTBAAVtableAccess() const { } // For struct-path aware TBAA, we use the access type of the tag. - if (getNumOperands() < 2) - return false; - MDNode *Tag = cast_or_null<MDNode>(getOperand(1)); - if (!Tag) - return false; - if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) { - if (Tag1->getString() == "vtable pointer") + TBAAStructTagNode Tag(this); + TBAAStructTypeNode AccessType(Tag.getAccessType()); + if(auto *Id = dyn_cast<MDString>(AccessType.getId())) + if (Id->getString() == "vtable pointer") return true; - } return false; } @@ -485,26 +539,6 @@ void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const { N.NoAlias = getMetadata(LLVMContext::MD_noalias); } -static bool findAccessType(TBAAStructTagNode BaseTag, - const MDNode *AccessTypeNode, - uint64_t &OffsetInBase) { - // Start from the base type, follow the edge with the correct offset in - // the type DAG and adjust the offset until we reach the access type or - // until we reach a root node. - TBAAStructTypeNode BaseType(BaseTag.getBaseType()); - OffsetInBase = BaseTag.getOffset(); - - while (const MDNode *BaseTypeNode = BaseType.getNode()) { - if (BaseTypeNode == AccessTypeNode) - return true; - - // Follow the edge with the correct offset, Offset will be adjusted to - // be relative to the field type. - BaseType = BaseType.getParent(OffsetInBase); - } - return false; -} - static const MDNode *createAccessTag(const MDNode *AccessType) { // If there is no access type or the access type is the root node, then // we don't have any useful access tag to return. @@ -512,12 +546,111 @@ static const MDNode *createAccessTag(const MDNode *AccessType) { return nullptr; Type *Int64 = IntegerType::get(AccessType->getContext(), 64); - auto *ImmutabilityFlag = ConstantAsMetadata::get(ConstantInt::get(Int64, 0)); + auto *OffsetNode = ConstantAsMetadata::get(ConstantInt::get(Int64, 0)); + + if (TBAAStructTypeNode(AccessType).isNewFormat()) { + // TODO: Take access ranges into account when matching access tags and + // fix this code to generate actual access sizes for generic tags. + uint64_t AccessSize = UINT64_MAX; + auto *SizeNode = + ConstantAsMetadata::get(ConstantInt::get(Int64, AccessSize)); + Metadata *Ops[] = {const_cast<MDNode*>(AccessType), + const_cast<MDNode*>(AccessType), + OffsetNode, SizeNode}; + return MDNode::get(AccessType->getContext(), Ops); + } + Metadata *Ops[] = {const_cast<MDNode*>(AccessType), - const_cast<MDNode*>(AccessType), ImmutabilityFlag}; + const_cast<MDNode*>(AccessType), + OffsetNode}; return MDNode::get(AccessType->getContext(), Ops); } +static bool hasField(TBAAStructTypeNode BaseType, + TBAAStructTypeNode FieldType) { + for (unsigned I = 0, E = BaseType.getNumFields(); I != E; ++I) { + TBAAStructTypeNode T = BaseType.getFieldType(I); + if (T == FieldType || hasField(T, FieldType)) + return true; + } + return false; +} + +/// Return true if for two given accesses, one of the accessed objects may be a +/// subobject of the other. The \p BaseTag and \p SubobjectTag parameters +/// describe the accesses to the base object and the subobject respectively. +/// \p CommonType must be the metadata node describing the common type of the +/// accessed objects. On return, \p MayAlias is set to true iff these accesses +/// may alias and \p Generic, if not null, points to the most generic access +/// tag for the given two. +static bool mayBeAccessToSubobjectOf(TBAAStructTagNode BaseTag, + TBAAStructTagNode SubobjectTag, + const MDNode *CommonType, + const MDNode **GenericTag, + bool &MayAlias) { + // If the base object is of the least common type, then this may be an access + // to its subobject. + if (BaseTag.getAccessType() == BaseTag.getBaseType() && + BaseTag.getAccessType() == CommonType) { + if (GenericTag) + *GenericTag = createAccessTag(CommonType); + MayAlias = true; + return true; + } + + // If the access to the base object is through a field of the subobject's + // type, then this may be an access to that field. To check for that we start + // from the base type, follow the edge with the correct offset in the type DAG + // and adjust the offset until we reach the field type or until we reach the + // access type. + bool NewFormat = BaseTag.isNewFormat(); + TBAAStructTypeNode BaseType(BaseTag.getBaseType()); + uint64_t OffsetInBase = BaseTag.getOffset(); + + for (;;) { + // In the old format there is no distinction between fields and parent + // types, so in this case we consider all nodes up to the root. + if (!BaseType.getNode()) { + assert(!NewFormat && "Did not see access type in access path!"); + break; + } + + if (BaseType.getNode() == SubobjectTag.getBaseType()) { + bool SameMemberAccess = OffsetInBase == SubobjectTag.getOffset(); + if (GenericTag) { + *GenericTag = SameMemberAccess ? SubobjectTag.getNode() : + createAccessTag(CommonType); + } + MayAlias = SameMemberAccess; + return true; + } + + // With new-format nodes we stop at the access type. + if (NewFormat && BaseType.getNode() == BaseTag.getAccessType()) + break; + + // Follow the edge with the correct offset. Offset will be adjusted to + // be relative to the field type. + BaseType = BaseType.getField(OffsetInBase); + } + + // If the base object has a direct or indirect field of the subobject's type, + // then this may be an access to that field. We need this to check now that + // we support aggregates as access types. + if (NewFormat) { + // TBAAStructTypeNode BaseAccessType(BaseTag.getAccessType()); + TBAAStructTypeNode FieldType(SubobjectTag.getBaseType()); + if (hasField(BaseType, FieldType)) { + if (GenericTag) + *GenericTag = createAccessTag(CommonType); + MayAlias = true; + return true; + } + } + + return false; +} + /// matchTags - Return true if the given couple of accesses are allowed to /// overlap. If \arg GenericTag is not null, then on return it points to the /// most generic access descriptor for the given two. @@ -545,38 +678,26 @@ static bool matchAccessTags(const MDNode *A, const MDNode *B, const MDNode *CommonType = getLeastCommonType(TagA.getAccessType(), TagB.getAccessType()); - // TODO: We need to check if AccessType of TagA encloses AccessType of - // TagB to support aggregate AccessType. If yes, return true. - - // Climb the type DAG from base type of A to see if we reach base type of B. - uint64_t OffsetA; - if (findAccessType(TagA, TagB.getBaseType(), OffsetA)) { - bool SameMemberAccess = OffsetA == TagB.getOffset(); + // If the final access types have different roots, they're part of different + // potentially unrelated type systems, so we must be conservative. + if (!CommonType) { if (GenericTag) - *GenericTag = SameMemberAccess ? TagB.getNode() : - createAccessTag(CommonType); - return SameMemberAccess; + *GenericTag = nullptr; + return true; } - // Climb the type DAG from base type of B to see if we reach base type of A. - uint64_t OffsetB; - if (findAccessType(TagB, TagA.getBaseType(), OffsetB)) { - bool SameMemberAccess = OffsetB == TagA.getOffset(); - if (GenericTag) - *GenericTag = SameMemberAccess ? TagA.getNode() : - createAccessTag(CommonType); - return SameMemberAccess; - } + // If one of the accessed objects may be a subobject of the other, then such + // accesses may alias. + bool MayAlias; + if (mayBeAccessToSubobjectOf(/* BaseTag= */ TagA, /* SubobjectTag= */ TagB, + CommonType, GenericTag, MayAlias) || + mayBeAccessToSubobjectOf(/* BaseTag= */ TagB, /* SubobjectTag= */ TagA, + CommonType, GenericTag, MayAlias)) + return MayAlias; + // Otherwise, we've proved there's no alias. if (GenericTag) *GenericTag = createAccessTag(CommonType); - - // If the final access types have different roots, they're part of different - // potentially unrelated type systems, so we must be conservative. - if (!CommonType) - return true; - - // If they have the same root, then we've proved there's no alias. return false; } diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp index 46ac3f451f81..04a7b73c22bf 100644 --- a/contrib/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp @@ -89,7 +89,7 @@ static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { if (unsigned BitWidth = Ty->getScalarSizeInBits()) return BitWidth; - return DL.getPointerTypeSizeInBits(Ty); + return DL.getIndexTypeSizeInBits(Ty); } namespace { @@ -190,6 +190,14 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, "LHS and RHS should have the same type"); assert(LHS->getType()->isIntOrIntVectorTy() && "LHS and RHS should be integers"); + // Look for an inverted mask: (X & ~M) op (Y & M). + Value *M; + if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) && + match(RHS, m_c_And(m_Specific(M), m_Value()))) + return true; + if (match(RHS, m_c_And(m_Not(m_Value(M)), m_Value())) && + match(LHS, m_c_And(m_Specific(M), m_Value()))) + return true; IntegerType *IT = cast<IntegerType>(LHS->getType()->getScalarType()); KnownBits LHSKnown(IT->getBitWidth()); KnownBits RHSKnown(IT->getBitWidth()); @@ -493,6 +501,7 @@ bool llvm::isAssumeLikeIntrinsic(const Instruction *I) { case Intrinsic::sideeffect: case Intrinsic::dbg_declare: case Intrinsic::dbg_value: + case Intrinsic::dbg_label: case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::lifetime_start: @@ -530,7 +539,7 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv, if (Inv->getParent() != CxtI->getParent()) return false; - // If we have a dom tree, then we now know that the assume doens't dominate + // If we have a dom tree, then we now know that the assume doesn't dominate // the other instruction. If we don't have a dom tree then we can check if // the assume is first in the BB. if (!DT) { @@ -574,7 +583,7 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, if (Q.isExcluded(I)) continue; - // Warning: This loop can end up being somewhat performance sensetive. + // Warning: This loop can end up being somewhat performance sensitive. // We're running this loop for once for each value queried resulting in a // runtime of ~O(#assumes * #values). @@ -816,6 +825,14 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + // If the RHS is known zero, then this assumption must be wrong (nothing + // is unsigned less than zero). Signal a conflict and get out of here. + if (RHSKnown.isZero()) { + Known.Zero.setAllBits(); + Known.One.setAllBits(); + break; + } + // Whatever high bits in c are zero are known to be zero (if c is a power // of 2, then one more). if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I))) @@ -848,7 +865,7 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, /// Compute known bits from a shift operator, including those with a /// non-constant shift amount. Known is the output of this function. Known2 is a /// pre-allocated temporary with the same bit width as Known. KZF and KOF are -/// operator-specific functors that, given the known-zero or known-one bits +/// operator-specific functions that, given the known-zero or known-one bits /// respectively, and a shift amount, compute the implied known-zero or /// known-one bits of the shift operator's result respectively for that shift /// amount. The results from calling KZF and KOF are conservatively combined for @@ -966,12 +983,9 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, // matching the form add(x, add(x, y)) where y is odd. // TODO: This could be generalized to clearing any bit set in y where the // following bit is known to be unset in y. - Value *Y = nullptr; + Value *X = nullptr, *Y = nullptr; if (!Known.Zero[0] && !Known.One[0] && - (match(I->getOperand(0), m_Add(m_Specific(I->getOperand(1)), - m_Value(Y))) || - match(I->getOperand(1), m_Add(m_Specific(I->getOperand(0)), - m_Value(Y))))) { + match(I, m_c_BinOp(m_Value(X), m_Add(m_Deferred(X), m_Value(Y))))) { Known2.resetAll(); computeKnownBits(Y, Known2, Depth + 1, Q); if (Known2.countMinTrailingOnes() > 0) @@ -1064,6 +1078,12 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, // leading zero bits. MaxHighZeros = std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros()); + } else if (SPF == SPF_ABS) { + // RHS from matchSelectPattern returns the negation part of abs pattern. + // If the negate has an NSW flag we can assume the sign bit of the result + // will be 0 because that makes abs(INT_MIN) undefined. + if (cast<Instruction>(RHS)->hasNoSignedWrap()) + MaxHighZeros = 1; } // Only known if known in both the LHS and RHS. @@ -1093,7 +1113,10 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, unsigned SrcBitWidth; // Note that we handle pointer operands here because of inttoptr/ptrtoint // which fall through here. - SrcBitWidth = Q.DL.getTypeSizeInBits(SrcTy->getScalarType()); + Type *ScalarTy = SrcTy->getScalarType(); + SrcBitWidth = ScalarTy->isPointerTy() ? + Q.DL.getIndexTypeSizeInBits(ScalarTy) : + Q.DL.getTypeSizeInBits(ScalarTy); assert(SrcBitWidth && "SrcBitWidth can't be zero"); Known = Known.zextOrTrunc(SrcBitWidth); @@ -1106,7 +1129,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, } case Instruction::BitCast: { Type *SrcTy = I->getOperand(0)->getType(); - if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + if (SrcTy->isIntOrPtrTy() && // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) !I->getType()->isVectorTy()) { @@ -1547,9 +1570,13 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, assert((V->getType()->isIntOrIntVectorTy(BitWidth) || V->getType()->isPtrOrPtrVectorTy()) && "Not integer or pointer type!"); - assert(Q.DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth && - "V and Known should have same BitWidth"); + + Type *ScalarTy = V->getType()->getScalarType(); + unsigned ExpectedWidth = ScalarTy->isPointerTy() ? + Q.DL.getIndexTypeSizeInBits(ScalarTy) : Q.DL.getTypeSizeInBits(ScalarTy); + assert(ExpectedWidth == BitWidth && "V and Known should have same BitWidth"); (void)BitWidth; + (void)ExpectedWidth; const APInt *C; if (match(V, m_APInt(C))) { @@ -1646,14 +1673,11 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, const Query &Q) { assert(Depth <= MaxDepth && "Limit Search Depth"); - if (const Constant *C = dyn_cast<Constant>(V)) { - if (C->isNullValue()) - return OrZero; - - const APInt *ConstIntOrConstSplatInt; - if (match(C, m_APInt(ConstIntOrConstSplatInt))) - return ConstIntOrConstSplatInt->isPowerOf2(); - } + // Attempt to match against constants. + if (OrZero && match(V, m_Power2OrZero())) + return true; + if (match(V, m_Power2())) + return true; // 1 << X is clearly a power of two if the one is not shifted off the end. If // it is shifted off the end then the result is undefined. @@ -1737,7 +1761,7 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, return false; } -/// \brief Test whether a GEP's result is known to be non-null. +/// Test whether a GEP's result is known to be non-null. /// /// Uses properties inherent in a GEP to try to determine whether it is known /// to be non-null. @@ -1745,7 +1769,12 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, /// Currently this routine does not support vector GEPs. static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth, const Query &Q) { - if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0) + const Function *F = nullptr; + if (const Instruction *I = dyn_cast<Instruction>(GEP)) + F = I->getFunction(); + + if (!GEP->isInBounds() || + NullPointerIsDefined(F, GEP->getPointerAddressSpace())) return false; // FIXME: Support vector-GEPs. @@ -1919,6 +1948,10 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { } } + // Some of the tests below are recursive, so bail out if we hit the limit. + if (Depth++ >= MaxDepth) + return false; + // Check for pointer simplifications. if (V->getType()->isPointerTy()) { // Alloca never returns null, malloc might. @@ -1935,14 +1968,14 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { if (LI->getMetadata(LLVMContext::MD_nonnull)) return true; - if (auto CS = ImmutableCallSite(V)) + if (auto CS = ImmutableCallSite(V)) { if (CS.isReturnNonNull()) return true; + if (const auto *RP = getArgumentAliasingToReturnedPointer(CS)) + return isKnownNonZero(RP, Depth, Q); + } } - // The remaining tests are all recursive, so bail out if we hit the limit. - if (Depth++ >= MaxDepth) - return false; // Check for recursive pointer simplifications. if (V->getType()->isPointerTy()) { @@ -2180,7 +2213,7 @@ static unsigned ComputeNumSignBits(const Value *V, unsigned Depth, /// (itself), but other cases can give us information. For example, immediately /// after an "ashr X, 2", we know that the top 3 bits are all equal to each /// other, so we return 3. For vectors, return the number of sign bits for the -/// vector element with the mininum number of known sign bits. +/// vector element with the minimum number of known sign bits. static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, const Query &Q) { assert(Depth <= MaxDepth && "Limit Search Depth"); @@ -2189,7 +2222,11 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, // in V, so for undef we have to conservatively return 1. We don't have the // same behavior for poison though -- that's a FIXME today. - unsigned TyBits = Q.DL.getTypeSizeInBits(V->getType()->getScalarType()); + Type *ScalarTy = V->getType()->getScalarType(); + unsigned TyBits = ScalarTy->isPointerTy() ? + Q.DL.getIndexTypeSizeInBits(ScalarTy) : + Q.DL.getTypeSizeInBits(ScalarTy); + unsigned Tmp, Tmp2; unsigned FirstAnswer = 1; @@ -2300,7 +2337,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, case Instruction::Select: Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); - if (Tmp == 1) return 1; // Early out. + if (Tmp == 1) break; Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q); return std::min(Tmp, Tmp2); @@ -2308,7 +2345,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, // Add can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - if (Tmp == 1) return 1; // Early out. + if (Tmp == 1) break; // Special case decrementing a value (ADD X, -1): if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1))) @@ -2328,12 +2365,12 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, } Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); - if (Tmp2 == 1) return 1; + if (Tmp2 == 1) break; return std::min(Tmp, Tmp2)-1; case Instruction::Sub: Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); - if (Tmp2 == 1) return 1; + if (Tmp2 == 1) break; // Handle NEG. if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0))) @@ -2356,15 +2393,15 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, // Sub can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - if (Tmp == 1) return 1; // Early out. + if (Tmp == 1) break; return std::min(Tmp, Tmp2)-1; case Instruction::Mul: { // The output of the Mul can be at most twice the valid bits in the inputs. unsigned SignBitsOp0 = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - if (SignBitsOp0 == 1) return 1; // Early out. + if (SignBitsOp0 == 1) break; unsigned SignBitsOp1 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); - if (SignBitsOp1 == 1) return 1; + if (SignBitsOp1 == 1) break; unsigned OutValidBits = (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1); return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1; @@ -2671,7 +2708,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI, return true; // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0. - if (match(Op, m_FAdd(m_Value(), m_Zero()))) + if (match(Op, m_FAdd(m_Value(), m_PosZeroFP()))) return true; // sitofp and uitofp turn into +0.0 for zero. @@ -2712,6 +2749,24 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V, (!SignBitOnly && CFP->getValueAPF().isZero()); } + // Handle vector of constants. + if (auto *CV = dyn_cast<Constant>(V)) { + if (CV->getType()->isVectorTy()) { + unsigned NumElts = CV->getType()->getVectorNumElements(); + for (unsigned i = 0; i != NumElts; ++i) { + auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i)); + if (!CFP) + return false; + if (CFP->getValueAPF().isNegative() && + (SignBitOnly || !CFP->getValueAPF().isZero())) + return false; + } + + // All non-negative ConstantFPs. + return true; + } + } + if (Depth == MaxDepth) return false; // Limit search depth. @@ -2749,6 +2804,12 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V, // Widening/narrowing never change sign. return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, Depth + 1); + case Instruction::ExtractElement: + // Look through extract element. At the moment we keep this simple and skip + // tracking the specific element. But at least we might find information + // valid for all elements of the vector. + return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, + Depth + 1); case Instruction::Call: const auto *CI = cast<CallInst>(I); Intrinsic::ID IID = getIntrinsicForCallSite(CI, TLI); @@ -2963,7 +3024,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, if (!V) return nullptr; - // Insert the value in the new (sub) aggregrate + // Insert the value in the new (sub) aggregate return InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip), "tmp", InsertBefore); } @@ -2992,9 +3053,9 @@ static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range, return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore); } -/// Given an aggregrate and an sequence of indices, see if -/// the scalar value indexed is already around as a register, for example if it -/// were inserted directly into the aggregrate. +/// Given an aggregate and a sequence of indices, see if the scalar value +/// indexed is already around as a register, for example if it was inserted +/// directly into the aggregate. /// /// If InsertBefore is not null, this function will duplicate (modified) /// insertvalues when a part of a nested struct is extracted. @@ -3086,7 +3147,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, /// pointer plus a constant offset. Return the base and offset to the caller. Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL) { - unsigned BitWidth = DL.getPointerTypeSizeInBits(Ptr->getType()); + unsigned BitWidth = DL.getIndexTypeSizeInBits(Ptr->getType()); APInt ByteOffset(BitWidth, 0); // We walk up the defs but use a visited set to handle unreachable code. In @@ -3104,7 +3165,7 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, // means when we construct GEPOffset, we need to use the size // of GEP's pointer type rather than the size of the original // pointer type. - APInt GEPOffset(DL.getPointerTypeSizeInBits(Ptr->getType()), 0); + APInt GEPOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); if (!GEP->accumulateConstantOffset(DL, GEPOffset)) break; @@ -3326,7 +3387,8 @@ static uint64_t GetStringLengthH(const Value *V, /// If we can compute the length of the string pointed to by /// the specified pointer, return 'len+1'. If we can't, return 0. uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) { - if (!V->getType()->isPointerTy()) return 0; + if (!V->getType()->isPointerTy()) + return 0; SmallPtrSet<const PHINode*, 32> PHIs; uint64_t Len = GetStringLengthH(V, PHIs, CharSize); @@ -3335,7 +3397,24 @@ uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) { return Len == ~0ULL ? 1 : Len; } -/// \brief \p PN defines a loop-variant pointer to an object. Check if the +const Value *llvm::getArgumentAliasingToReturnedPointer(ImmutableCallSite CS) { + assert(CS && + "getArgumentAliasingToReturnedPointer only works on nonnull CallSite"); + if (const Value *RV = CS.getReturnedArgOperand()) + return RV; + // This can be used only as a aliasing property. + if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(CS)) + return CS.getArgOperand(0); + return nullptr; +} + +bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( + ImmutableCallSite CS) { + return CS.getIntrinsicID() == Intrinsic::launder_invariant_group || + CS.getIntrinsicID() == Intrinsic::strip_invariant_group; +} + +/// \p PN defines a loop-variant pointer to an object. Check if the /// previous iteration of the loop was referring to the same object as \p PN. static bool isSameUnderlyingObjectInLoop(const PHINode *PN, const LoopInfo *LI) { @@ -3380,11 +3459,21 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL, // An alloca can't be further simplified. return V; } else { - if (auto CS = CallSite(V)) - if (Value *RV = CS.getReturnedArgOperand()) { - V = RV; + if (auto CS = CallSite(V)) { + // CaptureTracking can know about special capturing properties of some + // intrinsics like launder.invariant.group, that can't be expressed with + // the attributes, but have properties like returning aliasing pointer. + // Because some analysis may assume that nocaptured pointer is not + // returned from some special intrinsic (because function would have to + // be marked with returns attribute), it is crucial to use this function + // because it should be in sync with CaptureTracking. Not using it may + // cause weird miscompilations where 2 aliasing pointers are assumed to + // noalias. + if (auto *RP = getArgumentAliasingToReturnedPointer(CS)) { + V = RP; continue; } + } // See if InstructionSimplify knows any relevant tricks. if (Instruction *I = dyn_cast<Instruction>(V)) @@ -3658,6 +3747,48 @@ OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS, return OverflowResult::MayOverflow; } +OverflowResult llvm::computeOverflowForSignedMul(const Value *LHS, + const Value *RHS, + const DataLayout &DL, + AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT) { + // Multiplying n * m significant bits yields a result of n + m significant + // bits. If the total number of significant bits does not exceed the + // result bit width (minus 1), there is no overflow. + // This means if we have enough leading sign bits in the operands + // we can guarantee that the result does not overflow. + // Ref: "Hacker's Delight" by Henry Warren + unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); + + // Note that underestimating the number of sign bits gives a more + // conservative answer. + unsigned SignBits = ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) + + ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT); + + // First handle the easy case: if we have enough sign bits there's + // definitely no overflow. + if (SignBits > BitWidth + 1) + return OverflowResult::NeverOverflows; + + // There are two ambiguous cases where there can be no overflow: + // SignBits == BitWidth + 1 and + // SignBits == BitWidth + // The second case is difficult to check, therefore we only handle the + // first case. + if (SignBits == BitWidth + 1) { + // It overflows only when both arguments are negative and the true + // product is exactly the minimum negative number. + // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000 + // For simplicity we just check if at least one side is not negative. + KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT); + KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT); + if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) + return OverflowResult::NeverOverflows; + } + return OverflowResult::MayOverflow; +} + OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS, const Value *RHS, const DataLayout &DL, @@ -3684,7 +3815,7 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS, return OverflowResult::MayOverflow; } -/// \brief Return true if we can prove that adding the two values of the +/// Return true if we can prove that adding the two values of the /// knownbits will not overflow. /// Otherwise return false. static bool checkRippleForSignedAdd(const KnownBits &LHSKnown, @@ -3787,6 +3918,47 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS, return OverflowResult::MayOverflow; } +OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS, + const Value *RHS, + const DataLayout &DL, + AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT) { + // If the LHS is negative and the RHS is non-negative, no unsigned wrap. + KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT); + KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT); + if (LHSKnown.isNegative() && RHSKnown.isNonNegative()) + return OverflowResult::NeverOverflows; + + return OverflowResult::MayOverflow; +} + +OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS, + const Value *RHS, + const DataLayout &DL, + AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT) { + // If LHS and RHS each have at least two sign bits, the subtraction + // cannot overflow. + if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 && + ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1) + return OverflowResult::NeverOverflows; + + KnownBits LHSKnown = computeKnownBits(LHS, DL, 0, AC, CxtI, DT); + + KnownBits RHSKnown = computeKnownBits(RHS, DL, 0, AC, CxtI, DT); + + // Subtraction of two 2's complement numbers having identical signs will + // never overflow. + if ((LHSKnown.isNegative() && RHSKnown.isNegative()) || + (LHSKnown.isNonNegative() && RHSKnown.isNonNegative())) + return OverflowResult::NeverOverflows; + + // TODO: implement logic similar to checkRippleForAdd + return OverflowResult::MayOverflow; +} + bool llvm::isOverflowIntrinsicNoWrap(const IntrinsicInst *II, const DominatorTree &DT) { #ifndef NDEBUG @@ -3928,6 +4100,15 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { return true; } +bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) { + // TODO: This is slightly consdervative for invoke instruction since exiting + // via an exception *is* normal control for them. + for (auto I = BB->begin(), E = BB->end(); I != E; ++I) + if (!isGuaranteedToTransferExecutionToSuccessor(&*I)) + return false; + return true; +} + bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I, const Loop *L) { // The loop header is guaranteed to be executed for every iteration. @@ -4180,7 +4361,9 @@ static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred, if (L.Flavor != R.Flavor) return {SPF_UNKNOWN, SPNB_NA, false}; - // Match the compare to the min/max operations of the select operands. + // We have something like: x Pred y ? min(a, b) : min(c, d). + // Try to match the compare to the min/max operations of the select operands. + // First, make sure we have the right compare predicate. switch (L.Flavor) { case SPF_SMIN: if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) { @@ -4218,21 +4401,38 @@ static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred, return {SPF_UNKNOWN, SPNB_NA, false}; } - // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b)) - if (CmpLHS == A && CmpRHS == C && D == B) - return {L.Flavor, SPNB_NA, false}; + // If there is a common operand in the already matched min/max and the other + // min/max operands match the compare operands (either directly or inverted), + // then this is min/max of the same flavor. + // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b)) + // ~c pred ~a ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b)) + if (D == B) { + if ((CmpLHS == A && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) && + match(A, m_Not(m_Specific(CmpRHS))))) + return {L.Flavor, SPNB_NA, false}; + } // a pred d ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d)) - if (CmpLHS == A && CmpRHS == D && C == B) - return {L.Flavor, SPNB_NA, false}; - + // ~d pred ~a ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d)) + if (C == B) { + if ((CmpLHS == A && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) && + match(A, m_Not(m_Specific(CmpRHS))))) + return {L.Flavor, SPNB_NA, false}; + } // b pred c ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a)) - if (CmpLHS == B && CmpRHS == C && D == A) - return {L.Flavor, SPNB_NA, false}; - + // ~c pred ~b ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a)) + if (D == A) { + if ((CmpLHS == B && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) && + match(B, m_Not(m_Specific(CmpRHS))))) + return {L.Flavor, SPNB_NA, false}; + } // b pred d ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d)) - if (CmpLHS == B && CmpRHS == D && C == A) - return {L.Flavor, SPNB_NA, false}; + // ~d pred ~b ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d)) + if (C == A) { + if ((CmpLHS == B && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) && + match(B, m_Not(m_Specific(CmpRHS))))) + return {L.Flavor, SPNB_NA, false}; + } return {SPF_UNKNOWN, SPNB_NA, false}; } @@ -4311,6 +4511,27 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, return {SPF_UNKNOWN, SPNB_NA, false}; } +bool llvm::isKnownNegation(const Value *X, const Value *Y, bool NeedNSW) { + assert(X && Y && "Invalid operand"); + + // X = sub (0, Y) || X = sub nsw (0, Y) + if ((!NeedNSW && match(X, m_Sub(m_ZeroInt(), m_Specific(Y)))) || + (NeedNSW && match(X, m_NSWSub(m_ZeroInt(), m_Specific(Y))))) + return true; + + // Y = sub (0, X) || Y = sub nsw (0, X) + if ((!NeedNSW && match(Y, m_Sub(m_ZeroInt(), m_Specific(X)))) || + (NeedNSW && match(Y, m_NSWSub(m_ZeroInt(), m_Specific(X))))) + return true; + + // X = sub (A, B), Y = sub (B, A) || X = sub nsw (A, B), Y = sub nsw (B, A) + Value *A, *B; + return (!NeedNSW && (match(X, m_Sub(m_Value(A), m_Value(B))) && + match(Y, m_Sub(m_Specific(B), m_Specific(A))))) || + (NeedNSW && (match(X, m_NSWSub(m_Value(A), m_Value(B))) && + match(Y, m_NSWSub(m_Specific(B), m_Specific(A))))); +} + static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, FastMathFlags FMF, Value *CmpLHS, Value *CmpRHS, @@ -4409,25 +4630,49 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, case FCmpInst::FCMP_OLE: return {SPF_FMINNUM, NaNBehavior, Ordered}; } } - - const APInt *C1; - if (match(CmpRHS, m_APInt(C1))) { - if ((CmpLHS == TrueVal && match(FalseVal, m_Neg(m_Specific(CmpLHS)))) || - (CmpLHS == FalseVal && match(TrueVal, m_Neg(m_Specific(CmpLHS))))) { - - // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X - // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X - if (Pred == ICmpInst::ICMP_SGT && - (C1->isNullValue() || C1->isAllOnesValue())) { - return {(CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false}; - } - - // ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X - // NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X - if (Pred == ICmpInst::ICMP_SLT && - (C1->isNullValue() || C1->isOneValue())) { - return {(CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false}; - } + + if (isKnownNegation(TrueVal, FalseVal)) { + // Sign-extending LHS does not change its sign, so TrueVal/FalseVal can + // match against either LHS or sext(LHS). + auto MaybeSExtCmpLHS = + m_CombineOr(m_Specific(CmpLHS), m_SExt(m_Specific(CmpLHS))); + auto ZeroOrAllOnes = m_CombineOr(m_ZeroInt(), m_AllOnes()); + auto ZeroOrOne = m_CombineOr(m_ZeroInt(), m_One()); + if (match(TrueVal, MaybeSExtCmpLHS)) { + // Set the return values. If the compare uses the negated value (-X >s 0), + // swap the return values because the negated value is always 'RHS'. + LHS = TrueVal; + RHS = FalseVal; + if (match(CmpLHS, m_Neg(m_Specific(FalseVal)))) + std::swap(LHS, RHS); + + // (X >s 0) ? X : -X or (X >s -1) ? X : -X --> ABS(X) + // (-X >s 0) ? -X : X or (-X >s -1) ? -X : X --> ABS(X) + if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes)) + return {SPF_ABS, SPNB_NA, false}; + + // (X <s 0) ? X : -X or (X <s 1) ? X : -X --> NABS(X) + // (-X <s 0) ? -X : X or (-X <s 1) ? -X : X --> NABS(X) + if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne)) + return {SPF_NABS, SPNB_NA, false}; + } + else if (match(FalseVal, MaybeSExtCmpLHS)) { + // Set the return values. If the compare uses the negated value (-X >s 0), + // swap the return values because the negated value is always 'RHS'. + LHS = FalseVal; + RHS = TrueVal; + if (match(CmpLHS, m_Neg(m_Specific(TrueVal)))) + std::swap(LHS, RHS); + + // (X >s 0) ? -X : X or (X >s -1) ? -X : X --> NABS(X) + // (-X >s 0) ? X : -X or (-X >s -1) ? X : -X --> NABS(X) + if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes)) + return {SPF_NABS, SPNB_NA, false}; + + // (X <s 0) ? -X : X or (X <s 1) ? -X : X --> ABS(X) + // (-X <s 0) ? X : -X or (-X <s 1) ? X : -X --> ABS(X) + if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne)) + return {SPF_ABS, SPNB_NA, false}; } } @@ -4449,7 +4694,7 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, /// /// The function processes the case when type of true and false values of a /// select instruction differs from type of the cmp instruction operands because -/// of a cast instructon. The function checks if it is legal to move the cast +/// of a cast instruction. The function checks if it is legal to move the cast /// operation after "select". If yes, it returns the new second value of /// "select" (with the assumption that cast is moved): /// 1. As operand of cast instruction when both values of "select" are same cast @@ -4602,6 +4847,30 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, LHS, RHS, Depth); } +CmpInst::Predicate llvm::getMinMaxPred(SelectPatternFlavor SPF, bool Ordered) { + if (SPF == SPF_SMIN) return ICmpInst::ICMP_SLT; + if (SPF == SPF_UMIN) return ICmpInst::ICMP_ULT; + if (SPF == SPF_SMAX) return ICmpInst::ICMP_SGT; + if (SPF == SPF_UMAX) return ICmpInst::ICMP_UGT; + if (SPF == SPF_FMINNUM) + return Ordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; + if (SPF == SPF_FMAXNUM) + return Ordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT; + llvm_unreachable("unhandled!"); +} + +SelectPatternFlavor llvm::getInverseMinMaxFlavor(SelectPatternFlavor SPF) { + if (SPF == SPF_SMIN) return SPF_SMAX; + if (SPF == SPF_UMIN) return SPF_UMAX; + if (SPF == SPF_SMAX) return SPF_SMIN; + if (SPF == SPF_UMAX) return SPF_UMIN; + llvm_unreachable("unhandled!"); +} + +CmpInst::Predicate llvm::getInverseMinMaxPred(SelectPatternFlavor SPF) { + return getMinMaxPred(getInverseMinMaxFlavor(SPF)); +} + /// Return true if "icmp Pred LHS RHS" is always true. static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS, const Value *RHS, const DataLayout &DL, diff --git a/contrib/llvm/lib/Analysis/VectorUtils.cpp b/contrib/llvm/lib/Analysis/VectorUtils.cpp index 2becfbfe8a8d..d73d24736439 100644 --- a/contrib/llvm/lib/Analysis/VectorUtils.cpp +++ b/contrib/llvm/lib/Analysis/VectorUtils.cpp @@ -28,7 +28,7 @@ using namespace llvm; using namespace llvm::PatternMatch; -/// \brief Identify if the intrinsic is trivially vectorizable. +/// Identify if the intrinsic is trivially vectorizable. /// This method returns true if the intrinsic's argument types are all /// scalars for the scalar form of the intrinsic and all vectors for /// the vector form of the intrinsic. @@ -67,7 +67,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { } } -/// \brief Identifies if the intrinsic has a scalar operand. It check for +/// Identifies if the intrinsic has a scalar operand. It check for /// ctlz,cttz and powi special intrinsics whose argument is scalar. bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx) { @@ -81,7 +81,7 @@ bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, } } -/// \brief Returns intrinsic ID for call. +/// Returns intrinsic ID for call. /// For the input call instruction it finds mapping intrinsic and returns /// its ID, in case it does not found it return not_intrinsic. Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI, @@ -97,7 +97,7 @@ Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI, return Intrinsic::not_intrinsic; } -/// \brief Find the operand of the GEP that should be checked for consecutive +/// Find the operand of the GEP that should be checked for consecutive /// stores. This ignores trailing indices that have no effect on the final /// pointer. unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) { @@ -121,7 +121,7 @@ unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) { return LastOperand; } -/// \brief If the argument is a GEP, then returns the operand identified by +/// If the argument is a GEP, then returns the operand identified by /// getGEPInductionOperand. However, if there is some other non-loop-invariant /// operand, it returns that instead. Value *llvm::stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { @@ -140,7 +140,7 @@ Value *llvm::stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { return GEP->getOperand(InductionOperand); } -/// \brief If a value has only one user that is a CastInst, return it. +/// If a value has only one user that is a CastInst, return it. Value *llvm::getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) { Value *UniqueCast = nullptr; for (User *U : Ptr->users()) { @@ -155,7 +155,7 @@ Value *llvm::getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) { return UniqueCast; } -/// \brief Get the stride of a pointer access in a loop. Looks for symbolic +/// Get the stride of a pointer access in a loop. Looks for symbolic /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise. Value *llvm::getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { auto *PtrTy = dyn_cast<PointerType>(Ptr->getType()); @@ -163,7 +163,7 @@ Value *llvm::getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { return nullptr; // Try to remove a gep instruction to make the pointer (actually index at this - // point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the + // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the // pointer, otherwise, we are analyzing the index. Value *OrigPtr = Ptr; @@ -230,7 +230,7 @@ Value *llvm::getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { return Stride; } -/// \brief Given a vector and an element number, see if the scalar value is +/// Given a vector and an element number, see if the scalar value is /// already around as a register, for example if it were inserted then extracted /// from the vector. Value *llvm::findScalarElement(Value *V, unsigned EltNo) { @@ -280,7 +280,7 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) { return nullptr; } -/// \brief Get splat value if the input is a splat vector or return nullptr. +/// Get splat value if the input is a splat vector or return nullptr. /// This function is not fully general. It checks only 2 cases: /// the input value is (1) a splat constants vector or (2) a sequence /// of instructions that broadcast a single value into a vector. |