diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-12-20 14:16:56 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-12-20 14:16:56 +0000 |
commit | 2cab237b5dbfe1b3e9c7aa7a3c02d2b98fcf7462 (patch) | |
tree | 524fe828571f81358bba62fdb6d04c6e5e96a2a4 /contrib/llvm/lib/Analysis | |
parent | 6c7828a2807ea5e50c79ca42dbedf2b589ce63b2 (diff) | |
parent | 044eb2f6afba375a914ac9d8024f8f5142bb912e (diff) |
Merge llvm trunk r321017 to contrib/llvm.
Notes
Notes:
svn path=/projects/clang600-import/; revision=327023
Diffstat (limited to 'contrib/llvm/lib/Analysis')
62 files changed, 5226 insertions, 3644 deletions
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp index 4c29aeaa622f..dd2db1e5b27b 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp @@ -1,4 +1,4 @@ -//===- AliasAnalysis.cpp - Generic Alias Analysis Interface Implementation -==// +//==- AliasAnalysis.cpp - Generic Alias Analysis Interface Implementation --==// // // The LLVM Compiler Infrastructure // @@ -26,26 +26,35 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CFLAndersAliasAnalysis.h" #include "llvm/Analysis/CFLSteensAliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ObjCARCAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include <algorithm> +#include <cassert> +#include <functional> +#include <iterator> + using namespace llvm; /// Allow disabling BasicAA from the AA results. This is particularly useful @@ -110,13 +119,13 @@ bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc, } ModRefInfo AAResults::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) { - ModRefInfo Result = MRI_ModRef; + ModRefInfo Result = ModRefInfo::ModRef; for (const auto &AA : AAs) { - Result = ModRefInfo(Result & AA->getArgModRefInfo(CS, ArgIdx)); + Result = intersectModRef(Result, AA->getArgModRefInfo(CS, ArgIdx)); // Early-exit the moment we reach the bottom of the lattice. - if (Result == MRI_NoModRef) + if (isNoModRef(Result)) return Result; } @@ -129,29 +138,30 @@ ModRefInfo AAResults::getModRefInfo(Instruction *I, ImmutableCallSite Call) { // Check if the two calls modify the same memory return getModRefInfo(CS, Call); } else if (I->isFenceLike()) { - // If this is a fence, just return MRI_ModRef. - return MRI_ModRef; + // If this is a fence, just return ModRef. + return ModRefInfo::ModRef; } else { // Otherwise, check if the call modifies or references the // location this memory access defines. The best we can say // is that if the call references what this instruction // defines, it must be clobbered by this location. const MemoryLocation DefLoc = MemoryLocation::get(I); - if (getModRefInfo(Call, DefLoc) != MRI_NoModRef) - return MRI_ModRef; + ModRefInfo MR = getModRefInfo(Call, DefLoc); + if (isModOrRefSet(MR)) + return setModAndRef(MR); } - return MRI_NoModRef; + return ModRefInfo::NoModRef; } ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) { - ModRefInfo Result = MRI_ModRef; + ModRefInfo Result = ModRefInfo::ModRef; for (const auto &AA : AAs) { - Result = ModRefInfo(Result & AA->getModRefInfo(CS, Loc)); + Result = intersectModRef(Result, AA->getModRefInfo(CS, Loc)); // Early-exit the moment we reach the bottom of the lattice. - if (Result == MRI_NoModRef) + if (isNoModRef(Result)) return Result; } @@ -160,16 +170,16 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS, auto MRB = getModRefBehavior(CS); if (MRB == FMRB_DoesNotAccessMemory || MRB == FMRB_OnlyAccessesInaccessibleMem) - return MRI_NoModRef; + return ModRefInfo::NoModRef; if (onlyReadsMemory(MRB)) - Result = ModRefInfo(Result & MRI_Ref); + Result = clearMod(Result); else if (doesNotReadMemory(MRB)) - Result = ModRefInfo(Result & MRI_Mod); + Result = clearRef(Result); if (onlyAccessesArgPointees(MRB) || onlyAccessesInaccessibleOrArgMem(MRB)) { bool DoesAlias = false; - ModRefInfo AllArgsMask = MRI_NoModRef; + ModRefInfo AllArgsMask = ModRefInfo::NoModRef; if (doesAccessArgPointees(MRB)) { for (auto AI = CS.arg_begin(), AE = CS.arg_end(); AI != AE; ++AI) { const Value *Arg = *AI; @@ -181,33 +191,34 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS, if (ArgAlias != NoAlias) { ModRefInfo ArgMask = getArgModRefInfo(CS, ArgIdx); DoesAlias = true; - AllArgsMask = ModRefInfo(AllArgsMask | ArgMask); + AllArgsMask = unionModRef(AllArgsMask, ArgMask); } } } + // Return NoModRef if no alias found with any argument. if (!DoesAlias) - return MRI_NoModRef; - Result = ModRefInfo(Result & AllArgsMask); + return ModRefInfo::NoModRef; + // Logical & between other AA analyses and argument analysis. + Result = intersectModRef(Result, AllArgsMask); } // If Loc is a constant memory location, the call definitely could not // modify the memory location. - if ((Result & MRI_Mod) && - pointsToConstantMemory(Loc, /*OrLocal*/ false)) - Result = ModRefInfo(Result & ~MRI_Mod); + if (isModSet(Result) && pointsToConstantMemory(Loc, /*OrLocal*/ false)) + Result = clearMod(Result); return Result; } ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { - ModRefInfo Result = MRI_ModRef; + ModRefInfo Result = ModRefInfo::ModRef; for (const auto &AA : AAs) { - Result = ModRefInfo(Result & AA->getModRefInfo(CS1, CS2)); + Result = intersectModRef(Result, AA->getModRefInfo(CS1, CS2)); // Early-exit the moment we reach the bottom of the lattice. - if (Result == MRI_NoModRef) + if (isNoModRef(Result)) return Result; } @@ -217,28 +228,28 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1, // If CS1 or CS2 are readnone, they don't interact. auto CS1B = getModRefBehavior(CS1); if (CS1B == FMRB_DoesNotAccessMemory) - return MRI_NoModRef; + return ModRefInfo::NoModRef; auto CS2B = getModRefBehavior(CS2); if (CS2B == FMRB_DoesNotAccessMemory) - return MRI_NoModRef; + return ModRefInfo::NoModRef; // If they both only read from memory, there is no dependence. if (onlyReadsMemory(CS1B) && onlyReadsMemory(CS2B)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; // If CS1 only reads memory, the only dependence on CS2 can be // from CS1 reading memory written by CS2. if (onlyReadsMemory(CS1B)) - Result = ModRefInfo(Result & MRI_Ref); + Result = clearMod(Result); else if (doesNotReadMemory(CS1B)) - Result = ModRefInfo(Result & MRI_Mod); + Result = clearRef(Result); // If CS2 only access memory through arguments, accumulate the mod/ref // information from CS1's references to the memory referenced by // CS2's arguments. if (onlyAccessesArgPointees(CS2B)) { - ModRefInfo R = MRI_NoModRef; + ModRefInfo R = ModRefInfo::NoModRef; if (doesAccessArgPointees(CS2B)) { for (auto I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) { const Value *Arg = *I; @@ -247,17 +258,23 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1, unsigned CS2ArgIdx = std::distance(CS2.arg_begin(), I); auto CS2ArgLoc = MemoryLocation::getForArgument(CS2, CS2ArgIdx, TLI); - // ArgMask indicates what CS2 might do to CS2ArgLoc, and the dependence - // of CS1 on that location is the inverse. - ModRefInfo ArgMask = getArgModRefInfo(CS2, CS2ArgIdx); - if (ArgMask == MRI_Mod) - ArgMask = MRI_ModRef; - else if (ArgMask == MRI_Ref) - ArgMask = MRI_Mod; - - ArgMask = ModRefInfo(ArgMask & getModRefInfo(CS1, CS2ArgLoc)); - - R = ModRefInfo((R | ArgMask) & Result); + // ArgModRefCS2 indicates what CS2 might do to CS2ArgLoc, and the + // dependence of CS1 on that location is the inverse: + // - If CS2 modifies location, dependence exists if CS1 reads or writes. + // - If CS2 only reads location, dependence exists if CS1 writes. + ModRefInfo ArgModRefCS2 = getArgModRefInfo(CS2, CS2ArgIdx); + ModRefInfo ArgMask = ModRefInfo::NoModRef; + if (isModSet(ArgModRefCS2)) + ArgMask = ModRefInfo::ModRef; + else if (isRefSet(ArgModRefCS2)) + ArgMask = ModRefInfo::Mod; + + // ModRefCS1 indicates what CS1 might do to CS2ArgLoc, and we use + // above ArgMask to update dependence info. + ModRefInfo ModRefCS1 = getModRefInfo(CS1, CS2ArgLoc); + ArgMask = intersectModRef(ArgMask, ModRefCS1); + + R = intersectModRef(unionModRef(R, ArgMask), Result); if (R == Result) break; } @@ -268,7 +285,7 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1, // If CS1 only accesses memory through arguments, check if CS2 references // any of the memory referenced by CS1's arguments. If not, return NoModRef. if (onlyAccessesArgPointees(CS1B)) { - ModRefInfo R = MRI_NoModRef; + ModRefInfo R = ModRefInfo::NoModRef; if (doesAccessArgPointees(CS1B)) { for (auto I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) { const Value *Arg = *I; @@ -277,16 +294,14 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1, unsigned CS1ArgIdx = std::distance(CS1.arg_begin(), I); auto CS1ArgLoc = MemoryLocation::getForArgument(CS1, CS1ArgIdx, TLI); - // ArgMask indicates what CS1 might do to CS1ArgLoc; if CS1 might Mod - // CS1ArgLoc, then we care about either a Mod or a Ref by CS2. If CS1 - // might Ref, then we care only about a Mod by CS2. - ModRefInfo ArgMask = getArgModRefInfo(CS1, CS1ArgIdx); - ModRefInfo ArgR = getModRefInfo(CS2, CS1ArgLoc); - if (((ArgMask & MRI_Mod) != MRI_NoModRef && - (ArgR & MRI_ModRef) != MRI_NoModRef) || - ((ArgMask & MRI_Ref) != MRI_NoModRef && - (ArgR & MRI_Mod) != MRI_NoModRef)) - R = ModRefInfo((R | ArgMask) & Result); + // ArgModRefCS1 indicates what CS1 might do to CS1ArgLoc; if CS1 might + // Mod CS1ArgLoc, then we care about either a Mod or a Ref by CS2. If + // CS1 might Ref, then we care only about a Mod by CS2. + ModRefInfo ArgModRefCS1 = getArgModRefInfo(CS1, CS1ArgIdx); + ModRefInfo ModRefCS2 = getModRefInfo(CS2, CS1ArgLoc); + if ((isModSet(ArgModRefCS1) && isModOrRefSet(ModRefCS2)) || + (isRefSet(ArgModRefCS1) && isModSet(ModRefCS2))) + R = intersectModRef(unionModRef(R, ArgModRefCS1), Result); if (R == Result) break; @@ -334,64 +349,63 @@ ModRefInfo AAResults::getModRefInfo(const LoadInst *L, const MemoryLocation &Loc) { // Be conservative in the face of atomic. if (isStrongerThan(L->getOrdering(), AtomicOrdering::Unordered)) - return MRI_ModRef; + return ModRefInfo::ModRef; // If the load address doesn't alias the given address, it doesn't read // or write the specified memory. if (Loc.Ptr && !alias(MemoryLocation::get(L), Loc)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; // Otherwise, a load just reads. - return MRI_Ref; + return ModRefInfo::Ref; } ModRefInfo AAResults::getModRefInfo(const StoreInst *S, const MemoryLocation &Loc) { // Be conservative in the face of atomic. if (isStrongerThan(S->getOrdering(), AtomicOrdering::Unordered)) - return MRI_ModRef; + return ModRefInfo::ModRef; if (Loc.Ptr) { // If the store address cannot alias the pointer in question, then the // specified memory cannot be modified by the store. if (!alias(MemoryLocation::get(S), Loc)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; // If the pointer is a pointer to constant memory, then it could not have // been modified by this store. if (pointsToConstantMemory(Loc)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; } // Otherwise, a store just writes. - return MRI_Mod; + return ModRefInfo::Mod; } ModRefInfo AAResults::getModRefInfo(const FenceInst *S, const MemoryLocation &Loc) { // If we know that the location is a constant memory location, the fence // cannot modify this location. if (Loc.Ptr && pointsToConstantMemory(Loc)) - return MRI_Ref; - return MRI_ModRef; + return ModRefInfo::Ref; + return ModRefInfo::ModRef; } ModRefInfo AAResults::getModRefInfo(const VAArgInst *V, const MemoryLocation &Loc) { - if (Loc.Ptr) { // If the va_arg address cannot alias the pointer in question, then the // specified memory cannot be accessed by the va_arg. if (!alias(MemoryLocation::get(V), Loc)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; // If the pointer is a pointer to constant memory, then it could not have // been modified by this va_arg. if (pointsToConstantMemory(Loc)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; } // Otherwise, a va_arg reads and writes. - return MRI_ModRef; + return ModRefInfo::ModRef; } ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad, @@ -400,11 +414,11 @@ ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad, // If the pointer is a pointer to constant memory, // then it could not have been modified by this catchpad. if (pointsToConstantMemory(Loc)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; } // Otherwise, a catchpad reads and writes. - return MRI_ModRef; + return ModRefInfo::ModRef; } ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet, @@ -413,42 +427,42 @@ ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet, // If the pointer is a pointer to constant memory, // then it could not have been modified by this catchpad. if (pointsToConstantMemory(Loc)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; } // Otherwise, a catchret reads and writes. - return MRI_ModRef; + return ModRefInfo::ModRef; } ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX, const MemoryLocation &Loc) { // Acquire/Release cmpxchg has properties that matter for arbitrary addresses. if (isStrongerThanMonotonic(CX->getSuccessOrdering())) - return MRI_ModRef; + return ModRefInfo::ModRef; // If the cmpxchg address does not alias the location, it does not access it. if (Loc.Ptr && !alias(MemoryLocation::get(CX), Loc)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; - return MRI_ModRef; + return ModRefInfo::ModRef; } ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW, const MemoryLocation &Loc) { // Acquire/Release atomicrmw has properties that matter for arbitrary addresses. if (isStrongerThanMonotonic(RMW->getOrdering())) - return MRI_ModRef; + return ModRefInfo::ModRef; // If the atomicrmw address does not alias the location, it does not access it. if (Loc.Ptr && !alias(MemoryLocation::get(RMW), Loc)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; - return MRI_ModRef; + return ModRefInfo::ModRef; } /// \brief Return information about whether a particular call site modifies /// or reads the specified memory location \p MemLoc before instruction \p I -/// in a BasicBlock. A ordered basic block \p OBB can be used to speed up +/// in a BasicBlock. An ordered basic block \p OBB can be used to speed up /// instruction-ordering queries inside the BasicBlock containing \p I. /// FIXME: this is really just shoring-up a deficiency in alias analysis. /// BasicAA isn't willing to spend linear time determining whether an alloca @@ -459,26 +473,26 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I, DominatorTree *DT, OrderedBasicBlock *OBB) { if (!DT) - return MRI_ModRef; + return ModRefInfo::ModRef; const Value *Object = GetUnderlyingObject(MemLoc.Ptr, I->getModule()->getDataLayout()); if (!isIdentifiedObject(Object) || isa<GlobalValue>(Object) || isa<Constant>(Object)) - return MRI_ModRef; + return ModRefInfo::ModRef; ImmutableCallSite CS(I); if (!CS.getInstruction() || CS.getInstruction() == Object) - return MRI_ModRef; + return ModRefInfo::ModRef; - if (llvm::PointerMayBeCapturedBefore(Object, /* ReturnCaptures */ true, - /* StoreCaptures */ true, I, DT, - /* include Object */ true, - /* OrderedBasicBlock */ OBB)) - return MRI_ModRef; + if (PointerMayBeCapturedBefore(Object, /* ReturnCaptures */ true, + /* StoreCaptures */ true, I, DT, + /* include Object */ true, + /* OrderedBasicBlock */ OBB)) + return ModRefInfo::ModRef; unsigned ArgNo = 0; - ModRefInfo R = MRI_NoModRef; + ModRefInfo R = ModRefInfo::NoModRef; for (auto CI = CS.data_operands_begin(), CE = CS.data_operands_end(); CI != CE; ++CI, ++ArgNo) { // Only look at the no-capture or byval pointer arguments. If this @@ -498,10 +512,10 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I, if (CS.doesNotAccessMemory(ArgNo)) continue; if (CS.onlyReadsMemory(ArgNo)) { - R = MRI_Ref; + R = ModRefInfo::Ref; continue; } - return MRI_ModRef; + return ModRefInfo::ModRef; } return R; } @@ -511,7 +525,7 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I, /// bool AAResults::canBasicBlockModify(const BasicBlock &BB, const MemoryLocation &Loc) { - return canInstructionRangeModRef(BB.front(), BB.back(), Loc, MRI_Mod); + return canInstructionRangeModRef(BB.front(), BB.back(), Loc, ModRefInfo::Mod); } /// canInstructionRangeModRef - Return true if it is possible for the @@ -530,22 +544,23 @@ bool AAResults::canInstructionRangeModRef(const Instruction &I1, ++E; // Convert from inclusive to exclusive range. for (; I != E; ++I) // Check every instruction in range - if (getModRefInfo(&*I, Loc) & Mode) + if (isModOrRefSet(intersectModRef(getModRefInfo(&*I, Loc), Mode))) return true; return false; } // Provide a definition for the root virtual destructor. -AAResults::Concept::~Concept() {} +AAResults::Concept::~Concept() = default; // Provide a definition for the static object used to identify passes. AnalysisKey AAManager::Key; namespace { + /// A wrapper pass for external alias analyses. This just squirrels away the /// callback used to run any analyses and register their results. struct ExternalAAWrapperPass : ImmutablePass { - typedef std::function<void(Pass &, Function &, AAResults &)> CallbackT; + using CallbackT = std::function<void(Pass &, Function &, AAResults &)>; CallbackT CB; @@ -554,6 +569,7 @@ struct ExternalAAWrapperPass : ImmutablePass { ExternalAAWrapperPass() : ImmutablePass(ID) { initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry()); } + explicit ExternalAAWrapperPass(CallbackT CB) : ImmutablePass(ID), CB(std::move(CB)) { initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry()); @@ -563,9 +579,11 @@ struct ExternalAAWrapperPass : ImmutablePass { AU.setPreservesAll(); } }; -} + +} // end anonymous namespace char ExternalAAWrapperPass::ID = 0; + INITIALIZE_PASS(ExternalAAWrapperPass, "external-aa", "External Alias Analysis", false, true) diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp index 435c782d97a5..423acf739f58 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -244,20 +244,20 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) { if (ElTy->isSized()) Size = DL.getTypeStoreSize(ElTy); switch (AA.getModRefInfo(C, Pointer, Size)) { - case MRI_NoModRef: + case ModRefInfo::NoModRef: PrintModRefResults("NoModRef", PrintNoModRef, I, Pointer, F.getParent()); ++NoModRefCount; break; - case MRI_Mod: + case ModRefInfo::Mod: PrintModRefResults("Just Mod", PrintMod, I, Pointer, F.getParent()); ++ModCount; break; - case MRI_Ref: + case ModRefInfo::Ref: PrintModRefResults("Just Ref", PrintRef, I, Pointer, F.getParent()); ++RefCount; break; - case MRI_ModRef: + case ModRefInfo::ModRef: PrintModRefResults("Both ModRef", PrintModRef, I, Pointer, F.getParent()); ++ModRefCount; @@ -272,19 +272,19 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) { if (D == C) continue; switch (AA.getModRefInfo(*C, *D)) { - case MRI_NoModRef: + case ModRefInfo::NoModRef: PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent()); ++NoModRefCount; break; - case MRI_Mod: + case ModRefInfo::Mod: PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent()); ++ModCount; break; - case MRI_Ref: + case ModRefInfo::Ref: PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent()); ++RefCount; break; - case MRI_ModRef: + case ModRefInfo::ModRef: PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent()); ++ModRefCount; break; diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp index 4dfa25490d00..c88e0dd7dc44 100644 --- a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp @@ -13,17 +13,29 @@ #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cstdint> +#include <vector> + using namespace llvm; static cl::opt<unsigned> @@ -106,7 +118,6 @@ void AliasSetTracker::removeAliasSet(AliasSet *AS) { TotalMayAliasSetSize -= AS->size(); AliasSets.erase(AS); - } void AliasSet::removeFromTracker(AliasSetTracker &AST) { @@ -200,8 +211,8 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size, if (!UnknownInsts.empty()) { for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) if (auto *Inst = getUnknownInst(i)) - if (AA.getModRefInfo(Inst, MemoryLocation(Ptr, Size, AAInfo)) != - MRI_NoModRef) + if (isModOrRefSet( + AA.getModRefInfo(Inst, MemoryLocation(Ptr, Size, AAInfo)))) return true; } @@ -220,15 +231,15 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst, for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) { if (auto *UnknownInst = getUnknownInst(i)) { ImmutableCallSite C1(UnknownInst), C2(Inst); - if (!C1 || !C2 || AA.getModRefInfo(C1, C2) != MRI_NoModRef || - AA.getModRefInfo(C2, C1) != MRI_NoModRef) + if (!C1 || !C2 || isModOrRefSet(AA.getModRefInfo(C1, C2)) || + isModOrRefSet(AA.getModRefInfo(C2, C1))) return true; } } for (iterator I = begin(), E = end(); I != E; ++I) - if (AA.getModRefInfo(Inst, MemoryLocation(I.getPointer(), I.getSize(), - I.getAAInfo())) != MRI_NoModRef) + if (isModOrRefSet(AA.getModRefInfo( + Inst, MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo())))) return true; return false; @@ -425,6 +436,7 @@ void AliasSetTracker::addUnknown(Instruction *Inst) { break; // FIXME: Add lifetime/invariant intrinsics (See: PR30807). case Intrinsic::assume: + case Intrinsic::sideeffect: return; } } @@ -560,12 +572,11 @@ AliasSet &AliasSetTracker::mergeAllAliasSets() { AliasAnyAS->AliasAny = true; for (auto Cur : ASVector) { - // If Cur was already forwarding, just forward to the new AS instead. AliasSet *FwdTo = Cur->Forward; if (FwdTo) { Cur->Forward = AliasAnyAS; - AliasAnyAS->addRef(); + AliasAnyAS->addRef(); FwdTo->dropRef(*this); continue; } @@ -580,7 +591,6 @@ AliasSet &AliasSetTracker::mergeAllAliasSets() { AliasSet &AliasSetTracker::addPointer(Value *P, uint64_t Size, const AAMDNodes &AAInfo, AliasSet::AccessLattice E) { - AliasSet &AS = getAliasSetForPointer(P, Size, AAInfo); AS.Access |= E; @@ -611,7 +621,6 @@ void AliasSet::print(raw_ostream &OS) const { if (Forward) OS << " forwarding to " << (void*)Forward; - if (!empty()) { OS << "Pointers: "; for (iterator I = begin(), E = end(); I != E; ++I) { @@ -671,10 +680,13 @@ AliasSetTracker::ASTCallbackVH::operator=(Value *V) { //===----------------------------------------------------------------------===// namespace { + class AliasSetPrinter : public FunctionPass { AliasSetTracker *Tracker; + public: static char ID; // Pass identification, replacement for typeid + AliasSetPrinter() : FunctionPass(ID) { initializeAliasSetPrinterPass(*PassRegistry::getPassRegistry()); } @@ -695,9 +707,11 @@ namespace { return false; } }; -} + +} // end anonymous namespace char AliasSetPrinter::ID = 0; + INITIALIZE_PASS_BEGIN(AliasSetPrinter, "print-alias-sets", "Alias Set Printer", false, true) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) diff --git a/contrib/llvm/lib/Analysis/AssumptionCache.cpp b/contrib/llvm/lib/Analysis/AssumptionCache.cpp index 3ff27890dc38..8bfd24ccf77b 100644 --- a/contrib/llvm/lib/Analysis/AssumptionCache.cpp +++ b/contrib/llvm/lib/Analysis/AssumptionCache.cpp @@ -13,14 +13,26 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/AssumptionCache.h" -#include "llvm/IR/CallSite.h" -#include "llvm/IR/Dominators.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/Support/Debug.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <utility> + using namespace llvm; using namespace llvm::PatternMatch; @@ -255,8 +267,9 @@ AssumptionCacheTracker::AssumptionCacheTracker() : ImmutablePass(ID) { initializeAssumptionCacheTrackerPass(*PassRegistry::getPassRegistry()); } -AssumptionCacheTracker::~AssumptionCacheTracker() {} +AssumptionCacheTracker::~AssumptionCacheTracker() = default; + +char AssumptionCacheTracker::ID = 0; INITIALIZE_PASS(AssumptionCacheTracker, "assumption-cache-tracker", "Assumption Cache Tracker", false, true) -char AssumptionCacheTracker::ID = 0; diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp index e682a644ef2c..81b9f842249e 100644 --- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -14,6 +14,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -23,21 +25,40 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" -#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/KnownBits.h" -#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstdlib> +#include <utility> #define DEBUG_TYPE "basicaa" @@ -223,7 +244,6 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL, if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) { if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) { - // If we've been called recursively, then Offset and Scale will be wider // than the BOp operands. We'll always zext it here as we'll process sign // extensions below (see the isa<SExtInst> / isa<ZExtInst> cases). @@ -574,7 +594,6 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc, // Otherwise be conservative. Visited.clear(); return AAResultBase::pointsToConstantMemory(Loc, OrLocal); - } while (!Worklist.empty() && --MaxLookup); Visited.clear(); @@ -598,6 +617,10 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(ImmutableCallSite CS) { if (CS.onlyAccessesArgMemory()) Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees); + else if (CS.onlyAccessesInaccessibleMemory()) + Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesInaccessibleMem); + else if (CS.onlyAccessesInaccessibleMemOrArgMem()) + Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesInaccessibleOrArgMem); // If CS has operand bundles then aliasing attributes from the function it // calls do not directly apply to the CallSite. This can be made more @@ -662,16 +685,15 @@ static bool isWriteOnlyParam(ImmutableCallSite CS, unsigned ArgIdx, ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) { - // Checking for known builtin intrinsics and target library functions. if (isWriteOnlyParam(CS, ArgIdx, TLI)) - return MRI_Mod; + return ModRefInfo::Mod; if (CS.paramHasAttr(ArgIdx, Attribute::ReadOnly)) - return MRI_Ref; + return ModRefInfo::Ref; if (CS.paramHasAttr(ArgIdx, Attribute::ReadNone)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; return AAResultBase::getArgModRefInfo(CS, ArgIdx); } @@ -748,7 +770,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, if (isa<AllocaInst>(Object)) if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) if (CI->isTailCall()) - return MRI_NoModRef; + return ModRefInfo::NoModRef; // If the pointer is to a locally allocated object that does not escape, // then the call can not mod/ref the pointer unless the call takes the pointer @@ -758,7 +780,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, // Optimistically assume that call doesn't touch Object and check this // assumption in the following loop. - ModRefInfo Result = MRI_NoModRef; + ModRefInfo Result = ModRefInfo::NoModRef; unsigned OperandNo = 0; for (auto CI = CS.data_operands_begin(), CE = CS.data_operands_end(); @@ -787,21 +809,21 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, // Operand aliases 'Object', but call doesn't modify it. Strengthen // initial assumption and keep looking in case if there are more aliases. if (CS.onlyReadsMemory(OperandNo)) { - Result = static_cast<ModRefInfo>(Result | MRI_Ref); + Result = setRef(Result); continue; } // Operand aliases 'Object' but call only writes into it. if (CS.doesNotReadMemory(OperandNo)) { - Result = static_cast<ModRefInfo>(Result | MRI_Mod); + Result = setMod(Result); continue; } // This operand aliases 'Object' and call reads and writes into it. - Result = MRI_ModRef; + Result = ModRefInfo::ModRef; break; } // Early return if we improved mod ref information - if (Result != MRI_ModRef) + if (!isModAndRefSet(Result)) return Result; } @@ -810,13 +832,13 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, // routines do not read values visible in the IR. TODO: Consider special // casing realloc and strdup routines which access only their arguments as // well. Or alternatively, replace all of this with inaccessiblememonly once - // that's implemented fully. + // that's implemented fully. auto *Inst = CS.getInstruction(); if (isMallocOrCallocLikeFn(Inst, &TLI)) { // Be conservative if the accessed pointer may alias the allocation - // fallback to the generic handling below. if (getBestAAResults().alias(MemoryLocation(Inst), Loc) == NoAlias) - return MRI_NoModRef; + return ModRefInfo::NoModRef; } // The semantics of memcpy intrinsics forbid overlap between their respective @@ -829,18 +851,18 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, if ((SrcAA = getBestAAResults().alias(MemoryLocation::getForSource(Inst), Loc)) == MustAlias) // Loc is exactly the memcpy source thus disjoint from memcpy dest. - return MRI_Ref; + return ModRefInfo::Ref; if ((DestAA = getBestAAResults().alias(MemoryLocation::getForDest(Inst), Loc)) == MustAlias) // The converse case. - return MRI_Mod; + return ModRefInfo::Mod; // It's also possible for Loc to alias both src and dest, or neither. - ModRefInfo rv = MRI_NoModRef; + ModRefInfo rv = ModRefInfo::NoModRef; if (SrcAA != NoAlias) - rv = static_cast<ModRefInfo>(rv | MRI_Ref); + rv = setRef(rv); if (DestAA != NoAlias) - rv = static_cast<ModRefInfo>(rv | MRI_Mod); + rv = setMod(rv); return rv; } @@ -848,7 +870,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, // proper control dependencies will be maintained, it never aliases any // particular memory location. if (isIntrinsicCall(CS, Intrinsic::assume)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; // Like assumes, guard intrinsics are also marked as arbitrarily writing so // that proper control dependencies are maintained but they never mods any @@ -858,7 +880,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, // heap state at the point the guard is issued needs to be consistent in case // the guard invokes the "deopt" continuation. if (isIntrinsicCall(CS, Intrinsic::experimental_guard)) - return MRI_Ref; + return ModRefInfo::Ref; // Like assumes, invariant.start intrinsics were also marked as arbitrarily // writing so that proper control dependencies are maintained but they never @@ -884,7 +906,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, // rules of invariant.start) and print 40, while the first program always // prints 50. if (isIntrinsicCall(CS, Intrinsic::invariant_start)) - return MRI_Ref; + return ModRefInfo::Ref; // The AAResultBase base class has some smarts, lets use them. return AAResultBase::getModRefInfo(CS, Loc); @@ -897,7 +919,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS1, // particular memory location. if (isIntrinsicCall(CS1, Intrinsic::assume) || isIntrinsicCall(CS2, Intrinsic::assume)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; // Like assumes, guard intrinsics are also marked as arbitrarily writing so // that proper control dependencies are maintained but they never mod any @@ -911,10 +933,14 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS1, // possibilities for guard intrinsics. if (isIntrinsicCall(CS1, Intrinsic::experimental_guard)) - return getModRefBehavior(CS2) & MRI_Mod ? MRI_Ref : MRI_NoModRef; + return isModSet(createModRefInfo(getModRefBehavior(CS2))) + ? ModRefInfo::Ref + : ModRefInfo::NoModRef; if (isIntrinsicCall(CS2, Intrinsic::experimental_guard)) - return getModRefBehavior(CS1) & MRI_Mod ? MRI_Mod : MRI_NoModRef; + return isModSet(createModRefInfo(getModRefBehavior(CS1))) + ? ModRefInfo::Mod + : ModRefInfo::NoModRef; // The AAResultBase base class has some smarts, lets use them. return AAResultBase::getModRefInfo(CS1, CS2); @@ -927,7 +953,6 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1, const GEPOperator *GEP2, uint64_t V2Size, const DataLayout &DL) { - assert(GEP1->getPointerOperand()->stripPointerCastsAndBarriers() == GEP2->getPointerOperand()->stripPointerCastsAndBarriers() && GEP1->getPointerOperandType() == GEP2->getPointerOperandType() && @@ -1196,8 +1221,10 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // If we get a No or May, then return it immediately, no amount of analysis // will improve this situation. - if (BaseAlias != MustAlias) + if (BaseAlias != MustAlias) { + assert(BaseAlias == NoAlias || BaseAlias == MayAlias); return BaseAlias; + } // Otherwise, we have a MustAlias. Since the base pointers alias each other // exactly, see if the computed offset from the common pointer tells us @@ -1236,13 +1263,15 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, AliasResult R = aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize, AAMDNodes(), V2, MemoryLocation::UnknownSize, V2AAInfo, nullptr, UnderlyingV2); - if (R != MustAlias) + if (R != MustAlias) { // If V2 may alias GEP base pointer, conservatively returns MayAlias. // If V2 is known not to alias GEP base pointer, then the two values // cannot alias per GEP semantics: "Any memory access must be done through // a pointer value associated with an address range of the memory access, // otherwise the behavior is undefined.". + assert(R == NoAlias || R == MayAlias); return R; + } // If the max search depth is reached the result is undefined if (GEP1MaxLookupReached) @@ -1569,11 +1598,6 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size, (isa<Argument>(O2) && isIdentifiedFunctionLocal(O1))) return NoAlias; - // Most objects can't alias null. - if ((isa<ConstantPointerNull>(O2) && isKnownNonNull(O1)) || - (isa<ConstantPointerNull>(O1) && isKnownNonNull(O2))) - return NoAlias; - // If one pointer is the result of a call/invoke or load and the other is a // non-escaping local object within the same function, then we know the // object couldn't escape to a point where the call could return it. @@ -1652,9 +1676,9 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size, // If both pointers are pointing into the same object and one of them // accesses the entire object, then the accesses must overlap in some way. if (O1 == O2) - if ((V1Size != MemoryLocation::UnknownSize && - isObjectSize(O1, V1Size, DL, TLI)) || - (V2Size != MemoryLocation::UnknownSize && + if (V1Size != MemoryLocation::UnknownSize && + V2Size != MemoryLocation::UnknownSize && + (isObjectSize(O1, V1Size, DL, TLI) || isObjectSize(O2, V2Size, DL, TLI))) return AliasCache[Locs] = PartialAlias; @@ -1810,6 +1834,7 @@ BasicAAWrapperPass::BasicAAWrapperPass() : FunctionPass(ID) { } char BasicAAWrapperPass::ID = 0; + void BasicAAWrapperPass::anchor() {} INITIALIZE_PASS_BEGIN(BasicAAWrapperPass, "basicaa", diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp index 07a2a9229fd5..41c295895213 100644 --- a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp +++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp @@ -12,15 +12,22 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/iterator.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/Passes.h" #include "llvm/IR/CFG.h" -#include "llvm/InitializePasses.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <string> using namespace llvm; @@ -54,52 +61,67 @@ cl::opt<unsigned> "is no less than the max frequency of the " "function multiplied by this percent.")); -// Command line option to turn on CFG dot dump after profile annotation. -cl::opt<bool> - PGOViewCounts("pgo-view-counts", cl::init(false), cl::Hidden, - cl::desc("A boolean option to show CFG dag with " - "block profile counts and branch probabilities " - "right after PGO profile annotation step. The " - "profile counts are computed using branch " - "probabilities from the runtime profile data and " - "block frequency propagation algorithm. To view " - "the raw counts from the profile, use option " - "-pgo-view-raw-counts instead. To limit graph " - "display to only one function, use filtering option " - "-view-bfi-func-name.")); +// Command line option to turn on CFG dot or text dump after profile annotation. +cl::opt<PGOViewCountsType> PGOViewCounts( + "pgo-view-counts", cl::Hidden, + cl::desc("A boolean option to show CFG dag or text with " + "block profile counts and branch probabilities " + "right after PGO profile annotation step. The " + "profile counts are computed using branch " + "probabilities from the runtime profile data and " + "block frequency propagation algorithm. To view " + "the raw counts from the profile, use option " + "-pgo-view-raw-counts instead. To limit graph " + "display to only one function, use filtering option " + "-view-bfi-func-name."), + cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), + clEnumValN(PGOVCT_Graph, "graph", "show a graph."), + clEnumValN(PGOVCT_Text, "text", "show in text."))); + +static cl::opt<bool> PrintBlockFreq( + "print-bfi", cl::init(false), cl::Hidden, + cl::desc("Print the block frequency info.")); + +cl::opt<std::string> PrintBlockFreqFuncName( + "print-bfi-func-name", cl::Hidden, + cl::desc("The option to specify the name of the function " + "whose block frequency info is printed.")); namespace llvm { static GVDAGType getGVDT() { - - if (PGOViewCounts) + if (PGOViewCounts == PGOVCT_Graph) return GVDT_Count; return ViewBlockFreqPropagationDAG; } template <> struct GraphTraits<BlockFrequencyInfo *> { - typedef const BasicBlock *NodeRef; - typedef succ_const_iterator ChildIteratorType; - typedef pointer_iterator<Function::const_iterator> nodes_iterator; + using NodeRef = const BasicBlock *; + using ChildIteratorType = succ_const_iterator; + using nodes_iterator = pointer_iterator<Function::const_iterator>; static NodeRef getEntryNode(const BlockFrequencyInfo *G) { return &G->getFunction()->front(); } + static ChildIteratorType child_begin(const NodeRef N) { return succ_begin(N); } + static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } + static nodes_iterator nodes_begin(const BlockFrequencyInfo *G) { return nodes_iterator(G->getFunction()->begin()); } + static nodes_iterator nodes_end(const BlockFrequencyInfo *G) { return nodes_iterator(G->getFunction()->end()); } }; -typedef BFIDOTGraphTraitsBase<BlockFrequencyInfo, BranchProbabilityInfo> - BFIDOTGTraitsBase; +using BFIDOTGTraitsBase = + BFIDOTGraphTraitsBase<BlockFrequencyInfo, BranchProbabilityInfo>; template <> struct DOTGraphTraits<BlockFrequencyInfo *> : public BFIDOTGTraitsBase { @@ -127,7 +149,7 @@ struct DOTGraphTraits<BlockFrequencyInfo *> : public BFIDOTGTraitsBase { } // end namespace llvm -BlockFrequencyInfo::BlockFrequencyInfo() {} +BlockFrequencyInfo::BlockFrequencyInfo() = default; BlockFrequencyInfo::BlockFrequencyInfo(const Function &F, const BranchProbabilityInfo &BPI, @@ -148,7 +170,7 @@ BlockFrequencyInfo &BlockFrequencyInfo::operator=(BlockFrequencyInfo &&RHS) { // defined at the first ODR-use which is the BFI member in the // LazyBlockFrequencyInfo header. The dtor needs the BlockFrequencyInfoImpl // template instantiated which is not available in the header. -BlockFrequencyInfo::~BlockFrequencyInfo() {} +BlockFrequencyInfo::~BlockFrequencyInfo() = default; bool BlockFrequencyInfo::invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &) { @@ -170,6 +192,11 @@ void BlockFrequencyInfo::calculate(const Function &F, F.getName().equals(ViewBlockFreqFuncName))) { view(); } + if (PrintBlockFreq && + (PrintBlockFreqFuncName.empty() || + F.getName().equals(PrintBlockFreqFuncName))) { + print(dbgs()); + } } BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const { @@ -191,6 +218,11 @@ BlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const { return BFI->getProfileCountFromFreq(*getFunction(), Freq); } +bool BlockFrequencyInfo::isIrrLoopHeader(const BasicBlock *BB) { + assert(BFI && "Expected analysis to be available"); + return BFI->isIrrLoopHeader(BB); +} + void BlockFrequencyInfo::setBlockFreq(const BasicBlock *BB, uint64_t Freq) { assert(BFI && "Expected analysis to be available"); BFI->setBlockFreq(BB, Freq); @@ -254,7 +286,6 @@ void BlockFrequencyInfo::print(raw_ostream &OS) const { BFI->print(OS); } - INITIALIZE_PASS_BEGIN(BlockFrequencyInfoWrapperPass, "block-freq", "Block Frequency Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) @@ -264,13 +295,12 @@ INITIALIZE_PASS_END(BlockFrequencyInfoWrapperPass, "block-freq", char BlockFrequencyInfoWrapperPass::ID = 0; - BlockFrequencyInfoWrapperPass::BlockFrequencyInfoWrapperPass() : FunctionPass(ID) { initializeBlockFrequencyInfoWrapperPassPass(*PassRegistry::getPassRegistry()); } -BlockFrequencyInfoWrapperPass::~BlockFrequencyInfoWrapperPass() {} +BlockFrequencyInfoWrapperPass::~BlockFrequencyInfoWrapperPass() = default; void BlockFrequencyInfoWrapperPass::print(raw_ostream &OS, const Module *) const { diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp index e5d8c3347c16..7e323022d9ce 100644 --- a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -12,10 +12,28 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/IR/Function.h" +#include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ScaledNumber.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <list> #include <numeric> +#include <utility> +#include <vector> using namespace llvm; using namespace llvm::bfi_detail; @@ -47,13 +65,13 @@ raw_ostream &BlockMass::print(raw_ostream &OS) const { namespace { -typedef BlockFrequencyInfoImplBase::BlockNode BlockNode; -typedef BlockFrequencyInfoImplBase::Distribution Distribution; -typedef BlockFrequencyInfoImplBase::Distribution::WeightList WeightList; -typedef BlockFrequencyInfoImplBase::Scaled64 Scaled64; -typedef BlockFrequencyInfoImplBase::LoopData LoopData; -typedef BlockFrequencyInfoImplBase::Weight Weight; -typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData; +using BlockNode = BlockFrequencyInfoImplBase::BlockNode; +using Distribution = BlockFrequencyInfoImplBase::Distribution; +using WeightList = BlockFrequencyInfoImplBase::Distribution::WeightList; +using Scaled64 = BlockFrequencyInfoImplBase::Scaled64; +using LoopData = BlockFrequencyInfoImplBase::LoopData; +using Weight = BlockFrequencyInfoImplBase::Weight; +using FrequencyData = BlockFrequencyInfoImplBase::FrequencyData; /// \brief Dithering mass distributer. /// @@ -158,7 +176,8 @@ static void combineWeightsBySorting(WeightList &Weights) { static void combineWeightsByHashing(WeightList &Weights) { // Collect weights into a DenseMap. - typedef DenseMap<BlockNode::IndexType, Weight> HashTable; + using HashTable = DenseMap<BlockNode::IndexType, Weight>; + HashTable Combined(NextPowerOf2(2 * Weights.size())); for (const Weight &W : Weights) combineWeight(Combined[W.TargetNode.Index], W); @@ -252,6 +271,7 @@ void BlockFrequencyInfoImplBase::clear() { // Swap with a default-constructed std::vector, since std::vector<>::clear() // does not actually clear heap storage. std::vector<FrequencyData>().swap(Freqs); + IsIrrLoopHeader.clear(); std::vector<WorkingData>().swap(Working); Loops.clear(); } @@ -261,8 +281,10 @@ void BlockFrequencyInfoImplBase::clear() { /// Releases all memory not used downstream. In particular, saves Freqs. static void cleanup(BlockFrequencyInfoImplBase &BFI) { std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs)); + SparseBitVector<> SavedIsIrrLoopHeader(std::move(BFI.IsIrrLoopHeader)); BFI.clear(); BFI.Freqs = std::move(SavedFreqs); + BFI.IsIrrLoopHeader = std::move(SavedIsIrrLoopHeader); } bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist, @@ -553,6 +575,13 @@ BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F, return BlockCount.getLimitedValue(); } +bool +BlockFrequencyInfoImplBase::isIrrLoopHeader(const BlockNode &Node) { + if (!Node.isValid()) + return false; + return IsIrrLoopHeader.test(Node.Index); +} + Scaled64 BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const { if (!Node.isValid()) @@ -569,7 +598,7 @@ void BlockFrequencyInfoImplBase::setBlockFreq(const BlockNode &Node, std::string BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const { - return std::string(); + return {}; } std::string @@ -627,16 +656,17 @@ void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ, } namespace llvm { -template <> struct GraphTraits<IrreducibleGraph> { - typedef bfi_detail::IrreducibleGraph GraphT; - typedef const GraphT::IrrNode *NodeRef; - typedef GraphT::IrrNode::iterator ChildIteratorType; +template <> struct GraphTraits<IrreducibleGraph> { + using GraphT = bfi_detail::IrreducibleGraph; + using NodeRef = const GraphT::IrrNode *; + using ChildIteratorType = GraphT::IrrNode::iterator; static NodeRef getEntryNode(const GraphT &G) { return G.StartIrr; } static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); } static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); } }; + } // end namespace llvm /// \brief Find extra irreducible headers. @@ -799,3 +829,14 @@ void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) { DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr)); } } + +void BlockFrequencyInfoImplBase::distributeIrrLoopHeaderMass(Distribution &Dist) { + BlockMass LoopMass = BlockMass::getFull(); + DitheringDistributer D(Dist, LoopMass); + for (const Weight &W : Dist.Weights) { + BlockMass Taken = D.takeMass(W.Amount); + assert(W.Type == Weight::Local && "all weights should be local"); + Working[W.TargetNode.Index].getMass() = Taken; + DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr)); + } +} diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp index a329e5ad48c9..58ccad89d508 100644 --- a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -1,4 +1,4 @@ -//===-- BranchProbabilityInfo.cpp - Branch Probability Analysis -----------===// +//===- BranchProbabilityInfo.cpp - Branch Probability Analysis ------------===// // // The LLVM Compiler Infrastructure // @@ -13,21 +13,47 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cstdint> +#include <iterator> +#include <utility> using namespace llvm; #define DEBUG_TYPE "branch-prob" +static cl::opt<bool> PrintBranchProb( + "print-bpi", cl::init(false), cl::Hidden, + cl::desc("Print the branch probability info.")); + +cl::opt<std::string> PrintBranchProbFuncName( + "print-bpi-func-name", cl::Hidden, + cl::desc("The option to specify the name of the function " + "whose branch probability info is printed.")); + INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob", "Branch Probability Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) @@ -221,7 +247,7 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) { bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { const TerminatorInst *TI = BB->getTerminator(); assert(TI->getNumSuccessors() > 1 && "expected more than one successor!"); - if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI)) + if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || isa<IndirectBrInst>(TI))) return false; MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof); @@ -399,25 +425,73 @@ bool BranchProbabilityInfo::calcPointerHeuristics(const BasicBlock *BB) { return true; } +static int getSCCNum(const BasicBlock *BB, + const BranchProbabilityInfo::SccInfo &SccI) { + auto SccIt = SccI.SccNums.find(BB); + if (SccIt == SccI.SccNums.end()) + return -1; + return SccIt->second; +} + +// Consider any block that is an entry point to the SCC as a header. +static bool isSCCHeader(const BasicBlock *BB, int SccNum, + BranchProbabilityInfo::SccInfo &SccI) { + assert(getSCCNum(BB, SccI) == SccNum); + + // Lazily compute the set of headers for a given SCC and cache the results + // in the SccHeaderMap. + if (SccI.SccHeaders.size() <= static_cast<unsigned>(SccNum)) + SccI.SccHeaders.resize(SccNum + 1); + auto &HeaderMap = SccI.SccHeaders[SccNum]; + bool Inserted; + BranchProbabilityInfo::SccHeaderMap::iterator HeaderMapIt; + std::tie(HeaderMapIt, Inserted) = HeaderMap.insert(std::make_pair(BB, false)); + if (Inserted) { + bool IsHeader = llvm::any_of(make_range(pred_begin(BB), pred_end(BB)), + [&](const BasicBlock *Pred) { + return getSCCNum(Pred, SccI) != SccNum; + }); + HeaderMapIt->second = IsHeader; + return IsHeader; + } else + return HeaderMapIt->second; +} + // Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges // as taken, exiting edges as not-taken. bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB, - const LoopInfo &LI) { + const LoopInfo &LI, + SccInfo &SccI) { + int SccNum; Loop *L = LI.getLoopFor(BB); - if (!L) - return false; + if (!L) { + SccNum = getSCCNum(BB, SccI); + if (SccNum < 0) + return false; + } SmallVector<unsigned, 8> BackEdges; SmallVector<unsigned, 8> ExitingEdges; SmallVector<unsigned, 8> InEdges; // Edges from header to the loop. for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { - if (!L->contains(*I)) - ExitingEdges.push_back(I.getSuccessorIndex()); - else if (L->getHeader() == *I) - BackEdges.push_back(I.getSuccessorIndex()); - else - InEdges.push_back(I.getSuccessorIndex()); + // Use LoopInfo if we have it, otherwise fall-back to SCC info to catch + // irreducible loops. + if (L) { + if (!L->contains(*I)) + ExitingEdges.push_back(I.getSuccessorIndex()); + else if (L->getHeader() == *I) + BackEdges.push_back(I.getSuccessorIndex()); + else + InEdges.push_back(I.getSuccessorIndex()); + } else { + if (getSCCNum(*I, SccI) != SccNum) + ExitingEdges.push_back(I.getSuccessorIndex()); + else if (isSCCHeader(*I, SccNum, SccI)) + BackEdges.push_back(I.getSuccessorIndex()); + else + InEdges.push_back(I.getSuccessorIndex()); + } } if (BackEdges.empty() && ExitingEdges.empty()) @@ -480,7 +554,7 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB, if (Instruction *LHS = dyn_cast<Instruction>(CI->getOperand(0))) if (LHS->getOpcode() == Instruction::And) if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) - if (AndRHS->getUniqueInteger().isPowerOf2()) + if (AndRHS->getValue().isPowerOf2()) return false; // Check if the LHS is the return value of a library function @@ -722,7 +796,6 @@ raw_ostream & BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, const BasicBlock *Src, const BasicBlock *Dst) const { - const BranchProbability Prob = getEdgeProbability(Src, Dst); OS << "edge " << Src->getName() << " -> " << Dst->getName() << " probability is " << Prob @@ -747,6 +820,27 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, assert(PostDominatedByUnreachable.empty()); assert(PostDominatedByColdCall.empty()); + // Record SCC numbers of blocks in the CFG to identify irreducible loops. + // FIXME: We could only calculate this if the CFG is known to be irreducible + // (perhaps cache this info in LoopInfo if we can easily calculate it there?). + int SccNum = 0; + SccInfo SccI; + for (scc_iterator<const Function *> It = scc_begin(&F); !It.isAtEnd(); + ++It, ++SccNum) { + // Ignore single-block SCCs since they either aren't loops or LoopInfo will + // catch them. + const std::vector<const BasicBlock *> &Scc = *It; + if (Scc.size() == 1) + continue; + + DEBUG(dbgs() << "BPI: SCC " << SccNum << ":"); + for (auto *BB : Scc) { + DEBUG(dbgs() << " " << BB->getName()); + SccI.SccNums[BB] = SccNum; + } + DEBUG(dbgs() << "\n"); + } + // Walk the basic blocks in post-order so that we can build up state about // the successors of a block iteratively. for (auto BB : post_order(&F.getEntryBlock())) { @@ -762,7 +856,7 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, continue; if (calcColdCallHeuristics(BB)) continue; - if (calcLoopBranchHeuristics(BB, LI)) + if (calcLoopBranchHeuristics(BB, LI, SccI)) continue; if (calcPointerHeuristics(BB)) continue; @@ -775,6 +869,12 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, PostDominatedByUnreachable.clear(); PostDominatedByColdCall.clear(); + + if (PrintBranchProb && + (PrintBranchProbFuncName.empty() || + F.getName().equals(PrintBranchProbFuncName))) { + print(dbgs()); + } } void BranchProbabilityInfoWrapperPass::getAnalysisUsage( diff --git a/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp index 0de7ad98af46..076a2b205d00 100644 --- a/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp @@ -1,4 +1,4 @@ -//- CFLAndersAliasAnalysis.cpp - Unification-based Alias Analysis ---*- C++-*-// +//===- CFLAndersAliasAnalysis.cpp - Unification-based Alias Analysis ------===// // // The LLVM Compiler Infrastructure // @@ -54,9 +54,35 @@ // FunctionPasses to run concurrently. #include "llvm/Analysis/CFLAndersAliasAnalysis.h" +#include "AliasAnalysisSummary.h" #include "CFLGraph.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <bitset> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <functional> +#include <utility> +#include <vector> using namespace llvm; using namespace llvm::cflaa; @@ -66,7 +92,7 @@ using namespace llvm::cflaa; CFLAndersAAResult::CFLAndersAAResult(const TargetLibraryInfo &TLI) : TLI(TLI) {} CFLAndersAAResult::CFLAndersAAResult(CFLAndersAAResult &&RHS) : AAResultBase(std::move(RHS)), TLI(RHS.TLI) {} -CFLAndersAAResult::~CFLAndersAAResult() {} +CFLAndersAAResult::~CFLAndersAAResult() = default; namespace { @@ -95,7 +121,8 @@ enum class MatchState : uint8_t { FlowToMemAliasReadWrite, }; -typedef std::bitset<7> StateSet; +using StateSet = std::bitset<7>; + const unsigned ReadOnlyStateMask = (1U << static_cast<uint8_t>(MatchState::FlowFromReadOnly)) | (1U << static_cast<uint8_t>(MatchState::FlowFromMemAliasReadOnly)); @@ -130,13 +157,14 @@ bool operator==(OffsetInstantiatedValue LHS, OffsetInstantiatedValue RHS) { // We use ReachabilitySet to keep track of value aliases (The nonterminal "V" in // the paper) during the analysis. class ReachabilitySet { - typedef DenseMap<InstantiatedValue, StateSet> ValueStateMap; - typedef DenseMap<InstantiatedValue, ValueStateMap> ValueReachMap; + using ValueStateMap = DenseMap<InstantiatedValue, StateSet>; + using ValueReachMap = DenseMap<InstantiatedValue, ValueStateMap>; + ValueReachMap ReachMap; public: - typedef ValueStateMap::const_iterator const_valuestate_iterator; - typedef ValueReachMap::const_iterator const_value_iterator; + using const_valuestate_iterator = ValueStateMap::const_iterator; + using const_value_iterator = ValueReachMap::const_iterator; // Insert edge 'From->To' at state 'State' bool insert(InstantiatedValue From, InstantiatedValue To, MatchState State) { @@ -169,12 +197,13 @@ public: // We use AliasMemSet to keep track of all memory aliases (the nonterminal "M" // in the paper) during the analysis. class AliasMemSet { - typedef DenseSet<InstantiatedValue> MemSet; - typedef DenseMap<InstantiatedValue, MemSet> MemMapType; + using MemSet = DenseSet<InstantiatedValue>; + using MemMapType = DenseMap<InstantiatedValue, MemSet>; + MemMapType MemMap; public: - typedef MemSet::const_iterator const_mem_iterator; + using const_mem_iterator = MemSet::const_iterator; bool insert(InstantiatedValue LHS, InstantiatedValue RHS) { // Top-level values can never be memory aliases because one cannot take the @@ -193,11 +222,12 @@ public: // We use AliasAttrMap to keep track of the AliasAttr of each node. class AliasAttrMap { - typedef DenseMap<InstantiatedValue, AliasAttrs> MapType; + using MapType = DenseMap<InstantiatedValue, AliasAttrs>; + MapType AttrMap; public: - typedef MapType::const_iterator const_iterator; + using const_iterator = MapType::const_iterator; bool add(InstantiatedValue V, AliasAttrs Attr) { auto &OldAttr = AttrMap[V]; @@ -234,23 +264,28 @@ struct ValueSummary { }; SmallVector<Record, 4> FromRecords, ToRecords; }; -} + +} // end anonymous namespace namespace llvm { + // Specialize DenseMapInfo for OffsetValue. template <> struct DenseMapInfo<OffsetValue> { static OffsetValue getEmptyKey() { return OffsetValue{DenseMapInfo<const Value *>::getEmptyKey(), DenseMapInfo<int64_t>::getEmptyKey()}; } + static OffsetValue getTombstoneKey() { return OffsetValue{DenseMapInfo<const Value *>::getTombstoneKey(), DenseMapInfo<int64_t>::getEmptyKey()}; } + static unsigned getHashValue(const OffsetValue &OVal) { return DenseMapInfo<std::pair<const Value *, int64_t>>::getHashValue( std::make_pair(OVal.Val, OVal.Offset)); } + static bool isEqual(const OffsetValue &LHS, const OffsetValue &RHS) { return LHS == RHS; } @@ -263,21 +298,25 @@ template <> struct DenseMapInfo<OffsetInstantiatedValue> { DenseMapInfo<InstantiatedValue>::getEmptyKey(), DenseMapInfo<int64_t>::getEmptyKey()}; } + static OffsetInstantiatedValue getTombstoneKey() { return OffsetInstantiatedValue{ DenseMapInfo<InstantiatedValue>::getTombstoneKey(), DenseMapInfo<int64_t>::getEmptyKey()}; } + static unsigned getHashValue(const OffsetInstantiatedValue &OVal) { return DenseMapInfo<std::pair<InstantiatedValue, int64_t>>::getHashValue( std::make_pair(OVal.IVal, OVal.Offset)); } + static bool isEqual(const OffsetInstantiatedValue &LHS, const OffsetInstantiatedValue &RHS) { return LHS == RHS; } }; -} + +} // end namespace llvm class CFLAndersAAResult::FunctionInfo { /// Map a value to other values that may alias it @@ -654,41 +693,40 @@ static void processWorkListItem(const WorkListItem &Item, const CFLGraph &Graph, }; switch (Item.State) { - case MatchState::FlowFromReadOnly: { + case MatchState::FlowFromReadOnly: NextRevAssignState(MatchState::FlowFromReadOnly); NextAssignState(MatchState::FlowToReadWrite); NextMemState(MatchState::FlowFromMemAliasReadOnly); break; - } - case MatchState::FlowFromMemAliasNoReadWrite: { + + case MatchState::FlowFromMemAliasNoReadWrite: NextRevAssignState(MatchState::FlowFromReadOnly); NextAssignState(MatchState::FlowToWriteOnly); break; - } - case MatchState::FlowFromMemAliasReadOnly: { + + case MatchState::FlowFromMemAliasReadOnly: NextRevAssignState(MatchState::FlowFromReadOnly); NextAssignState(MatchState::FlowToReadWrite); break; - } - case MatchState::FlowToWriteOnly: { + + case MatchState::FlowToWriteOnly: NextAssignState(MatchState::FlowToWriteOnly); NextMemState(MatchState::FlowToMemAliasWriteOnly); break; - } - case MatchState::FlowToReadWrite: { + + case MatchState::FlowToReadWrite: NextAssignState(MatchState::FlowToReadWrite); NextMemState(MatchState::FlowToMemAliasReadWrite); break; - } - case MatchState::FlowToMemAliasWriteOnly: { + + case MatchState::FlowToMemAliasWriteOnly: NextAssignState(MatchState::FlowToWriteOnly); break; - } - case MatchState::FlowToMemAliasReadWrite: { + + case MatchState::FlowToMemAliasReadWrite: NextAssignState(MatchState::FlowToReadWrite); break; } - } } static AliasAttrMap buildAttrMap(const CFLGraph &Graph, @@ -837,7 +875,7 @@ AliasResult CFLAndersAAResult::query(const MemoryLocation &LocA, AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA, const MemoryLocation &LocB) { if (LocA.Ptr == LocB.Ptr) - return LocA.Size == LocB.Size ? MustAlias : PartialAlias; + return MustAlias; // Comparisons between global variables and other constants should be // handled by BasicAA. diff --git a/contrib/llvm/lib/Analysis/CFLGraph.h b/contrib/llvm/lib/Analysis/CFLGraph.h index 95874b88244b..e4e92864061f 100644 --- a/contrib/llvm/lib/Analysis/CFLGraph.h +++ b/contrib/llvm/lib/Analysis/CFLGraph.h @@ -1,4 +1,4 @@ -//======- CFLGraph.h - Abstract stratified sets implementation. --------======// +//===- CFLGraph.h - Abstract stratified sets implementation. -----*- C++-*-===// // // The LLVM Compiler Infrastructure // @@ -6,19 +6,42 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// /// \file /// This file defines CFLGraph, an auxiliary data structure used by CFL-based /// alias analysis. -/// +// //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_CFLGRAPH_H -#define LLVM_ANALYSIS_CFLGRAPH_H +#ifndef LLVM_LIB_ANALYSIS_CFLGRAPH_H +#define LLVM_LIB_ANALYSIS_CFLGRAPH_H #include "AliasAnalysisSummary.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/InstVisitor.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include <cassert> +#include <cstdint> +#include <vector> namespace llvm { namespace cflaa { @@ -35,14 +58,14 @@ namespace cflaa { /// I+1) and a reference edge to (X, I-1). class CFLGraph { public: - typedef InstantiatedValue Node; + using Node = InstantiatedValue; struct Edge { Node Other; int64_t Offset; }; - typedef std::vector<Edge> EdgeList; + using EdgeList = std::vector<Edge>; struct NodeInfo { EdgeList Edges, ReverseEdges; @@ -74,7 +97,8 @@ public: }; private: - typedef DenseMap<Value *, ValueInfo> ValueMap; + using ValueMap = DenseMap<Value *, ValueInfo>; + ValueMap ValueImpls; NodeInfo *getNode(Node N) { @@ -85,7 +109,7 @@ private: } public: - typedef ValueMap::const_iterator const_value_iterator; + using const_value_iterator = ValueMap::const_iterator; bool addNode(Node N, AliasAttrs Attr = AliasAttrs()) { assert(N.Val != nullptr); @@ -496,10 +520,10 @@ template <typename CFLAA> class CFLGraphBuilder { addNode(Ptr, getAttrEscaped()); break; } - case Instruction::IntToPtr: { + case Instruction::IntToPtr: addNode(CE, getAttrUnknown()); break; - } + case Instruction::BitCast: case Instruction::AddrSpaceCast: case Instruction::Trunc: @@ -571,11 +595,11 @@ template <typename CFLAA> class CFLGraphBuilder { case Instruction::LShr: case Instruction::AShr: case Instruction::ICmp: - case Instruction::FCmp: { + case Instruction::FCmp: addAssignEdge(CE->getOperand(0), CE); addAssignEdge(CE->getOperand(1), CE); break; - } + default: llvm_unreachable("Unknown instruction type encountered!"); } @@ -640,7 +664,8 @@ public: return ReturnedValues; } }; -} -} -#endif +} // end namespace cflaa +} // end namespace llvm + +#endif // LLVM_LIB_ANALYSIS_CFLGRAPH_H diff --git a/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp index adbdd82012a3..eee6d26ba787 100644 --- a/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp @@ -1,4 +1,4 @@ -//- CFLSteensAliasAnalysis.cpp - Unification-based Alias Analysis ---*- C++-*-// +//===- CFLSteensAliasAnalysis.cpp - Unification-based Alias Analysis ------===// // // The LLVM Compiler Infrastructure // @@ -36,23 +36,25 @@ // FunctionPasses to run concurrently. #include "llvm/Analysis/CFLSteensAliasAnalysis.h" +#include "AliasAnalysisSummary.h" #include "CFLGraph.h" #include "StratifiedSets.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> +#include <limits> #include <memory> -#include <tuple> +#include <utility> using namespace llvm; using namespace llvm::cflaa; @@ -63,7 +65,7 @@ CFLSteensAAResult::CFLSteensAAResult(const TargetLibraryInfo &TLI) : AAResultBase(), TLI(TLI) {} CFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg) : AAResultBase(std::move(Arg)), TLI(Arg.TLI) {} -CFLSteensAAResult::~CFLSteensAAResult() {} +CFLSteensAAResult::~CFLSteensAAResult() = default; /// Information we have about a function and would like to keep around. class CFLSteensAAResult::FunctionInfo { @@ -77,6 +79,7 @@ public: const StratifiedSets<InstantiatedValue> &getStratifiedSets() const { return Sets; } + const AliasSummary &getAliasSummary() const { return Summary; } }; diff --git a/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp b/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp index 74b5d79ebac5..ceff94756fe3 100644 --- a/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp +++ b/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp @@ -8,8 +8,27 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/LazyCallGraph.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <iterator> + +#define DEBUG_TYPE "cgscc" using namespace llvm; @@ -53,8 +72,13 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, // Update the SCC if necessary. C = UR.UpdatedC ? UR.UpdatedC : C; + // If the CGSCC pass wasn't able to provide a valid updated SCC, the + // current SCC may simply need to be skipped if invalid. + if (UR.InvalidatedSCCs.count(C)) { + DEBUG(dbgs() << "Skipping invalidated root or island SCC!\n"); + break; + } // Check that we didn't miss any update scenario. - assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!"); assert(C->begin() != C->end() && "Cannot have an empty SCC!"); // Update the analysis manager as each pass runs and potentially @@ -211,7 +235,7 @@ bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate( auto PAC = PA.getChecker<FunctionAnalysisManagerCGSCCProxy>(); if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<LazyCallGraph::SCC>>()) { for (LazyCallGraph::Node &N : C) - FAM->clear(N.getFunction()); + FAM->clear(N.getFunction(), N.getFunction().getName()); return true; } @@ -260,7 +284,7 @@ bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate( return false; } -} // End llvm namespace +} // end namespace llvm /// When a new SCC is created for the graph and there might be function /// analysis results cached for the functions now in that SCC two forms of @@ -307,7 +331,6 @@ static void updateNewSCCFunctionAnalyses(LazyCallGraph::SCC &C, } } -namespace { /// Helper function to update both the \c CGSCCAnalysisManager \p AM and the \c /// CGSCCPassManager's \c CGSCCUpdateResult \p UR based on a range of newly /// added SCCs. @@ -319,20 +342,18 @@ namespace { /// This function returns the SCC containing \p N. This will be either \p C if /// no new SCCs have been split out, or it will be the new SCC containing \p N. template <typename SCCRangeT> -LazyCallGraph::SCC * +static LazyCallGraph::SCC * incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G, LazyCallGraph::Node &N, LazyCallGraph::SCC *C, - CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, - bool DebugLogging = false) { - typedef LazyCallGraph::SCC SCC; + CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR) { + using SCC = LazyCallGraph::SCC; if (NewSCCRange.begin() == NewSCCRange.end()) return C; // Add the current SCC to the worklist as its shape has changed. UR.CWorklist.insert(C); - if (DebugLogging) - dbgs() << "Enqueuing the existing SCC in the worklist:" << *C << "\n"; + DEBUG(dbgs() << "Enqueuing the existing SCC in the worklist:" << *C << "\n"); SCC *OldC = C; @@ -363,13 +384,12 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G, if (NeedFAMProxy) updateNewSCCFunctionAnalyses(*C, G, AM); - for (SCC &NewC : - reverse(make_range(std::next(NewSCCRange.begin()), NewSCCRange.end()))) { + for (SCC &NewC : llvm::reverse(make_range(std::next(NewSCCRange.begin()), + NewSCCRange.end()))) { assert(C != &NewC && "No need to re-visit the current SCC!"); assert(OldC != &NewC && "Already handled the original SCC!"); UR.CWorklist.insert(&NewC); - if (DebugLogging) - dbgs() << "Enqueuing a newly formed SCC:" << NewC << "\n"; + DEBUG(dbgs() << "Enqueuing a newly formed SCC:" << NewC << "\n"); // Ensure new SCCs' function analyses are updated. if (NeedFAMProxy) @@ -381,15 +401,14 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G, } return C; } -} LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( LazyCallGraph &G, LazyCallGraph::SCC &InitialC, LazyCallGraph::Node &N, - CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, bool DebugLogging) { - typedef LazyCallGraph::Node Node; - typedef LazyCallGraph::Edge Edge; - typedef LazyCallGraph::SCC SCC; - typedef LazyCallGraph::RefSCC RefSCC; + CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR) { + using Node = LazyCallGraph::Node; + using Edge = LazyCallGraph::Edge; + using SCC = LazyCallGraph::SCC; + using RefSCC = LazyCallGraph::RefSCC; RefSCC &InitialRC = InitialC.getOuterRefSCC(); SCC *C = &InitialC; @@ -421,7 +440,9 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( assert(E && "No function transformations should introduce *new* " "call edges! Any new calls should be modeled as " "promoted existing ref edges!"); - RetainedEdges.insert(&CalleeN); + bool Inserted = RetainedEdges.insert(&CalleeN).second; + (void)Inserted; + assert(Inserted && "We should never visit a function twice."); if (!E->isCall()) PromotedRefTargets.insert(&CalleeN); } @@ -429,7 +450,7 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( // Now walk all references. for (Instruction &I : instructions(F)) for (Value *Op : I.operand_values()) - if (Constant *C = dyn_cast<Constant>(Op)) + if (auto *C = dyn_cast<Constant>(Op)) if (Visited.insert(C).second) Worklist.push_back(C); @@ -441,7 +462,9 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( assert(E && "No function transformations should introduce *new* ref " "edges! Any new ref edges would require IPO which " "function passes aren't allowed to do!"); - RetainedEdges.insert(&RefereeN); + bool Inserted = RetainedEdges.insert(&RefereeN).second; + (void)Inserted; + assert(Inserted && "We should never visit a function twice."); if (E->isCall()) DemotedCallTargets.insert(&RefereeN); }; @@ -449,74 +472,82 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( // Include synthetic reference edges to known, defined lib functions. for (auto *F : G.getLibFunctions()) - VisitRef(*F); + // While the list of lib functions doesn't have repeats, don't re-visit + // anything handled above. + if (!Visited.count(F)) + VisitRef(*F); // First remove all of the edges that are no longer present in this function. - // We have to build a list of dead targets first and then remove them as the - // data structures will all be invalidated by removing them. - SmallVector<PointerIntPair<Node *, 1, Edge::Kind>, 4> DeadTargets; - for (Edge &E : *N) - if (!RetainedEdges.count(&E.getNode())) - DeadTargets.push_back({&E.getNode(), E.getKind()}); - for (auto DeadTarget : DeadTargets) { - Node &TargetN = *DeadTarget.getPointer(); - bool IsCall = DeadTarget.getInt() == Edge::Call; - SCC &TargetC = *G.lookupSCC(TargetN); - RefSCC &TargetRC = TargetC.getOuterRefSCC(); - - if (&TargetRC != RC) { - RC->removeOutgoingEdge(N, TargetN); - if (DebugLogging) - dbgs() << "Deleting outgoing edge from '" << N << "' to '" << TargetN - << "'\n"; + // The first step makes these edges uniformly ref edges and accumulates them + // into a separate data structure so removal doesn't invalidate anything. + SmallVector<Node *, 4> DeadTargets; + for (Edge &E : *N) { + if (RetainedEdges.count(&E.getNode())) continue; - } - if (DebugLogging) - dbgs() << "Deleting internal " << (IsCall ? "call" : "ref") - << " edge from '" << N << "' to '" << TargetN << "'\n"; - if (IsCall) { + SCC &TargetC = *G.lookupSCC(E.getNode()); + RefSCC &TargetRC = TargetC.getOuterRefSCC(); + if (&TargetRC == RC && E.isCall()) { if (C != &TargetC) { // For separate SCCs this is trivial. - RC->switchTrivialInternalEdgeToRef(N, TargetN); + RC->switchTrivialInternalEdgeToRef(N, E.getNode()); } else { // Now update the call graph. - C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, TargetN), G, - N, C, AM, UR, DebugLogging); + C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, E.getNode()), + G, N, C, AM, UR); } } - auto NewRefSCCs = RC->removeInternalRefEdge(N, TargetN); - if (!NewRefSCCs.empty()) { - // Note that we don't bother to invalidate analyses as ref-edge - // connectivity is not really observable in any way and is intended - // exclusively to be used for ordering of transforms rather than for - // analysis conclusions. - - // The RC worklist is in reverse postorder, so we first enqueue the - // current RefSCC as it will remain the parent of all split RefSCCs, then - // we enqueue the new ones in RPO except for the one which contains the - // source node as that is the "bottom" we will continue processing in the - // bottom-up walk. - UR.RCWorklist.insert(RC); - if (DebugLogging) - dbgs() << "Enqueuing the existing RefSCC in the update worklist: " - << *RC << "\n"; - // Update the RC to the "bottom". - assert(G.lookupSCC(N) == C && "Changed the SCC when splitting RefSCCs!"); - RC = &C->getOuterRefSCC(); - assert(G.lookupRefSCC(N) == RC && "Failed to update current RefSCC!"); - assert(NewRefSCCs.front() == RC && - "New current RefSCC not first in the returned list!"); - for (RefSCC *NewRC : reverse( - make_range(std::next(NewRefSCCs.begin()), NewRefSCCs.end()))) { - assert(NewRC != RC && "Should not encounter the current RefSCC further " - "in the postorder list of new RefSCCs."); - UR.RCWorklist.insert(NewRC); - if (DebugLogging) - dbgs() << "Enqueuing a new RefSCC in the update worklist: " << *NewRC - << "\n"; - } + // Now that this is ready for actual removal, put it into our list. + DeadTargets.push_back(&E.getNode()); + } + // Remove the easy cases quickly and actually pull them out of our list. + DeadTargets.erase( + llvm::remove_if(DeadTargets, + [&](Node *TargetN) { + SCC &TargetC = *G.lookupSCC(*TargetN); + RefSCC &TargetRC = TargetC.getOuterRefSCC(); + + // We can't trivially remove internal targets, so skip + // those. + if (&TargetRC == RC) + return false; + + RC->removeOutgoingEdge(N, *TargetN); + DEBUG(dbgs() << "Deleting outgoing edge from '" << N + << "' to '" << TargetN << "'\n"); + return true; + }), + DeadTargets.end()); + + // Now do a batch removal of the internal ref edges left. + auto NewRefSCCs = RC->removeInternalRefEdge(N, DeadTargets); + if (!NewRefSCCs.empty()) { + // The old RefSCC is dead, mark it as such. + UR.InvalidatedRefSCCs.insert(RC); + + // Note that we don't bother to invalidate analyses as ref-edge + // connectivity is not really observable in any way and is intended + // exclusively to be used for ordering of transforms rather than for + // analysis conclusions. + + // Update RC to the "bottom". + assert(G.lookupSCC(N) == C && "Changed the SCC when splitting RefSCCs!"); + RC = &C->getOuterRefSCC(); + assert(G.lookupRefSCC(N) == RC && "Failed to update current RefSCC!"); + + // The RC worklist is in reverse postorder, so we enqueue the new ones in + // RPO except for the one which contains the source node as that is the + // "bottom" we will continue processing in the bottom-up walk. + assert(NewRefSCCs.front() == RC && + "New current RefSCC not first in the returned list!"); + for (RefSCC *NewRC : llvm::reverse(make_range(std::next(NewRefSCCs.begin()), + NewRefSCCs.end()))) { + assert(NewRC != RC && "Should not encounter the current RefSCC further " + "in the postorder list of new RefSCCs."); + UR.RCWorklist.insert(NewRC); + DEBUG(dbgs() << "Enqueuing a new RefSCC in the update worklist: " + << *NewRC << "\n"); } } @@ -533,9 +564,8 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( assert(RC->isAncestorOf(TargetRC) && "Cannot potentially form RefSCC cycles here!"); RC->switchOutgoingEdgeToRef(N, *RefTarget); - if (DebugLogging) - dbgs() << "Switch outgoing call edge to a ref edge from '" << N - << "' to '" << *RefTarget << "'\n"; + DEBUG(dbgs() << "Switch outgoing call edge to a ref edge from '" << N + << "' to '" << *RefTarget << "'\n"); continue; } @@ -549,7 +579,7 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( // Now update the call graph. C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, *RefTarget), G, N, - C, AM, UR, DebugLogging); + C, AM, UR); } // Now promote ref edges into call edges. @@ -563,14 +593,12 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( assert(RC->isAncestorOf(TargetRC) && "Cannot potentially form RefSCC cycles here!"); RC->switchOutgoingEdgeToCall(N, *CallTarget); - if (DebugLogging) - dbgs() << "Switch outgoing ref edge to a call edge from '" << N - << "' to '" << *CallTarget << "'\n"; + DEBUG(dbgs() << "Switch outgoing ref edge to a call edge from '" << N + << "' to '" << *CallTarget << "'\n"); continue; } - if (DebugLogging) - dbgs() << "Switch an internal ref edge to a call edge from '" << N - << "' to '" << *CallTarget << "'\n"; + DEBUG(dbgs() << "Switch an internal ref edge to a call edge from '" << N + << "' to '" << *CallTarget << "'\n"); // Otherwise we are switching an internal ref edge to a call edge. This // may merge away some SCCs, and we add those to the UpdateResult. We also @@ -619,21 +647,28 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( AM.invalidate(*C, PA); } auto NewSCCIndex = RC->find(*C) - RC->begin(); + // If we have actually moved an SCC to be topologically "below" the current + // one due to merging, we will need to revisit the current SCC after + // visiting those moved SCCs. + // + // It is critical that we *do not* revisit the current SCC unless we + // actually move SCCs in the process of merging because otherwise we may + // form a cycle where an SCC is split apart, merged, split, merged and so + // on infinitely. if (InitialSCCIndex < NewSCCIndex) { // Put our current SCC back onto the worklist as we'll visit other SCCs // that are now definitively ordered prior to the current one in the // post-order sequence, and may end up observing more precise context to // optimize the current SCC. UR.CWorklist.insert(C); - if (DebugLogging) - dbgs() << "Enqueuing the existing SCC in the worklist: " << *C << "\n"; + DEBUG(dbgs() << "Enqueuing the existing SCC in the worklist: " << *C + << "\n"); // Enqueue in reverse order as we pop off the back of the worklist. - for (SCC &MovedC : reverse(make_range(RC->begin() + InitialSCCIndex, - RC->begin() + NewSCCIndex))) { + for (SCC &MovedC : llvm::reverse(make_range(RC->begin() + InitialSCCIndex, + RC->begin() + NewSCCIndex))) { UR.CWorklist.insert(&MovedC); - if (DebugLogging) - dbgs() << "Enqueuing a newly earlier in post-order SCC: " << MovedC - << "\n"; + DEBUG(dbgs() << "Enqueuing a newly earlier in post-order SCC: " + << MovedC << "\n"); } } } diff --git a/contrib/llvm/lib/Analysis/CallGraph.cpp b/contrib/llvm/lib/Analysis/CallGraph.cpp index ff5242f69a1b..ac3ea2b73fed 100644 --- a/contrib/llvm/lib/Analysis/CallGraph.cpp +++ b/contrib/llvm/lib/Analysis/CallGraph.cpp @@ -8,12 +8,20 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CallGraph.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/IR/CallSite.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> + using namespace llvm; //===----------------------------------------------------------------------===// @@ -125,7 +133,6 @@ Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) { /// This does not rescan the body of the function, so it is suitable when /// splicing the body of the old function to the new while also updating all /// callers from old to new. -/// void CallGraph::spliceFunction(const Function *From, const Function *To) { assert(FunctionMap.count(From) && "No CallGraphNode for function!"); assert(!FunctionMap.count(To) && @@ -256,7 +263,7 @@ CallGraphWrapperPass::CallGraphWrapperPass() : ModulePass(ID) { initializeCallGraphWrapperPassPass(*PassRegistry::getPassRegistry()); } -CallGraphWrapperPass::~CallGraphWrapperPass() {} +CallGraphWrapperPass::~CallGraphWrapperPass() = default; void CallGraphWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -291,8 +298,10 @@ void CallGraphWrapperPass::dump() const { print(dbgs(), nullptr); } #endif namespace { + struct CallGraphPrinterLegacyPass : public ModulePass { static char ID; // Pass ID, replacement for typeid + CallGraphPrinterLegacyPass() : ModulePass(ID) { initializeCallGraphPrinterLegacyPassPass(*PassRegistry::getPassRegistry()); } @@ -301,12 +310,14 @@ struct CallGraphPrinterLegacyPass : public ModulePass { AU.setPreservesAll(); AU.addRequiredTransitive<CallGraphWrapperPass>(); } + bool runOnModule(Module &M) override { getAnalysis<CallGraphWrapperPass>().print(errs(), &M); return false; } }; -} + +} // end anonymous namespace char CallGraphPrinterLegacyPass::ID = 0; diff --git a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp index facda246936d..a2dda58a6a2f 100644 --- a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp +++ b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp @@ -16,18 +16,27 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Function.h" -#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManagers.h" +#include "llvm/IR/Module.h" #include "llvm/IR/OptBisect.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <string> +#include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "cgscc-passmgr" @@ -47,8 +56,8 @@ namespace { class CGPassManager : public ModulePass, public PMDataManager { public: static char ID; - explicit CGPassManager() - : ModulePass(ID), PMDataManager() { } + + explicit CGPassManager() : ModulePass(ID), PMDataManager() {} /// Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the module, and if so, return true. @@ -106,7 +115,6 @@ private: char CGPassManager::ID = 0; - bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, CallGraph &CG, bool &CallGraphUpToDate, bool &DevirtualizedCall) { @@ -135,7 +143,6 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, return Changed; } - assert(PM->getPassManagerType() == PMT_FunctionPassManager && "Invalid CGPassManager member"); FPPassManager *FPP = (FPPassManager*)P; @@ -162,7 +169,6 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, return Changed; } - /// Scan the functions in the specified CFG and resync the /// callgraph with the call sites found in it. This is used after /// FunctionPasses have potentially munged the callgraph, and can be used after @@ -172,7 +178,6 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, /// meaning it turned an indirect call into a direct call. This happens when /// a function pass like GVN optimizes away stuff feeding the indirect call. /// This never happens in checking mode. -/// bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, bool CheckingMode) { DenseMap<Value*, CallGraphNode*> CallSites; @@ -484,7 +489,6 @@ bool CGPassManager::runOnModule(Module &M) { return Changed; } - /// Initialize CG bool CGPassManager::doInitialization(CallGraph &CG) { bool Changed = false; @@ -536,7 +540,6 @@ void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) { CGI->ReplaceNode(Old, New); } - //===----------------------------------------------------------------------===// // CallGraphSCCPass Implementation //===----------------------------------------------------------------------===// @@ -586,22 +589,23 @@ void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<CallGraphWrapperPass>(); } - //===----------------------------------------------------------------------===// // PrintCallGraphPass Implementation //===----------------------------------------------------------------------===// namespace { + /// PrintCallGraphPass - Print a Module corresponding to a call graph. /// class PrintCallGraphPass : public CallGraphSCCPass { std::string Banner; - raw_ostream &Out; // raw_ostream to print on. + raw_ostream &OS; // raw_ostream to print on. public: static char ID; - PrintCallGraphPass(const std::string &B, raw_ostream &o) - : CallGraphSCCPass(ID), Banner(B), Out(o) {} + + PrintCallGraphPass(const std::string &B, raw_ostream &OS) + : CallGraphSCCPass(ID), Banner(B), OS(OS) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); @@ -612,18 +616,18 @@ namespace { auto PrintBannerOnce = [&] () { if (BannerPrinted) return; - Out << Banner; + OS << Banner; BannerPrinted = true; }; for (CallGraphNode *CGN : SCC) { if (Function *F = CGN->getFunction()) { if (!F->isDeclaration() && isFunctionInPrintList(F->getName())) { PrintBannerOnce(); - F->print(Out); + F->print(OS); } - } else if (llvm::isFunctionInPrintList("*")) { + } else if (isFunctionInPrintList("*")) { PrintBannerOnce(); - Out << "\nPrinting <null> Function\n"; + OS << "\nPrinting <null> Function\n"; } } return false; @@ -636,9 +640,9 @@ namespace { char PrintCallGraphPass::ID = 0; -Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &O, +Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &OS, const std::string &Banner) const { - return new PrintCallGraphPass(Banner, O); + return new PrintCallGraphPass(Banner, OS); } bool CallGraphSCCPass::skipSCC(CallGraphSCC &SCC) const { @@ -649,5 +653,6 @@ bool CallGraphSCCPass::skipSCC(CallGraphSCC &SCC) const { } char DummyCGSCCPass::ID = 0; + INITIALIZE_PASS(DummyCGSCCPass, "DummyCGSCCPass", "DummyCGSCCPass", false, false) diff --git a/contrib/llvm/lib/Analysis/CmpInstAnalysis.cpp b/contrib/llvm/lib/Analysis/CmpInstAnalysis.cpp new file mode 100644 index 000000000000..159c1a2d135a --- /dev/null +++ b/contrib/llvm/lib/Analysis/CmpInstAnalysis.cpp @@ -0,0 +1,144 @@ +//===- CmpInstAnalysis.cpp - Utils to help fold compares ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file holds routines to help analyse compare instructions +// and fold them into constants or other compare instructions +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CmpInstAnalysis.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/PatternMatch.h" + +using namespace llvm; + +unsigned llvm::getICmpCode(const ICmpInst *ICI, bool InvertPred) { + ICmpInst::Predicate Pred = InvertPred ? ICI->getInversePredicate() + : ICI->getPredicate(); + switch (Pred) { + // False -> 0 + case ICmpInst::ICMP_UGT: return 1; // 001 + case ICmpInst::ICMP_SGT: return 1; // 001 + case ICmpInst::ICMP_EQ: return 2; // 010 + case ICmpInst::ICMP_UGE: return 3; // 011 + case ICmpInst::ICMP_SGE: return 3; // 011 + case ICmpInst::ICMP_ULT: return 4; // 100 + case ICmpInst::ICMP_SLT: return 4; // 100 + case ICmpInst::ICMP_NE: return 5; // 101 + case ICmpInst::ICMP_ULE: return 6; // 110 + case ICmpInst::ICMP_SLE: return 6; // 110 + // True -> 7 + default: + llvm_unreachable("Invalid ICmp predicate!"); + } +} + +Value *llvm::getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS, + CmpInst::Predicate &NewICmpPred) { + switch (Code) { + default: llvm_unreachable("Illegal ICmp code!"); + case 0: // False. + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); + case 1: NewICmpPred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; + case 2: NewICmpPred = ICmpInst::ICMP_EQ; break; + case 3: NewICmpPred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; + case 4: NewICmpPred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; + case 5: NewICmpPred = ICmpInst::ICMP_NE; break; + case 6: NewICmpPred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; + case 7: // True. + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1); + } + return nullptr; +} + +bool llvm::PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) { + return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) || + (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) || + (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1)); +} + +bool llvm::decomposeBitTestICmp(Value *LHS, Value *RHS, + CmpInst::Predicate &Pred, + Value *&X, APInt &Mask, bool LookThruTrunc) { + using namespace PatternMatch; + + const APInt *C; + if (!match(RHS, m_APInt(C))) + return false; + + switch (Pred) { + default: + return false; + case ICmpInst::ICMP_SLT: + // X < 0 is equivalent to (X & SignMask) != 0. + if (!C->isNullValue()) + return false; + Mask = APInt::getSignMask(C->getBitWidth()); + Pred = ICmpInst::ICMP_NE; + break; + case ICmpInst::ICMP_SLE: + // X <= -1 is equivalent to (X & SignMask) != 0. + if (!C->isAllOnesValue()) + return false; + Mask = APInt::getSignMask(C->getBitWidth()); + Pred = ICmpInst::ICMP_NE; + break; + case ICmpInst::ICMP_SGT: + // X > -1 is equivalent to (X & SignMask) == 0. + if (!C->isAllOnesValue()) + return false; + Mask = APInt::getSignMask(C->getBitWidth()); + Pred = ICmpInst::ICMP_EQ; + break; + case ICmpInst::ICMP_SGE: + // X >= 0 is equivalent to (X & SignMask) == 0. + if (!C->isNullValue()) + return false; + Mask = APInt::getSignMask(C->getBitWidth()); + Pred = ICmpInst::ICMP_EQ; + break; + case ICmpInst::ICMP_ULT: + // X <u 2^n is equivalent to (X & ~(2^n-1)) == 0. + if (!C->isPowerOf2()) + return false; + Mask = -*C; + Pred = ICmpInst::ICMP_EQ; + break; + case ICmpInst::ICMP_ULE: + // X <=u 2^n-1 is equivalent to (X & ~(2^n-1)) == 0. + if (!(*C + 1).isPowerOf2()) + return false; + Mask = ~*C; + Pred = ICmpInst::ICMP_EQ; + break; + case ICmpInst::ICMP_UGT: + // X >u 2^n-1 is equivalent to (X & ~(2^n-1)) != 0. + if (!(*C + 1).isPowerOf2()) + return false; + Mask = ~*C; + Pred = ICmpInst::ICMP_NE; + break; + case ICmpInst::ICMP_UGE: + // X >=u 2^n is equivalent to (X & ~(2^n-1)) != 0. + if (!C->isPowerOf2()) + return false; + Mask = -*C; + Pred = ICmpInst::ICMP_NE; + break; + } + + if (LookThruTrunc && match(LHS, m_Trunc(m_Value(X)))) { + Mask = Mask.zext(X->getType()->getScalarSizeInBits()); + } else { + X = LHS; + } + + return true; +} diff --git a/contrib/llvm/lib/Analysis/CodeMetrics.cpp b/contrib/llvm/lib/Analysis/CodeMetrics.cpp index e4d9292db92d..ac7d14ebdaea 100644 --- a/contrib/llvm/lib/Analysis/CodeMetrics.cpp +++ b/contrib/llvm/lib/Analysis/CodeMetrics.cpp @@ -19,7 +19,6 @@ #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp index 0f5ec3f5626e..e88b8f14d54e 100644 --- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp +++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp @@ -1359,7 +1359,7 @@ llvm::ConstantFoldLoadThroughGEPIndices(Constant *C, // bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) { - if (CS.isNoBuiltin()) + if (CS.isNoBuiltin() || CS.isStrictFP()) return false; switch (F->getIntrinsicID()) { case Intrinsic::fabs: @@ -2066,7 +2066,7 @@ Constant * llvm::ConstantFoldCall(ImmutableCallSite CS, Function *F, ArrayRef<Constant *> Operands, const TargetLibraryInfo *TLI) { - if (CS.isNoBuiltin()) + if (CS.isNoBuiltin() || CS.isStrictFP()) return nullptr; if (!F->hasName()) return nullptr; @@ -2084,7 +2084,7 @@ llvm::ConstantFoldCall(ImmutableCallSite CS, Function *F, bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) { // FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap // (and to some extent ConstantFoldScalarCall). - if (CS.isNoBuiltin()) + if (CS.isNoBuiltin() || CS.isStrictFP()) return false; Function *F = CS.getCalledFunction(); if (!F) diff --git a/contrib/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm/lib/Analysis/CostModel.cpp index 32bfea58bf9d..3d55bf20bb40 100644 --- a/contrib/llvm/lib/Analysis/CostModel.cpp +++ b/contrib/llvm/lib/Analysis/CostModel.cpp @@ -20,24 +20,26 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +static cl::opt<TargetTransformInfo::TargetCostKind> CostKind( + "cost-kind", cl::desc("Target cost kind"), + cl::init(TargetTransformInfo::TCK_RecipThroughput), + cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, + "throughput", "Reciprocal throughput"), + clEnumValN(TargetTransformInfo::TCK_Latency, + "latency", "Instruction latency"), + clEnumValN(TargetTransformInfo::TCK_CodeSize, + "code-size", "Code size"))); + #define CM_NAME "cost-model" #define DEBUG_TYPE CM_NAME -static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false), - cl::Hidden, - cl::desc("Recognize reduction patterns.")); - namespace { class CostModelAnalysis : public FunctionPass { @@ -52,7 +54,9 @@ namespace { /// Returns -1 if the cost is unknown. /// Note, this method does not cache the cost calculation and it /// can be expensive in some cases. - unsigned getInstructionCost(const Instruction *I) const; + unsigned getInstructionCost(const Instruction *I) const { + return TTI->getInstructionCost(I, TargetTransformInfo::TCK_RecipThroughput); + } private: void getAnalysisUsage(AnalysisUsage &AU) const override; @@ -90,481 +94,13 @@ CostModelAnalysis::runOnFunction(Function &F) { return false; } -static bool isReverseVectorMask(ArrayRef<int> Mask) { - for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i) - if (Mask[i] >= 0 && Mask[i] != (int)(MaskSize - 1 - i)) - return false; - return true; -} - -static bool isSingleSourceVectorMask(ArrayRef<int> Mask) { - bool Vec0 = false; - bool Vec1 = false; - for (unsigned i = 0, NumVecElts = Mask.size(); i < NumVecElts; ++i) { - if (Mask[i] >= 0) { - if ((unsigned)Mask[i] >= NumVecElts) - Vec1 = true; - else - Vec0 = true; - } - } - return !(Vec0 && Vec1); -} - -static bool isZeroEltBroadcastVectorMask(ArrayRef<int> Mask) { - for (unsigned i = 0; i < Mask.size(); ++i) - if (Mask[i] > 0) - return false; - return true; -} - -static bool isAlternateVectorMask(ArrayRef<int> Mask) { - bool isAlternate = true; - unsigned MaskSize = Mask.size(); - - // Example: shufflevector A, B, <0,5,2,7> - for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { - if (Mask[i] < 0) - continue; - isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i); - } - - if (isAlternate) - return true; - - isAlternate = true; - // Example: shufflevector A, B, <4,1,6,3> - for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { - if (Mask[i] < 0) - continue; - isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i); - } - - return isAlternate; -} - -static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { - TargetTransformInfo::OperandValueKind OpInfo = - TargetTransformInfo::OK_AnyValue; - - // Check for a splat of a constant or for a non uniform vector of constants. - if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) { - OpInfo = TargetTransformInfo::OK_NonUniformConstantValue; - if (cast<Constant>(V)->getSplatValue() != nullptr) - OpInfo = TargetTransformInfo::OK_UniformConstantValue; - } - - // Check for a splat of a uniform value. This is not loop aware, so return - // true only for the obviously uniform cases (argument, globalvalue) - const Value *Splat = getSplatValue(V); - if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat))) - OpInfo = TargetTransformInfo::OK_UniformValue; - - return OpInfo; -} - -static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, - unsigned Level) { - // We don't need a shuffle if we just want to have element 0 in position 0 of - // the vector. - if (!SI && Level == 0 && IsLeft) - return true; - else if (!SI) - return false; - - SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1); - - // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether - // we look at the left or right side. - for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2) - Mask[i] = val; - - SmallVector<int, 16> ActualMask = SI->getShuffleMask(); - return Mask == ActualMask; -} - -static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp, - unsigned Level, unsigned NumLevels) { - // Match one level of pairwise operations. - // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, - // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> - // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, - // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> - // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 - if (BinOp == nullptr) - return false; - - assert(BinOp->getType()->isVectorTy() && "Expecting a vector type"); - - unsigned Opcode = BinOp->getOpcode(); - Value *L = BinOp->getOperand(0); - Value *R = BinOp->getOperand(1); - - ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(L); - if (!LS && Level) - return false; - ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(R); - if (!RS && Level) - return false; - - // On level 0 we can omit one shufflevector instruction. - if (!Level && !RS && !LS) - return false; - - // Shuffle inputs must match. - Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr; - Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr; - Value *NextLevelOp = nullptr; - if (NextLevelOpR && NextLevelOpL) { - // If we have two shuffles their operands must match. - if (NextLevelOpL != NextLevelOpR) - return false; - - NextLevelOp = NextLevelOpL; - } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) { - // On the first level we can omit the shufflevector <0, undef,...>. So the - // input to the other shufflevector <1, undef> must match with one of the - // inputs to the current binary operation. - // Example: - // %NextLevelOpL = shufflevector %R, <1, undef ...> - // %BinOp = fadd %NextLevelOpL, %R - if (NextLevelOpL && NextLevelOpL != R) - return false; - else if (NextLevelOpR && NextLevelOpR != L) - return false; - - NextLevelOp = NextLevelOpL ? R : L; - } else - return false; - - // Check that the next levels binary operation exists and matches with the - // current one. - BinaryOperator *NextLevelBinOp = nullptr; - if (Level + 1 != NumLevels) { - if (!(NextLevelBinOp = dyn_cast<BinaryOperator>(NextLevelOp))) - return false; - else if (NextLevelBinOp->getOpcode() != Opcode) - return false; - } - - // Shuffle mask for pairwise operation must match. - if (matchPairwiseShuffleMask(LS, true, Level)) { - if (!matchPairwiseShuffleMask(RS, false, Level)) - return false; - } else if (matchPairwiseShuffleMask(RS, true, Level)) { - if (!matchPairwiseShuffleMask(LS, false, Level)) - return false; - } else - return false; - - if (++Level == NumLevels) - return true; - - // Match next level. - return matchPairwiseReductionAtLevel(NextLevelBinOp, Level, NumLevels); -} - -static bool matchPairwiseReduction(const ExtractElementInst *ReduxRoot, - unsigned &Opcode, Type *&Ty) { - if (!EnableReduxCost) - return false; - - // Need to extract the first element. - ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); - unsigned Idx = ~0u; - if (CI) - Idx = CI->getZExtValue(); - if (Idx != 0) - return false; - - BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0)); - if (!RdxStart) - return false; - - Type *VecTy = ReduxRoot->getOperand(0)->getType(); - unsigned NumVecElems = VecTy->getVectorNumElements(); - if (!isPowerOf2_32(NumVecElems)) - return false; - - // We look for a sequence of shuffle,shuffle,add triples like the following - // that builds a pairwise reduction tree. - // - // (X0, X1, X2, X3) - // (X0 + X1, X2 + X3, undef, undef) - // ((X0 + X1) + (X2 + X3), undef, undef, undef) - // - // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, - // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> - // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, - // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> - // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 - // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, - // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> - // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, - // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> - // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 - // %r = extractelement <4 x float> %bin.rdx8, i32 0 - if (!matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems))) - return false; - - Opcode = RdxStart->getOpcode(); - Ty = VecTy; - - return true; -} - -static std::pair<Value *, ShuffleVectorInst *> -getShuffleAndOtherOprd(BinaryOperator *B) { - - Value *L = B->getOperand(0); - Value *R = B->getOperand(1); - ShuffleVectorInst *S = nullptr; - - if ((S = dyn_cast<ShuffleVectorInst>(L))) - return std::make_pair(R, S); - - S = dyn_cast<ShuffleVectorInst>(R); - return std::make_pair(L, S); -} - -static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, - unsigned &Opcode, Type *&Ty) { - if (!EnableReduxCost) - return false; - - // Need to extract the first element. - ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); - unsigned Idx = ~0u; - if (CI) - Idx = CI->getZExtValue(); - if (Idx != 0) - return false; - - BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0)); - if (!RdxStart) - return false; - unsigned RdxOpcode = RdxStart->getOpcode(); - - Type *VecTy = ReduxRoot->getOperand(0)->getType(); - unsigned NumVecElems = VecTy->getVectorNumElements(); - if (!isPowerOf2_32(NumVecElems)) - return false; - - // We look for a sequence of shuffles and adds like the following matching one - // fadd, shuffle vector pair at a time. - // - // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, - // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> - // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf - // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, - // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> - // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 - // %r = extractelement <4 x float> %bin.rdx8, i32 0 - - unsigned MaskStart = 1; - Value *RdxOp = RdxStart; - SmallVector<int, 32> ShuffleMask(NumVecElems, 0); - unsigned NumVecElemsRemain = NumVecElems; - while (NumVecElemsRemain - 1) { - // Check for the right reduction operation. - BinaryOperator *BinOp; - if (!(BinOp = dyn_cast<BinaryOperator>(RdxOp))) - return false; - if (BinOp->getOpcode() != RdxOpcode) - return false; - - Value *NextRdxOp; - ShuffleVectorInst *Shuffle; - std::tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp); - - // Check the current reduction operation and the shuffle use the same value. - if (Shuffle == nullptr) - return false; - if (Shuffle->getOperand(0) != NextRdxOp) - return false; - - // Check that shuffle masks matches. - for (unsigned j = 0; j != MaskStart; ++j) - ShuffleMask[j] = MaskStart + j; - // Fill the rest of the mask with -1 for undef. - std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1); - - SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); - if (ShuffleMask != Mask) - return false; - - RdxOp = NextRdxOp; - NumVecElemsRemain /= 2; - MaskStart *= 2; - } - - Opcode = RdxOpcode; - Ty = VecTy; - return true; -} - -unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { - if (!TTI) - return -1; - - switch (I->getOpcode()) { - case Instruction::GetElementPtr: - return TTI->getUserCost(I); - - case Instruction::Ret: - case Instruction::PHI: - case Instruction::Br: { - return TTI->getCFInstrCost(I->getOpcode()); - } - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { - TargetTransformInfo::OperandValueKind Op1VK = - getOperandInfo(I->getOperand(0)); - TargetTransformInfo::OperandValueKind Op2VK = - getOperandInfo(I->getOperand(1)); - SmallVector<const Value*, 2> Operands(I->operand_values()); - return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, - Op2VK, TargetTransformInfo::OP_None, - TargetTransformInfo::OP_None, - Operands); - } - case Instruction::Select: { - const SelectInst *SI = cast<SelectInst>(I); - Type *CondTy = SI->getCondition()->getType(); - return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I); - } - case Instruction::ICmp: - case Instruction::FCmp: { - Type *ValTy = I->getOperand(0)->getType(); - return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I); - } - case Instruction::Store: { - const StoreInst *SI = cast<StoreInst>(I); - Type *ValTy = SI->getValueOperand()->getType(); - return TTI->getMemoryOpCost(I->getOpcode(), ValTy, - SI->getAlignment(), - SI->getPointerAddressSpace(), I); - } - case Instruction::Load: { - const LoadInst *LI = cast<LoadInst>(I); - return TTI->getMemoryOpCost(I->getOpcode(), I->getType(), - LI->getAlignment(), - LI->getPointerAddressSpace(), I); - } - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::FPExt: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::SIToFP: - case Instruction::UIToFP: - case Instruction::Trunc: - case Instruction::FPTrunc: - case Instruction::BitCast: - case Instruction::AddrSpaceCast: { - Type *SrcTy = I->getOperand(0)->getType(); - return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I); - } - case Instruction::ExtractElement: { - const ExtractElementInst * EEI = cast<ExtractElementInst>(I); - ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); - unsigned Idx = -1; - if (CI) - Idx = CI->getZExtValue(); - - // Try to match a reduction sequence (series of shufflevector and vector - // adds followed by a extractelement). - unsigned ReduxOpCode; - Type *ReduxType; - - if (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) - return TTI->getReductionCost(ReduxOpCode, ReduxType, false); - else if (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) - return TTI->getReductionCost(ReduxOpCode, ReduxType, true); - - return TTI->getVectorInstrCost(I->getOpcode(), - EEI->getOperand(0)->getType(), Idx); - } - case Instruction::InsertElement: { - const InsertElementInst * IE = cast<InsertElementInst>(I); - ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); - unsigned Idx = -1; - if (CI) - Idx = CI->getZExtValue(); - return TTI->getVectorInstrCost(I->getOpcode(), - IE->getType(), Idx); - } - case Instruction::ShuffleVector: { - const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); - Type *VecTypOp0 = Shuffle->getOperand(0)->getType(); - unsigned NumVecElems = VecTypOp0->getVectorNumElements(); - SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); - - if (NumVecElems == Mask.size()) { - if (isReverseVectorMask(Mask)) - return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, - 0, nullptr); - if (isAlternateVectorMask(Mask)) - return TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, - VecTypOp0, 0, nullptr); - - if (isZeroEltBroadcastVectorMask(Mask)) - return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, - VecTypOp0, 0, nullptr); - - if (isSingleSourceVectorMask(Mask)) - return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, - VecTypOp0, 0, nullptr); - - return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, - VecTypOp0, 0, nullptr); - } - - return -1; - } - case Instruction::Call: - if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { - SmallVector<Value *, 4> Args(II->arg_operands()); - - FastMathFlags FMF; - if (auto *FPMO = dyn_cast<FPMathOperator>(II)) - FMF = FPMO->getFastMathFlags(); - - return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), - Args, FMF); - } - return -1; - default: - // We don't have any information on this instruction. - return -1; - } -} - void CostModelAnalysis::print(raw_ostream &OS, const Module*) const { if (!F) return; for (BasicBlock &B : *F) { for (Instruction &Inst : B) { - unsigned Cost = getInstructionCost(&Inst); + unsigned Cost = TTI->getInstructionCost(&Inst, CostKind); if (Cost != (unsigned)-1) OS << "Cost Model: Found an estimated cost of " << Cost; else diff --git a/contrib/llvm/lib/Analysis/DemandedBits.cpp b/contrib/llvm/lib/Analysis/DemandedBits.cpp index 9c53f9140ca3..7276f2524fed 100644 --- a/contrib/llvm/lib/Analysis/DemandedBits.cpp +++ b/contrib/llvm/lib/Analysis/DemandedBits.cpp @@ -1,4 +1,4 @@ -//===---- DemandedBits.cpp - Determine demanded bits ----------------------===// +//===- DemandedBits.cpp - Determine demanded bits -------------------------===// // // The LLVM Compiler Infrastructure // @@ -20,30 +20,41 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/DemandedBits.h" -#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instructions.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cstdint> + using namespace llvm; #define DEBUG_TYPE "demanded-bits" char DemandedBitsWrapperPass::ID = 0; + INITIALIZE_PASS_BEGIN(DemandedBitsWrapperPass, "demanded-bits", "Demanded bits analysis", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) @@ -357,7 +368,7 @@ void DemandedBits::performAnalysis() { APInt DemandedBits::getDemandedBits(Instruction *I) { performAnalysis(); - const DataLayout &DL = I->getParent()->getModule()->getDataLayout(); + const DataLayout &DL = I->getModule()->getDataLayout(); auto Found = AliveBits.find(I); if (Found != AliveBits.end()) return Found->second; diff --git a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp index 2d39a0b02150..ac684ec18466 100644 --- a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp @@ -71,7 +71,6 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Value.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp index c08c6cfe0c3b..bb8caf4a5174 100644 --- a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp +++ b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp @@ -9,15 +9,23 @@ #include "llvm/Analysis/DominanceFrontier.h" #include "llvm/Analysis/DominanceFrontierImpl.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; namespace llvm { + template class DominanceFrontierBase<BasicBlock, false>; template class DominanceFrontierBase<BasicBlock, true>; template class ForwardDominanceFrontierBase<BasicBlock>; -} + +} // end namespace llvm char DominanceFrontierWrapperPass::ID = 0; @@ -27,7 +35,7 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(DominanceFrontierWrapperPass, "domfrontier", "Dominance Frontier Construction", true, true) - DominanceFrontierWrapperPass::DominanceFrontierWrapperPass() +DominanceFrontierWrapperPass::DominanceFrontierWrapperPass() : FunctionPass(ID), DF() { initializeDominanceFrontierWrapperPassPass(*PassRegistry::getPassRegistry()); } diff --git a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp index 4ef023379bb6..23109c67e5c3 100644 --- a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp +++ b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp @@ -84,12 +84,13 @@ class GlobalsAAResult::FunctionInfo { /// The bit that flags that this function may read any global. This is /// chosen to mix together with ModRefInfo bits. + /// FIXME: This assumes ModRefInfo lattice will remain 4 bits! enum { MayReadAnyGlobal = 4 }; /// Checks to document the invariants of the bit packing here. - static_assert((MayReadAnyGlobal & MRI_ModRef) == 0, + static_assert((MayReadAnyGlobal & static_cast<int>(ModRefInfo::ModRef)) == 0, "ModRef and the MayReadAnyGlobal flag bits overlap."); - static_assert(((MayReadAnyGlobal | MRI_ModRef) >> + static_assert(((MayReadAnyGlobal | static_cast<int>(ModRefInfo::ModRef)) >> AlignedMapPointerTraits::NumLowBitsAvailable) == 0, "Insufficient low bits to store our flag and ModRef info."); @@ -126,12 +127,12 @@ public: /// Returns the \c ModRefInfo info for this function. ModRefInfo getModRefInfo() const { - return ModRefInfo(Info.getInt() & MRI_ModRef); + return ModRefInfo(Info.getInt() & static_cast<int>(ModRefInfo::ModRef)); } /// Adds new \c ModRefInfo for this function to its state. void addModRefInfo(ModRefInfo NewMRI) { - Info.setInt(Info.getInt() | NewMRI); + Info.setInt(Info.getInt() | static_cast<int>(NewMRI)); } /// Returns whether this function may read any global variable, and we don't @@ -144,17 +145,18 @@ public: /// Returns the \c ModRefInfo info for this function w.r.t. a particular /// global, which may be more precise than the general information above. ModRefInfo getModRefInfoForGlobal(const GlobalValue &GV) const { - ModRefInfo GlobalMRI = mayReadAnyGlobal() ? MRI_Ref : MRI_NoModRef; + ModRefInfo GlobalMRI = + mayReadAnyGlobal() ? ModRefInfo::Ref : ModRefInfo::NoModRef; if (AlignedMap *P = Info.getPointer()) { auto I = P->Map.find(&GV); if (I != P->Map.end()) - GlobalMRI = ModRefInfo(GlobalMRI | I->second); + GlobalMRI = unionModRef(GlobalMRI, I->second); } return GlobalMRI; } /// Add mod/ref info from another function into ours, saturating towards - /// MRI_ModRef. + /// ModRef. void addFunctionInfo(const FunctionInfo &FI) { addModRefInfo(FI.getModRefInfo()); @@ -173,7 +175,7 @@ public: Info.setPointer(P); } auto &GlobalMRI = P->Map[&GV]; - GlobalMRI = ModRefInfo(GlobalMRI | NewMRI); + GlobalMRI = unionModRef(GlobalMRI, NewMRI); } /// Clear a global's ModRef info. Should be used when a global is being @@ -230,9 +232,9 @@ FunctionModRefBehavior GlobalsAAResult::getModRefBehavior(const Function *F) { FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior; if (FunctionInfo *FI = getFunctionInfo(F)) { - if (FI->getModRefInfo() == MRI_NoModRef) + if (!isModOrRefSet(FI->getModRefInfo())) Min = FMRB_DoesNotAccessMemory; - else if ((FI->getModRefInfo() & MRI_Mod) == 0) + else if (!isModSet(FI->getModRefInfo())) Min = FMRB_OnlyReadsMemory; } @@ -246,9 +248,9 @@ GlobalsAAResult::getModRefBehavior(ImmutableCallSite CS) { if (!CS.hasOperandBundles()) if (const Function *F = CS.getCalledFunction()) if (FunctionInfo *FI = getFunctionInfo(F)) { - if (FI->getModRefInfo() == MRI_NoModRef) + if (!isModOrRefSet(FI->getModRefInfo())) Min = FMRB_DoesNotAccessMemory; - else if ((FI->getModRefInfo() & MRI_Mod) == 0) + else if (!isModSet(FI->getModRefInfo())) Min = FMRB_OnlyReadsMemory; } @@ -297,7 +299,7 @@ void GlobalsAAResult::AnalyzeGlobals(Module &M) { Handles.emplace_front(*this, Reader); Handles.front().I = Handles.begin(); } - FunctionInfos[Reader].addModRefInfoForGlobal(GV, MRI_Ref); + FunctionInfos[Reader].addModRefInfoForGlobal(GV, ModRefInfo::Ref); } if (!GV.isConstant()) // No need to keep track of writers to constants @@ -306,7 +308,7 @@ void GlobalsAAResult::AnalyzeGlobals(Module &M) { Handles.emplace_front(*this, Writer); Handles.front().I = Handles.begin(); } - FunctionInfos[Writer].addModRefInfoForGlobal(GV, MRI_Mod); + FunctionInfos[Writer].addModRefInfoForGlobal(GV, ModRefInfo::Mod); } ++NumNonAddrTakenGlobalVars; @@ -502,13 +504,13 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { if (F->doesNotAccessMemory()) { // Can't do better than that! } else if (F->onlyReadsMemory()) { - FI.addModRefInfo(MRI_Ref); + FI.addModRefInfo(ModRefInfo::Ref); if (!F->isIntrinsic() && !F->onlyAccessesArgMemory()) // This function might call back into the module and read a global - // consider every global as possibly being read by this function. FI.setMayReadAnyGlobal(); } else { - FI.addModRefInfo(MRI_ModRef); + FI.addModRefInfo(ModRefInfo::ModRef); // Can't say anything useful unless it's an intrinsic - they don't // read or write global variables of the kind considered here. KnowNothing = !F->isIntrinsic(); @@ -544,7 +546,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { // Scan the function bodies for explicit loads or stores. for (auto *Node : SCC) { - if (FI.getModRefInfo() == MRI_ModRef) + if (isModAndRefSet(FI.getModRefInfo())) break; // The mod/ref lattice saturates here. // Don't prove any properties based on the implementation of an optnone @@ -554,7 +556,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { continue; for (Instruction &I : instructions(Node->getFunction())) { - if (FI.getModRefInfo() == MRI_ModRef) + if (isModAndRefSet(FI.getModRefInfo())) break; // The mod/ref lattice saturates here. // We handle calls specially because the graph-relevant aspects are @@ -563,13 +565,13 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { if (isAllocationFn(&I, &TLI) || isFreeCall(&I, &TLI)) { // FIXME: It is completely unclear why this is necessary and not // handled by the above graph code. - FI.addModRefInfo(MRI_ModRef); + FI.addModRefInfo(ModRefInfo::ModRef); } else if (Function *Callee = CS.getCalledFunction()) { // The callgraph doesn't include intrinsic calls. if (Callee->isIntrinsic()) { FunctionModRefBehavior Behaviour = AAResultBase::getModRefBehavior(Callee); - FI.addModRefInfo(ModRefInfo(Behaviour & MRI_ModRef)); + FI.addModRefInfo(createModRefInfo(Behaviour)); } } continue; @@ -578,15 +580,15 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { // All non-call instructions we use the primary predicates for whether // thay read or write memory. if (I.mayReadFromMemory()) - FI.addModRefInfo(MRI_Ref); + FI.addModRefInfo(ModRefInfo::Ref); if (I.mayWriteToMemory()) - FI.addModRefInfo(MRI_Mod); + FI.addModRefInfo(ModRefInfo::Mod); } } - if ((FI.getModRefInfo() & MRI_Mod) == 0) + if (!isModSet(FI.getModRefInfo())) ++NumReadMemFunctions; - if (FI.getModRefInfo() == MRI_NoModRef) + if (!isModOrRefSet(FI.getModRefInfo())) ++NumNoMemFunctions; // Finally, now that we know the full effect on this SCC, clone the @@ -867,8 +869,9 @@ AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA, ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS, const GlobalValue *GV) { if (CS.doesNotAccessMemory()) - return MRI_NoModRef; - ModRefInfo ConservativeResult = CS.onlyReadsMemory() ? MRI_Ref : MRI_ModRef; + return ModRefInfo::NoModRef; + ModRefInfo ConservativeResult = + CS.onlyReadsMemory() ? ModRefInfo::Ref : ModRefInfo::ModRef; // Iterate through all the arguments to the called function. If any argument // is based on GV, return the conservative result. @@ -889,12 +892,12 @@ ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS, } // We identified all objects in the argument list, and none of them were GV. - return MRI_NoModRef; + return ModRefInfo::NoModRef; } ModRefInfo GlobalsAAResult::getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) { - unsigned Known = MRI_ModRef; + ModRefInfo Known = ModRefInfo::ModRef; // If we are asking for mod/ref info of a direct call with a pointer to a // global we are tracking, return information if we have it. @@ -904,12 +907,12 @@ ModRefInfo GlobalsAAResult::getModRefInfo(ImmutableCallSite CS, if (const Function *F = CS.getCalledFunction()) if (NonAddressTakenGlobals.count(GV)) if (const FunctionInfo *FI = getFunctionInfo(F)) - Known = FI->getModRefInfoForGlobal(*GV) | - getModRefInfoForArgument(CS, GV); + Known = unionModRef(FI->getModRefInfoForGlobal(*GV), + getModRefInfoForArgument(CS, GV)); - if (Known == MRI_NoModRef) - return MRI_NoModRef; // No need to query other mod/ref analyses - return ModRefInfo(Known & AAResultBase::getModRefInfo(CS, Loc)); + if (!isModOrRefSet(Known)) + return ModRefInfo::NoModRef; // No need to query other mod/ref analyses + return intersectModRef(Known, AAResultBase::getModRefInfo(CS, Loc)); } GlobalsAAResult::GlobalsAAResult(const DataLayout &DL, diff --git a/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp index ed233d201537..c11176bbb9c8 100644 --- a/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp @@ -17,7 +17,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/IndirectCallSiteVisitor.h" #include "llvm/IR/CallSite.h" -#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/Instructions.h" @@ -32,25 +31,25 @@ using namespace llvm; #define DEBUG_TYPE "pgo-icall-prom-analysis" -// The minimum call count for the direct-call target to be considered as the -// promotion candidate. -static cl::opt<unsigned> - ICPCountThreshold("icp-count-threshold", cl::Hidden, cl::ZeroOrMore, - cl::init(1000), - cl::desc("The minimum count to the direct call target " - "for the promotion")); +// The percent threshold for the direct-call target (this call site vs the +// remaining call count) for it to be considered as the promotion target. +static cl::opt<unsigned> ICPRemainingPercentThreshold( + "icp-remaining-percent-threshold", cl::init(30), cl::Hidden, cl::ZeroOrMore, + cl::desc("The percentage threshold against remaining unpromoted indirect " + "call count for the promotion")); // The percent threshold for the direct-call target (this call site vs the // total call count) for it to be considered as the promotion target. static cl::opt<unsigned> - ICPPercentThreshold("icp-percent-threshold", cl::init(30), cl::Hidden, - cl::ZeroOrMore, - cl::desc("The percentage threshold for the promotion")); + ICPTotalPercentThreshold("icp-total-percent-threshold", cl::init(5), + cl::Hidden, cl::ZeroOrMore, + cl::desc("The percentage threshold against total " + "count for the promotion")); // Set the maximum number of targets to promote for a single indirect-call // callsite. static cl::opt<unsigned> - MaxNumPromotions("icp-max-prom", cl::init(2), cl::Hidden, cl::ZeroOrMore, + MaxNumPromotions("icp-max-prom", cl::init(3), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of promotions for a single indirect " "call callsite")); @@ -59,12 +58,10 @@ ICallPromotionAnalysis::ICallPromotionAnalysis() { } bool ICallPromotionAnalysis::isPromotionProfitable(uint64_t Count, - uint64_t TotalCount) { - if (Count < ICPCountThreshold) - return false; - - unsigned Percentage = (Count * 100) / TotalCount; - return (Percentage >= ICPPercentThreshold); + uint64_t TotalCount, + uint64_t RemainingCount) { + return Count * 100 >= ICPRemainingPercentThreshold * RemainingCount && + Count * 100 >= ICPTotalPercentThreshold * TotalCount; } // Indirect-call promotion heuristic. The direct targets are sorted based on @@ -78,17 +75,18 @@ uint32_t ICallPromotionAnalysis::getProfitablePromotionCandidates( << "\n"); uint32_t I = 0; + uint64_t RemainingCount = TotalCount; for (; I < MaxNumPromotions && I < NumVals; I++) { uint64_t Count = ValueDataRef[I].Count; - assert(Count <= TotalCount); + assert(Count <= RemainingCount); DEBUG(dbgs() << " Candidate " << I << " Count=" << Count << " Target_func: " << ValueDataRef[I].Value << "\n"); - if (!isPromotionProfitable(Count, TotalCount)) { + if (!isPromotionProfitable(Count, TotalCount, RemainingCount)) { DEBUG(dbgs() << " Not promote: Cold target.\n"); return I; } - TotalCount -= Count; + RemainingCount -= Count; } return I; } diff --git a/contrib/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm/lib/Analysis/InlineCost.cpp index 35693666aa03..fba96c8976a6 100644 --- a/contrib/llvm/lib/Analysis/InlineCost.cpp +++ b/contrib/llvm/lib/Analysis/InlineCost.cpp @@ -21,9 +21,11 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" @@ -66,12 +68,27 @@ static cl::opt<int> cl::ZeroOrMore, cl::desc("Threshold for hot callsites ")); +static cl::opt<int> LocallyHotCallSiteThreshold( + "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore, + cl::desc("Threshold for locally hot callsites ")); + static cl::opt<int> ColdCallSiteRelFreq( "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::desc("Maxmimum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information.")); +static cl::opt<int> HotCallSiteRelFreq( + "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore, + cl::desc("Minimum block frequency, expressed as a multiple of caller's " + "entry frequency, for a callsite to be hot in the absence of " + "profile information.")); + +static cl::opt<bool> OptComputeFullInlineCost( + "inline-cost-full", cl::Hidden, cl::init(false), + cl::desc("Compute the full inline cost of a call site even when the cost " + "exceeds the threshold.")); + namespace { class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { @@ -96,6 +113,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { // Cache the DataLayout since we use it a lot. const DataLayout &DL; + /// The OptimizationRemarkEmitter available for this compilation. + OptimizationRemarkEmitter *ORE; + /// The candidate callsite being analyzed. Please do not use this to do /// analysis in the caller function; we want the inline cost query to be /// easily cacheable. Instead, use the cover function paramHasAttr. @@ -106,6 +126,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { int Threshold; int Cost; + bool ComputeFullInlineCost; bool IsCallerRecursive; bool IsRecursiveCall; @@ -119,8 +140,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { /// Number of bytes allocated statically by the callee. uint64_t AllocatedSize; unsigned NumInstructions, NumVectorInstructions; - int FiftyPercentVectorBonus, TenPercentVectorBonus; - int VectorBonus; + int VectorBonus, TenPercentVectorBonus; + // Bonus to be applied when the callee has only one reachable basic block. + int SingleBBBonus; /// While we walk the potentially-inlined instructions, we build up and /// maintain a mapping of simplified values specific to this callsite. The @@ -143,15 +165,32 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { /// Keep track of values which map to a pointer base and constant offset. DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs; + /// Keep track of dead blocks due to the constant arguments. + SetVector<BasicBlock *> DeadBlocks; + + /// The mapping of the blocks to their known unique successors due to the + /// constant arguments. + DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors; + + /// Model the elimination of repeated loads that is expected to happen + /// whenever we simplify away the stores that would otherwise cause them to be + /// loads. + bool EnableLoadElimination; + SmallPtrSet<Value *, 16> LoadAddrSet; + int LoadEliminationCost; + // Custom simplification helper routines. bool isAllocaDerivedArg(Value *V); bool lookupSROAArgAndCost(Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt); void disableSROA(DenseMap<Value *, int>::iterator CostIt); void disableSROA(Value *V); + void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB); void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, int InstructionCost); + void disableLoadElimination(); bool isGEPFree(GetElementPtrInst &GEP); + bool canFoldInboundsGEP(GetElementPtrInst &I); bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); bool simplifyCallSite(Function *F, CallSite CS); template <typename Callable> @@ -181,6 +220,10 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { /// Return true if \p CS is a cold callsite. bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI); + /// Return a higher threshold if \p CS is a hot callsite. + Optional<int> getHotCallSiteThreshold(CallSite CS, + BlockFrequencyInfo *CallerBFI); + // Custom analysis routines. bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues); @@ -206,6 +249,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { bool visitCastInst(CastInst &I); bool visitUnaryInstruction(UnaryInstruction &I); bool visitCmpInst(CmpInst &I); + bool visitAnd(BinaryOperator &I); + bool visitOr(BinaryOperator &I); bool visitSub(BinaryOperator &I); bool visitBinaryOperator(BinaryOperator &I); bool visitLoad(LoadInst &I); @@ -215,6 +260,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { bool visitCallSite(CallSite CS); bool visitReturnInst(ReturnInst &RI); bool visitBranchInst(BranchInst &BI); + bool visitSelectInst(SelectInst &SI); bool visitSwitchInst(SwitchInst &SI); bool visitIndirectBrInst(IndirectBrInst &IBI); bool visitResumeInst(ResumeInst &RI); @@ -226,17 +272,19 @@ public: CallAnalyzer(const TargetTransformInfo &TTI, std::function<AssumptionCache &(Function &)> &GetAssumptionCache, Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI, - ProfileSummaryInfo *PSI, Function &Callee, CallSite CSArg, - const InlineParams &Params) + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, + Function &Callee, CallSite CSArg, const InlineParams &Params) : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), - PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), + PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE), CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold), - Cost(0), IsCallerRecursive(false), IsRecursiveCall(false), + Cost(0), ComputeFullInlineCost(OptComputeFullInlineCost || + Params.ComputeFullInlineCost || ORE), + IsCallerRecursive(false), IsRecursiveCall(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), HasFrameEscape(false), AllocatedSize(0), NumInstructions(0), - NumVectorInstructions(0), FiftyPercentVectorBonus(0), - TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), + NumVectorInstructions(0), VectorBonus(0), SingleBBBonus(0), + EnableLoadElimination(true), LoadEliminationCost(0), NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), NumConstantPtrDiffs(0), NumInstructionsSimplified(0), SROACostSavings(0), SROACostSavingsLost(0) {} @@ -294,6 +342,7 @@ void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) { SROACostSavings -= CostIt->second; SROACostSavingsLost += CostIt->second; SROAArgCosts.erase(CostIt); + disableLoadElimination(); } /// \brief If 'V' maps to a SROA candidate, disable SROA for it. @@ -311,6 +360,13 @@ void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, SROACostSavings += InstructionCost; } +void CallAnalyzer::disableLoadElimination() { + if (EnableLoadElimination) { + Cost += LoadEliminationCost; + EnableLoadElimination = false; + } +} + /// \brief Accumulate a constant GEP offset into an APInt if possible. /// /// Returns false if unable to compute the offset for any reason. Respects any @@ -348,15 +404,14 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { /// /// Respects any simplified values known during the analysis of this callsite. bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) { - SmallVector<Value *, 4> Indices; + SmallVector<Value *, 4> Operands; + Operands.push_back(GEP.getOperand(0)); for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) if (Constant *SimpleOp = SimplifiedValues.lookup(*I)) - Indices.push_back(SimpleOp); + Operands.push_back(SimpleOp); else - Indices.push_back(*I); - return TargetTransformInfo::TCC_Free == - TTI.getGEPCost(GEP.getSourceElementType(), GEP.getPointerOperand(), - Indices); + Operands.push_back(*I); + return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands); } bool CallAnalyzer::visitAlloca(AllocaInst &I) { @@ -391,52 +446,125 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) { } bool CallAnalyzer::visitPHI(PHINode &I) { - // FIXME: We should potentially be tracking values through phi nodes, - // especially when they collapse to a single value due to deleted CFG edges - // during inlining. - // FIXME: We need to propagate SROA *disabling* through phi nodes, even // though we don't want to propagate it's bonuses. The idea is to disable // SROA if it *might* be used in an inappropriate manner. // Phi nodes are always zero-cost. - return true; -} -bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { - Value *SROAArg; - DenseMap<Value *, int>::iterator CostIt; - bool SROACandidate = - lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt); + APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits()); + bool CheckSROA = I.getType()->isPointerTy(); - // Try to fold GEPs of constant-offset call site argument pointers. This - // requires target data and inbounds GEPs. - if (I.isInBounds()) { - // Check if we have a base + offset for the pointer. - Value *Ptr = I.getPointerOperand(); - std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr); - if (BaseAndOffset.first) { - // Check if the offset of this GEP is constant, and if so accumulate it - // into Offset. - if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) { - // Non-constant GEPs aren't folded, and disable SROA. - if (SROACandidate) - disableSROA(CostIt); - return isGEPFree(I); - } + // Track the constant or pointer with constant offset we've seen so far. + Constant *FirstC = nullptr; + std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset}; + Value *FirstV = nullptr; - // Add the result as a new mapping to Base + Offset. - ConstantOffsetPtrs[&I] = BaseAndOffset; + for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) { + BasicBlock *Pred = I.getIncomingBlock(i); + // If the incoming block is dead, skip the incoming block. + if (DeadBlocks.count(Pred)) + continue; + // If the parent block of phi is not the known successor of the incoming + // block, skip the incoming block. + BasicBlock *KnownSuccessor = KnownSuccessors[Pred]; + if (KnownSuccessor && KnownSuccessor != I.getParent()) + continue; + + Value *V = I.getIncomingValue(i); + // If the incoming value is this phi itself, skip the incoming value. + if (&I == V) + continue; + + Constant *C = dyn_cast<Constant>(V); + if (!C) + C = SimplifiedValues.lookup(V); - // Also handle SROA candidates here, we already know that the GEP is - // all-constant indexed. - if (SROACandidate) - SROAArgValues[&I] = SROAArg; + std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset}; + if (!C && CheckSROA) + BaseAndOffset = ConstantOffsetPtrs.lookup(V); + if (!C && !BaseAndOffset.first) + // The incoming value is neither a constant nor a pointer with constant + // offset, exit early. + return true; + + if (FirstC) { + if (FirstC == C) + // If we've seen a constant incoming value before and it is the same + // constant we see this time, continue checking the next incoming value. + continue; + // Otherwise early exit because we either see a different constant or saw + // a constant before but we have a pointer with constant offset this time. + return true; + } + + if (FirstV) { + // The same logic as above, but check pointer with constant offset here. + if (FirstBaseAndOffset == BaseAndOffset) + continue; return true; } + + if (C) { + // This is the 1st time we've seen a constant, record it. + FirstC = C; + continue; + } + + // The remaining case is that this is the 1st time we've seen a pointer with + // constant offset, record it. + FirstV = V; + FirstBaseAndOffset = BaseAndOffset; } + // Check if we can map phi to a constant. + if (FirstC) { + SimplifiedValues[&I] = FirstC; + return true; + } + + // Check if we can map phi to a pointer with constant offset. + if (FirstBaseAndOffset.first) { + ConstantOffsetPtrs[&I] = FirstBaseAndOffset; + + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + if (lookupSROAArgAndCost(FirstV, SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; + } + + return true; +} + +/// \brief Check we can fold GEPs of constant-offset call site argument pointers. +/// This requires target data and inbounds GEPs. +/// +/// \return true if the specified GEP can be folded. +bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) { + // Check if we have a base + offset for the pointer. + std::pair<Value *, APInt> BaseAndOffset = + ConstantOffsetPtrs.lookup(I.getPointerOperand()); + if (!BaseAndOffset.first) + return false; + + // Check if the offset of this GEP is constant, and if so accumulate it + // into Offset. + if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) + return false; + + // Add the result as a new mapping to Base + Offset. + ConstantOffsetPtrs[&I] = BaseAndOffset; + + return true; +} + +bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + bool SROACandidate = + lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt); + // Lambda to check whether a GEP's indices are all constant. auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) { for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) @@ -445,7 +573,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { return true; }; - if (IsGEPOffsetConstant(I)) { + if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) { if (SROACandidate) SROAArgValues[&I] = SROAArg; @@ -643,15 +771,17 @@ bool CallAnalyzer::allowSizeGrowth(CallSite CS) { bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) { // If global profile summary is available, then callsite's coldness is // determined based on that. - if (PSI->hasProfileSummary()) + if (PSI && PSI->hasProfileSummary()) return PSI->isColdCallSite(CS, CallerBFI); + + // Otherwise we need BFI to be available. if (!CallerBFI) return false; - // In the absence of global profile summary, determine if the callsite is cold - // relative to caller's entry. We could potentially cache the computation of - // scaled entry frequency, but the added complexity is not worth it unless - // this scaling shows up high in the profiles. + // Determine if the callsite is cold relative to caller's entry. We could + // potentially cache the computation of scaled entry frequency, but the added + // complexity is not worth it unless this scaling shows up high in the + // profiles. const BranchProbability ColdProb(ColdCallSiteRelFreq, 100); auto CallSiteBB = CS.getInstruction()->getParent(); auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB); @@ -660,6 +790,34 @@ bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) { return CallSiteFreq < CallerEntryFreq * ColdProb; } +Optional<int> +CallAnalyzer::getHotCallSiteThreshold(CallSite CS, + BlockFrequencyInfo *CallerBFI) { + + // If global profile summary is available, then callsite's hotness is + // determined based on that. + if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(CS, CallerBFI)) + return Params.HotCallSiteThreshold; + + // Otherwise we need BFI to be available and to have a locally hot callsite + // threshold. + if (!CallerBFI || !Params.LocallyHotCallSiteThreshold) + return None; + + // Determine if the callsite is hot relative to caller's entry. We could + // potentially cache the computation of scaled entry frequency, but the added + // complexity is not worth it unless this scaling shows up high in the + // profiles. + auto CallSiteBB = CS.getInstruction()->getParent(); + auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency(); + auto CallerEntryFreq = CallerBFI->getEntryFreq(); + if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq) + return Params.LocallyHotCallSiteThreshold; + + // Otherwise treat it normally. + return None; +} + void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { // If no size growth is allowed for this inlining, set Threshold to 0. if (!allowSizeGrowth(CS)) { @@ -679,11 +837,49 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { return B ? std::max(A, B.getValue()) : A; }; + // Various bonus percentages. These are multiplied by Threshold to get the + // bonus values. + // SingleBBBonus: This bonus is applied if the callee has a single reachable + // basic block at the given callsite context. This is speculatively applied + // and withdrawn if more than one basic block is seen. + // + // Vector bonuses: We want to more aggressively inline vector-dense kernels + // and apply this bonus based on the percentage of vector instructions. A + // bonus is applied if the vector instructions exceed 50% and half that amount + // is applied if it exceeds 10%. Note that these bonuses are some what + // arbitrary and evolved over time by accident as much as because they are + // principled bonuses. + // FIXME: It would be nice to base the bonus values on something more + // scientific. + // + // LstCallToStaticBonus: This large bonus is applied to ensure the inlining + // of the last call to a static function as inlining such functions is + // guaranteed to reduce code size. + // + // These bonus percentages may be set to 0 based on properties of the caller + // and the callsite. + int SingleBBBonusPercent = 50; + int VectorBonusPercent = 150; + int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus; + + // Lambda to set all the above bonus and bonus percentages to 0. + auto DisallowAllBonuses = [&]() { + SingleBBBonusPercent = 0; + VectorBonusPercent = 0; + LastCallToStaticBonus = 0; + }; + // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available // and reduce the threshold if the caller has the necessary attribute. - if (Caller->optForMinSize()) + if (Caller->optForMinSize()) { Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold); - else if (Caller->optForSize()) + // For minsize, we want to disable the single BB bonus and the vector + // bonuses, but not the last-call-to-static bonus. Inlining the last call to + // a static function will, at the minimum, eliminate the parameter setup and + // call/return instructions. + SingleBBBonusPercent = 0; + VectorBonusPercent = 0; + } else if (Caller->optForSize()) Threshold = MinIfValid(Threshold, Params.OptSizeThreshold); // Adjust the threshold based on inlinehint attribute and profile based @@ -691,35 +887,48 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { if (!Caller->optForMinSize()) { if (Callee.hasFnAttribute(Attribute::InlineHint)) Threshold = MaxIfValid(Threshold, Params.HintThreshold); - if (PSI) { - BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr; - // FIXME: After switching to the new passmanager, simplify the logic below - // by checking only the callsite hotness/coldness. The check for CallerBFI - // exists only because we do not have BFI available with the old PM. - // - // Use callee's hotness information only if we have no way of determining - // callsite's hotness information. Callsite hotness can be determined if - // sample profile is used (which adds hotness metadata to calls) or if - // caller's BlockFrequencyInfo is available. - if (CallerBFI || PSI->hasSampleProfile()) { - if (PSI->isHotCallSite(CS, CallerBFI)) { - DEBUG(dbgs() << "Hot callsite.\n"); - Threshold = Params.HotCallSiteThreshold.getValue(); - } else if (isColdCallSite(CS, CallerBFI)) { - DEBUG(dbgs() << "Cold callsite.\n"); - Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold); - } - } else { - if (PSI->isFunctionEntryHot(&Callee)) { - DEBUG(dbgs() << "Hot callee.\n"); - // If callsite hotness can not be determined, we may still know - // that the callee is hot and treat it as a weaker hint for threshold - // increase. - Threshold = MaxIfValid(Threshold, Params.HintThreshold); - } else if (PSI->isFunctionEntryCold(&Callee)) { - DEBUG(dbgs() << "Cold callee.\n"); - Threshold = MinIfValid(Threshold, Params.ColdThreshold); - } + + // FIXME: After switching to the new passmanager, simplify the logic below + // by checking only the callsite hotness/coldness as we will reliably + // have local profile information. + // + // Callsite hotness and coldness can be determined if sample profile is + // used (which adds hotness metadata to calls) or if caller's + // BlockFrequencyInfo is available. + BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr; + auto HotCallSiteThreshold = getHotCallSiteThreshold(CS, CallerBFI); + if (!Caller->optForSize() && HotCallSiteThreshold) { + DEBUG(dbgs() << "Hot callsite.\n"); + // FIXME: This should update the threshold only if it exceeds the + // current threshold, but AutoFDO + ThinLTO currently relies on this + // behavior to prevent inlining of hot callsites during ThinLTO + // compile phase. + Threshold = HotCallSiteThreshold.getValue(); + } else if (isColdCallSite(CS, CallerBFI)) { + DEBUG(dbgs() << "Cold callsite.\n"); + // Do not apply bonuses for a cold callsite including the + // LastCallToStatic bonus. While this bonus might result in code size + // reduction, it can cause the size of a non-cold caller to increase + // preventing it from being inlined. + DisallowAllBonuses(); + Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold); + } else if (PSI) { + // Use callee's global profile information only if we have no way of + // determining this via callsite information. + if (PSI->isFunctionEntryHot(&Callee)) { + DEBUG(dbgs() << "Hot callee.\n"); + // If callsite hotness can not be determined, we may still know + // that the callee is hot and treat it as a weaker hint for threshold + // increase. + Threshold = MaxIfValid(Threshold, Params.HintThreshold); + } else if (PSI->isFunctionEntryCold(&Callee)) { + DEBUG(dbgs() << "Cold callee.\n"); + // Do not apply bonuses for a cold callee including the + // LastCallToStatic bonus. While this bonus might result in code size + // reduction, it can cause the size of a non-cold caller to increase + // preventing it from being inlined. + DisallowAllBonuses(); + Threshold = MinIfValid(Threshold, Params.ColdThreshold); } } } @@ -727,6 +936,17 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { // Finally, take the target-specific inlining threshold multiplier into // account. Threshold *= TTI.getInliningThresholdMultiplier(); + + SingleBBBonus = Threshold * SingleBBBonusPercent / 100; + VectorBonus = Threshold * VectorBonusPercent / 100; + + bool OnlyOneCallAndLocalLinkage = + F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction(); + // If there is only one call of the function, and it has internal linkage, + // the cost of inlining it drops dramatically. It may seem odd to update + // Cost in updateThreshold, but the bonus depends on the logic in this method. + if (OnlyOneCallAndLocalLinkage) + Cost -= LastCallToStaticBonus; } bool CallAnalyzer::visitCmpInst(CmpInst &I) { @@ -784,6 +1004,34 @@ bool CallAnalyzer::visitCmpInst(CmpInst &I) { return false; } +bool CallAnalyzer::visitOr(BinaryOperator &I) { + // This is necessary because the generic simplify instruction only works if + // both operands are constants. + for (unsigned i = 0; i < 2; ++i) { + if (ConstantInt *C = dyn_cast_or_null<ConstantInt>( + SimplifiedValues.lookup(I.getOperand(i)))) + if (C->isAllOnesValue()) { + SimplifiedValues[&I] = C; + return true; + } + } + return Base::visitOr(I); +} + +bool CallAnalyzer::visitAnd(BinaryOperator &I) { + // This is necessary because the generic simplify instruction only works if + // both operands are constants. + for (unsigned i = 0; i < 2; ++i) { + if (ConstantInt *C = dyn_cast_or_null<ConstantInt>( + SimplifiedValues.lookup(I.getOperand(i)))) + if (C->isZero()) { + SimplifiedValues[&I] = C; + return true; + } + } + return Base::visitAnd(I); +} + bool CallAnalyzer::visitSub(BinaryOperator &I) { // Try to handle a special case: we can fold computing the difference of two // constant-related pointers. @@ -845,6 +1093,15 @@ bool CallAnalyzer::visitLoad(LoadInst &I) { disableSROA(CostIt); } + // If the data is already loaded from this address and hasn't been clobbered + // by any stores or calls, this load is likely to be redundant and can be + // eliminated. + if (EnableLoadElimination && + !LoadAddrSet.insert(I.getPointerOperand()).second) { + LoadEliminationCost += InlineConstants::InstrCost; + return true; + } + return false; } @@ -860,6 +1117,15 @@ bool CallAnalyzer::visitStore(StoreInst &I) { disableSROA(CostIt); } + // The store can potentially clobber loads and prevent repeated loads from + // being eliminated. + // FIXME: + // 1. We can probably keep an initial set of eliminatable loads substracted + // from the cost even when we finally see a store. We just need to disable + // *further* accumulation of elimination savings. + // 2. We should probably at some point thread MemorySSA for the callee into + // this and then use that to actually compute *really* precise savings. + disableLoadElimination(); return false; } @@ -942,6 +1208,8 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { switch (II->getIntrinsicID()) { default: + if (!CS.onlyReadsMemory() && !isAssumeLikeIntrinsic(II)) + disableLoadElimination(); return Base::visitCallSite(CS); case Intrinsic::load_relative: @@ -952,6 +1220,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { case Intrinsic::memset: case Intrinsic::memcpy: case Intrinsic::memmove: + disableLoadElimination(); // SROA can usually chew through these intrinsics, but they aren't free. return false; case Intrinsic::localescape: @@ -960,7 +1229,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { } } - if (F == CS.getInstruction()->getParent()->getParent()) { + if (F == CS.getInstruction()->getFunction()) { // This flag will fully abort the analysis, so don't bother with anything // else. IsRecursiveCall = true; @@ -978,6 +1247,8 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { Cost += InlineConstants::CallPenalty; } + if (!CS.onlyReadsMemory()) + disableLoadElimination(); return Base::visitCallSite(CS); } @@ -992,8 +1263,11 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { // Next, check if this happens to be an indirect function call to a known // function in this inline context. If not, we've done all we can. Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee)); - if (!F) + if (!F) { + if (!CS.onlyReadsMemory()) + disableLoadElimination(); return Base::visitCallSite(CS); + } // If we have a constant that we are calling as a function, we can peer // through it and see the function target. This happens not infrequently @@ -1002,7 +1276,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { // out. Pretend to inline the function, with a custom threshold. auto IndirectCallParams = Params; IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold; - CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, *F, CS, + CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, CS, IndirectCallParams); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the @@ -1010,6 +1284,8 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { Cost -= std::max(0, CA.getThreshold() - CA.getCost()); } + if (!F->onlyReadsMemory()) + disableLoadElimination(); return Base::visitCallSite(CS); } @@ -1030,6 +1306,87 @@ bool CallAnalyzer::visitBranchInst(BranchInst &BI) { SimplifiedValues.lookup(BI.getCondition())); } +bool CallAnalyzer::visitSelectInst(SelectInst &SI) { + bool CheckSROA = SI.getType()->isPointerTy(); + Value *TrueVal = SI.getTrueValue(); + Value *FalseVal = SI.getFalseValue(); + + Constant *TrueC = dyn_cast<Constant>(TrueVal); + if (!TrueC) + TrueC = SimplifiedValues.lookup(TrueVal); + Constant *FalseC = dyn_cast<Constant>(FalseVal); + if (!FalseC) + FalseC = SimplifiedValues.lookup(FalseVal); + Constant *CondC = + dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition())); + + if (!CondC) { + // Select C, X, X => X + if (TrueC == FalseC && TrueC) { + SimplifiedValues[&SI] = TrueC; + return true; + } + + if (!CheckSROA) + return Base::visitSelectInst(SI); + + std::pair<Value *, APInt> TrueBaseAndOffset = + ConstantOffsetPtrs.lookup(TrueVal); + std::pair<Value *, APInt> FalseBaseAndOffset = + ConstantOffsetPtrs.lookup(FalseVal); + if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) { + ConstantOffsetPtrs[&SI] = TrueBaseAndOffset; + + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + if (lookupSROAArgAndCost(TrueVal, SROAArg, CostIt)) + SROAArgValues[&SI] = SROAArg; + return true; + } + + return Base::visitSelectInst(SI); + } + + // Select condition is a constant. + Value *SelectedV = CondC->isAllOnesValue() + ? TrueVal + : (CondC->isNullValue()) ? FalseVal : nullptr; + if (!SelectedV) { + // Condition is a vector constant that is not all 1s or all 0s. If all + // operands are constants, ConstantExpr::getSelect() can handle the cases + // such as select vectors. + if (TrueC && FalseC) { + if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) { + SimplifiedValues[&SI] = C; + return true; + } + } + return Base::visitSelectInst(SI); + } + + // Condition is either all 1s or all 0s. SI can be simplified. + if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) { + SimplifiedValues[&SI] = SelectedC; + return true; + } + + if (!CheckSROA) + return true; + + std::pair<Value *, APInt> BaseAndOffset = + ConstantOffsetPtrs.lookup(SelectedV); + if (BaseAndOffset.first) { + ConstantOffsetPtrs[&SI] = BaseAndOffset; + + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + if (lookupSROAArgAndCost(SelectedV, SROAArg, CostIt)) + SROAArgValues[&SI] = SROAArg; + } + + return true; +} + bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { // We model unconditional switches as free, see the comments on handling // branches. @@ -1062,7 +1419,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { std::min((int64_t)CostUpperBound, (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost); - if (CostLowerBound > Threshold) { + if (CostLowerBound > Threshold && !ComputeFullInlineCost) { Cost = CostLowerBound; return false; } @@ -1211,21 +1568,39 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB, else Cost += InlineConstants::InstrCost; + using namespace ore; // If the visit this instruction detected an uninlinable pattern, abort. if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || - HasIndirectBr || HasFrameEscape) + HasIndirectBr || HasFrameEscape) { + if (ORE) + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", + CandidateCS.getInstruction()) + << NV("Callee", &F) + << " has uninlinable pattern and cost is not fully computed"; + }); return false; + } // If the caller is a recursive function then we don't want to inline // functions which allocate a lot of stack space because it would increase // the caller stack usage dramatically. if (IsCallerRecursive && - AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) + AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) { + if (ORE) + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", + CandidateCS.getInstruction()) + << NV("Callee", &F) + << " is recursive and allocates too much stack space. Cost is " + "not fully computed"; + }); return false; + } // Check if we've past the maximum possible threshold so we don't spin in // huge basic blocks that will never inline. - if (Cost > Threshold) + if (Cost >= Threshold && !ComputeFullInlineCost) return false; } @@ -1270,6 +1645,44 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset)); } +/// \brief Find dead blocks due to deleted CFG edges during inlining. +/// +/// If we know the successor of the current block, \p CurrBB, has to be \p +/// NextBB, the other successors of \p CurrBB are dead if these successors have +/// no live incoming CFG edges. If one block is found to be dead, we can +/// continue growing the dead block list by checking the successors of the dead +/// blocks to see if all their incoming edges are dead or not. +void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) { + auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) { + // A CFG edge is dead if the predecessor is dead or the predessor has a + // known successor which is not the one under exam. + return (DeadBlocks.count(Pred) || + (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ)); + }; + + auto IsNewlyDead = [&](BasicBlock *BB) { + // If all the edges to a block are dead, the block is also dead. + return (!DeadBlocks.count(BB) && + llvm::all_of(predecessors(BB), + [&](BasicBlock *P) { return IsEdgeDead(P, BB); })); + }; + + for (BasicBlock *Succ : successors(CurrBB)) { + if (Succ == NextBB || !IsNewlyDead(Succ)) + continue; + SmallVector<BasicBlock *, 4> NewDead; + NewDead.push_back(Succ); + while (!NewDead.empty()) { + BasicBlock *Dead = NewDead.pop_back_val(); + if (DeadBlocks.insert(Dead)) + // Continue growing the dead block lists. + for (BasicBlock *S : successors(Dead)) + if (IsNewlyDead(S)) + NewDead.push_back(S); + } + } +} + /// \brief Analyze a call site for potential inlining. /// /// Returns true if inlining this call is viable, and false if it is not @@ -1296,51 +1709,35 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // Update the threshold based on callsite properties updateThreshold(CS, F); - FiftyPercentVectorBonus = 3 * Threshold / 2; - TenPercentVectorBonus = 3 * Threshold / 4; - - // Track whether the post-inlining function would have more than one basic - // block. A single basic block is often intended for inlining. Balloon the - // threshold by 50% until we pass the single-BB phase. - bool SingleBB = true; - int SingleBBBonus = Threshold / 2; - // Speculatively apply all possible bonuses to Threshold. If cost exceeds // this Threshold any time, and cost cannot decrease, we can stop processing // the rest of the function body. - Threshold += (SingleBBBonus + FiftyPercentVectorBonus); + Threshold += (SingleBBBonus + VectorBonus); // Give out bonuses for the callsite, as the instructions setting them up // will be gone after inlining. Cost -= getCallsiteCost(CS, DL); - // If there is only one call of the function, and it has internal linkage, - // the cost of inlining it drops dramatically. - bool OnlyOneCallAndLocalLinkage = - F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction(); - if (OnlyOneCallAndLocalLinkage) - Cost -= InlineConstants::LastCallToStaticBonus; - // If this function uses the coldcc calling convention, prefer not to inline // it. if (F.getCallingConv() == CallingConv::Cold) Cost += InlineConstants::ColdccPenalty; // Check if we're done. This can happen due to bonuses and penalties. - if (Cost > Threshold) + if (Cost >= Threshold && !ComputeFullInlineCost) return false; if (F.empty()) return true; - Function *Caller = CS.getInstruction()->getParent()->getParent(); + Function *Caller = CS.getInstruction()->getFunction(); // Check if the caller function is recursive itself. for (User *U : Caller->users()) { CallSite Site(U); if (!Site) continue; Instruction *I = Site.getInstruction(); - if (I->getParent()->getParent() == Caller) { + if (I->getFunction() == Caller) { IsCallerRecursive = true; break; } @@ -1388,11 +1785,12 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { BBSetVector; BBSetVector BBWorklist; BBWorklist.insert(&F.getEntryBlock()); + bool SingleBB = true; // Note that we *must not* cache the size, this loop grows the worklist. for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { // Bail out the moment we cross the threshold. This means we'll under-count // the cost, but only when undercounting doesn't matter. - if (Cost > Threshold) + if (Cost >= Threshold && !ComputeFullInlineCost) break; BasicBlock *BB = BBWorklist[Idx]; @@ -1422,7 +1820,10 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { Value *Cond = BI->getCondition(); if (ConstantInt *SimpleCond = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { - BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0)); + BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0); + BBWorklist.insert(NextBB); + KnownSuccessors[BB] = NextBB; + findDeadBlocks(BB, NextBB); continue; } } @@ -1430,7 +1831,10 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { Value *Cond = SI->getCondition(); if (ConstantInt *SimpleCond = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { - BBWorklist.insert(SI->findCaseValue(SimpleCond)->getCaseSuccessor()); + BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor(); + BBWorklist.insert(NextBB); + KnownSuccessors[BB] = NextBB; + findDeadBlocks(BB, NextBB); continue; } } @@ -1452,6 +1856,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { } } + bool OnlyOneCallAndLocalLinkage = + F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction(); // If this is a noduplicate call, we can still inline as long as // inlining this would cause the removal of the caller (so the instruction // is not actually duplicated, just moved). @@ -1462,9 +1868,9 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // subtract the excess bonus, if any, from the Threshold before // comparing against Cost. if (NumVectorInstructions <= NumInstructions / 10) - Threshold -= FiftyPercentVectorBonus; + Threshold -= VectorBonus; else if (NumVectorInstructions <= NumInstructions / 2) - Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus); + Threshold -= VectorBonus/2; return Cost < std::max(1, Threshold); } @@ -1482,6 +1888,7 @@ LLVM_DUMP_METHOD void CallAnalyzer::dump() { DEBUG_PRINT_STAT(NumInstructions); DEBUG_PRINT_STAT(SROACostSavings); DEBUG_PRINT_STAT(SROACostSavingsLost); + DEBUG_PRINT_STAT(LoadEliminationCost); DEBUG_PRINT_STAT(ContainsNoDuplicateCall); DEBUG_PRINT_STAT(Cost); DEBUG_PRINT_STAT(Threshold); @@ -1534,9 +1941,9 @@ InlineCost llvm::getInlineCost( CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function<AssumptionCache &(Function &)> &GetAssumptionCache, Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, - ProfileSummaryInfo *PSI) { + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI, - GetAssumptionCache, GetBFI, PSI); + GetAssumptionCache, GetBFI, PSI, ORE); } InlineCost llvm::getInlineCost( @@ -1544,7 +1951,7 @@ InlineCost llvm::getInlineCost( TargetTransformInfo &CalleeTTI, std::function<AssumptionCache &(Function &)> &GetAssumptionCache, Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, - ProfileSummaryInfo *PSI) { + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { // Cannot inline indirect calls. if (!Callee) @@ -1560,11 +1967,12 @@ InlineCost llvm::getInlineCost( // Never inline functions with conflicting attributes (unless callee has // always-inline attribute). - if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee, CalleeTTI)) + Function *Caller = CS.getCaller(); + if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI)) return llvm::InlineCost::getNever(); // Don't inline this call if the caller has the optnone attribute. - if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone)) + if (Caller->hasFnAttribute(Attribute::OptimizeNone)) return llvm::InlineCost::getNever(); // Don't inline functions which can be interposed at link-time. Don't inline @@ -1576,9 +1984,9 @@ InlineCost llvm::getInlineCost( return llvm::InlineCost::getNever(); DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() - << "...\n"); + << "... (caller:" << Caller->getName() << ")\n"); - CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, *Callee, CS, + CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS, Params); bool ShouldInline = CA.analyzeCall(CS); @@ -1652,6 +2060,16 @@ InlineParams llvm::getInlineParams(int Threshold) { // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold. Params.HotCallSiteThreshold = HotCallSiteThreshold; + // If the -locally-hot-callsite-threshold is explicitly specified, use it to + // populate LocallyHotCallSiteThreshold. Later, we populate + // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if + // we know that optimization level is O3 (in the getInlineParams variant that + // takes the opt and size levels). + // FIXME: Remove this check (and make the assignment unconditional) after + // addressing size regression issues at O2. + if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0) + Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold; + // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold. Params.ColdCallSiteThreshold = ColdCallSiteThreshold; @@ -1691,5 +2109,12 @@ static int computeThresholdFromOptLevels(unsigned OptLevel, } InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) { - return getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel)); + auto Params = + getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel)); + // At O3, use the value of -locally-hot-callsite-threshold option to populate + // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only + // when it is specified explicitly. + if (OptLevel > 2) + Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold; + return Params; } diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp index b4f3b87e1846..3ce1281743c3 100644 --- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp @@ -23,10 +23,10 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/CmpInstAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/ConstantRange.h" @@ -327,7 +327,7 @@ static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS, // Check that the simplified value has the form "X op Y" where "op" is the // same as the original operation. Instruction *Simplified = dyn_cast<Instruction>(FV ? FV : TV); - if (Simplified && Simplified->getOpcode() == Opcode) { + if (Simplified && Simplified->getOpcode() == unsigned(Opcode)) { // The value that didn't simplify is "UnsimplifiedLHS op UnsimplifiedRHS". // We already know that "op" is the same as for the simplified value. See // if the operands match too. If so, return the simplified value. @@ -791,90 +791,6 @@ Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit); } -/// Given operands for an FAdd, see if we can fold the result. If not, this -/// returns null. -static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const SimplifyQuery &Q, unsigned MaxRecurse) { - if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q)) - return C; - - // fadd X, -0 ==> X - if (match(Op1, m_NegZero())) - return Op0; - - // fadd X, 0 ==> X, when we know X is not -0 - if (match(Op1, m_Zero()) && - (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI))) - return Op0; - - // fadd [nnan ninf] X, (fsub [nnan ninf] 0, X) ==> 0 - // where nnan and ninf have to occur at least once somewhere in this - // expression - Value *SubOp = nullptr; - if (match(Op1, m_FSub(m_AnyZero(), m_Specific(Op0)))) - SubOp = Op1; - else if (match(Op0, m_FSub(m_AnyZero(), m_Specific(Op1)))) - SubOp = Op0; - if (SubOp) { - Instruction *FSub = cast<Instruction>(SubOp); - if ((FMF.noNaNs() || FSub->hasNoNaNs()) && - (FMF.noInfs() || FSub->hasNoInfs())) - return Constant::getNullValue(Op0->getType()); - } - - return nullptr; -} - -/// Given operands for an FSub, see if we can fold the result. If not, this -/// returns null. -static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const SimplifyQuery &Q, unsigned MaxRecurse) { - if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q)) - return C; - - // fsub X, 0 ==> X - if (match(Op1, m_Zero())) - return Op0; - - // fsub X, -0 ==> X, when we know X is not -0 - if (match(Op1, m_NegZero()) && - (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI))) - return Op0; - - // fsub -0.0, (fsub -0.0, X) ==> X - Value *X; - if (match(Op0, m_NegZero()) && match(Op1, m_FSub(m_NegZero(), m_Value(X)))) - return X; - - // fsub 0.0, (fsub 0.0, X) ==> X if signed zeros are ignored. - if (FMF.noSignedZeros() && match(Op0, m_AnyZero()) && - match(Op1, m_FSub(m_AnyZero(), m_Value(X)))) - return X; - - // fsub nnan x, x ==> 0.0 - if (FMF.noNaNs() && Op0 == Op1) - return Constant::getNullValue(Op0->getType()); - - return nullptr; -} - -/// Given the operands for an FMul, see if we can fold the result -static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const SimplifyQuery &Q, unsigned MaxRecurse) { - if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q)) - return C; - - // fmul X, 1.0 ==> X - if (match(Op1, m_FPOne())) - return Op0; - - // fmul nnan nsz X, 0 ==> 0 - if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero())) - return Op1; - - return nullptr; -} - /// Given operands for a Mul, see if we can fold the result. /// If not, this returns null. static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, @@ -932,27 +848,12 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return nullptr; } -Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const SimplifyQuery &Q) { - return ::SimplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit); -} - - -Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const SimplifyQuery &Q) { - return ::SimplifyFSubInst(Op0, Op1, FMF, Q, RecursionLimit); -} - -Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const SimplifyQuery &Q) { - return ::SimplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit); -} - Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { return ::SimplifyMulInst(Op0, Op1, Q, RecursionLimit); } /// Check for common or similar folds of integer division or integer remainder. +/// This applies to all 4 opcodes (sdiv/udiv/srem/urem). static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv) { Type *Ty = Op0->getType(); @@ -1003,9 +904,70 @@ static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv) { return nullptr; } -/// Given operands for an SDiv or UDiv, see if we can fold the result. -/// If not, this returns null. -static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, +/// Given a predicate and two operands, return true if the comparison is true. +/// This is a helper for div/rem simplification where we return some other value +/// when we can prove a relationship between the operands. +static bool isICmpTrue(ICmpInst::Predicate Pred, Value *LHS, Value *RHS, + const SimplifyQuery &Q, unsigned MaxRecurse) { + Value *V = SimplifyICmpInst(Pred, LHS, RHS, Q, MaxRecurse); + Constant *C = dyn_cast_or_null<Constant>(V); + return (C && C->isAllOnesValue()); +} + +/// Return true if we can simplify X / Y to 0. Remainder can adapt that answer +/// to simplify X % Y to X. +static bool isDivZero(Value *X, Value *Y, const SimplifyQuery &Q, + unsigned MaxRecurse, bool IsSigned) { + // Recursion is always used, so bail out at once if we already hit the limit. + if (!MaxRecurse--) + return false; + + if (IsSigned) { + // |X| / |Y| --> 0 + // + // We require that 1 operand is a simple constant. That could be extended to + // 2 variables if we computed the sign bit for each. + // + // Make sure that a constant is not the minimum signed value because taking + // the abs() of that is undefined. + Type *Ty = X->getType(); + const APInt *C; + if (match(X, m_APInt(C)) && !C->isMinSignedValue()) { + // Is the variable divisor magnitude always greater than the constant + // dividend magnitude? + // |Y| > |C| --> Y < -abs(C) or Y > abs(C) + Constant *PosDividendC = ConstantInt::get(Ty, C->abs()); + Constant *NegDividendC = ConstantInt::get(Ty, -C->abs()); + if (isICmpTrue(CmpInst::ICMP_SLT, Y, NegDividendC, Q, MaxRecurse) || + isICmpTrue(CmpInst::ICMP_SGT, Y, PosDividendC, Q, MaxRecurse)) + return true; + } + if (match(Y, m_APInt(C))) { + // Special-case: we can't take the abs() of a minimum signed value. If + // that's the divisor, then all we have to do is prove that the dividend + // is also not the minimum signed value. + if (C->isMinSignedValue()) + return isICmpTrue(CmpInst::ICMP_NE, X, Y, Q, MaxRecurse); + + // Is the variable dividend magnitude always less than the constant + // divisor magnitude? + // |X| < |C| --> X > -abs(C) and X < abs(C) + Constant *PosDivisorC = ConstantInt::get(Ty, C->abs()); + Constant *NegDivisorC = ConstantInt::get(Ty, -C->abs()); + if (isICmpTrue(CmpInst::ICMP_SGT, X, NegDivisorC, Q, MaxRecurse) && + isICmpTrue(CmpInst::ICMP_SLT, X, PosDivisorC, Q, MaxRecurse)) + return true; + } + return false; + } + + // IsSigned == false. + // Is the dividend unsigned less than the divisor? + return isICmpTrue(ICmpInst::ICMP_ULT, X, Y, Q, MaxRecurse); +} + +/// These are simplifications common to SDiv and UDiv. +static Value *simplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q)) return C; @@ -1013,7 +975,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, if (Value *V = simplifyDivRem(Op0, Op1, true)) return V; - bool isSigned = Opcode == Instruction::SDiv; + bool IsSigned = Opcode == Instruction::SDiv; // (X * Y) / Y -> X if the multiplication does not overflow. Value *X = nullptr, *Y = nullptr; @@ -1021,8 +983,8 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1 OverflowingBinaryOperator *Mul = cast<OverflowingBinaryOperator>(Op0); // If the Mul knows it does not overflow, then we are good to go. - if ((isSigned && Mul->hasNoSignedWrap()) || - (!isSigned && Mul->hasNoUnsignedWrap())) + if ((IsSigned && Mul->hasNoSignedWrap()) || + (!IsSigned && Mul->hasNoUnsignedWrap())) return X; // If X has the form X = A / Y then X * Y cannot overflow. if (BinaryOperator *Div = dyn_cast<BinaryOperator>(X)) @@ -1031,13 +993,13 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, } // (X rem Y) / Y -> 0 - if ((isSigned && match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) || - (!isSigned && match(Op0, m_URem(m_Value(), m_Specific(Op1))))) + if ((IsSigned && match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) || + (!IsSigned && match(Op0, m_URem(m_Value(), m_Specific(Op1))))) return Constant::getNullValue(Op0->getType()); // (X /u C1) /u C2 -> 0 if C1 * C2 overflow ConstantInt *C1, *C2; - if (!isSigned && match(Op0, m_UDiv(m_Value(X), m_ConstantInt(C1))) && + if (!IsSigned && match(Op0, m_UDiv(m_Value(X), m_ConstantInt(C1))) && match(Op1, m_ConstantInt(C2))) { bool Overflow; (void)C1->getValue().umul_ov(C2->getValue(), Overflow); @@ -1057,96 +1019,14 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; - return nullptr; -} - -/// Given operands for an SDiv, see if we can fold the result. -/// If not, this returns null. -static Value *SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, - unsigned MaxRecurse) { - if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse)) - return V; - - return nullptr; -} - -Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { - return ::SimplifySDivInst(Op0, Op1, Q, RecursionLimit); -} - -/// Given operands for a UDiv, see if we can fold the result. -/// If not, this returns null. -static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, - unsigned MaxRecurse) { - if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse)) - return V; - - // udiv %V, C -> 0 if %V < C - if (MaxRecurse) { - if (Constant *C = dyn_cast_or_null<Constant>(SimplifyICmpInst( - ICmpInst::ICMP_ULT, Op0, Op1, Q, MaxRecurse - 1))) { - if (C->isAllOnesValue()) { - return Constant::getNullValue(Op0->getType()); - } - } - } - - return nullptr; -} - -Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { - return ::SimplifyUDivInst(Op0, Op1, Q, RecursionLimit); -} - -static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const SimplifyQuery &Q, unsigned) { - if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q)) - return C; - - // undef / X -> undef (the undef could be a snan). - if (match(Op0, m_Undef())) - return Op0; - - // X / undef -> undef - if (match(Op1, m_Undef())) - return Op1; - - // X / 1.0 -> X - if (match(Op1, m_FPOne())) - return Op0; - - // 0 / X -> 0 - // Requires that NaNs are off (X could be zero) and signed zeroes are - // ignored (X could be positive or negative, so the output sign is unknown). - if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZero())) - return Op0; - - if (FMF.noNaNs()) { - // X / X -> 1.0 is legal when NaNs are ignored. - if (Op0 == Op1) - return ConstantFP::get(Op0->getType(), 1.0); - - // -X / X -> -1.0 and - // X / -X -> -1.0 are legal when NaNs are ignored. - // We can ignore signed zeros because +-0.0/+-0.0 is NaN and ignored. - if ((BinaryOperator::isFNeg(Op0, /*IgnoreZeroSign=*/true) && - BinaryOperator::getFNegArgument(Op0) == Op1) || - (BinaryOperator::isFNeg(Op1, /*IgnoreZeroSign=*/true) && - BinaryOperator::getFNegArgument(Op1) == Op0)) - return ConstantFP::get(Op0->getType(), -1.0); - } + if (isDivZero(Op0, Op1, Q, MaxRecurse, IsSigned)) + return Constant::getNullValue(Op0->getType()); return nullptr; } -Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const SimplifyQuery &Q) { - return ::SimplifyFDivInst(Op0, Op1, FMF, Q, RecursionLimit); -} - -/// Given operands for an SRem or URem, see if we can fold the result. -/// If not, this returns null. -static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, +/// These are simplifications common to SRem and URem. +static Value *simplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q)) return C; @@ -1173,17 +1053,40 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; + // If X / Y == 0, then X % Y == X. + if (isDivZero(Op0, Op1, Q, MaxRecurse, Opcode == Instruction::SRem)) + return Op0; + return nullptr; } +/// Given operands for an SDiv, see if we can fold the result. +/// If not, this returns null. +static Value *SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, + unsigned MaxRecurse) { + return simplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse); +} + +Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifySDivInst(Op0, Op1, Q, RecursionLimit); +} + +/// Given operands for a UDiv, see if we can fold the result. +/// If not, this returns null. +static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, + unsigned MaxRecurse) { + return simplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse); +} + +Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifyUDivInst(Op0, Op1, Q, RecursionLimit); +} + /// Given operands for an SRem, see if we can fold the result. /// If not, this returns null. static Value *SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { - if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse)) - return V; - - return nullptr; + return simplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse); } Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { @@ -1194,53 +1097,13 @@ Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { /// If not, this returns null. static Value *SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { - if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse)) - return V; - - // urem %V, C -> %V if %V < C - if (MaxRecurse) { - if (Constant *C = dyn_cast_or_null<Constant>(SimplifyICmpInst( - ICmpInst::ICMP_ULT, Op0, Op1, Q, MaxRecurse - 1))) { - if (C->isAllOnesValue()) { - return Op0; - } - } - } - - return nullptr; + return simplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse); } Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { return ::SimplifyURemInst(Op0, Op1, Q, RecursionLimit); } -static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const SimplifyQuery &Q, unsigned) { - if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q)) - return C; - - // undef % X -> undef (the undef could be a snan). - if (match(Op0, m_Undef())) - return Op0; - - // X % undef -> undef - if (match(Op1, m_Undef())) - return Op1; - - // 0 % X -> 0 - // Requires that NaNs are off (X could be zero) and signed zeroes are - // ignored (X could be positive or negative, so the output sign is unknown). - if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZero())) - return Op0; - - return nullptr; -} - -Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const SimplifyQuery &Q) { - return ::SimplifyFRemInst(Op0, Op1, FMF, Q, RecursionLimit); -} - /// Returns true if a shift by \c Amount always yields undef. static bool isUndefShift(Value *Amount) { Constant *C = dyn_cast<Constant>(Amount); @@ -1686,7 +1549,44 @@ static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) { return nullptr; } -static Value *simplifyAndOrOfICmps(Value *Op0, Value *Op1, bool IsAnd) { +static Value *simplifyAndOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd) { + Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1); + Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1); + if (LHS0->getType() != RHS0->getType()) + return nullptr; + + FCmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); + if ((PredL == FCmpInst::FCMP_ORD && PredR == FCmpInst::FCMP_ORD && IsAnd) || + (PredL == FCmpInst::FCMP_UNO && PredR == FCmpInst::FCMP_UNO && !IsAnd)) { + // (fcmp ord NNAN, X) & (fcmp ord X, Y) --> fcmp ord X, Y + // (fcmp ord NNAN, X) & (fcmp ord Y, X) --> fcmp ord Y, X + // (fcmp ord X, NNAN) & (fcmp ord X, Y) --> fcmp ord X, Y + // (fcmp ord X, NNAN) & (fcmp ord Y, X) --> fcmp ord Y, X + // (fcmp uno NNAN, X) | (fcmp uno X, Y) --> fcmp uno X, Y + // (fcmp uno NNAN, X) | (fcmp uno Y, X) --> fcmp uno Y, X + // (fcmp uno X, NNAN) | (fcmp uno X, Y) --> fcmp uno X, Y + // (fcmp uno X, NNAN) | (fcmp uno Y, X) --> fcmp uno Y, X + if ((isKnownNeverNaN(LHS0) && (LHS1 == RHS0 || LHS1 == RHS1)) || + (isKnownNeverNaN(LHS1) && (LHS0 == RHS0 || LHS0 == RHS1))) + return RHS; + + // (fcmp ord X, Y) & (fcmp ord NNAN, X) --> fcmp ord X, Y + // (fcmp ord Y, X) & (fcmp ord NNAN, X) --> fcmp ord Y, X + // (fcmp ord X, Y) & (fcmp ord X, NNAN) --> fcmp ord X, Y + // (fcmp ord Y, X) & (fcmp ord X, NNAN) --> fcmp ord Y, X + // (fcmp uno X, Y) | (fcmp uno NNAN, X) --> fcmp uno X, Y + // (fcmp uno Y, X) | (fcmp uno NNAN, X) --> fcmp uno Y, X + // (fcmp uno X, Y) | (fcmp uno X, NNAN) --> fcmp uno X, Y + // (fcmp uno Y, X) | (fcmp uno X, NNAN) --> fcmp uno Y, X + if ((isKnownNeverNaN(RHS0) && (RHS1 == LHS0 || RHS1 == LHS1)) || + (isKnownNeverNaN(RHS1) && (RHS0 == LHS0 || RHS0 == LHS1))) + return LHS; + } + + return nullptr; +} + +static Value *simplifyAndOrOfCmps(Value *Op0, Value *Op1, bool IsAnd) { // Look through casts of the 'and' operands to find compares. auto *Cast0 = dyn_cast<CastInst>(Op0); auto *Cast1 = dyn_cast<CastInst>(Op1); @@ -1696,13 +1596,18 @@ static Value *simplifyAndOrOfICmps(Value *Op0, Value *Op1, bool IsAnd) { Op1 = Cast1->getOperand(0); } - auto *Cmp0 = dyn_cast<ICmpInst>(Op0); - auto *Cmp1 = dyn_cast<ICmpInst>(Op1); - if (!Cmp0 || !Cmp1) - return nullptr; + Value *V = nullptr; + auto *ICmp0 = dyn_cast<ICmpInst>(Op0); + auto *ICmp1 = dyn_cast<ICmpInst>(Op1); + if (ICmp0 && ICmp1) + V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1) : + simplifyOrOfICmps(ICmp0, ICmp1); + + auto *FCmp0 = dyn_cast<FCmpInst>(Op0); + auto *FCmp1 = dyn_cast<FCmpInst>(Op1); + if (FCmp0 && FCmp1) + V = simplifyAndOrOfFCmps(FCmp0, FCmp1, IsAnd); - Value *V = - IsAnd ? simplifyAndOfICmps(Cmp0, Cmp1) : simplifyOrOfICmps(Cmp0, Cmp1); if (!V) return nullptr; if (!Cast0) @@ -1781,7 +1686,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return Op1; } - if (Value *V = simplifyAndOrOfICmps(Op0, Op1, true)) + if (Value *V = simplifyAndOrOfCmps(Op0, Op1, true)) return V; // Try some generic simplifications for associative operations. @@ -1902,7 +1807,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, match(Op0, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B))))) return Op0; - if (Value *V = simplifyAndOrOfICmps(Op0, Op1, false)) + if (Value *V = simplifyAndOrOfCmps(Op0, Op1, false)) return V; // Try some generic simplifications for associative operations. @@ -2062,13 +1967,14 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, static Constant * computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, CmpInst::Predicate Pred, - const Instruction *CxtI, Value *LHS, Value *RHS) { + AssumptionCache *AC, const Instruction *CxtI, + Value *LHS, Value *RHS) { // First, skip past any trivial no-ops. LHS = LHS->stripPointerCasts(); RHS = RHS->stripPointerCasts(); // A non-null pointer is not equal to a null pointer. - if (llvm::isKnownNonNull(LHS) && isa<ConstantPointerNull>(RHS) && + if (llvm::isKnownNonZero(LHS, DL) && isa<ConstantPointerNull>(RHS) && (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE)) return ConstantInt::get(GetCompareTy(LHS), !CmpInst::isTrueWhenEqual(Pred)); @@ -2223,9 +2129,11 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI, // cannot be elided. We cannot fold malloc comparison to null. Also, the // dynamic allocation call could be either of the operands. Value *MI = nullptr; - if (isAllocLikeFn(LHS, TLI) && llvm::isKnownNonNullAt(RHS, CxtI, DT)) + if (isAllocLikeFn(LHS, TLI) && + llvm::isKnownNonZero(RHS, DL, 0, nullptr, CxtI, DT)) MI = LHS; - else if (isAllocLikeFn(RHS, TLI) && llvm::isKnownNonNullAt(LHS, CxtI, DT)) + else if (isAllocLikeFn(RHS, TLI) && + llvm::isKnownNonZero(LHS, DL, 0, nullptr, CxtI, DT)) MI = RHS; // FIXME: We should also fold the compare when the pointer escapes, but the // compare dominates the pointer escape @@ -3312,7 +3220,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Simplify comparisons of related pointers using a powerful, recursive // GEP-walk when we have target data available.. if (LHS->getType()->isPointerTy()) - if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.CxtI, LHS, RHS)) + if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.AC, Q.CxtI, LHS, + RHS)) return C; if (auto *CLHS = dyn_cast<PtrToIntOperator>(LHS)) if (auto *CRHS = dyn_cast<PtrToIntOperator>(RHS)) @@ -3320,7 +3229,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, Q.DL.getTypeSizeInBits(CLHS->getType()) && Q.DL.getTypeSizeInBits(CRHS->getPointerOperandType()) == Q.DL.getTypeSizeInBits(CRHS->getType())) - if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.CxtI, + if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.AC, Q.CxtI, CLHS->getPointerOperand(), CRHS->getPointerOperand())) return C; @@ -3416,17 +3325,11 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getFalse(RetTy); } - // Handle fcmp with constant RHS - const ConstantFP *CFP = nullptr; - if (const auto *RHSC = dyn_cast<Constant>(RHS)) { - if (RHS->getType()->isVectorTy()) - CFP = dyn_cast_or_null<ConstantFP>(RHSC->getSplatValue()); - else - CFP = dyn_cast<ConstantFP>(RHSC); - } - if (CFP) { + // Handle fcmp with constant RHS. + const APFloat *C; + if (match(RHS, m_APFloat(C))) { // If the constant is a nan, see if we can fold the comparison based on it. - if (CFP->getValueAPF().isNaN()) { + if (C->isNaN()) { if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo" return getFalse(RetTy); assert(FCmpInst::isUnordered(Pred) && @@ -3435,8 +3338,8 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getTrue(RetTy); } // Check whether the constant is an infinity. - if (CFP->getValueAPF().isInfinity()) { - if (CFP->getValueAPF().isNegative()) { + if (C->isInfinity()) { + if (C->isNegative()) { switch (Pred) { case FCmpInst::FCMP_OLT: // No value is ordered and less than negative infinity. @@ -3460,7 +3363,7 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } } - if (CFP->getValueAPF().isZero()) { + if (C->isZero()) { switch (Pred) { case FCmpInst::FCMP_UGE: if (CannotBeOrderedLessThanZero(LHS, Q.TLI)) @@ -3474,6 +3377,28 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, default: break; } + } else if (C->isNegative()) { + assert(!C->isNaN() && "Unexpected NaN constant!"); + // TODO: We can catch more cases by using a range check rather than + // relying on CannotBeOrderedLessThanZero. + switch (Pred) { + case FCmpInst::FCMP_UGE: + case FCmpInst::FCMP_UGT: + case FCmpInst::FCMP_UNE: + // (X >= 0) implies (X > C) when (C < 0) + if (CannotBeOrderedLessThanZero(LHS, Q.TLI)) + return getTrue(RetTy); + break; + case FCmpInst::FCMP_OEQ: + case FCmpInst::FCMP_OLE: + case FCmpInst::FCMP_OLT: + // (X >= 0) implies !(X < C) when (C < 0) + if (CannotBeOrderedLessThanZero(LHS, Q.TLI)) + return getFalse(RetTy); + break; + default: + break; + } } } @@ -3620,32 +3545,16 @@ static Value *simplifySelectBitTest(Value *TrueVal, Value *FalseVal, Value *X, /// An alternative way to test if a bit is set or not uses sgt/slt instead of /// eq/ne. -static Value *simplifySelectWithFakeICmpEq(Value *CmpLHS, Value *TrueVal, - Value *FalseVal, - bool TrueWhenUnset) { - unsigned BitWidth = TrueVal->getType()->getScalarSizeInBits(); - if (!BitWidth) - return nullptr; - - APInt MinSignedValue; +static Value *simplifySelectWithFakeICmpEq(Value *CmpLHS, Value *CmpRHS, + ICmpInst::Predicate Pred, + Value *TrueVal, Value *FalseVal) { Value *X; - if (match(CmpLHS, m_Trunc(m_Value(X))) && (X == TrueVal || X == FalseVal)) { - // icmp slt (trunc X), 0 <--> icmp ne (and X, C), 0 - // icmp sgt (trunc X), -1 <--> icmp eq (and X, C), 0 - unsigned DestSize = CmpLHS->getType()->getScalarSizeInBits(); - MinSignedValue = APInt::getSignedMinValue(DestSize).zext(BitWidth); - } else { - // icmp slt X, 0 <--> icmp ne (and X, C), 0 - // icmp sgt X, -1 <--> icmp eq (and X, C), 0 - X = CmpLHS; - MinSignedValue = APInt::getSignedMinValue(BitWidth); - } - - if (Value *V = simplifySelectBitTest(TrueVal, FalseVal, X, &MinSignedValue, - TrueWhenUnset)) - return V; + APInt Mask; + if (!decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, X, Mask)) + return nullptr; - return nullptr; + return simplifySelectBitTest(TrueVal, FalseVal, X, &Mask, + Pred == ICmpInst::ICMP_EQ); } /// Try to simplify a select instruction when its condition operand is an @@ -3658,8 +3567,6 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, if (!match(CondVal, m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)))) return nullptr; - // FIXME: This code is nearly duplicated in InstCombine. Using/refactoring - // decomposeBitTestICmp() might help. if (ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero())) { Value *X; const APInt *Y; @@ -3667,18 +3574,13 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, if (Value *V = simplifySelectBitTest(TrueVal, FalseVal, X, Y, Pred == ICmpInst::ICMP_EQ)) return V; - } else if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_Zero())) { - // Comparing signed-less-than 0 checks if the sign bit is set. - if (Value *V = simplifySelectWithFakeICmpEq(CmpLHS, TrueVal, FalseVal, - false)) - return V; - } else if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_AllOnes())) { - // Comparing signed-greater-than -1 checks if the sign bit is not set. - if (Value *V = simplifySelectWithFakeICmpEq(CmpLHS, TrueVal, FalseVal, - true)) - return V; } + // Check for other compares that behave like bit test. + if (Value *V = simplifySelectWithFakeICmpEq(CmpLHS, CmpRHS, Pred, + TrueVal, FalseVal)) + return V; + if (CondVal->hasOneUse()) { const APInt *C; if (match(CmpRHS, m_APInt(C))) { @@ -3735,6 +3637,9 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, // select true, X, Y -> X // select false, X, Y -> Y if (Constant *CB = dyn_cast<Constant>(CondVal)) { + if (Constant *CT = dyn_cast<Constant>(TrueVal)) + if (Constant *CF = dyn_cast<Constant>(FalseVal)) + return ConstantFoldSelectInstruction(CB, CT, CF); if (CB->isAllOnesValue()) return TrueVal; if (CB->isNullValue()) @@ -3921,6 +3826,28 @@ Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, return ::SimplifyInsertValueInst(Agg, Val, Idxs, Q, RecursionLimit); } +Value *llvm::SimplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx, + const SimplifyQuery &Q) { + // Try to constant fold. + auto *VecC = dyn_cast<Constant>(Vec); + auto *ValC = dyn_cast<Constant>(Val); + auto *IdxC = dyn_cast<Constant>(Idx); + if (VecC && ValC && IdxC) + return ConstantFoldInsertElementInstruction(VecC, ValC, IdxC); + + // Fold into undef if index is out of bounds. + if (auto *CI = dyn_cast<ConstantInt>(Idx)) { + uint64_t NumElements = cast<VectorType>(Vec->getType())->getNumElements(); + + if (CI->uge(NumElements)) + return UndefValue::get(Vec->getType()); + } + + // TODO: We should also fold if index is iteslf an undef. + + return nullptr; +} + /// Given operands for an ExtractValueInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs, @@ -3973,6 +3900,11 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQ if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue())) return Elt; + // An undef extract index can be arbitrarily chosen to be an out-of-range + // index value, which would result in the instruction being undef. + if (isa<UndefValue>(Idx)) + return UndefValue::get(Vec->getType()->getVectorElementType()); + return nullptr; } @@ -4186,6 +4118,179 @@ Value *llvm::SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit); } +/// Given operands for an FAdd, see if we can fold the result. If not, this +/// returns null. +static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q, unsigned MaxRecurse) { + if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q)) + return C; + + // fadd X, -0 ==> X + if (match(Op1, m_NegZero())) + return Op0; + + // fadd X, 0 ==> X, when we know X is not -0 + if (match(Op1, m_Zero()) && + (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI))) + return Op0; + + // fadd [nnan ninf] X, (fsub [nnan ninf] 0, X) ==> 0 + // where nnan and ninf have to occur at least once somewhere in this + // expression + Value *SubOp = nullptr; + if (match(Op1, m_FSub(m_AnyZero(), m_Specific(Op0)))) + SubOp = Op1; + else if (match(Op0, m_FSub(m_AnyZero(), m_Specific(Op1)))) + SubOp = Op0; + if (SubOp) { + Instruction *FSub = cast<Instruction>(SubOp); + if ((FMF.noNaNs() || FSub->hasNoNaNs()) && + (FMF.noInfs() || FSub->hasNoInfs())) + return Constant::getNullValue(Op0->getType()); + } + + return nullptr; +} + +/// Given operands for an FSub, see if we can fold the result. If not, this +/// returns null. +static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q, unsigned MaxRecurse) { + if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q)) + return C; + + // fsub X, 0 ==> X + if (match(Op1, m_Zero())) + return Op0; + + // fsub X, -0 ==> X, when we know X is not -0 + if (match(Op1, m_NegZero()) && + (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI))) + return Op0; + + // fsub -0.0, (fsub -0.0, X) ==> X + Value *X; + if (match(Op0, m_NegZero()) && match(Op1, m_FSub(m_NegZero(), m_Value(X)))) + return X; + + // fsub 0.0, (fsub 0.0, X) ==> X if signed zeros are ignored. + if (FMF.noSignedZeros() && match(Op0, m_AnyZero()) && + match(Op1, m_FSub(m_AnyZero(), m_Value(X)))) + return X; + + // fsub nnan x, x ==> 0.0 + if (FMF.noNaNs() && Op0 == Op1) + return Constant::getNullValue(Op0->getType()); + + return nullptr; +} + +/// Given the operands for an FMul, see if we can fold the result +static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q, unsigned MaxRecurse) { + if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q)) + return C; + + // fmul X, 1.0 ==> X + if (match(Op1, m_FPOne())) + return Op0; + + // fmul nnan nsz X, 0 ==> 0 + if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero())) + return Op1; + + return nullptr; +} + +Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q) { + return ::SimplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit); +} + + +Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q) { + return ::SimplifyFSubInst(Op0, Op1, FMF, Q, RecursionLimit); +} + +Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q) { + return ::SimplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit); +} + +static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q, unsigned) { + if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q)) + return C; + + // undef / X -> undef (the undef could be a snan). + if (match(Op0, m_Undef())) + return Op0; + + // X / undef -> undef + if (match(Op1, m_Undef())) + return Op1; + + // X / 1.0 -> X + if (match(Op1, m_FPOne())) + return Op0; + + // 0 / X -> 0 + // Requires that NaNs are off (X could be zero) and signed zeroes are + // ignored (X could be positive or negative, so the output sign is unknown). + if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZero())) + return Op0; + + if (FMF.noNaNs()) { + // X / X -> 1.0 is legal when NaNs are ignored. + if (Op0 == Op1) + return ConstantFP::get(Op0->getType(), 1.0); + + // -X / X -> -1.0 and + // X / -X -> -1.0 are legal when NaNs are ignored. + // We can ignore signed zeros because +-0.0/+-0.0 is NaN and ignored. + if ((BinaryOperator::isFNeg(Op0, /*IgnoreZeroSign=*/true) && + BinaryOperator::getFNegArgument(Op0) == Op1) || + (BinaryOperator::isFNeg(Op1, /*IgnoreZeroSign=*/true) && + BinaryOperator::getFNegArgument(Op1) == Op0)) + return ConstantFP::get(Op0->getType(), -1.0); + } + + return nullptr; +} + +Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q) { + return ::SimplifyFDivInst(Op0, Op1, FMF, Q, RecursionLimit); +} + +static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q, unsigned) { + if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q)) + return C; + + // undef % X -> undef (the undef could be a snan). + if (match(Op0, m_Undef())) + return Op0; + + // X % undef -> undef + if (match(Op1, m_Undef())) + return Op1; + + // 0 % X -> 0 + // Requires that NaNs are off (X could be zero) and signed zeroes are + // ignored (X could be positive or negative, so the output sign is unknown). + if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZero())) + return Op0; + + return nullptr; +} + +Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q) { + return ::SimplifyFRemInst(Op0, Op1, FMF, Q, RecursionLimit); +} + //=== Helper functions for higher up the class hierarchy. /// Given operands for a BinaryOperator, see if we can fold the result. @@ -4195,28 +4300,18 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, switch (Opcode) { case Instruction::Add: return SimplifyAddInst(LHS, RHS, false, false, Q, MaxRecurse); - case Instruction::FAdd: - return SimplifyFAddInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); case Instruction::Sub: return SimplifySubInst(LHS, RHS, false, false, Q, MaxRecurse); - case Instruction::FSub: - return SimplifyFSubInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); case Instruction::Mul: return SimplifyMulInst(LHS, RHS, Q, MaxRecurse); - case Instruction::FMul: - return SimplifyFMulInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, Q, MaxRecurse); case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse); - case Instruction::FDiv: - return SimplifyFDivInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); case Instruction::SRem: return SimplifySRemInst(LHS, RHS, Q, MaxRecurse); case Instruction::URem: return SimplifyURemInst(LHS, RHS, Q, MaxRecurse); - case Instruction::FRem: - return SimplifyFRemInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); case Instruction::Shl: return SimplifyShlInst(LHS, RHS, false, false, Q, MaxRecurse); case Instruction::LShr: @@ -4229,6 +4324,16 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, return SimplifyOrInst(LHS, RHS, Q, MaxRecurse); case Instruction::Xor: return SimplifyXorInst(LHS, RHS, Q, MaxRecurse); + case Instruction::FAdd: + return SimplifyFAddInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); + case Instruction::FSub: + return SimplifyFSubInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); + case Instruction::FMul: + return SimplifyFMulInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); + case Instruction::FDiv: + return SimplifyFDivInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); + case Instruction::FRem: + return SimplifyFRemInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); default: llvm_unreachable("Unexpected opcode"); } @@ -4290,6 +4395,7 @@ static bool IsIdempotent(Intrinsic::ID ID) { case Intrinsic::rint: case Intrinsic::nearbyint: case Intrinsic::round: + case Intrinsic::canonicalize: return true; } } @@ -4615,6 +4721,12 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ, IV->getIndices(), Q); break; } + case Instruction::InsertElement: { + auto *IE = cast<InsertElementInst>(I); + Result = SimplifyInsertElementInst(IE->getOperand(0), IE->getOperand(1), + IE->getOperand(2), Q); + break; + } case Instruction::ExtractValue: { auto *EVI = cast<ExtractValueInst>(I); Result = SimplifyExtractValueInst(EVI->getAggregateOperand(), diff --git a/contrib/llvm/lib/Analysis/Interval.cpp b/contrib/llvm/lib/Analysis/Interval.cpp index 6c10d73bcb44..6d5de22cb93f 100644 --- a/contrib/llvm/lib/Analysis/Interval.cpp +++ b/contrib/llvm/lib/Analysis/Interval.cpp @@ -16,7 +16,6 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/Support/raw_ostream.h" -#include <algorithm> using namespace llvm; @@ -25,7 +24,6 @@ using namespace llvm; //===----------------------------------------------------------------------===// // isLoop - Find out if there is a back edge in this interval... -// bool Interval::isLoop() const { // There is a loop in this interval iff one of the predecessors of the header // node lives in the interval. @@ -36,7 +34,6 @@ bool Interval::isLoop() const { return false; } - void Interval::print(raw_ostream &OS) const { OS << "-------------------------------------------------------------\n" << "Interval Contents:\n"; diff --git a/contrib/llvm/lib/Analysis/IntervalPartition.cpp b/contrib/llvm/lib/Analysis/IntervalPartition.cpp index a4e56e0694bc..c777d91b67c6 100644 --- a/contrib/llvm/lib/Analysis/IntervalPartition.cpp +++ b/contrib/llvm/lib/Analysis/IntervalPartition.cpp @@ -12,10 +12,17 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/IntervalPartition.h" +#include "llvm/Analysis/Interval.h" #include "llvm/Analysis/IntervalIterator.h" +#include "llvm/Pass.h" +#include <cassert> +#include <utility> + using namespace llvm; char IntervalPartition::ID = 0; + INITIALIZE_PASS(IntervalPartition, "intervals", "Interval Partition Construction", true, true) @@ -40,7 +47,6 @@ void IntervalPartition::print(raw_ostream &O, const Module*) const { // addIntervalToPartition - Add an interval to the internal list of intervals, // and then add mappings from all of the basic blocks in the interval to the // interval itself (in the IntervalMap). -// void IntervalPartition::addIntervalToPartition(Interval *I) { Intervals.push_back(I); @@ -54,7 +60,6 @@ void IntervalPartition::addIntervalToPartition(Interval *I) { // the interval data structures. After interval generation is complete, // run through all of the intervals and propagate successor info as // predecessor info. -// void IntervalPartition::updatePredecessors(Interval *Int) { BasicBlock *Header = Int->getHeaderNode(); for (BasicBlock *Successor : Int->Successors) @@ -63,7 +68,6 @@ void IntervalPartition::updatePredecessors(Interval *Int) { // IntervalPartition ctor - Build the first level interval partition for the // specified function... -// bool IntervalPartition::runOnFunction(Function &F) { // Pass false to intervals_begin because we take ownership of it's memory function_interval_iterator I = intervals_begin(&F, false); @@ -84,11 +88,9 @@ bool IntervalPartition::runOnFunction(Function &F) { return false; } - // IntervalPartition ctor - Build a reduced interval partition from an // existing interval graph. This takes an additional boolean parameter to // distinguish it from a copy constructor. Always pass in false for now. -// IntervalPartition::IntervalPartition(IntervalPartition &IP, bool) : FunctionPass(ID) { assert(IP.getRootInterval() && "Cannot operate on empty IntervalPartitions!"); @@ -110,4 +112,3 @@ IntervalPartition::IntervalPartition(IntervalPartition &IP, bool) for (unsigned i = 0, e = Intervals.size(); i != e; ++i) updatePredecessors(Intervals[i]); } - diff --git a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp index d287f81985fd..54299d078be5 100644 --- a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp +++ b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp @@ -8,15 +8,31 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/Sequence.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" -#include "llvm/IR/InstVisitor.h" -#include "llvm/IR/Instructions.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <iterator> +#include <string> +#include <tuple> #include <utility> using namespace llvm; @@ -175,7 +191,7 @@ LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) { LazyCallGraph::LazyCallGraph(LazyCallGraph &&G) : BPA(std::move(G.BPA)), NodeMap(std::move(G.NodeMap)), EntryEdges(std::move(G.EntryEdges)), SCCBPA(std::move(G.SCCBPA)), - SCCMap(std::move(G.SCCMap)), LeafRefSCCs(std::move(G.LeafRefSCCs)), + SCCMap(std::move(G.SCCMap)), LibFunctions(std::move(G.LibFunctions)) { updateGraphPtrs(); } @@ -186,7 +202,6 @@ LazyCallGraph &LazyCallGraph::operator=(LazyCallGraph &&G) { EntryEdges = std::move(G.EntryEdges); SCCBPA = std::move(G.SCCBPA); SCCMap = std::move(G.SCCMap); - LeafRefSCCs = std::move(G.LeafRefSCCs); LibFunctions = std::move(G.LibFunctions); updateGraphPtrs(); return *this; @@ -212,7 +227,7 @@ void LazyCallGraph::SCC::verify() { assert(N->LowLink == -1 && "Must set low link to -1 when adding a node to an SCC!"); for (Edge &E : **N) - assert(E.getNode() && "Can't have an unpopulated node!"); + assert(E.getNode().isPopulated() && "Can't have an unpopulated node!"); } } #endif @@ -313,38 +328,49 @@ void LazyCallGraph::RefSCC::verify() { "Edge between SCCs violates post-order relationship."); continue; } - assert(TargetSCC.getOuterRefSCC().Parents.count(this) && - "Edge to a RefSCC missing us in its parent set."); } } - - // Check that our parents are actually parents. - for (RefSCC *ParentRC : Parents) { - assert(ParentRC != this && "Cannot be our own parent!"); - auto HasConnectingEdge = [&] { - for (SCC &C : *ParentRC) - for (Node &N : C) - for (Edge &E : *N) - if (G->lookupRefSCC(E.getNode()) == this) - return true; - return false; - }; - assert(HasConnectingEdge() && "No edge connects the parent to us!"); - } } #endif -bool LazyCallGraph::RefSCC::isDescendantOf(const RefSCC &C) const { - // Walk up the parents of this SCC and verify that we eventually find C. - SmallVector<const RefSCC *, 4> AncestorWorklist; - AncestorWorklist.push_back(this); +bool LazyCallGraph::RefSCC::isParentOf(const RefSCC &RC) const { + if (&RC == this) + return false; + + // Search all edges to see if this is a parent. + for (SCC &C : *this) + for (Node &N : C) + for (Edge &E : *N) + if (G->lookupRefSCC(E.getNode()) == &RC) + return true; + + return false; +} + +bool LazyCallGraph::RefSCC::isAncestorOf(const RefSCC &RC) const { + if (&RC == this) + return false; + + // For each descendant of this RefSCC, see if one of its children is the + // argument. If not, add that descendant to the worklist and continue + // searching. + SmallVector<const RefSCC *, 4> Worklist; + SmallPtrSet<const RefSCC *, 4> Visited; + Worklist.push_back(this); + Visited.insert(this); do { - const RefSCC *AncestorC = AncestorWorklist.pop_back_val(); - if (AncestorC->isChildOf(C)) - return true; - for (const RefSCC *ParentC : AncestorC->Parents) - AncestorWorklist.push_back(ParentC); - } while (!AncestorWorklist.empty()); + const RefSCC &DescendantRC = *Worklist.pop_back_val(); + for (SCC &C : DescendantRC) + for (Node &N : C) + for (Edge &E : *N) { + auto *ChildRC = G->lookupRefSCC(E.getNode()); + if (ChildRC == &RC) + return true; + if (!ChildRC || !Visited.insert(ChildRC).second) + continue; + Worklist.push_back(ChildRC); + } + } while (!Worklist.empty()); return false; } @@ -907,17 +933,13 @@ void LazyCallGraph::RefSCC::insertOutgoingEdge(Node &SourceN, Node &TargetN, assert(G->lookupRefSCC(SourceN) == this && "Source must be in this RefSCC."); - RefSCC &TargetC = *G->lookupRefSCC(TargetN); - assert(&TargetC != this && "Target must not be in this RefSCC."); + assert(G->lookupRefSCC(TargetN) != this && + "Target must not be in this RefSCC."); #ifdef EXPENSIVE_CHECKS - assert(TargetC.isDescendantOf(*this) && + assert(G->lookupRefSCC(TargetN)->isDescendantOf(*this) && "Target must be a descendant of the Source."); #endif - // The only change required is to add this SCC to the parent set of the - // callee. - TargetC.Parents.insert(this); - #ifndef NDEBUG // Check that the RefSCC is still valid. verify(); @@ -957,22 +979,20 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) { // RefSCCs (and their edges) are visited here. auto ComputeSourceConnectedSet = [&](SmallPtrSetImpl<RefSCC *> &Set) { Set.insert(&SourceC); - SmallVector<RefSCC *, 4> Worklist; - Worklist.push_back(&SourceC); - do { - RefSCC &RC = *Worklist.pop_back_val(); - for (RefSCC &ParentRC : RC.parents()) { - // Skip any RefSCCs outside the range of source to target in the - // postorder sequence. - int ParentIdx = G->getRefSCCIndex(ParentRC); - assert(ParentIdx > SourceIdx && "Parent cannot precede source in postorder!"); - if (ParentIdx > TargetIdx) - continue; - if (Set.insert(&ParentRC).second) - // First edge connecting to this parent, add it to our worklist. - Worklist.push_back(&ParentRC); - } - } while (!Worklist.empty()); + auto IsConnected = [&](RefSCC &RC) { + for (SCC &C : RC) + for (Node &N : C) + for (Edge &E : *N) + if (Set.count(G->lookupRefSCC(E.getNode()))) + return true; + + return false; + }; + + for (RefSCC *C : make_range(G->PostOrderRefSCCs.begin() + SourceIdx + 1, + G->PostOrderRefSCCs.begin() + TargetIdx + 1)) + if (IsConnected(*C)) + Set.insert(C); }; // Use a normal worklist to find which SCCs the target connects to. We still @@ -1023,12 +1043,6 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) { assert(RC != this && "We're merging into the target RefSCC, so it " "shouldn't be in the range."); - // Merge the parents which aren't part of the merge into the our parents. - for (RefSCC *ParentRC : RC->Parents) - if (!MergeSet.count(ParentRC)) - Parents.insert(ParentRC); - RC->Parents.clear(); - // Walk the inner SCCs to update their up-pointer and walk all the edges to // update any parent sets. // FIXME: We should try to find a way to avoid this (rather expensive) edge @@ -1036,16 +1050,8 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) { for (SCC &InnerC : *RC) { InnerC.OuterRefSCC = this; SCCIndices[&InnerC] = SCCIndex++; - for (Node &N : InnerC) { + for (Node &N : InnerC) G->SCCMap[&N] = &InnerC; - for (Edge &E : *N) { - RefSCC &ChildRC = *G->lookupRefSCC(E.getNode()); - if (MergeSet.count(&ChildRC)) - continue; - ChildRC.Parents.erase(RC); - ChildRC.Parents.insert(this); - } - } } // Now merge in the SCCs. We can actually move here so try to reuse storage @@ -1087,12 +1093,8 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) { void LazyCallGraph::RefSCC::removeOutgoingEdge(Node &SourceN, Node &TargetN) { assert(G->lookupRefSCC(SourceN) == this && "The source must be a member of this RefSCC."); - - RefSCC &TargetRC = *G->lookupRefSCC(TargetN); - assert(&TargetRC != this && "The target must not be a member of this RefSCC"); - - assert(!is_contained(G->LeafRefSCCs, this) && - "Cannot have a leaf RefSCC source."); + assert(G->lookupRefSCC(TargetN) != this && + "The target must not be a member of this RefSCC"); #ifndef NDEBUG // In a debug build, verify the RefSCC is valid to start with and when this @@ -1105,122 +1107,72 @@ void LazyCallGraph::RefSCC::removeOutgoingEdge(Node &SourceN, Node &TargetN) { bool Removed = SourceN->removeEdgeInternal(TargetN); (void)Removed; assert(Removed && "Target not in the edge set for this caller?"); - - bool HasOtherEdgeToChildRC = false; - bool HasOtherChildRC = false; - for (SCC *InnerC : SCCs) { - for (Node &N : *InnerC) { - for (Edge &E : *N) { - RefSCC &OtherChildRC = *G->lookupRefSCC(E.getNode()); - if (&OtherChildRC == &TargetRC) { - HasOtherEdgeToChildRC = true; - break; - } - if (&OtherChildRC != this) - HasOtherChildRC = true; - } - if (HasOtherEdgeToChildRC) - break; - } - if (HasOtherEdgeToChildRC) - break; - } - // Because the SCCs form a DAG, deleting such an edge cannot change the set - // of SCCs in the graph. However, it may cut an edge of the SCC DAG, making - // the source SCC no longer connected to the target SCC. If so, we need to - // update the target SCC's map of its parents. - if (!HasOtherEdgeToChildRC) { - bool Removed = TargetRC.Parents.erase(this); - (void)Removed; - assert(Removed && - "Did not find the source SCC in the target SCC's parent list!"); - - // It may orphan an SCC if it is the last edge reaching it, but that does - // not violate any invariants of the graph. - if (TargetRC.Parents.empty()) - DEBUG(dbgs() << "LCG: Update removing " << SourceN.getFunction().getName() - << " -> " << TargetN.getFunction().getName() - << " edge orphaned the callee's SCC!\n"); - - // It may make the Source SCC a leaf SCC. - if (!HasOtherChildRC) - G->LeafRefSCCs.push_back(this); - } } SmallVector<LazyCallGraph::RefSCC *, 1> -LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) { - assert(!(*SourceN)[TargetN].isCall() && - "Cannot remove a call edge, it must first be made a ref edge"); +LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, + ArrayRef<Node *> TargetNs) { + // We return a list of the resulting *new* RefSCCs in post-order. + SmallVector<RefSCC *, 1> Result; #ifndef NDEBUG - // In a debug build, verify the RefSCC is valid to start with and when this - // routine finishes. + // In a debug build, verify the RefSCC is valid to start with and that either + // we return an empty list of result RefSCCs and this RefSCC remains valid, + // or we return new RefSCCs and this RefSCC is dead. verify(); - auto VerifyOnExit = make_scope_exit([&]() { verify(); }); + auto VerifyOnExit = make_scope_exit([&]() { + // If we didn't replace our RefSCC with new ones, check that this one + // remains valid. + if (G) + verify(); + }); #endif - // First remove the actual edge. - bool Removed = SourceN->removeEdgeInternal(TargetN); - (void)Removed; - assert(Removed && "Target not in the edge set for this caller?"); + // First remove the actual edges. + for (Node *TargetN : TargetNs) { + assert(!(*SourceN)[*TargetN].isCall() && + "Cannot remove a call edge, it must first be made a ref edge"); - // We return a list of the resulting *new* RefSCCs in post-order. - SmallVector<RefSCC *, 1> Result; + bool Removed = SourceN->removeEdgeInternal(*TargetN); + (void)Removed; + assert(Removed && "Target not in the edge set for this caller?"); + } - // Direct recursion doesn't impact the SCC graph at all. - if (&SourceN == &TargetN) + // Direct self references don't impact the ref graph at all. + if (llvm::all_of(TargetNs, + [&](Node *TargetN) { return &SourceN == TargetN; })) return Result; - // If this ref edge is within an SCC then there are sufficient other edges to - // form a cycle without this edge so removing it is a no-op. + // If all targets are in the same SCC as the source, because no call edges + // were removed there is no RefSCC structure change. SCC &SourceC = *G->lookupSCC(SourceN); - SCC &TargetC = *G->lookupSCC(TargetN); - if (&SourceC == &TargetC) + if (llvm::all_of(TargetNs, [&](Node *TargetN) { + return G->lookupSCC(*TargetN) == &SourceC; + })) return Result; // We build somewhat synthetic new RefSCCs by providing a postorder mapping - // for each inner SCC. We also store these associated with *nodes* rather - // than SCCs because this saves a round-trip through the node->SCC map and in - // the common case, SCCs are small. We will verify that we always give the - // same number to every node in the SCC such that these are equivalent. - const int RootPostOrderNumber = 0; - int PostOrderNumber = RootPostOrderNumber + 1; - SmallDenseMap<Node *, int> PostOrderMapping; - - // Every node in the target SCC can already reach every node in this RefSCC - // (by definition). It is the only node we know will stay inside this RefSCC. - // Everything which transitively reaches Target will also remain in the - // RefSCC. We handle this by pre-marking that the nodes in the target SCC map - // back to the root post order number. - // - // This also enables us to take a very significant short-cut in the standard - // Tarjan walk to re-form RefSCCs below: whenever we build an edge that - // references the target node, we know that the target node eventually - // references all other nodes in our walk. As a consequence, we can detect - // and handle participants in that cycle without walking all the edges that - // form the connections, and instead by relying on the fundamental guarantee - // coming into this operation. - for (Node &N : TargetC) - PostOrderMapping[&N] = RootPostOrderNumber; + // for each inner SCC. We store these inside the low-link field of the nodes + // rather than associated with SCCs because this saves a round-trip through + // the node->SCC map and in the common case, SCCs are small. We will verify + // that we always give the same number to every node in the SCC such that + // these are equivalent. + int PostOrderNumber = 0; // Reset all the other nodes to prepare for a DFS over them, and add them to // our worklist. SmallVector<Node *, 8> Worklist; for (SCC *C : SCCs) { - if (C == &TargetC) - continue; - for (Node &N : *C) N.DFSNumber = N.LowLink = 0; Worklist.append(C->Nodes.begin(), C->Nodes.end()); } - auto MarkNodeForSCCNumber = [&PostOrderMapping](Node &N, int Number) { - N.DFSNumber = N.LowLink = -1; - PostOrderMapping[&N] = Number; - }; + // Track the number of nodes in this RefSCC so that we can quickly recognize + // an important special case of the edge removal not breaking the cycle of + // this RefSCC. + const int NumRefSCCNodes = Worklist.size(); SmallVector<std::pair<Node *, EdgeSequence::iterator>, 4> DFSStack; SmallVector<Node *, 4> PendingRefSCCStack; @@ -1267,31 +1219,8 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) { continue; } if (ChildN.DFSNumber == -1) { - // Check if this edge's target node connects to the deleted edge's - // target node. If so, we know that every node connected will end up - // in this RefSCC, so collapse the entire current stack into the root - // slot in our SCC numbering. See above for the motivation of - // optimizing the target connected nodes in this way. - auto PostOrderI = PostOrderMapping.find(&ChildN); - if (PostOrderI != PostOrderMapping.end() && - PostOrderI->second == RootPostOrderNumber) { - MarkNodeForSCCNumber(*N, RootPostOrderNumber); - while (!PendingRefSCCStack.empty()) - MarkNodeForSCCNumber(*PendingRefSCCStack.pop_back_val(), - RootPostOrderNumber); - while (!DFSStack.empty()) - MarkNodeForSCCNumber(*DFSStack.pop_back_val().first, - RootPostOrderNumber); - // Ensure we break all the way out of the enclosing loop. - N = nullptr; - break; - } - // If this child isn't currently in this RefSCC, no need to process - // it. However, we do need to remove this RefSCC from its RefSCC's - // parent set. - RefSCC &ChildRC = *G->lookupRefSCC(ChildN); - ChildRC.Parents.erase(this); + // it. ++I; continue; } @@ -1304,9 +1233,6 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) { N->LowLink = ChildN.LowLink; ++I; } - if (!N) - // We short-circuited this node. - break; // We've finished processing N and its descendents, put it on our pending // stack to eventually get merged into a RefSCC. @@ -1321,146 +1247,98 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) { } // Otherwise, form a new RefSCC from the top of the pending node stack. + int RefSCCNumber = PostOrderNumber++; int RootDFSNumber = N->DFSNumber; + // Find the range of the node stack by walking down until we pass the - // root DFS number. - auto RefSCCNodes = make_range( - PendingRefSCCStack.rbegin(), - find_if(reverse(PendingRefSCCStack), [RootDFSNumber](const Node *N) { - return N->DFSNumber < RootDFSNumber; - })); + // root DFS number. Update the DFS numbers and low link numbers in the + // process to avoid re-walking this list where possible. + auto StackRI = find_if(reverse(PendingRefSCCStack), [&](Node *N) { + if (N->DFSNumber < RootDFSNumber) + // We've found the bottom. + return true; - // Mark the postorder number for these nodes and clear them off the - // stack. We'll use the postorder number to pull them into RefSCCs at the - // end. FIXME: Fuse with the loop above. - int RefSCCNumber = PostOrderNumber++; - for (Node *N : RefSCCNodes) - MarkNodeForSCCNumber(*N, RefSCCNumber); + // Update this node and keep scanning. + N->DFSNumber = -1; + // Save the post-order number in the lowlink field so that we can use + // it to map SCCs into new RefSCCs after we finish the DFS. + N->LowLink = RefSCCNumber; + return false; + }); + auto RefSCCNodes = make_range(StackRI.base(), PendingRefSCCStack.end()); + + // If we find a cycle containing all nodes originally in this RefSCC then + // the removal hasn't changed the structure at all. This is an important + // special case and we can directly exit the entire routine more + // efficiently as soon as we discover it. + if (std::distance(RefSCCNodes.begin(), RefSCCNodes.end()) == + NumRefSCCNodes) { + // Clear out the low link field as we won't need it. + for (Node *N : RefSCCNodes) + N->LowLink = -1; + // Return the empty result immediately. + return Result; + } - PendingRefSCCStack.erase(RefSCCNodes.end().base(), - PendingRefSCCStack.end()); + // We've already marked the nodes internally with the RefSCC number so + // just clear them off the stack and continue. + PendingRefSCCStack.erase(RefSCCNodes.begin(), PendingRefSCCStack.end()); } while (!DFSStack.empty()); assert(DFSStack.empty() && "Didn't flush the entire DFS stack!"); assert(PendingRefSCCStack.empty() && "Didn't flush all pending nodes!"); } while (!Worklist.empty()); - // We now have a post-order numbering for RefSCCs and a mapping from each - // node in this RefSCC to its final RefSCC. We create each new RefSCC node - // (re-using this RefSCC node for the root) and build a radix-sort style map - // from postorder number to the RefSCC. We then append SCCs to each of these - // RefSCCs in the order they occured in the original SCCs container. - for (int i = 1; i < PostOrderNumber; ++i) + assert(PostOrderNumber > 1 && + "Should never finish the DFS when the existing RefSCC remains valid!"); + + // Otherwise we create a collection of new RefSCC nodes and build + // a radix-sort style map from postorder number to these new RefSCCs. We then + // append SCCs to each of these RefSCCs in the order they occured in the + // original SCCs container. + for (int i = 0; i < PostOrderNumber; ++i) Result.push_back(G->createRefSCC(*G)); // Insert the resulting postorder sequence into the global graph postorder - // sequence before the current RefSCC in that sequence. The idea being that - // this RefSCC is the target of the reference edge removed, and thus has - // a direct or indirect edge to every other RefSCC formed and so must be at - // the end of any postorder traversal. + // sequence before the current RefSCC in that sequence, and then remove the + // current one. // // FIXME: It'd be nice to change the APIs so that we returned an iterator // range over the global postorder sequence and generally use that sequence // rather than building a separate result vector here. - if (!Result.empty()) { - int Idx = G->getRefSCCIndex(*this); - G->PostOrderRefSCCs.insert(G->PostOrderRefSCCs.begin() + Idx, - Result.begin(), Result.end()); - for (int i : seq<int>(Idx, G->PostOrderRefSCCs.size())) - G->RefSCCIndices[G->PostOrderRefSCCs[i]] = i; - assert(G->PostOrderRefSCCs[G->getRefSCCIndex(*this)] == this && - "Failed to update this RefSCC's index after insertion!"); - } + int Idx = G->getRefSCCIndex(*this); + G->PostOrderRefSCCs.erase(G->PostOrderRefSCCs.begin() + Idx); + G->PostOrderRefSCCs.insert(G->PostOrderRefSCCs.begin() + Idx, Result.begin(), + Result.end()); + for (int i : seq<int>(Idx, G->PostOrderRefSCCs.size())) + G->RefSCCIndices[G->PostOrderRefSCCs[i]] = i; for (SCC *C : SCCs) { - auto PostOrderI = PostOrderMapping.find(&*C->begin()); - assert(PostOrderI != PostOrderMapping.end() && - "Cannot have missing mappings for nodes!"); - int SCCNumber = PostOrderI->second; -#ifndef NDEBUG - for (Node &N : *C) - assert(PostOrderMapping.find(&N)->second == SCCNumber && + // We store the SCC number in the node's low-link field above. + int SCCNumber = C->begin()->LowLink; + // Clear out all of the SCC's node's low-link fields now that we're done + // using them as side-storage. + for (Node &N : *C) { + assert(N.LowLink == SCCNumber && "Cannot have different numbers for nodes in the same SCC!"); -#endif - if (SCCNumber == 0) - // The root node is handled separately by removing the SCCs. - continue; + N.LowLink = -1; + } - RefSCC &RC = *Result[SCCNumber - 1]; + RefSCC &RC = *Result[SCCNumber]; int SCCIndex = RC.SCCs.size(); RC.SCCs.push_back(C); RC.SCCIndices[C] = SCCIndex; C->OuterRefSCC = &RC; } - // FIXME: We re-walk the edges in each RefSCC to establish whether it is - // a leaf and connect it to the rest of the graph's parents lists. This is - // really wasteful. We should instead do this during the DFS to avoid yet - // another edge walk. - for (RefSCC *RC : Result) - G->connectRefSCC(*RC); - - // Now erase all but the root's SCCs. - SCCs.erase(remove_if(SCCs, - [&](SCC *C) { - return PostOrderMapping.lookup(&*C->begin()) != - RootPostOrderNumber; - }), - SCCs.end()); + // Now that we've moved things into the new RefSCCs, clear out our current + // one. + G = nullptr; + SCCs.clear(); SCCIndices.clear(); - for (int i = 0, Size = SCCs.size(); i < Size; ++i) - SCCIndices[SCCs[i]] = i; #ifndef NDEBUG - // Now we need to reconnect the current (root) SCC to the graph. We do this - // manually because we can special case our leaf handling and detect errors. - bool IsLeaf = true; -#endif - for (SCC *C : SCCs) - for (Node &N : *C) { - for (Edge &E : *N) { - RefSCC &ChildRC = *G->lookupRefSCC(E.getNode()); - if (&ChildRC == this) - continue; - ChildRC.Parents.insert(this); -#ifndef NDEBUG - IsLeaf = false; -#endif - } - } -#ifndef NDEBUG - if (!Result.empty()) - assert(!IsLeaf && "This SCC cannot be a leaf as we have split out new " - "SCCs by removing this edge."); - if (none_of(G->LeafRefSCCs, [&](RefSCC *C) { return C == this; })) - assert(!IsLeaf && "This SCC cannot be a leaf as it already had child " - "SCCs before we removed this edge."); -#endif - // And connect both this RefSCC and all the new ones to the correct parents. - // The easiest way to do this is just to re-analyze the old parent set. - SmallVector<RefSCC *, 4> OldParents(Parents.begin(), Parents.end()); - Parents.clear(); - for (RefSCC *ParentRC : OldParents) - for (SCC &ParentC : *ParentRC) - for (Node &ParentN : ParentC) - for (Edge &E : *ParentN) { - RefSCC &RC = *G->lookupRefSCC(E.getNode()); - if (&RC != ParentRC) - RC.Parents.insert(ParentRC); - } - - // If this SCC stopped being a leaf through this edge removal, remove it from - // the leaf SCC list. Note that this DTRT in the case where this was never - // a leaf. - // FIXME: As LeafRefSCCs could be very large, we might want to not walk the - // entire list if this RefSCC wasn't a leaf before the edge removal. - if (!Result.empty()) - G->LeafRefSCCs.erase( - std::remove(G->LeafRefSCCs.begin(), G->LeafRefSCCs.end(), this), - G->LeafRefSCCs.end()); - -#ifndef NDEBUG - // Verify all of the new RefSCCs. + // Verify the new RefSCCs we've built. for (RefSCC *RC : Result) RC->verify(); #endif @@ -1477,18 +1355,13 @@ void LazyCallGraph::RefSCC::handleTrivialEdgeInsertion(Node &SourceN, // after this edge insertion. assert(G->lookupRefSCC(SourceN) == this && "Source must be in this RefSCC."); RefSCC &TargetRC = *G->lookupRefSCC(TargetN); - if (&TargetRC == this) { - + if (&TargetRC == this) return; - } #ifdef EXPENSIVE_CHECKS assert(TargetRC.isDescendantOf(*this) && "Target must be a descendant of the Source."); #endif - // The only change required is to add this RefSCC to the parent set of the - // target. This is a set and so idempotent if the edge already existed. - TargetRC.Parents.insert(this); } void LazyCallGraph::RefSCC::insertTrivialCallEdge(Node &SourceN, @@ -1646,24 +1519,6 @@ void LazyCallGraph::removeDeadFunction(Function &F) { assert(C.size() == 1 && "Dead functions must be in a singular SCC"); assert(RC.size() == 1 && "Dead functions must be in a singular RefSCC"); - // Clean up any remaining reference edges. Note that we walk an unordered set - // here but are just removing and so the order doesn't matter. - for (RefSCC &ParentRC : RC.parents()) - for (SCC &ParentC : ParentRC) - for (Node &ParentN : ParentC) - if (ParentN) - ParentN->removeEdgeInternal(N); - - // Now remove this RefSCC from any parents sets and the leaf list. - for (Edge &E : *N) - if (RefSCC *TargetRC = lookupRefSCC(E.getNode())) - TargetRC->Parents.erase(&RC); - // FIXME: This is a linear operation which could become hot and benefit from - // an index map. - auto LRI = find(LeafRefSCCs, &RC); - if (LRI != LeafRefSCCs.end()) - LeafRefSCCs.erase(LRI); - auto RCIndexI = RefSCCIndices.find(&RC); int RCIndex = RCIndexI->second; PostOrderRefSCCs.erase(PostOrderRefSCCs.begin() + RCIndex); @@ -1674,8 +1529,11 @@ void LazyCallGraph::removeDeadFunction(Function &F) { // Finally clear out all the data structures from the node down through the // components. N.clear(); + N.G = nullptr; + N.F = nullptr; C.clear(); RC.clear(); + RC.G = nullptr; // Nothing to delete as all the objects are allocated in stable bump pointer // allocators. @@ -1686,32 +1544,13 @@ LazyCallGraph::Node &LazyCallGraph::insertInto(Function &F, Node *&MappedN) { } void LazyCallGraph::updateGraphPtrs() { - // Process all nodes updating the graph pointers. - { - SmallVector<Node *, 16> Worklist; - for (Edge &E : EntryEdges) - Worklist.push_back(&E.getNode()); - - while (!Worklist.empty()) { - Node &N = *Worklist.pop_back_val(); - N.G = this; - if (N) - for (Edge &E : *N) - Worklist.push_back(&E.getNode()); - } - } + // Walk the node map to update their graph pointers. While this iterates in + // an unstable order, the order has no effect so it remains correct. + for (auto &FunctionNodePair : NodeMap) + FunctionNodePair.second->G = this; - // Process all SCCs updating the graph pointers. - { - SmallVector<RefSCC *, 16> Worklist(LeafRefSCCs.begin(), LeafRefSCCs.end()); - - while (!Worklist.empty()) { - RefSCC &C = *Worklist.pop_back_val(); - C.G = this; - for (RefSCC &ParentC : C.parents()) - Worklist.push_back(&ParentC); - } - } + for (auto *RC : PostOrderRefSCCs) + RC->G = this; } template <typename RootsT, typename GetBeginT, typename GetEndT, @@ -1719,7 +1558,7 @@ template <typename RootsT, typename GetBeginT, typename GetEndT, void LazyCallGraph::buildGenericSCCs(RootsT &&Roots, GetBeginT &&GetBegin, GetEndT &&GetEnd, GetNodeT &&GetNode, FormSCCCallbackT &&FormSCC) { - typedef decltype(GetBegin(std::declval<Node &>())) EdgeItT; + using EdgeItT = decltype(GetBegin(std::declval<Node &>())); SmallVector<std::pair<Node *, EdgeItT>, 16> DFSStack; SmallVector<Node *, 16> PendingSCCStack; @@ -1871,7 +1710,6 @@ void LazyCallGraph::buildRefSCCs() { [this](node_stack_range Nodes) { RefSCC *NewRC = createRefSCC(*this); buildSCCs(*NewRC, Nodes); - connectRefSCC(*NewRC); // Push the new node into the postorder list and remember its position // in the index map. @@ -1886,28 +1724,6 @@ void LazyCallGraph::buildRefSCCs() { }); } -// FIXME: We should move callers of this to embed the parent linking and leaf -// tracking into their DFS in order to remove a full walk of all edges. -void LazyCallGraph::connectRefSCC(RefSCC &RC) { - // Walk all edges in the RefSCC (this remains linear as we only do this once - // when we build the RefSCC) to connect it to the parent sets of its - // children. - bool IsLeaf = true; - for (SCC &C : RC) - for (Node &N : C) - for (Edge &E : *N) { - RefSCC &ChildRC = *lookupRefSCC(E.getNode()); - if (&ChildRC == &RC) - continue; - ChildRC.Parents.insert(&RC); - IsLeaf = false; - } - - // For the SCCs where we find no child SCCs, add them to the leaf list. - if (IsLeaf) - LeafRefSCCs.push_back(&RC); -} - AnalysisKey LazyCallGraphAnalysis::Key; LazyCallGraphPrinterPass::LazyCallGraphPrinterPass(raw_ostream &OS) : OS(OS) {} diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp index 102081e721ac..d7da669f6e79 100644 --- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp @@ -17,8 +17,10 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/ValueLattice.h" #include "llvm/IR/AssemblyAnnotationWriter.h" #include "llvm/IR/CFG.h" #include "llvm/IR/ConstantRange.h" @@ -35,7 +37,6 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" #include <map> -#include <stack> using namespace llvm; using namespace PatternMatch; @@ -59,225 +60,10 @@ namespace llvm { AnalysisKey LazyValueAnalysis::Key; -//===----------------------------------------------------------------------===// -// LVILatticeVal -//===----------------------------------------------------------------------===// - -/// This is the information tracked by LazyValueInfo for each value. -/// -/// FIXME: This is basically just for bringup, this can be made a lot more rich -/// in the future. -/// -namespace { -class LVILatticeVal { - enum LatticeValueTy { - /// This Value has no known value yet. As a result, this implies the - /// producing instruction is dead. Caution: We use this as the starting - /// state in our local meet rules. In this usage, it's taken to mean - /// "nothing known yet". - undefined, - - /// This Value has a specific constant value. (For constant integers, - /// constantrange is used instead. Integer typed constantexprs can appear - /// as constant.) - constant, - - /// This Value is known to not have the specified value. (For constant - /// integers, constantrange is used instead. As above, integer typed - /// constantexprs can appear here.) - notconstant, - - /// The Value falls within this range. (Used only for integer typed values.) - constantrange, - - /// We can not precisely model the dynamic values this value might take. - overdefined - }; - - /// Val: This stores the current lattice value along with the Constant* for - /// the constant if this is a 'constant' or 'notconstant' value. - LatticeValueTy Tag; - Constant *Val; - ConstantRange Range; - -public: - LVILatticeVal() : Tag(undefined), Val(nullptr), Range(1, true) {} - - static LVILatticeVal get(Constant *C) { - LVILatticeVal Res; - if (!isa<UndefValue>(C)) - Res.markConstant(C); - return Res; - } - static LVILatticeVal getNot(Constant *C) { - LVILatticeVal Res; - if (!isa<UndefValue>(C)) - Res.markNotConstant(C); - return Res; - } - static LVILatticeVal getRange(ConstantRange CR) { - LVILatticeVal Res; - Res.markConstantRange(std::move(CR)); - return Res; - } - static LVILatticeVal getOverdefined() { - LVILatticeVal Res; - Res.markOverdefined(); - return Res; - } - - bool isUndefined() const { return Tag == undefined; } - bool isConstant() const { return Tag == constant; } - bool isNotConstant() const { return Tag == notconstant; } - bool isConstantRange() const { return Tag == constantrange; } - bool isOverdefined() const { return Tag == overdefined; } - - Constant *getConstant() const { - assert(isConstant() && "Cannot get the constant of a non-constant!"); - return Val; - } - - Constant *getNotConstant() const { - assert(isNotConstant() && "Cannot get the constant of a non-notconstant!"); - return Val; - } - - const ConstantRange &getConstantRange() const { - assert(isConstantRange() && - "Cannot get the constant-range of a non-constant-range!"); - return Range; - } - -private: - void markOverdefined() { - if (isOverdefined()) - return; - Tag = overdefined; - } - - void markConstant(Constant *V) { - assert(V && "Marking constant with NULL"); - if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - markConstantRange(ConstantRange(CI->getValue())); - return; - } - if (isa<UndefValue>(V)) - return; - - assert((!isConstant() || getConstant() == V) && - "Marking constant with different value"); - assert(isUndefined()); - Tag = constant; - Val = V; - } - - void markNotConstant(Constant *V) { - assert(V && "Marking constant with NULL"); - if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue())); - return; - } - if (isa<UndefValue>(V)) - return; - - assert((!isConstant() || getConstant() != V) && - "Marking constant !constant with same value"); - assert((!isNotConstant() || getNotConstant() == V) && - "Marking !constant with different value"); - assert(isUndefined() || isConstant()); - Tag = notconstant; - Val = V; - } - - void markConstantRange(ConstantRange NewR) { - if (isConstantRange()) { - if (NewR.isEmptySet()) - markOverdefined(); - else { - Range = std::move(NewR); - } - return; - } - - assert(isUndefined()); - if (NewR.isEmptySet()) - markOverdefined(); - else { - Tag = constantrange; - Range = std::move(NewR); - } - } - -public: - - /// Merge the specified lattice value into this one, updating this - /// one and returning true if anything changed. - void mergeIn(const LVILatticeVal &RHS, const DataLayout &DL) { - if (RHS.isUndefined() || isOverdefined()) - return; - if (RHS.isOverdefined()) { - markOverdefined(); - return; - } - - if (isUndefined()) { - *this = RHS; - return; - } - - if (isConstant()) { - if (RHS.isConstant() && Val == RHS.Val) - return; - markOverdefined(); - return; - } - - if (isNotConstant()) { - if (RHS.isNotConstant() && Val == RHS.Val) - return; - markOverdefined(); - return; - } - - assert(isConstantRange() && "New LVILattice type?"); - if (!RHS.isConstantRange()) { - // We can get here if we've encountered a constantexpr of integer type - // and merge it with a constantrange. - markOverdefined(); - return; - } - ConstantRange NewR = Range.unionWith(RHS.getConstantRange()); - if (NewR.isFullSet()) - markOverdefined(); - else - markConstantRange(std::move(NewR)); - } -}; - -} // end anonymous namespace. - -namespace llvm { -raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) - LLVM_ATTRIBUTE_USED; -raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) { - if (Val.isUndefined()) - return OS << "undefined"; - if (Val.isOverdefined()) - return OS << "overdefined"; - - if (Val.isNotConstant()) - return OS << "notconstant<" << *Val.getNotConstant() << '>'; - if (Val.isConstantRange()) - return OS << "constantrange<" << Val.getConstantRange().getLower() << ", " - << Val.getConstantRange().getUpper() << '>'; - return OS << "constant<" << *Val.getConstant() << '>'; -} -} - /// Returns true if this lattice value represents at most one possible value. /// This is as precise as any lattice value can get while still representing /// reachable code. -static bool hasSingleValue(const LVILatticeVal &Val) { +static bool hasSingleValue(const ValueLatticeElement &Val) { if (Val.isConstantRange() && Val.getConstantRange().isSingleElement()) // Integer constants are single element ranges @@ -302,7 +88,8 @@ static bool hasSingleValue(const LVILatticeVal &Val) { /// contradictory. If this happens, we return some valid lattice value so as /// not confuse the rest of LVI. Ideally, we'd always return Undefined, but /// we do not make this guarantee. TODO: This would be a useful enhancement. -static LVILatticeVal intersect(const LVILatticeVal &A, const LVILatticeVal &B) { +static ValueLatticeElement intersect(const ValueLatticeElement &A, + const ValueLatticeElement &B) { // Undefined is the strongest state. It means the value is known to be along // an unreachable path. if (A.isUndefined()) @@ -334,7 +121,7 @@ static LVILatticeVal intersect(const LVILatticeVal &A, const LVILatticeVal &B) { // Note: An empty range is implicitly converted to overdefined internally. // TODO: We could instead use Undefined here since we've proven a conflict // and thus know this path must be unreachable. - return LVILatticeVal::getRange(std::move(Range)); + return ValueLatticeElement::getRange(std::move(Range)); } //===----------------------------------------------------------------------===// @@ -372,7 +159,7 @@ namespace { struct ValueCacheEntryTy { ValueCacheEntryTy(Value *V, LazyValueInfoCache *P) : Handle(V, P) {} LVIValueHandle Handle; - SmallDenseMap<PoisoningVH<BasicBlock>, LVILatticeVal, 4> BlockVals; + SmallDenseMap<PoisoningVH<BasicBlock>, ValueLatticeElement, 4> BlockVals; }; /// This tracks, on a per-block basis, the set of values that are @@ -390,7 +177,8 @@ namespace { public: - void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) { + void insertResult(Value *Val, BasicBlock *BB, + const ValueLatticeElement &Result) { SeenBlocks.insert(BB); // Insert over-defined values into their own cache to reduce memory @@ -428,16 +216,16 @@ namespace { return I->second->BlockVals.count(BB); } - LVILatticeVal getCachedValueInfo(Value *V, BasicBlock *BB) const { + ValueLatticeElement getCachedValueInfo(Value *V, BasicBlock *BB) const { if (isOverdefined(V, BB)) - return LVILatticeVal::getOverdefined(); + return ValueLatticeElement::getOverdefined(); auto I = ValueCache.find_as(V); if (I == ValueCache.end()) - return LVILatticeVal(); + return ValueLatticeElement(); auto BBI = I->second->BlockVals.find(BB); if (BBI == I->second->BlockVals.end()) - return LVILatticeVal(); + return ValueLatticeElement(); return BBI->second; } @@ -614,26 +402,29 @@ namespace { const DataLayout &DL; ///< A mandatory DataLayout DominatorTree *DT; ///< An optional DT pointer. - LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB); + ValueLatticeElement getBlockValue(Value *Val, BasicBlock *BB); bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T, - LVILatticeVal &Result, Instruction *CxtI = nullptr); + ValueLatticeElement &Result, Instruction *CxtI = nullptr); bool hasBlockValue(Value *Val, BasicBlock *BB); // These methods process one work item and may add more. A false value // returned means that the work item was not completely processed and must // be revisited after going through the new items. bool solveBlockValue(Value *Val, BasicBlock *BB); - bool solveBlockValueImpl(LVILatticeVal &Res, Value *Val, BasicBlock *BB); - bool solveBlockValueNonLocal(LVILatticeVal &BBLV, Value *Val, BasicBlock *BB); - bool solveBlockValuePHINode(LVILatticeVal &BBLV, PHINode *PN, BasicBlock *BB); - bool solveBlockValueSelect(LVILatticeVal &BBLV, SelectInst *S, + bool solveBlockValueImpl(ValueLatticeElement &Res, Value *Val, + BasicBlock *BB); + bool solveBlockValueNonLocal(ValueLatticeElement &BBLV, Value *Val, + BasicBlock *BB); + bool solveBlockValuePHINode(ValueLatticeElement &BBLV, PHINode *PN, + BasicBlock *BB); + bool solveBlockValueSelect(ValueLatticeElement &BBLV, SelectInst *S, BasicBlock *BB); - bool solveBlockValueBinaryOp(LVILatticeVal &BBLV, BinaryOperator *BBI, + bool solveBlockValueBinaryOp(ValueLatticeElement &BBLV, BinaryOperator *BBI, BasicBlock *BB); - bool solveBlockValueCast(LVILatticeVal &BBLV, CastInst *CI, + bool solveBlockValueCast(ValueLatticeElement &BBLV, CastInst *CI, BasicBlock *BB); void intersectAssumeOrGuardBlockValueConstantRange(Value *Val, - LVILatticeVal &BBLV, + ValueLatticeElement &BBLV, Instruction *BBI); void solve(); @@ -641,18 +432,19 @@ namespace { public: /// This is the query interface to determine the lattice /// value for the specified Value* at the end of the specified block. - LVILatticeVal getValueInBlock(Value *V, BasicBlock *BB, - Instruction *CxtI = nullptr); + ValueLatticeElement getValueInBlock(Value *V, BasicBlock *BB, + Instruction *CxtI = nullptr); /// This is the query interface to determine the lattice /// value for the specified Value* at the specified instruction (generally /// from an assume intrinsic). - LVILatticeVal getValueAt(Value *V, Instruction *CxtI); + ValueLatticeElement getValueAt(Value *V, Instruction *CxtI); /// This is the query interface to determine the lattice /// value for the specified Value* that is true on the specified edge. - LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB, - Instruction *CxtI = nullptr); + ValueLatticeElement getValueOnEdge(Value *V, BasicBlock *FromBB, + BasicBlock *ToBB, + Instruction *CxtI = nullptr); /// Complete flush all previously computed values void clear() { @@ -703,7 +495,7 @@ void LazyValueInfoImpl::solve() { while (!StartingStack.empty()) { std::pair<BasicBlock *, Value *> &e = StartingStack.back(); TheCache.insertResult(e.second, e.first, - LVILatticeVal::getOverdefined()); + ValueLatticeElement::getOverdefined()); StartingStack.pop_back(); } BlockValueSet.clear(); @@ -739,15 +531,16 @@ bool LazyValueInfoImpl::hasBlockValue(Value *Val, BasicBlock *BB) { return TheCache.hasCachedValueInfo(Val, BB); } -LVILatticeVal LazyValueInfoImpl::getBlockValue(Value *Val, BasicBlock *BB) { +ValueLatticeElement LazyValueInfoImpl::getBlockValue(Value *Val, + BasicBlock *BB) { // If already a constant, there is nothing to compute. if (Constant *VC = dyn_cast<Constant>(Val)) - return LVILatticeVal::get(VC); + return ValueLatticeElement::get(VC); return TheCache.getCachedValueInfo(Val, BB); } -static LVILatticeVal getFromRangeMetadata(Instruction *BBI) { +static ValueLatticeElement getFromRangeMetadata(Instruction *BBI) { switch (BBI->getOpcode()) { default: break; case Instruction::Load: @@ -755,12 +548,13 @@ static LVILatticeVal getFromRangeMetadata(Instruction *BBI) { case Instruction::Invoke: if (MDNode *Ranges = BBI->getMetadata(LLVMContext::MD_range)) if (isa<IntegerType>(BBI->getType())) { - return LVILatticeVal::getRange(getConstantRangeFromMetadata(*Ranges)); + return ValueLatticeElement::getRange( + getConstantRangeFromMetadata(*Ranges)); } break; }; // Nothing known - will be intersected with other facts - return LVILatticeVal::getOverdefined(); + return ValueLatticeElement::getOverdefined(); } bool LazyValueInfoImpl::solveBlockValue(Value *Val, BasicBlock *BB) { @@ -780,7 +574,7 @@ bool LazyValueInfoImpl::solveBlockValue(Value *Val, BasicBlock *BB) { // Hold off inserting this value into the Cache in case we have to return // false and come back later. - LVILatticeVal Res; + ValueLatticeElement Res; if (!solveBlockValueImpl(Res, Val, BB)) // Work pushed, will revisit return false; @@ -789,7 +583,7 @@ bool LazyValueInfoImpl::solveBlockValue(Value *Val, BasicBlock *BB) { return true; } -bool LazyValueInfoImpl::solveBlockValueImpl(LVILatticeVal &Res, +bool LazyValueInfoImpl::solveBlockValueImpl(ValueLatticeElement &Res, Value *Val, BasicBlock *BB) { Instruction *BBI = dyn_cast<Instruction>(Val); @@ -807,13 +601,13 @@ bool LazyValueInfoImpl::solveBlockValueImpl(LVILatticeVal &Res, // definition. We could easily extend this to look through geps, bitcasts, // and the like to prove non-nullness, but it's not clear that's worth it // compile time wise. The context-insensitive value walk done inside - // isKnownNonNull gets most of the profitable cases at much less expense. + // isKnownNonZero gets most of the profitable cases at much less expense. // This does mean that we have a sensativity to where the defining // instruction is placed, even if it could legally be hoisted much higher. // That is unfortunate. PointerType *PT = dyn_cast<PointerType>(BBI->getType()); - if (PT && isKnownNonNull(BBI)) { - Res = LVILatticeVal::getNot(ConstantPointerNull::get(PT)); + if (PT && isKnownNonZero(BBI, DL)) { + Res = ValueLatticeElement::getNot(ConstantPointerNull::get(PT)); return true; } if (BBI->getType()->isIntegerTy()) { @@ -880,9 +674,9 @@ static bool isObjectDereferencedInBlock(Value *Val, BasicBlock *BB) { return false; } -bool LazyValueInfoImpl::solveBlockValueNonLocal(LVILatticeVal &BBLV, +bool LazyValueInfoImpl::solveBlockValueNonLocal(ValueLatticeElement &BBLV, Value *Val, BasicBlock *BB) { - LVILatticeVal Result; // Start Undefined. + ValueLatticeElement Result; // Start Undefined. // If this is the entry block, we must be asking about an argument. The // value is overdefined. @@ -891,11 +685,11 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(LVILatticeVal &BBLV, // Before giving up, see if we can prove the pointer non-null local to // this particular block. if (Val->getType()->isPointerTy() && - (isKnownNonNull(Val) || isObjectDereferencedInBlock(Val, BB))) { + (isKnownNonZero(Val, DL) || isObjectDereferencedInBlock(Val, BB))) { PointerType *PTy = cast<PointerType>(Val->getType()); - Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy)); + Result = ValueLatticeElement::getNot(ConstantPointerNull::get(PTy)); } else { - Result = LVILatticeVal::getOverdefined(); + Result = ValueLatticeElement::getOverdefined(); } BBLV = Result; return true; @@ -911,7 +705,7 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(LVILatticeVal &BBLV, // canonicalizing to make this true rather than relying on this happy // accident. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - LVILatticeVal EdgeResult; + ValueLatticeElement EdgeResult; if (!getEdgeValue(Val, *PI, BB, EdgeResult)) // Explore that input, then return here return false; @@ -928,7 +722,7 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(LVILatticeVal &BBLV, if (Val->getType()->isPointerTy() && isObjectDereferencedInBlock(Val, BB)) { PointerType *PTy = cast<PointerType>(Val->getType()); - Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy)); + Result = ValueLatticeElement::getNot(ConstantPointerNull::get(PTy)); } BBLV = Result; @@ -942,9 +736,9 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(LVILatticeVal &BBLV, return true; } -bool LazyValueInfoImpl::solveBlockValuePHINode(LVILatticeVal &BBLV, - PHINode *PN, BasicBlock *BB) { - LVILatticeVal Result; // Start Undefined. +bool LazyValueInfoImpl::solveBlockValuePHINode(ValueLatticeElement &BBLV, + PHINode *PN, BasicBlock *BB) { + ValueLatticeElement Result; // Start Undefined. // Loop over all of our predecessors, merging what we know from them into // result. See the comment about the chosen traversal order in @@ -952,7 +746,7 @@ bool LazyValueInfoImpl::solveBlockValuePHINode(LVILatticeVal &BBLV, for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *PhiBB = PN->getIncomingBlock(i); Value *PhiVal = PN->getIncomingValue(i); - LVILatticeVal EdgeResult; + ValueLatticeElement EdgeResult; // Note that we can provide PN as the context value to getEdgeValue, even // though the results will be cached, because PN is the value being used as // the cache key in the caller. @@ -979,13 +773,13 @@ bool LazyValueInfoImpl::solveBlockValuePHINode(LVILatticeVal &BBLV, return true; } -static LVILatticeVal getValueFromCondition(Value *Val, Value *Cond, - bool isTrueDest = true); +static ValueLatticeElement getValueFromCondition(Value *Val, Value *Cond, + bool isTrueDest = true); // If we can determine a constraint on the value given conditions assumed by // the program, intersect those constraints with BBLV void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange( - Value *Val, LVILatticeVal &BBLV, Instruction *BBI) { + Value *Val, ValueLatticeElement &BBLV, Instruction *BBI) { BBI = BBI ? BBI : dyn_cast<Instruction>(Val); if (!BBI) return; @@ -1014,35 +808,35 @@ void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange( } } -bool LazyValueInfoImpl::solveBlockValueSelect(LVILatticeVal &BBLV, - SelectInst *SI, BasicBlock *BB) { +bool LazyValueInfoImpl::solveBlockValueSelect(ValueLatticeElement &BBLV, + SelectInst *SI, BasicBlock *BB) { // Recurse on our inputs if needed if (!hasBlockValue(SI->getTrueValue(), BB)) { if (pushBlockValue(std::make_pair(BB, SI->getTrueValue()))) return false; - BBLV = LVILatticeVal::getOverdefined(); + BBLV = ValueLatticeElement::getOverdefined(); return true; } - LVILatticeVal TrueVal = getBlockValue(SI->getTrueValue(), BB); + ValueLatticeElement TrueVal = getBlockValue(SI->getTrueValue(), BB); // If we hit overdefined, don't ask more queries. We want to avoid poisoning // extra slots in the table if we can. if (TrueVal.isOverdefined()) { - BBLV = LVILatticeVal::getOverdefined(); + BBLV = ValueLatticeElement::getOverdefined(); return true; } if (!hasBlockValue(SI->getFalseValue(), BB)) { if (pushBlockValue(std::make_pair(BB, SI->getFalseValue()))) return false; - BBLV = LVILatticeVal::getOverdefined(); + BBLV = ValueLatticeElement::getOverdefined(); return true; } - LVILatticeVal FalseVal = getBlockValue(SI->getFalseValue(), BB); + ValueLatticeElement FalseVal = getBlockValue(SI->getFalseValue(), BB); // If we hit overdefined, don't ask more queries. We want to avoid poisoning // extra slots in the table if we can. if (FalseVal.isOverdefined()) { - BBLV = LVILatticeVal::getOverdefined(); + BBLV = ValueLatticeElement::getOverdefined(); return true; } @@ -1070,7 +864,7 @@ bool LazyValueInfoImpl::solveBlockValueSelect(LVILatticeVal &BBLV, return TrueCR.umax(FalseCR); }; }(); - BBLV = LVILatticeVal::getRange(ResultCR); + BBLV = ValueLatticeElement::getRange(ResultCR); return true; } @@ -1113,7 +907,7 @@ bool LazyValueInfoImpl::solveBlockValueSelect(LVILatticeVal &BBLV, m_ConstantInt(CIAdded)))) { auto ResNot = addConstants(CIBase, CIAdded); FalseVal = intersect(FalseVal, - LVILatticeVal::getNot(ResNot)); + ValueLatticeElement::getNot(ResNot)); } break; case ICmpInst::ICMP_NE: @@ -1121,27 +915,27 @@ bool LazyValueInfoImpl::solveBlockValueSelect(LVILatticeVal &BBLV, m_ConstantInt(CIAdded)))) { auto ResNot = addConstants(CIBase, CIAdded); TrueVal = intersect(TrueVal, - LVILatticeVal::getNot(ResNot)); + ValueLatticeElement::getNot(ResNot)); } break; }; } } - LVILatticeVal Result; // Start Undefined. + ValueLatticeElement Result; // Start Undefined. Result.mergeIn(TrueVal, DL); Result.mergeIn(FalseVal, DL); BBLV = Result; return true; } -bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV, +bool LazyValueInfoImpl::solveBlockValueCast(ValueLatticeElement &BBLV, CastInst *CI, BasicBlock *BB) { if (!CI->getOperand(0)->getType()->isSized()) { // Without knowing how wide the input is, we can't analyze it in any useful // way. - BBLV = LVILatticeVal::getOverdefined(); + BBLV = ValueLatticeElement::getOverdefined(); return true; } @@ -1158,7 +952,7 @@ bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV, // Unhandled instructions are overdefined. DEBUG(dbgs() << " compute BB '" << BB->getName() << "' - overdefined (unknown cast).\n"); - BBLV = LVILatticeVal::getOverdefined(); + BBLV = ValueLatticeElement::getOverdefined(); return true; } @@ -1174,7 +968,7 @@ bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV, DL.getTypeSizeInBits(CI->getOperand(0)->getType()); ConstantRange LHSRange = ConstantRange(OperandBitWidth); if (hasBlockValue(CI->getOperand(0), BB)) { - LVILatticeVal LHSVal = getBlockValue(CI->getOperand(0), BB); + ValueLatticeElement LHSVal = getBlockValue(CI->getOperand(0), BB); intersectAssumeOrGuardBlockValueConstantRange(CI->getOperand(0), LHSVal, CI); if (LHSVal.isConstantRange()) @@ -1186,14 +980,14 @@ bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV, // NOTE: We're currently limited by the set of operations that ConstantRange // can evaluate symbolically. Enhancing that set will allows us to analyze // more definitions. - BBLV = LVILatticeVal::getRange(LHSRange.castOp(CI->getOpcode(), - ResultBitWidth)); + BBLV = ValueLatticeElement::getRange(LHSRange.castOp(CI->getOpcode(), + ResultBitWidth)); return true; } -bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV, - BinaryOperator *BO, - BasicBlock *BB) { +bool LazyValueInfoImpl::solveBlockValueBinaryOp(ValueLatticeElement &BBLV, + BinaryOperator *BO, + BasicBlock *BB) { assert(BO->getOperand(0)->getType()->isSized() && "all operands to binary operators are sized"); @@ -1208,6 +1002,7 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV, case Instruction::UDiv: case Instruction::Shl: case Instruction::LShr: + case Instruction::AShr: case Instruction::And: case Instruction::Or: // continue into the code below @@ -1216,7 +1011,7 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV, // Unhandled instructions are overdefined. DEBUG(dbgs() << " compute BB '" << BB->getName() << "' - overdefined (unknown binary operator).\n"); - BBLV = LVILatticeVal::getOverdefined(); + BBLV = ValueLatticeElement::getOverdefined(); return true; }; @@ -1232,7 +1027,7 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV, DL.getTypeSizeInBits(BO->getOperand(0)->getType()); ConstantRange LHSRange = ConstantRange(OperandBitWidth); if (hasBlockValue(BO->getOperand(0), BB)) { - LVILatticeVal LHSVal = getBlockValue(BO->getOperand(0), BB); + ValueLatticeElement LHSVal = getBlockValue(BO->getOperand(0), BB); intersectAssumeOrGuardBlockValueConstantRange(BO->getOperand(0), LHSVal, BO); if (LHSVal.isConstantRange()) @@ -1246,12 +1041,12 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV, // can evaluate symbolically. Enhancing that set will allows us to analyze // more definitions. Instruction::BinaryOps BinOp = BO->getOpcode(); - BBLV = LVILatticeVal::getRange(LHSRange.binaryOp(BinOp, RHSRange)); + BBLV = ValueLatticeElement::getRange(LHSRange.binaryOp(BinOp, RHSRange)); return true; } -static LVILatticeVal getValueFromICmpCondition(Value *Val, ICmpInst *ICI, - bool isTrueDest) { +static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI, + bool isTrueDest) { Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); CmpInst::Predicate Predicate = ICI->getPredicate(); @@ -1261,14 +1056,14 @@ static LVILatticeVal getValueFromICmpCondition(Value *Val, ICmpInst *ICI, // We know that V has the RHS constant if this is a true SETEQ or // false SETNE. if (isTrueDest == (Predicate == ICmpInst::ICMP_EQ)) - return LVILatticeVal::get(cast<Constant>(RHS)); + return ValueLatticeElement::get(cast<Constant>(RHS)); else - return LVILatticeVal::getNot(cast<Constant>(RHS)); + return ValueLatticeElement::getNot(cast<Constant>(RHS)); } } if (!Val->getType()->isIntegerTy()) - return LVILatticeVal::getOverdefined(); + return ValueLatticeElement::getOverdefined(); // Use ConstantRange::makeAllowedICmpRegion in order to determine the possible // range of Val guaranteed by the condition. Recognize comparisons in the from @@ -1307,19 +1102,19 @@ static LVILatticeVal getValueFromICmpCondition(Value *Val, ICmpInst *ICI, if (Offset) // Apply the offset from above. TrueValues = TrueValues.subtract(Offset->getValue()); - return LVILatticeVal::getRange(std::move(TrueValues)); + return ValueLatticeElement::getRange(std::move(TrueValues)); } - return LVILatticeVal::getOverdefined(); + return ValueLatticeElement::getOverdefined(); } -static LVILatticeVal +static ValueLatticeElement getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest, - DenseMap<Value*, LVILatticeVal> &Visited); + DenseMap<Value*, ValueLatticeElement> &Visited); -static LVILatticeVal +static ValueLatticeElement getValueFromConditionImpl(Value *Val, Value *Cond, bool isTrueDest, - DenseMap<Value*, LVILatticeVal> &Visited) { + DenseMap<Value*, ValueLatticeElement> &Visited) { if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cond)) return getValueFromICmpCondition(Val, ICI, isTrueDest); @@ -1330,16 +1125,16 @@ getValueFromConditionImpl(Value *Val, Value *Cond, bool isTrueDest, BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond); if (!BO || (isTrueDest && BO->getOpcode() != BinaryOperator::And) || (!isTrueDest && BO->getOpcode() != BinaryOperator::Or)) - return LVILatticeVal::getOverdefined(); + return ValueLatticeElement::getOverdefined(); auto RHS = getValueFromCondition(Val, BO->getOperand(0), isTrueDest, Visited); auto LHS = getValueFromCondition(Val, BO->getOperand(1), isTrueDest, Visited); return intersect(RHS, LHS); } -static LVILatticeVal +static ValueLatticeElement getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest, - DenseMap<Value*, LVILatticeVal> &Visited) { + DenseMap<Value*, ValueLatticeElement> &Visited) { auto I = Visited.find(Cond); if (I != Visited.end()) return I->second; @@ -1349,17 +1144,63 @@ getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest, return Result; } -LVILatticeVal getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest) { +ValueLatticeElement getValueFromCondition(Value *Val, Value *Cond, + bool isTrueDest) { assert(Cond && "precondition"); - DenseMap<Value*, LVILatticeVal> Visited; + DenseMap<Value*, ValueLatticeElement> Visited; return getValueFromCondition(Val, Cond, isTrueDest, Visited); } +// Return true if Usr has Op as an operand, otherwise false. +static bool usesOperand(User *Usr, Value *Op) { + return find(Usr->operands(), Op) != Usr->op_end(); +} + +// Return true if the instruction type of Val is supported by +// constantFoldUser(). Currently CastInst and BinaryOperator only. Call this +// before calling constantFoldUser() to find out if it's even worth attempting +// to call it. +static bool isOperationFoldable(User *Usr) { + return isa<CastInst>(Usr) || isa<BinaryOperator>(Usr); +} + +// Check if Usr can be simplified to an integer constant when the value of one +// of its operands Op is an integer constant OpConstVal. If so, return it as an +// lattice value range with a single element or otherwise return an overdefined +// lattice value. +static ValueLatticeElement constantFoldUser(User *Usr, Value *Op, + const APInt &OpConstVal, + const DataLayout &DL) { + assert(isOperationFoldable(Usr) && "Precondition"); + Constant* OpConst = Constant::getIntegerValue(Op->getType(), OpConstVal); + // Check if Usr can be simplified to a constant. + if (auto *CI = dyn_cast<CastInst>(Usr)) { + assert(CI->getOperand(0) == Op && "Operand 0 isn't Op"); + if (auto *C = dyn_cast_or_null<ConstantInt>( + SimplifyCastInst(CI->getOpcode(), OpConst, + CI->getDestTy(), DL))) { + return ValueLatticeElement::getRange(ConstantRange(C->getValue())); + } + } else if (auto *BO = dyn_cast<BinaryOperator>(Usr)) { + bool Op0Match = BO->getOperand(0) == Op; + bool Op1Match = BO->getOperand(1) == Op; + assert((Op0Match || Op1Match) && + "Operand 0 nor Operand 1 isn't a match"); + Value *LHS = Op0Match ? OpConst : BO->getOperand(0); + Value *RHS = Op1Match ? OpConst : BO->getOperand(1); + if (auto *C = dyn_cast_or_null<ConstantInt>( + SimplifyBinOp(BO->getOpcode(), LHS, RHS, DL))) { + return ValueLatticeElement::getRange(ConstantRange(C->getValue())); + } + } + return ValueLatticeElement::getOverdefined(); +} + /// \brief Compute the value of Val on the edge BBFrom -> BBTo. Returns false if /// Val is not constrained on the edge. Result is unspecified if return value /// is false. static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, - BasicBlock *BBTo, LVILatticeVal &Result) { + BasicBlock *BBTo, ValueLatticeElement &Result) { // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we // know that v != 0. if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) { @@ -1370,18 +1211,59 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, bool isTrueDest = BI->getSuccessor(0) == BBTo; assert(BI->getSuccessor(!isTrueDest) == BBTo && "BBTo isn't a successor of BBFrom"); + Value *Condition = BI->getCondition(); // If V is the condition of the branch itself, then we know exactly what // it is. - if (BI->getCondition() == Val) { - Result = LVILatticeVal::get(ConstantInt::get( + if (Condition == Val) { + Result = ValueLatticeElement::get(ConstantInt::get( Type::getInt1Ty(Val->getContext()), isTrueDest)); return true; } // If the condition of the branch is an equality comparison, we may be // able to infer the value. - Result = getValueFromCondition(Val, BI->getCondition(), isTrueDest); + Result = getValueFromCondition(Val, Condition, isTrueDest); + if (!Result.isOverdefined()) + return true; + + if (User *Usr = dyn_cast<User>(Val)) { + assert(Result.isOverdefined() && "Result isn't overdefined"); + // Check with isOperationFoldable() first to avoid linearly iterating + // over the operands unnecessarily which can be expensive for + // instructions with many operands. + if (isa<IntegerType>(Usr->getType()) && isOperationFoldable(Usr)) { + const DataLayout &DL = BBTo->getModule()->getDataLayout(); + if (usesOperand(Usr, Condition)) { + // If Val has Condition as an operand and Val can be folded into a + // constant with either Condition == true or Condition == false, + // propagate the constant. + // eg. + // ; %Val is true on the edge to %then. + // %Val = and i1 %Condition, true. + // br %Condition, label %then, label %else + APInt ConditionVal(1, isTrueDest ? 1 : 0); + Result = constantFoldUser(Usr, Condition, ConditionVal, DL); + } else { + // If one of Val's operand has an inferred value, we may be able to + // infer the value of Val. + // eg. + // ; %Val is 94 on the edge to %then. + // %Val = add i8 %Op, 1 + // %Condition = icmp eq i8 %Op, 93 + // br i1 %Condition, label %then, label %else + for (unsigned i = 0; i < Usr->getNumOperands(); ++i) { + Value *Op = Usr->getOperand(i); + ValueLatticeElement OpLatticeVal = + getValueFromCondition(Op, Condition, isTrueDest); + if (Optional<APInt> OpConst = OpLatticeVal.asConstantInteger()) { + Result = constantFoldUser(Usr, Op, OpConst.getValue(), DL); + break; + } + } + } + } + } if (!Result.isOverdefined()) return true; } @@ -1390,24 +1272,50 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, // If the edge was formed by a switch on the value, then we may know exactly // what it is. if (SwitchInst *SI = dyn_cast<SwitchInst>(BBFrom->getTerminator())) { - if (SI->getCondition() != Val) + Value *Condition = SI->getCondition(); + if (!isa<IntegerType>(Val->getType())) return false; + bool ValUsesConditionAndMayBeFoldable = false; + if (Condition != Val) { + // Check if Val has Condition as an operand. + if (User *Usr = dyn_cast<User>(Val)) + ValUsesConditionAndMayBeFoldable = isOperationFoldable(Usr) && + usesOperand(Usr, Condition); + if (!ValUsesConditionAndMayBeFoldable) + return false; + } + assert((Condition == Val || ValUsesConditionAndMayBeFoldable) && + "Condition != Val nor Val doesn't use Condition"); bool DefaultCase = SI->getDefaultDest() == BBTo; unsigned BitWidth = Val->getType()->getIntegerBitWidth(); ConstantRange EdgesVals(BitWidth, DefaultCase/*isFullSet*/); for (auto Case : SI->cases()) { - ConstantRange EdgeVal(Case.getCaseValue()->getValue()); + APInt CaseValue = Case.getCaseValue()->getValue(); + ConstantRange EdgeVal(CaseValue); + if (ValUsesConditionAndMayBeFoldable) { + User *Usr = cast<User>(Val); + const DataLayout &DL = BBTo->getModule()->getDataLayout(); + ValueLatticeElement EdgeLatticeVal = + constantFoldUser(Usr, Condition, CaseValue, DL); + if (EdgeLatticeVal.isOverdefined()) + return false; + EdgeVal = EdgeLatticeVal.getConstantRange(); + } if (DefaultCase) { // It is possible that the default destination is the destination of - // some cases. There is no need to perform difference for those cases. - if (Case.getCaseSuccessor() != BBTo) + // some cases. We cannot perform difference for those cases. + // We know Condition != CaseValue in BBTo. In some cases we can use + // this to infer Val == f(Condition) is != f(CaseValue). For now, we + // only do this when f is identity (i.e. Val == Condition), but we + // should be able to do this for any injective f. + if (Case.getCaseSuccessor() != BBTo && Condition == Val) EdgesVals = EdgesVals.difference(EdgeVal); } else if (Case.getCaseSuccessor() == BBTo) EdgesVals = EdgesVals.unionWith(EdgeVal); } - Result = LVILatticeVal::getRange(std::move(EdgesVals)); + Result = ValueLatticeElement::getRange(std::move(EdgesVals)); return true; } return false; @@ -1416,19 +1324,20 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, /// \brief Compute the value of Val on the edge BBFrom -> BBTo or the value at /// the basic block if the edge does not constrain Val. bool LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom, - BasicBlock *BBTo, LVILatticeVal &Result, + BasicBlock *BBTo, + ValueLatticeElement &Result, Instruction *CxtI) { // If already a constant, there is nothing to compute. if (Constant *VC = dyn_cast<Constant>(Val)) { - Result = LVILatticeVal::get(VC); + Result = ValueLatticeElement::get(VC); return true; } - LVILatticeVal LocalResult; + ValueLatticeElement LocalResult; if (!getEdgeValueLocal(Val, BBFrom, BBTo, LocalResult)) // If we couldn't constrain the value on the edge, LocalResult doesn't // provide any information. - LocalResult = LVILatticeVal::getOverdefined(); + LocalResult = ValueLatticeElement::getOverdefined(); if (hasSingleValue(LocalResult)) { // Can't get any more precise here @@ -1445,7 +1354,7 @@ bool LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom, } // Try to intersect ranges of the BB and the constraint on the edge. - LVILatticeVal InBlock = getBlockValue(Val, BBFrom); + ValueLatticeElement InBlock = getBlockValue(Val, BBFrom); intersectAssumeOrGuardBlockValueConstantRange(Val, InBlock, BBFrom->getTerminator()); // We can use the context instruction (generically the ultimate instruction @@ -1462,8 +1371,8 @@ bool LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom, return true; } -LVILatticeVal LazyValueInfoImpl::getValueInBlock(Value *V, BasicBlock *BB, - Instruction *CxtI) { +ValueLatticeElement LazyValueInfoImpl::getValueInBlock(Value *V, BasicBlock *BB, + Instruction *CxtI) { DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '" << BB->getName() << "'\n"); @@ -1472,21 +1381,21 @@ LVILatticeVal LazyValueInfoImpl::getValueInBlock(Value *V, BasicBlock *BB, pushBlockValue(std::make_pair(BB, V)); solve(); } - LVILatticeVal Result = getBlockValue(V, BB); + ValueLatticeElement Result = getBlockValue(V, BB); intersectAssumeOrGuardBlockValueConstantRange(V, Result, CxtI); DEBUG(dbgs() << " Result = " << Result << "\n"); return Result; } -LVILatticeVal LazyValueInfoImpl::getValueAt(Value *V, Instruction *CxtI) { +ValueLatticeElement LazyValueInfoImpl::getValueAt(Value *V, Instruction *CxtI) { DEBUG(dbgs() << "LVI Getting value " << *V << " at '" << CxtI->getName() << "'\n"); if (auto *C = dyn_cast<Constant>(V)) - return LVILatticeVal::get(C); + return ValueLatticeElement::get(C); - LVILatticeVal Result = LVILatticeVal::getOverdefined(); + ValueLatticeElement Result = ValueLatticeElement::getOverdefined(); if (auto *I = dyn_cast<Instruction>(V)) Result = getFromRangeMetadata(I); intersectAssumeOrGuardBlockValueConstantRange(V, Result, CxtI); @@ -1495,13 +1404,13 @@ LVILatticeVal LazyValueInfoImpl::getValueAt(Value *V, Instruction *CxtI) { return Result; } -LVILatticeVal LazyValueInfoImpl:: +ValueLatticeElement LazyValueInfoImpl:: getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI) { DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '" << FromBB->getName() << "' to '" << ToBB->getName() << "'\n"); - LVILatticeVal Result; + ValueLatticeElement Result; if (!getEdgeValue(V, FromBB, ToBB, Result, CxtI)) { solve(); bool WasFastQuery = getEdgeValue(V, FromBB, ToBB, Result, CxtI); @@ -1581,7 +1490,8 @@ bool LazyValueInfo::invalidate(Function &F, const PreservedAnalyses &PA, void LazyValueInfoWrapperPass::releaseMemory() { Info.releaseMemory(); } -LazyValueInfo LazyValueAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { +LazyValueInfo LazyValueAnalysis::run(Function &F, + FunctionAnalysisManager &FAM) { auto &AC = FAM.getResult<AssumptionAnalysis>(F); auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); @@ -1610,7 +1520,7 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB, return nullptr; const DataLayout &DL = BB->getModule()->getDataLayout(); - LVILatticeVal Result = + ValueLatticeElement Result = getImpl(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI); if (Result.isConstant()) @@ -1628,7 +1538,7 @@ ConstantRange LazyValueInfo::getConstantRange(Value *V, BasicBlock *BB, assert(V->getType()->isIntegerTy()); unsigned Width = V->getType()->getIntegerBitWidth(); const DataLayout &DL = BB->getModule()->getDataLayout(); - LVILatticeVal Result = + ValueLatticeElement Result = getImpl(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI); if (Result.isUndefined()) return ConstantRange(Width, /*isFullSet=*/false); @@ -1647,7 +1557,7 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI) { const DataLayout &DL = FromBB->getModule()->getDataLayout(); - LVILatticeVal Result = + ValueLatticeElement Result = getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); if (Result.isConstant()) @@ -1666,7 +1576,7 @@ ConstantRange LazyValueInfo::getConstantRangeOnEdge(Value *V, Instruction *CxtI) { unsigned Width = V->getType()->getIntegerBitWidth(); const DataLayout &DL = FromBB->getModule()->getDataLayout(); - LVILatticeVal Result = + ValueLatticeElement Result = getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); if (Result.isUndefined()) @@ -1680,11 +1590,9 @@ ConstantRange LazyValueInfo::getConstantRangeOnEdge(Value *V, return ConstantRange(Width, /*isFullSet=*/true); } -static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C, - const LVILatticeVal &Val, - const DataLayout &DL, - TargetLibraryInfo *TLI) { - +static LazyValueInfo::Tristate +getPredicateResult(unsigned Pred, Constant *C, const ValueLatticeElement &Val, + const DataLayout &DL, TargetLibraryInfo *TLI) { // If we know the value is a constant, evaluate the conditional. Constant *Res = nullptr; if (Val.isConstant()) { @@ -1754,7 +1662,7 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI) { const DataLayout &DL = FromBB->getModule()->getDataLayout(); - LVILatticeVal Result = + ValueLatticeElement Result = getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); return getPredicateResult(Pred, C, Result, DL, TLI); @@ -1764,18 +1672,18 @@ LazyValueInfo::Tristate LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C, Instruction *CxtI) { // Is or is not NonNull are common predicates being queried. If - // isKnownNonNull can tell us the result of the predicate, we can + // isKnownNonZero can tell us the result of the predicate, we can // return it quickly. But this is only a fastpath, and falling // through would still be correct. + const DataLayout &DL = CxtI->getModule()->getDataLayout(); if (V->getType()->isPointerTy() && C->isNullValue() && - isKnownNonNull(V->stripPointerCasts())) { + isKnownNonZero(V->stripPointerCasts(), DL)) { if (Pred == ICmpInst::ICMP_EQ) return LazyValueInfo::False; else if (Pred == ICmpInst::ICMP_NE) return LazyValueInfo::True; } - const DataLayout &DL = CxtI->getModule()->getDataLayout(); - LVILatticeVal Result = getImpl(PImpl, AC, &DL, DT).getValueAt(V, CxtI); + ValueLatticeElement Result = getImpl(PImpl, AC, &DL, DT).getValueAt(V, CxtI); Tristate Ret = getPredicateResult(Pred, C, Result, DL, TLI); if (Ret != Unknown) return Ret; @@ -1889,7 +1797,7 @@ void LazyValueInfoAnnotatedWriter::emitBasicBlockStartAnnot( // Find if there are latticevalues defined for arguments of the function. auto *F = BB->getParent(); for (auto &Arg : F->args()) { - LVILatticeVal Result = LVIImpl->getValueInBlock( + ValueLatticeElement Result = LVIImpl->getValueInBlock( const_cast<Argument *>(&Arg), const_cast<BasicBlock *>(BB)); if (Result.isUndefined()) continue; @@ -1914,7 +1822,7 @@ void LazyValueInfoAnnotatedWriter::emitInstructionAnnot( auto printResult = [&](const BasicBlock *BB) { if (!BlocksContainingLVI.insert(BB).second) return; - LVILatticeVal Result = LVIImpl->getValueInBlock( + ValueLatticeElement Result = LVIImpl->getValueInBlock( const_cast<Instruction *>(I), const_cast<BasicBlock *>(BB)); OS << "; LatticeVal for: '" << *I << "' in BB: '"; BB->printAsOperand(OS, false); diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp index ada600a69b87..7b792ed0a2e2 100644 --- a/contrib/llvm/lib/Analysis/Lint.cpp +++ b/contrib/llvm/lib/Analysis/Lint.cpp @@ -285,15 +285,24 @@ void Lint::visitCallSite(CallSite CS) { } } - if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall()) - for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); - AI != AE; ++AI) { - Value *Obj = findValue(*AI, /*OffsetOk=*/true); - Assert(!isa<AllocaInst>(Obj), - "Undefined behavior: Call with \"tail\" keyword references " - "alloca", - &I); + if (CS.isCall()) { + const CallInst *CI = cast<CallInst>(CS.getInstruction()); + if (CI->isTailCall()) { + const AttributeList &PAL = CI->getAttributes(); + unsigned ArgNo = 0; + for (Value *Arg : CS.args()) { + // Skip ByVal arguments since they will be memcpy'd to the callee's + // stack anyway. + if (PAL.hasParamAttribute(ArgNo++, Attribute::ByVal)) + continue; + Value *Obj = findValue(Arg, /*OffsetOk=*/true); + Assert(!isa<AllocaInst>(Obj), + "Undefined behavior: Call with \"tail\" keyword references " + "alloca", + &I); + } } + } if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) @@ -683,7 +692,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, if (Instruction::isCast(CE->getOpcode())) { if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), CE->getOperand(0)->getType(), CE->getType(), - DL->getIntPtrType(V->getType()))) + *DL)) return findValueImpl(CE->getOperand(0), OffsetOk, Visited); } else if (CE->getOpcode() == Instruction::ExtractValue) { ArrayRef<unsigned> Indices = CE->getIndices(); diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp index 591b0fc481d2..834727c9224d 100644 --- a/contrib/llvm/lib/Analysis/Loads.cpp +++ b/contrib/llvm/lib/Analysis/Loads.cpp @@ -72,7 +72,7 @@ static bool isDereferenceableAndAlignedPointer( V->getPointerDereferenceableBytes(DL, CheckForNonNull)); if (KnownDerefBytes.getBoolValue()) { if (KnownDerefBytes.uge(Size)) - if (!CheckForNonNull || isKnownNonNullAt(V, CtxI, DT)) + if (!CheckForNonNull || isKnownNonZero(V, DL, 0, nullptr, CtxI, DT)) return isAligned(V, Align, DL); } @@ -414,7 +414,7 @@ Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, // If we have alias analysis and it says the store won't modify the loaded // value, ignore the store. - if (AA && (AA->getModRefInfo(SI, StrippedPtr, AccessSize) & MRI_Mod) == 0) + if (AA && !isModSet(AA->getModRefInfo(SI, StrippedPtr, AccessSize))) continue; // Otherwise the store that may or may not alias the pointer, bail out. @@ -426,8 +426,7 @@ Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, if (Inst->mayWriteToMemory()) { // If alias analysis claims that it really won't modify the load, // ignore it. - if (AA && - (AA->getModRefInfo(Inst, StrippedPtr, AccessSize) & MRI_Mod) == 0) + if (AA && !isModSet(AA->getModRefInfo(Inst, StrippedPtr, AccessSize))) continue; // May modify the pointer, bail out. diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 4ba12583ff83..ed8e5e8cc489 100644 --- a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -29,7 +29,7 @@ #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryLocation.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -522,6 +522,21 @@ public: Accesses.insert(MemAccessInfo(Ptr, true)); } + /// \brief Check if we can emit a run-time no-alias check for \p Access. + /// + /// Returns true if we can emit a run-time no alias check for \p Access. + /// If we can check this access, this also adds it to a dependence set and + /// adds a run-time to check for it to \p RtCheck. If \p Assume is true, + /// we will attempt to use additional run-time checks in order to get + /// the bounds of the pointer. + bool createCheckForAccess(RuntimePointerChecking &RtCheck, + MemAccessInfo Access, + const ValueToValueMap &Strides, + DenseMap<Value *, unsigned> &DepSetId, + Loop *TheLoop, unsigned &RunningDepId, + unsigned ASId, bool ShouldCheckStride, + bool Assume); + /// \brief Check whether we can check the pointers at runtime for /// non-intersection. /// @@ -597,9 +612,11 @@ private: } // end anonymous namespace /// \brief Check whether a pointer can participate in a runtime bounds check. +/// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr +/// by adding run-time checks (overflow checks) if necessary. static bool hasComputableBounds(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, - Loop *L) { + Loop *L, bool Assume) { const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); // The bounds for loop-invariant pointer is trivial. @@ -607,6 +624,10 @@ static bool hasComputableBounds(PredicatedScalarEvolution &PSE, return true; const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev); + + if (!AR && Assume) + AR = PSE.getAsAddRec(Ptr); + if (!AR) return false; @@ -621,9 +642,53 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE, return true; int64_t Stride = getPtrStride(PSE, Ptr, L, Strides); - return Stride == 1; + if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW)) + return true; + + return false; } +bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck, + MemAccessInfo Access, + const ValueToValueMap &StridesMap, + DenseMap<Value *, unsigned> &DepSetId, + Loop *TheLoop, unsigned &RunningDepId, + unsigned ASId, bool ShouldCheckWrap, + bool Assume) { + Value *Ptr = Access.getPointer(); + + if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume)) + return false; + + // When we run after a failing dependency check we have to make sure + // we don't have wrapping pointers. + if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) { + auto *Expr = PSE.getSCEV(Ptr); + if (!Assume || !isa<SCEVAddRecExpr>(Expr)) + return false; + PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); + } + + // The id of the dependence set. + unsigned DepId; + + if (isDependencyCheckNeeded()) { + Value *Leader = DepCands.getLeaderValue(Access).getPointer(); + unsigned &LeaderId = DepSetId[Leader]; + if (!LeaderId) + LeaderId = RunningDepId++; + DepId = LeaderId; + } else + // Each access has its own dependence set. + DepId = RunningDepId++; + + bool IsWrite = Access.getInt(); + RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE); + DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); + + return true; + } + bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap, @@ -643,12 +708,15 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, for (auto &AS : AST) { int NumReadPtrChecks = 0; int NumWritePtrChecks = 0; + bool CanDoAliasSetRT = true; // We assign consecutive id to access from different dependence sets. // Accesses within the same set don't need a runtime check. unsigned RunningDepId = 1; DenseMap<Value *, unsigned> DepSetId; + SmallVector<MemAccessInfo, 4> Retries; + for (auto A : AS) { Value *Ptr = A.getValue(); bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true)); @@ -659,29 +727,11 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, else ++NumReadPtrChecks; - if (hasComputableBounds(PSE, StridesMap, Ptr, TheLoop) && - // When we run after a failing dependency check we have to make sure - // we don't have wrapping pointers. - (!ShouldCheckWrap || isNoWrap(PSE, StridesMap, Ptr, TheLoop))) { - // The id of the dependence set. - unsigned DepId; - - if (IsDepCheckNeeded) { - Value *Leader = DepCands.getLeaderValue(Access).getPointer(); - unsigned &LeaderId = DepSetId[Leader]; - if (!LeaderId) - LeaderId = RunningDepId++; - DepId = LeaderId; - } else - // Each access has its own dependence set. - DepId = RunningDepId++; - - RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE); - - DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); - } else { + if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop, + RunningDepId, ASId, ShouldCheckWrap, false)) { DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n'); - CanDoRT = false; + Retries.push_back(Access); + CanDoAliasSetRT = false; } } @@ -693,10 +743,29 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer // for which we couldn't find the bounds but we don't actually need to emit // any checks so it does not matter. - if (!(IsDepCheckNeeded && CanDoRT && RunningDepId == 2)) - NeedRTCheck |= (NumWritePtrChecks >= 2 || (NumReadPtrChecks >= 1 && - NumWritePtrChecks >= 1)); + bool NeedsAliasSetRTCheck = false; + if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2)) + NeedsAliasSetRTCheck = (NumWritePtrChecks >= 2 || + (NumReadPtrChecks >= 1 && NumWritePtrChecks >= 1)); + + // We need to perform run-time alias checks, but some pointers had bounds + // that couldn't be checked. + if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) { + // Reset the CanDoSetRt flag and retry all accesses that have failed. + // We know that we need these checks, so we can now be more aggressive + // and add further checks if required (overflow checks). + CanDoAliasSetRT = true; + for (auto Access : Retries) + if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, + TheLoop, RunningDepId, ASId, + ShouldCheckWrap, /*Assume=*/true)) { + CanDoAliasSetRT = false; + break; + } + } + CanDoRT &= CanDoAliasSetRT; + NeedRTCheck |= NeedsAliasSetRTCheck; ++ASId; } @@ -1038,6 +1107,77 @@ static unsigned getAddressSpaceOperand(Value *I) { return -1; } +// TODO:This API can be improved by using the permutation of given width as the +// accesses are entered into the map. +bool llvm::sortLoadAccesses(ArrayRef<Value *> VL, const DataLayout &DL, + ScalarEvolution &SE, + SmallVectorImpl<Value *> &Sorted, + SmallVectorImpl<unsigned> *Mask) { + SmallVector<std::pair<int64_t, Value *>, 4> OffValPairs; + OffValPairs.reserve(VL.size()); + Sorted.reserve(VL.size()); + + // Walk over the pointers, and map each of them to an offset relative to + // first pointer in the array. + Value *Ptr0 = getPointerOperand(VL[0]); + const SCEV *Scev0 = SE.getSCEV(Ptr0); + Value *Obj0 = GetUnderlyingObject(Ptr0, DL); + PointerType *PtrTy = dyn_cast<PointerType>(Ptr0->getType()); + uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType()); + + for (auto *Val : VL) { + // The only kind of access we care about here is load. + if (!isa<LoadInst>(Val)) + return false; + + Value *Ptr = getPointerOperand(Val); + assert(Ptr && "Expected value to have a pointer operand."); + // If a pointer refers to a different underlying object, bail - the + // pointers are by definition incomparable. + Value *CurrObj = GetUnderlyingObject(Ptr, DL); + if (CurrObj != Obj0) + return false; + + const SCEVConstant *Diff = + dyn_cast<SCEVConstant>(SE.getMinusSCEV(SE.getSCEV(Ptr), Scev0)); + // The pointers may not have a constant offset from each other, or SCEV + // may just not be smart enough to figure out they do. Regardless, + // there's nothing we can do. + if (!Diff || static_cast<unsigned>(Diff->getAPInt().abs().getSExtValue()) > + (VL.size() - 1) * Size) + return false; + + OffValPairs.emplace_back(Diff->getAPInt().getSExtValue(), Val); + } + SmallVector<unsigned, 4> UseOrder(VL.size()); + for (unsigned i = 0; i < VL.size(); i++) { + UseOrder[i] = i; + } + + // Sort the memory accesses and keep the order of their uses in UseOrder. + std::sort(UseOrder.begin(), UseOrder.end(), + [&OffValPairs](unsigned Left, unsigned Right) { + return OffValPairs[Left].first < OffValPairs[Right].first; + }); + + for (unsigned i = 0; i < VL.size(); i++) + Sorted.emplace_back(OffValPairs[UseOrder[i]].second); + + // Sort UseOrder to compute the Mask. + if (Mask) { + Mask->reserve(VL.size()); + for (unsigned i = 0; i < VL.size(); i++) + Mask->emplace_back(i); + std::sort(Mask->begin(), Mask->end(), + [&UseOrder](unsigned Left, unsigned Right) { + return UseOrder[Left] < UseOrder[Right]; + }); + } + + return true; +} + + /// Returns true if the memory operations \p A and \p B are consecutive. bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType) { @@ -1471,10 +1611,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, couldPreventStoreLoadForward(Distance, TypeByteSize)) return Dependence::BackwardVectorizableButPreventsForwarding; + uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride); DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() - << " with max VF = " - << MaxSafeDepDistBytes / (TypeByteSize * Stride) << '\n'); - + << " with max VF = " << MaxVF << '\n'); + uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8; + MaxSafeRegisterWidth = std::min(MaxSafeRegisterWidth, MaxVFInBits); return Dependence::BackwardVectorizable; } @@ -2066,8 +2207,51 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { if (!Stride) return; - DEBUG(dbgs() << "LAA: Found a strided access that we can version"); + DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for " + "versioning:"); DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n"); + + // Avoid adding the "Stride == 1" predicate when we know that + // Stride >= Trip-Count. Such a predicate will effectively optimize a single + // or zero iteration loop, as Trip-Count <= Stride == 1. + // + // TODO: We are currently not making a very informed decision on when it is + // beneficial to apply stride versioning. It might make more sense that the + // users of this analysis (such as the vectorizer) will trigger it, based on + // their specific cost considerations; For example, in cases where stride + // versioning does not help resolving memory accesses/dependences, the + // vectorizer should evaluate the cost of the runtime test, and the benefit + // of various possible stride specializations, considering the alternatives + // of using gather/scatters (if available). + + const SCEV *StrideExpr = PSE->getSCEV(Stride); + const SCEV *BETakenCount = PSE->getBackedgeTakenCount(); + + // Match the types so we can compare the stride and the BETakenCount. + // The Stride can be positive/negative, so we sign extend Stride; + // The backdgeTakenCount is non-negative, so we zero extend BETakenCount. + const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout(); + uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType()); + uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType()); + const SCEV *CastedStride = StrideExpr; + const SCEV *CastedBECount = BETakenCount; + ScalarEvolution *SE = PSE->getSE(); + if (BETypeSize >= StrideTypeSize) + CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType()); + else + CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType()); + const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount); + // Since TripCount == BackEdgeTakenCount + 1, checking: + // "Stride >= TripCount" is equivalent to checking: + // Stride - BETakenCount > 0 + if (SE->isKnownPositive(StrideMinusBETaken)) { + DEBUG(dbgs() << "LAA: Stride>=TripCount; No point in versioning as the " + "Stride==1 predicate will imply that the loop executes " + "at most once.\n"); + return; + } + DEBUG(dbgs() << "LAA: Found a strided access that we can version."); + SymbolicStrides[Ptr] = Stride; StrideSet.insert(Stride); } diff --git a/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp b/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp index e4a0f90b2f71..ea7a62d179c4 100644 --- a/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp +++ b/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp @@ -11,15 +11,21 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/IR/Dominators.h" using namespace llvm; +namespace llvm { +/// Enables memory ssa as a dependency for loop passes in legacy pass manager. +cl::opt<bool> EnableMSSALoopDependency( + "enable-mssa-loop-dependency", cl::Hidden, cl::init(false), + cl::desc("Enable MemorySSA dependency for loop pass manager")); + // Explicit template instantiations and specialization defininitions for core // template typedefs. -namespace llvm { template class AllAnalysesOn<Loop>; template class AnalysisManager<Loop, LoopStandardAnalysisResults &>; template class InnerAnalysisManagerProxy<LoopAnalysisManager, Function>; @@ -45,19 +51,25 @@ bool LoopAnalysisManagerFunctionProxy::Result::invalidate( // loop analyses declare any dependencies on these and use the more general // invalidation logic below to act on that. auto PAC = PA.getChecker<LoopAnalysisManagerFunctionProxy>(); + bool invalidateMemorySSAAnalysis = false; + if (EnableMSSALoopDependency) + invalidateMemorySSAAnalysis = Inv.invalidate<MemorySSAAnalysis>(F, PA); if (!(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) || Inv.invalidate<AAManager>(F, PA) || Inv.invalidate<AssumptionAnalysis>(F, PA) || Inv.invalidate<DominatorTreeAnalysis>(F, PA) || Inv.invalidate<LoopAnalysis>(F, PA) || - Inv.invalidate<ScalarEvolutionAnalysis>(F, PA)) { + Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) || + invalidateMemorySSAAnalysis) { // Note that the LoopInfo may be stale at this point, however the loop // objects themselves remain the only viable keys that could be in the // analysis manager's cache. So we just walk the keys and forcibly clear // those results. Note that the order doesn't matter here as this will just // directly destroy the results without calling methods on them. - for (Loop *L : PreOrderLoops) - InnerAM->clear(*L); + for (Loop *L : PreOrderLoops) { + // NB! `L` may not be in a good enough state to run Loop::getName. + InnerAM->clear(*L, "<possibly invalidated loop>"); + } // We also need to null out the inner AM so that when the object gets // destroyed as invalid we don't try to clear the inner AM again. At that @@ -135,7 +147,9 @@ PreservedAnalyses llvm::getLoopPassPreservedAnalyses() { PA.preserve<LoopAnalysis>(); PA.preserve<LoopAnalysisManagerFunctionProxy>(); PA.preserve<ScalarEvolutionAnalysis>(); - // TODO: What we really want to do here is preserve an AA category, but that + // FIXME: Uncomment this when all loop passes preserve MemorySSA + // PA.preserve<MemorySSAAnalysis>(); + // FIXME: What we really want to do here is preserve an AA category, but that // concept doesn't exist yet. PA.preserve<AAManager>(); PA.preserve<BasicAA>(); diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp index 697b58622bb4..9e54d60779a0 100644 --- a/contrib/llvm/lib/Analysis/LoopInfo.cpp +++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/LoopInfoImpl.h" #include "llvm/Analysis/LoopIterator.h" @@ -44,9 +45,9 @@ bool llvm::VerifyLoopInfo = true; #else bool llvm::VerifyLoopInfo = false; #endif -static cl::opt<bool,true> -VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo), - cl::desc("Verify loop info (time consuming)")); +static cl::opt<bool, true> + VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo), + cl::Hidden, cl::desc("Verify loop info (time consuming)")); //===----------------------------------------------------------------------===// // Loop implementation @@ -55,7 +56,7 @@ VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo), bool Loop::isLoopInvariant(const Value *V) const { if (const Instruction *I = dyn_cast<Instruction>(V)) return !contains(I); - return true; // All non-instructions are loop invariant + return true; // All non-instructions are loop invariant } bool Loop::hasLoopInvariantOperands(const Instruction *I) const { @@ -66,7 +67,7 @@ bool Loop::makeLoopInvariant(Value *V, bool &Changed, Instruction *InsertPt) const { if (Instruction *I = dyn_cast<Instruction>(V)) return makeLoopInvariant(I, Changed, InsertPt); - return true; // All non-instructions are loop-invariant. + return true; // All non-instructions are loop-invariant. } bool Loop::makeLoopInvariant(Instruction *I, bool &Changed, @@ -112,12 +113,13 @@ PHINode *Loop::getCanonicalInductionVariable() const { BasicBlock *Incoming = nullptr, *Backedge = nullptr; pred_iterator PI = pred_begin(H); - assert(PI != pred_end(H) && - "Loop must have at least one backedge!"); + assert(PI != pred_end(H) && "Loop must have at least one backedge!"); Backedge = *PI++; - if (PI == pred_end(H)) return nullptr; // dead loop + if (PI == pred_end(H)) + return nullptr; // dead loop Incoming = *PI++; - if (PI != pred_end(H)) return nullptr; // multiple backedges? + if (PI != pred_end(H)) + return nullptr; // multiple backedges? if (contains(Incoming)) { if (contains(Backedge)) @@ -130,12 +132,11 @@ PHINode *Loop::getCanonicalInductionVariable() const { for (BasicBlock::iterator I = H->begin(); isa<PHINode>(I); ++I) { PHINode *PN = cast<PHINode>(I); if (ConstantInt *CI = - dyn_cast<ConstantInt>(PN->getIncomingValueForBlock(Incoming))) + dyn_cast<ConstantInt>(PN->getIncomingValueForBlock(Incoming))) if (CI->isZero()) if (Instruction *Inc = - dyn_cast<Instruction>(PN->getIncomingValueForBlock(Backedge))) - if (Inc->getOpcode() == Instruction::Add && - Inc->getOperand(0) == PN) + dyn_cast<Instruction>(PN->getIncomingValueForBlock(Backedge))) + if (Inc->getOpcode() == Instruction::Add && Inc->getOperand(0) == PN) if (ConstantInt *CI = dyn_cast<ConstantInt>(Inc->getOperand(1))) if (CI->isOne()) return PN; @@ -255,7 +256,8 @@ void Loop::setLoopID(MDNode *LoopID) const { return; } - assert(!getLoopLatch() && "The loop should have no single latch at this point"); + assert(!getLoopLatch() && + "The loop should have no single latch at this point"); BasicBlock *H = getHeader(); for (BasicBlock *BB : this->blocks()) { TerminatorInst *TI = BB->getTerminator(); @@ -266,11 +268,44 @@ void Loop::setLoopID(MDNode *LoopID) const { } } +void Loop::setLoopAlreadyUnrolled() { + MDNode *LoopID = getLoopID(); + // First remove any existing loop unrolling metadata. + SmallVector<Metadata *, 4> MDs; + // Reserve first location for self reference to the LoopID metadata node. + MDs.push_back(nullptr); + + if (LoopID) { + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { + bool IsUnrollMetadata = false; + MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); + if (MD) { + const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); + IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll."); + } + if (!IsUnrollMetadata) + MDs.push_back(LoopID->getOperand(i)); + } + } + + // Add unroll(disable) metadata to disable future unrolling. + LLVMContext &Context = getHeader()->getContext(); + SmallVector<Metadata *, 1> DisableOperands; + DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable")); + MDNode *DisableNode = MDNode::get(Context, DisableOperands); + MDs.push_back(DisableNode); + + MDNode *NewLoopID = MDNode::get(Context, MDs); + // Set operand 0 to refer to the loop id itself. + NewLoopID->replaceOperandWith(0, NewLoopID); + setLoopID(NewLoopID); +} + bool Loop::isAnnotatedParallel() const { MDNode *DesiredLoopIdMetadata = getLoopID(); if (!DesiredLoopIdMetadata) - return false; + return false; // The loop branch contains the parallel loop metadata. In order to ensure // that any parallel-loop-unaware optimization pass hasn't added loop-carried @@ -307,9 +342,7 @@ bool Loop::isAnnotatedParallel() const { return true; } -DebugLoc Loop::getStartLoc() const { - return getLocRange().getStart(); -} +DebugLoc Loop::getStartLoc() const { return getLocRange().getStart(); } Loop::LocRange Loop::getLocRange() const { // If we have a debug location in the loop ID, then use it. @@ -357,8 +390,8 @@ bool Loop::hasDedicatedExits() const { return true; } -void -Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const { +void Loop::getUniqueExitBlocks( + SmallVectorImpl<BasicBlock *> &ExitBlocks) const { assert(hasDedicatedExits() && "getUniqueExitBlocks assumes the loop has canonical form exits!"); @@ -408,12 +441,10 @@ BasicBlock *Loop::getUniqueExitBlock() const { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void Loop::dump() const { - print(dbgs()); -} +LLVM_DUMP_METHOD void Loop::dump() const { print(dbgs()); } LLVM_DUMP_METHOD void Loop::dumpVerbose() const { - print(dbgs(), /*Depth=*/ 0, /*Verbose=*/ true); + print(dbgs(), /*Depth=*/0, /*Verbose=*/true); } #endif @@ -434,15 +465,15 @@ class UnloopUpdater { // loops within these subloops will not change parents. However, an immediate // subloop's new parent will be the nearest loop reachable from either its own // exits *or* any of its nested loop's exits. - DenseMap<Loop*, Loop*> SubloopParents; + DenseMap<Loop *, Loop *> SubloopParents; // Flag the presence of an irreducible backedge whose destination is a block // directly contained by the original unloop. bool FoundIB; public: - UnloopUpdater(Loop *UL, LoopInfo *LInfo) : - Unloop(*UL), LI(LInfo), DFS(UL), FoundIB(false) {} + UnloopUpdater(Loop *UL, LoopInfo *LInfo) + : Unloop(*UL), LI(LInfo), DFS(UL), FoundIB(false) {} void updateBlockParents(); @@ -472,8 +503,7 @@ void UnloopUpdater::updateBlockParents() { assert((NL != &Unloop && (!NL || NL->contains(&Unloop))) && "uninitialized successor"); LI->changeLoopFor(POI, NL); - } - else { + } else { // Or the current block is part of a subloop, in which case its parent // is unchanged. assert((FoundIB || Unloop.contains(L)) && "uninitialized successor"); @@ -490,7 +520,8 @@ void UnloopUpdater::updateBlockParents() { // from successors to predecessors as before. Changed = false; for (LoopBlocksDFS::POIterator POI = DFS.beginPostorder(), - POE = DFS.endPostorder(); POI != POE; ++POI) { + POE = DFS.endPostorder(); + POI != POE; ++POI) { Loop *L = LI->getLoopFor(*POI); Loop *NL = getNearestLoop(*POI, L); @@ -508,8 +539,8 @@ void UnloopUpdater::updateBlockParents() { void UnloopUpdater::removeBlocksFromAncestors() { // Remove all unloop's blocks (including those in nested subloops) from // ancestors below the new parent loop. - for (Loop::block_iterator BI = Unloop.block_begin(), - BE = Unloop.block_end(); BI != BE; ++BI) { + for (Loop::block_iterator BI = Unloop.block_begin(), BE = Unloop.block_end(); + BI != BE; ++BI) { Loop *OuterParent = LI->getLoopFor(*BI); if (Unloop.contains(OuterParent)) { while (OuterParent->getParentLoop() != &Unloop) @@ -609,9 +640,7 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) { return NearLoop; } -LoopInfo::LoopInfo(const DomTreeBase<BasicBlock> &DomTree) { - analyze(DomTree); -} +LoopInfo::LoopInfo(const DomTreeBase<BasicBlock> &DomTree) { analyze(DomTree); } bool LoopInfo::invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &) { @@ -622,10 +651,10 @@ bool LoopInfo::invalidate(Function &F, const PreservedAnalyses &PA, PAC.preservedSet<CFGAnalyses>()); } -void LoopInfo::markAsRemoved(Loop *Unloop) { - assert(!Unloop->isInvalid() && "Loop has already been removed"); - Unloop->invalidate(); - RemovedLoops.push_back(Unloop); +void LoopInfo::erase(Loop *Unloop) { + assert(!Unloop->isInvalid() && "Loop has already been erased!"); + + auto InvalidateOnExit = make_scope_exit([&]() { destroy(Unloop); }); // First handle the special case of no parent loop to simplify the algorithm. if (!Unloop->getParentLoop()) { @@ -702,12 +731,43 @@ PreservedAnalyses LoopPrinterPass::run(Function &F, } void llvm::printLoop(Loop &L, raw_ostream &OS, const std::string &Banner) { + + if (forcePrintModuleIR()) { + // handling -print-module-scope + OS << Banner << " (loop: "; + L.getHeader()->printAsOperand(OS, false); + OS << ")\n"; + + // printing whole module + OS << *L.getHeader()->getModule(); + return; + } + OS << Banner; + + auto *PreHeader = L.getLoopPreheader(); + if (PreHeader) { + OS << "\n; Preheader:"; + PreHeader->print(OS); + OS << "\n; Loop:"; + } + for (auto *Block : L.blocks()) if (Block) Block->print(OS); else OS << "Printing <null> block"; + + SmallVector<BasicBlock *, 8> ExitBlocks; + L.getExitBlocks(ExitBlocks); + if (!ExitBlocks.empty()) { + OS << "\n; Exit blocks"; + for (auto *Block : ExitBlocks) + if (Block) + Block->print(OS); + else + OS << "Printing <null> block"; + } } //===----------------------------------------------------------------------===// @@ -766,5 +826,7 @@ PreservedAnalyses LoopVerifierPass::run(Function &F, void LoopBlocksDFS::perform(LoopInfo *LI) { LoopBlocksTraversal Traversal(*this, LI); for (LoopBlocksTraversal::POTIterator POI = Traversal.begin(), - POE = Traversal.end(); POI != POE; ++POI) ; + POE = Traversal.end(); + POI != POE; ++POI) + ; } diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp index e988f6444a58..9af717bafdca 100644 --- a/contrib/llvm/lib/Analysis/LoopPass.cpp +++ b/contrib/llvm/lib/Analysis/LoopPass.cpp @@ -46,8 +46,7 @@ public: } bool runOnLoop(Loop *L, LPPassManager &) override { - auto BBI = find_if(L->blocks().begin(), L->blocks().end(), - [](BasicBlock *BB) { return BB; }); + auto BBI = llvm::find_if(L->blocks(), [](BasicBlock *BB) { return BB; }); if (BBI != L->blocks().end() && isFunctionInPrintList((*BBI)->getParent()->getName())) { printLoop(*L, OS, Banner); @@ -140,6 +139,13 @@ void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const { Info.setPreservesAll(); } +void LPPassManager::markLoopAsDeleted(Loop &L) { + assert((&L == CurrentLoop || CurrentLoop->contains(&L)) && + "Must not delete loop outside the current loop tree!"); + if (&L == CurrentLoop) + CurrentLoopDeleted = true; +} + /// run - Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the function, and if so, return true. bool LPPassManager::runOnFunction(Function &F) { @@ -176,7 +182,7 @@ bool LPPassManager::runOnFunction(Function &F) { // Walk Loops while (!LQ.empty()) { - bool LoopWasDeleted = false; + CurrentLoopDeleted = false; CurrentLoop = LQ.back(); // Run all passes on the current Loop. @@ -195,15 +201,14 @@ bool LPPassManager::runOnFunction(Function &F) { Changed |= P->runOnLoop(CurrentLoop, *this); } - LoopWasDeleted = CurrentLoop->isInvalid(); if (Changed) dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG, - LoopWasDeleted ? "<deleted>" - : CurrentLoop->getHeader()->getName()); + CurrentLoopDeleted ? "<deleted loop>" + : CurrentLoop->getName()); dumpPreservedSet(P); - if (LoopWasDeleted) { + if (CurrentLoopDeleted) { // Notify passes that the loop is being deleted. deleteSimpleAnalysisLoop(CurrentLoop); } else { @@ -231,11 +236,12 @@ bool LPPassManager::runOnFunction(Function &F) { removeNotPreservedAnalysis(P); recordAvailableAnalysis(P); - removeDeadPasses(P, LoopWasDeleted ? "<deleted>" - : CurrentLoop->getHeader()->getName(), + removeDeadPasses(P, + CurrentLoopDeleted ? "<deleted>" + : CurrentLoop->getHeader()->getName(), ON_LOOP_MSG); - if (LoopWasDeleted) + if (CurrentLoopDeleted) // Do not run other passes on this loop. break; } @@ -243,7 +249,7 @@ bool LPPassManager::runOnFunction(Function &F) { // If the loop was deleted, release all the loop passes. This frees up // some memory, and avoids trouble with the pass manager trying to call // verifyAnalysis on them. - if (LoopWasDeleted) { + if (CurrentLoopDeleted) { for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { Pass *P = getContainedPass(Index); freePass(P, "<deleted>", ON_LOOP_MSG); @@ -361,4 +367,3 @@ bool LoopPass::skipLoop(const Loop *L) const { char LCSSAVerificationPass::ID = 0; INITIALIZE_PASS(LCSSAVerificationPass, "lcssa-verification", "LCSSA Verifier", false, false) - diff --git a/contrib/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp b/contrib/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp index 7bdf3408a581..0da90dae3d9a 100644 --- a/contrib/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp +++ b/contrib/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp @@ -14,7 +14,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LoopUnrollAnalyzer.h" -#include "llvm/IR/Dominators.h" using namespace llvm; diff --git a/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp b/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp index 4231a78352ce..4a136c5a0c6d 100644 --- a/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp +++ b/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp @@ -7,9 +7,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SetVector.h" #include "llvm/Analysis/Loads.h" -#include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/Passes.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp index 7327c07499be..24fedfed772c 100644 --- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -1,4 +1,4 @@ -//===------ MemoryBuiltins.cpp - Identify calls to memory builtins --------===// +//===- MemoryBuiltins.cpp - Identify calls to memory builtins -------------===// // // The LLVM Compiler Infrastructure // @@ -13,20 +13,39 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" +#include <cassert> +#include <cstdint> +#include <iterator> +#include <utility> + using namespace llvm; #define DEBUG_TYPE "memory-builtins" @@ -187,7 +206,6 @@ static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) { return CS && CS.hasRetAttr(Attribute::NoAlias); } - /// \brief Tests if a value is a call or invoke to a library function that /// allocates or reallocates memory (either malloc, calloc, realloc, or strdup /// like). @@ -323,7 +341,6 @@ Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout &DL, return computeArraySize(CI, DL, TLI, LookThroughSExt); } - /// extractCallocCall - Returns the corresponding CallInst if the instruction /// is a calloc call. const CallInst *llvm::extractCallocCall(const Value *I, @@ -331,7 +348,6 @@ const CallInst *llvm::extractCallocCall(const Value *I, return isCallocLikeFn(I, TLI) ? cast<CallInst>(I) : nullptr; } - /// isFreeCall - Returns non-null if the value is a call to the builtin free() const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { const CallInst *CI = dyn_cast<CallInst>(I); @@ -387,8 +403,6 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { return CI; } - - //===----------------------------------------------------------------------===// // Utility functions to compute size of objects. // @@ -452,7 +466,6 @@ STATISTIC(ObjectVisitorArgument, STATISTIC(ObjectVisitorLoad, "Number of load instructions with unsolved size and offset"); - APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) { if (Options.RoundToAlign && Align) return APInt(IntTyBits, alignTo(Size.getZExtValue(), Align)); diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 263cf42ebe27..a6c590126c2f 100644 --- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -15,28 +15,40 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/OrderedBasicBlock.h" #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/IR/PredIteratorCache.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -45,7 +57,9 @@ #include "llvm/Support/MathExtras.h" #include <algorithm> #include <cassert> +#include <cstdint> #include <iterator> +#include <utility> using namespace llvm; @@ -105,38 +119,38 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) { if (LI->isUnordered()) { Loc = MemoryLocation::get(LI); - return MRI_Ref; + return ModRefInfo::Ref; } if (LI->getOrdering() == AtomicOrdering::Monotonic) { Loc = MemoryLocation::get(LI); - return MRI_ModRef; + return ModRefInfo::ModRef; } Loc = MemoryLocation(); - return MRI_ModRef; + return ModRefInfo::ModRef; } if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { if (SI->isUnordered()) { Loc = MemoryLocation::get(SI); - return MRI_Mod; + return ModRefInfo::Mod; } if (SI->getOrdering() == AtomicOrdering::Monotonic) { Loc = MemoryLocation::get(SI); - return MRI_ModRef; + return ModRefInfo::ModRef; } Loc = MemoryLocation(); - return MRI_ModRef; + return ModRefInfo::ModRef; } if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) { Loc = MemoryLocation::get(V); - return MRI_ModRef; + return ModRefInfo::ModRef; } if (const CallInst *CI = isFreeCall(Inst, &TLI)) { // calls to free() deallocate the entire structure Loc = MemoryLocation(CI->getArgOperand(0)); - return MRI_Mod; + return ModRefInfo::Mod; } if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { @@ -152,7 +166,7 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(), AAInfo); // These intrinsics don't really modify the memory, but returning Mod // will allow them to be handled conservatively. - return MRI_Mod; + return ModRefInfo::Mod; case Intrinsic::invariant_end: II->getAAMetadata(AAInfo); Loc = MemoryLocation( @@ -160,7 +174,7 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(), AAInfo); // These intrinsics don't really modify the memory, but returning Mod // will allow them to be handled conservatively. - return MRI_Mod; + return ModRefInfo::Mod; default: break; } @@ -168,10 +182,10 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, // Otherwise, just do the coarse-grained thing that always works. if (Inst->mayWriteToMemory()) - return MRI_ModRef; + return ModRefInfo::ModRef; if (Inst->mayReadFromMemory()) - return MRI_Ref; - return MRI_NoModRef; + return ModRefInfo::Ref; + return ModRefInfo::NoModRef; } /// Private helper for finding the local dependencies of a call site. @@ -182,48 +196,46 @@ MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom( // Walk backwards through the block, looking for dependencies. while (ScanIt != BB->begin()) { + Instruction *Inst = &*--ScanIt; + // Debug intrinsics don't cause dependences and should not affect Limit + if (isa<DbgInfoIntrinsic>(Inst)) + continue; + // Limit the amount of scanning we do so we don't end up with quadratic // running time on extreme testcases. --Limit; if (!Limit) return MemDepResult::getUnknown(); - Instruction *Inst = &*--ScanIt; - // If this inst is a memory op, get the pointer it accessed MemoryLocation Loc; ModRefInfo MR = GetLocation(Inst, Loc, TLI); if (Loc.Ptr) { // A simple instruction. - if (AA.getModRefInfo(CS, Loc) != MRI_NoModRef) + if (isModOrRefSet(AA.getModRefInfo(CS, Loc))) return MemDepResult::getClobber(Inst); continue; } if (auto InstCS = CallSite(Inst)) { - // Debug intrinsics don't cause dependences. - if (isa<DbgInfoIntrinsic>(Inst)) - continue; // If these two calls do not interfere, look past it. - switch (AA.getModRefInfo(CS, InstCS)) { - case MRI_NoModRef: + if (isNoModRef(AA.getModRefInfo(CS, InstCS))) { // If the two calls are the same, return InstCS as a Def, so that // CS can be found redundant and eliminated. - if (isReadOnlyCall && !(MR & MRI_Mod) && + if (isReadOnlyCall && !isModSet(MR) && CS.getInstruction()->isIdenticalToWhenDefined(Inst)) return MemDepResult::getDef(Inst); // Otherwise if the two calls don't interact (e.g. InstCS is readnone) // keep scanning. continue; - default: + } else return MemDepResult::getClobber(Inst); - } } // If we could not obtain a pointer for the instruction and the instruction // touches memory then assume that this is a dependency. - if (MR != MRI_NoModRef) + if (isModOrRefSet(MR)) return MemDepResult::getClobber(Inst); } @@ -294,8 +306,10 @@ unsigned MemoryDependenceResults::getLoadLoadClobberFullWidthSize( return 0; if (LIOffs + NewLoadByteSize > MemLocEnd && - LI->getParent()->getParent()->hasFnAttribute( - Attribute::SanitizeAddress)) + (LI->getParent()->getParent()->hasFnAttribute( + Attribute::SanitizeAddress) || + LI->getParent()->getParent()->hasFnAttribute( + Attribute::SanitizeHWAddress))) // We will be reading past the location accessed by the original program. // While this is safe in a regular build, Address Safety analysis tools // may start reporting false warnings. So, don't do widening. @@ -322,7 +336,6 @@ static bool isVolatile(Instruction *Inst) { MemDepResult MemoryDependenceResults::getPointerDependencyFrom( const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) { - MemDepResult InvariantGroupDependency = MemDepResult::getUnknown(); if (QueryInst != nullptr) { if (auto *LI = dyn_cast<LoadInst>(QueryInst)) { @@ -350,7 +363,6 @@ MemDepResult MemoryDependenceResults::getPointerDependencyFrom( MemDepResult MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB) { - auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group); if (!InvariantGroupMD) return MemDepResult::getUnknown(); @@ -380,7 +392,6 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, return Best; }; - // FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case // we will see all the instructions. This should be fixed in MSSA. while (!LoadOperandsQueue.empty()) { @@ -541,7 +552,6 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( // it does not alias with when this atomic load indicates that another // thread may be accessing the location. if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { - // While volatile access cannot be eliminated, they do not have to clobber // non-aliasing locations, as normal accesses, for example, can be safely // reordered with volatile accesses. @@ -632,7 +642,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( // If alias analysis can tell that this store is guaranteed to not modify // the query pointer, ignore it. Use getModRefInfo to handle cases where // the query pointer points to constant memory etc. - if (AA.getModRefInfo(SI, MemLoc) == MRI_NoModRef) + if (!isModOrRefSet(AA.getModRefInfo(SI, MemLoc))) continue; // Ok, this store might clobber the query pointer. Check to see if it is @@ -678,15 +688,15 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. ModRefInfo MR = AA.getModRefInfo(Inst, MemLoc); // If necessary, perform additional analysis. - if (MR == MRI_ModRef) + if (isModAndRefSet(MR)) MR = AA.callCapturesBefore(Inst, MemLoc, &DT, &OBB); switch (MR) { - case MRI_NoModRef: + case ModRefInfo::NoModRef: // If the call has no effect on the queried pointer, just ignore it. continue; - case MRI_Mod: + case ModRefInfo::Mod: return MemDepResult::getClobber(Inst); - case MRI_Ref: + case ModRefInfo::Ref: // If the call is known to never store to the pointer, and if this is a // load query, we can safely ignore it (scan past it). if (isLoad) @@ -739,7 +749,7 @@ MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) { ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI); if (MemLoc.Ptr) { // If we can do a pointer scan, make it happen. - bool isLoad = !(MR & MRI_Mod); + bool isLoad = !isModSet(MR); if (auto *II = dyn_cast<IntrinsicInst>(QueryInst)) isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start; @@ -1508,7 +1518,6 @@ void MemoryDependenceResults::removeInstruction(Instruction *RemInst) { } // If we have a cached local dependence query for this instruction, remove it. - // LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst); if (LocalDepEntry != LocalDeps.end()) { // Remove us from DepInst's reverse set now that the local dep info is gone. @@ -1531,7 +1540,6 @@ void MemoryDependenceResults::removeInstruction(Instruction *RemInst) { } // Loop over all of the things that depend on the instruction we're removing. - // SmallVector<std::pair<Instruction *, Instruction *>, 8> ReverseDepsToAdd; // If we find RemInst as a clobber or Def in any of the maps for other values, @@ -1726,7 +1734,7 @@ MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) { initializeMemoryDependenceWrapperPassPass(*PassRegistry::getPassRegistry()); } -MemoryDependenceWrapperPass::~MemoryDependenceWrapperPass() {} +MemoryDependenceWrapperPass::~MemoryDependenceWrapperPass() = default; void MemoryDependenceWrapperPass::releaseMemory() { MemDep.reset(); diff --git a/contrib/llvm/lib/Analysis/MemorySSA.cpp b/contrib/llvm/lib/Analysis/MemorySSA.cpp index 86de474c7aa9..8fe190e8bcf8 100644 --- a/contrib/llvm/lib/Analysis/MemorySSA.cpp +++ b/contrib/llvm/lib/Analysis/MemorySSA.cpp @@ -1,48 +1,63 @@ -//===-- MemorySSA.cpp - Memory SSA Builder---------------------------===// +//===- MemorySSA.cpp - Memory SSA Builder ---------------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -//===----------------------------------------------------------------===// +//===----------------------------------------------------------------------===// // // This file implements the MemorySSA class. // -//===----------------------------------------------------------------===// +//===----------------------------------------------------------------------===// + #include "llvm/Analysis/MemorySSA.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/GraphTraits.h" -#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" #include "llvm/Analysis/MemoryLocation.h" -#include "llvm/Analysis/PHITransAddr.h" #include "llvm/IR/AssemblyAnnotationWriter.h" -#include "llvm/IR/DataLayout.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Use.h" +#include "llvm/Pass.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> +#include <cassert> +#include <iterator> +#include <memory> +#include <utility> -#define DEBUG_TYPE "memoryssa" using namespace llvm; + +#define DEBUG_TYPE "memoryssa" + INITIALIZE_PASS_BEGIN(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false, true) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) @@ -66,30 +81,34 @@ static cl::opt<bool> cl::desc("Verify MemorySSA in legacy printer pass.")); namespace llvm { + /// \brief An assembly annotator class to print Memory SSA information in /// comments. class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter { friend class MemorySSA; + const MemorySSA *MSSA; public: MemorySSAAnnotatedWriter(const MemorySSA *M) : MSSA(M) {} - virtual void emitBasicBlockStartAnnot(const BasicBlock *BB, - formatted_raw_ostream &OS) { + void emitBasicBlockStartAnnot(const BasicBlock *BB, + formatted_raw_ostream &OS) override { if (MemoryAccess *MA = MSSA->getMemoryAccess(BB)) OS << "; " << *MA << "\n"; } - virtual void emitInstructionAnnot(const Instruction *I, - formatted_raw_ostream &OS) { + void emitInstructionAnnot(const Instruction *I, + formatted_raw_ostream &OS) override { if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) OS << "; " << *MA << "\n"; } }; -} + +} // end namespace llvm namespace { + /// Our current alias analysis API differentiates heavily between calls and /// non-calls, and functions called on one usually assert on the other. /// This class encapsulates the distinction to simplify other code that wants @@ -97,7 +116,9 @@ namespace { /// For example, this class is used as a densemap key in the use optimizer. class MemoryLocOrCall { public: - MemoryLocOrCall() : IsCall(false) {} + bool IsCall = false; + + MemoryLocOrCall() = default; MemoryLocOrCall(MemoryUseOrDef *MUD) : MemoryLocOrCall(MUD->getMemoryInst()) {} MemoryLocOrCall(const MemoryUseOrDef *MUD) @@ -116,14 +137,13 @@ public: } } - explicit MemoryLocOrCall(const MemoryLocation &Loc) - : IsCall(false), Loc(Loc) {} + explicit MemoryLocOrCall(const MemoryLocation &Loc) : Loc(Loc) {} - bool IsCall; ImmutableCallSite getCS() const { assert(IsCall); return CS; } + MemoryLocation getLoc() const { assert(!IsCall); return Loc; @@ -144,16 +164,20 @@ private: MemoryLocation Loc; }; }; -} + +} // end anonymous namespace namespace llvm { + template <> struct DenseMapInfo<MemoryLocOrCall> { static inline MemoryLocOrCall getEmptyKey() { return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getEmptyKey()); } + static inline MemoryLocOrCall getTombstoneKey() { return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getTombstoneKey()); } + static unsigned getHashValue(const MemoryLocOrCall &MLOC) { if (MLOC.IsCall) return hash_combine(MLOC.IsCall, @@ -162,6 +186,7 @@ template <> struct DenseMapInfo<MemoryLocOrCall> { return hash_combine( MLOC.IsCall, DenseMapInfo<MemoryLocation>::getHashValue(MLOC.getLoc())); } + static bool isEqual(const MemoryLocOrCall &LHS, const MemoryLocOrCall &RHS) { return LHS == RHS; } @@ -169,6 +194,8 @@ template <> struct DenseMapInfo<MemoryLocOrCall> { enum class Reorderability { Always, IfNoAlias, Never }; +} // end namespace llvm + /// This does one-way checks to see if Use could theoretically be hoisted above /// MayClobber. This will not check the other way around. /// @@ -235,7 +262,7 @@ static bool instructionClobbersQuery(MemoryDef *MD, if (UseCS) { ModRefInfo I = AA.getModRefInfo(DefInst, UseCS); - return I != MRI_NoModRef; + return isModOrRefSet(I); } if (auto *DefLoad = dyn_cast<LoadInst>(DefInst)) { @@ -251,7 +278,7 @@ static bool instructionClobbersQuery(MemoryDef *MD, } } - return AA.getModRefInfo(DefInst, UseLoc) & MRI_Mod; + return isModSet(AA.getModRefInfo(DefInst, UseLoc)); } static bool instructionClobbersQuery(MemoryDef *MD, const MemoryUseOrDef *MU, @@ -271,22 +298,21 @@ bool MemorySSAUtil::defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU, AliasAnalysis &AA) { return instructionClobbersQuery(MD, MU, MemoryLocOrCall(MU), AA); } -} namespace { + struct UpwardsMemoryQuery { // True if our original query started off as a call - bool IsCall; + bool IsCall = false; // The pointer location we started the query with. This will be empty if // IsCall is true. MemoryLocation StartingLoc; // This is the instruction we were querying about. - const Instruction *Inst; + const Instruction *Inst = nullptr; // The MemoryAccess we actually got called with, used to test local domination - const MemoryAccess *OriginalAccess; + const MemoryAccess *OriginalAccess = nullptr; - UpwardsMemoryQuery() - : IsCall(false), Inst(nullptr), OriginalAccess(nullptr) {} + UpwardsMemoryQuery() = default; UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access) : IsCall(ImmutableCallSite(Inst)), Inst(Inst), OriginalAccess(Access) { @@ -295,6 +321,8 @@ struct UpwardsMemoryQuery { } }; +} // end anonymous namespace + static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc, AliasAnalysis &AA) { Instruction *Inst = MD->getMemoryInst(); @@ -394,6 +422,8 @@ checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt, "ClobberAt never acted as a clobber"); } +namespace { + /// Our algorithm for walking (and trying to optimize) clobbers, all wrapped up /// in one class. class ClobberWalker { @@ -569,7 +599,7 @@ class ClobberWalker { struct generic_def_path_iterator : public iterator_facade_base<generic_def_path_iterator<T, Walker>, std::forward_iterator_tag, T *> { - generic_def_path_iterator() : W(nullptr), N(None) {} + generic_def_path_iterator() = default; generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {} T &operator*() const { return curNode(); } @@ -588,8 +618,8 @@ class ClobberWalker { private: T &curNode() const { return W->Paths[*N]; } - Walker *W; - Optional<ListIndex> N; + Walker *W = nullptr; + Optional<ListIndex> N = None; }; using def_path_iterator = generic_def_path_iterator<DefPath, ClobberWalker>; @@ -664,7 +694,7 @@ class ClobberWalker { }; MemoryPhi *Current = Phi; - while (1) { + while (true) { assert(!MSSA.isLiveOnEntryDef(Current) && "liveOnEntry wasn't treated as a clobber?"); @@ -842,30 +872,33 @@ struct RenamePassData { RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It, MemoryAccess *M) : DTN(D), ChildIt(It), IncomingVal(M) {} + void swap(RenamePassData &RHS) { std::swap(DTN, RHS.DTN); std::swap(ChildIt, RHS.ChildIt); std::swap(IncomingVal, RHS.IncomingVal); } }; -} // anonymous namespace + +} // end anonymous namespace namespace llvm { + /// \brief A MemorySSAWalker that does AA walks to disambiguate accesses. It no /// longer does caching on its own, /// but the name has been retained for the moment. class MemorySSA::CachingWalker final : public MemorySSAWalker { ClobberWalker Walker; - bool AutoResetWalker; + bool AutoResetWalker = true; MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &); - void verifyRemoved(MemoryAccess *); public: CachingWalker(MemorySSA *, AliasAnalysis *, DominatorTree *); - ~CachingWalker() override; + ~CachingWalker() override = default; using MemorySSAWalker::getClobberingMemoryAccess; + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override; MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, const MemoryLocation &) override; @@ -884,6 +917,8 @@ public: } }; +} // end namespace llvm + void MemorySSA::renameSuccessorPhis(BasicBlock *BB, MemoryAccess *IncomingVal, bool RenameAllUses) { // Pass through values to our successors @@ -1032,17 +1067,20 @@ MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) { auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr)); if (Res.second) - Res.first->second = make_unique<AccessList>(); + Res.first->second = llvm::make_unique<AccessList>(); return Res.first->second.get(); } + MemorySSA::DefsList *MemorySSA::getOrCreateDefsList(const BasicBlock *BB) { auto Res = PerBlockDefs.insert(std::make_pair(BB, nullptr)); if (Res.second) - Res.first->second = make_unique<DefsList>(); + Res.first->second = llvm::make_unique<DefsList>(); return Res.first->second.get(); } +namespace llvm { + /// This class is a batch walker of all MemoryUse's in the program, and points /// their defining access at the thing that actually clobbers them. Because it /// is a batch walker that touches everything, it does not operate like the @@ -1077,15 +1115,19 @@ private: unsigned long LastKill; bool LastKillValid; }; + void optimizeUsesInBlock(const BasicBlock *, unsigned long &, unsigned long &, SmallVectorImpl<MemoryAccess *> &, DenseMap<MemoryLocOrCall, MemlocStackInfo> &); + MemorySSA *MSSA; MemorySSAWalker *Walker; AliasAnalysis *AA; DominatorTree *DT; }; +} // end namespace llvm + /// Optimize the uses in a given block This is basically the SSA renaming /// algorithm, with one caveat: We are able to use a single stack for all /// MemoryUses. This is because the set of *possible* reaching MemoryDefs is @@ -1281,8 +1323,9 @@ void MemorySSA::buildMemorySSA() { // semantics do *not* imply that something with no immediate uses can simply // be removed. BasicBlock &StartingPoint = F.getEntryBlock(); - LiveOnEntryDef = make_unique<MemoryDef>(F.getContext(), nullptr, nullptr, - &StartingPoint, NextID++); + LiveOnEntryDef = + llvm::make_unique<MemoryDef>(F.getContext(), nullptr, nullptr, + &StartingPoint, NextID++); DenseMap<const BasicBlock *, unsigned int> BBNumbers; unsigned NextBBNum = 0; @@ -1343,7 +1386,7 @@ MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() { if (Walker) return Walker.get(); - Walker = make_unique<CachingWalker>(this, AA, DT); + Walker = llvm::make_unique<CachingWalker>(this, AA, DT); return Walker.get(); } @@ -1462,6 +1505,7 @@ static inline bool isOrdered(const Instruction *I) { } return false; } + /// \brief Helper function to create new memory accesses MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) { // The assume intrinsic has a control dependency which we model by claiming @@ -1473,7 +1517,7 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) { return nullptr; // Find out what affect this instruction has on memory. - ModRefInfo ModRef = AA->getModRefInfo(I); + ModRefInfo ModRef = AA->getModRefInfo(I, None); // The isOrdered check is used to ensure that volatiles end up as defs // (atomics end up as ModRef right now anyway). Until we separate the // ordering chain from the memory chain, this enables people to see at least @@ -1482,8 +1526,8 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) { // Separate memory aliasing and ordering into two different chains so that we // can precisely represent both "what memory will this read/write/is clobbered // by" and "what instructions can I move this past". - bool Def = bool(ModRef & MRI_Mod) || isOrdered(I); - bool Use = bool(ModRef & MRI_Ref); + bool Def = isModSet(ModRef) || isOrdered(I); + bool Use = isRefSet(ModRef); // It's possible for an instruction to not modify memory at all. During // construction, we ignore them. @@ -1675,7 +1719,6 @@ void MemorySSA::verifyDomination(Function &F) const { /// \brief Verify the def-use lists in MemorySSA, by verifying that \p Use /// appears in the use list of \p Def. - void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const { #ifndef NDEBUG // The live on entry use may cause us to get a NULL def here @@ -1739,7 +1782,6 @@ void MemorySSA::renumberBlock(const BasicBlock *B) const { /// \returns True if \p Dominator dominates \p Dominatee. bool MemorySSA::locallyDominates(const MemoryAccess *Dominator, const MemoryAccess *Dominatee) const { - const BasicBlock *DominatorBlock = Dominator->getBlock(); assert((DominatorBlock == Dominatee->getBlock()) && @@ -1887,7 +1929,7 @@ MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F, FunctionAnalysisManager &AM) { auto &DT = AM.getResult<DominatorTreeAnalysis>(F); auto &AA = AM.getResult<AAManager>(F); - return MemorySSAAnalysis::Result(make_unique<MemorySSA>(F, &AA, &DT)); + return MemorySSAAnalysis::Result(llvm::make_unique<MemorySSA>(F, &AA, &DT)); } PreservedAnalyses MemorySSAPrinterPass::run(Function &F, @@ -1936,9 +1978,7 @@ MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {} MemorySSA::CachingWalker::CachingWalker(MemorySSA *M, AliasAnalysis *A, DominatorTree *D) - : MemorySSAWalker(M), Walker(*M, *A, *D), AutoResetWalker(true) {} - -MemorySSA::CachingWalker::~CachingWalker() {} + : MemorySSAWalker(M), Walker(*M, *A, *D) {} void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) { if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) @@ -2059,7 +2099,6 @@ MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess( return Use->getDefiningAccess(); return StartingAccess; } -} // namespace llvm void MemoryPhi::deleteMe(DerivedUser *Self) { delete static_cast<MemoryPhi *>(Self); diff --git a/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp b/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp index 1ff84471c094..f5d89f699a5a 100644 --- a/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp +++ b/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp @@ -13,13 +13,11 @@ #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" @@ -85,12 +83,11 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(BasicBlock *BB) { unsigned i = 0; for (auto *Pred : predecessors(BB)) Phi->addIncoming(PhiOps[i++], Pred); + InsertedPHIs.push_back(Phi); } - Result = Phi; } - if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Result)) - InsertedPHIs.push_back(MP); + // Set ourselves up for the next variable by resetting visited state. VisitedBlocks.erase(BB); return Result; diff --git a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp index e12cdf9182c7..1e321f17d59f 100644 --- a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp +++ b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -18,7 +18,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Passes.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/IR/Function.h" #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" diff --git a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp index e9e354ebb88f..d54fb700200d 100644 --- a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -13,23 +13,47 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ModuleSummaryAnalysis.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/Triple.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TypeMetadataUtils.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/ValueSymbolTable.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/ModuleSummaryIndex.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" #include "llvm/Object/ModuleSymbolTable.h" +#include "llvm/Object/SymbolicFile.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "module-summary-analysis" @@ -174,7 +198,7 @@ static void addIntrinsicToSummary( static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, const Function &F, BlockFrequencyInfo *BFI, - ProfileSummaryInfo *PSI, bool HasLocalsInUsed, + ProfileSummaryInfo *PSI, bool HasLocalsInUsedOrAsm, DenseSet<GlobalValue::GUID> &CantBePromoted) { // Summary not currently supported for anonymous functions, they should // have been named. @@ -191,9 +215,13 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, SetVector<FunctionSummary::ConstVCall> TypeTestAssumeConstVCalls, TypeCheckedLoadConstVCalls; ICallPromotionAnalysis ICallAnalysis; + SmallPtrSet<const User *, 8> Visited; + + // Add personality function, prefix data and prologue data to function's ref + // list. + findRefEdges(Index, &F, RefEdges, Visited); bool HasInlineAsmMaybeReferencingInternal = false; - SmallPtrSet<const User *, 8> Visited; for (const BasicBlock &BB : F) for (const Instruction &I : BB) { if (isa<DbgInfoIntrinsic>(I)) @@ -210,11 +238,16 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, // a local value from inline assembly to ensure we don't export a // reference (which would require renaming and promotion of the // referenced value). - if (HasLocalsInUsed && CI && CI->isInlineAsm()) + if (HasLocalsInUsedOrAsm && CI && CI->isInlineAsm()) HasInlineAsmMaybeReferencingInternal = true; auto *CalledValue = CS.getCalledValue(); auto *CalledFunction = CS.getCalledFunction(); + if (CalledValue && !CalledFunction) { + CalledValue = CalledValue->stripPointerCastsNoFollowAliases(); + // Stripping pointer casts can reveal a called function. + CalledFunction = dyn_cast<Function>(CalledValue); + } // Check if this is an alias to a function. If so, get the // called aliasee for the checks below. if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) { @@ -248,7 +281,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, if (CI && CI->isInlineAsm()) continue; // Skip direct calls. - if (!CS.getCalledValue() || isa<Constant>(CS.getCalledValue())) + if (!CalledValue || isa<Constant>(CalledValue)) continue; uint32_t NumVals, NumCandidates; @@ -275,11 +308,17 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, // FIXME: refactor this to use the same code that inliner is using. F.isVarArg(); GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport, - /* Live = */ false); + /* Live = */ false, F.isDSOLocal()); + FunctionSummary::FFlags FunFlags{ + F.hasFnAttribute(Attribute::ReadNone), + F.hasFnAttribute(Attribute::ReadOnly), + F.hasFnAttribute(Attribute::NoRecurse), + F.returnDoesNotAlias(), + }; auto FuncSummary = llvm::make_unique<FunctionSummary>( - Flags, NumInsts, RefEdges.takeVector(), CallGraphEdges.takeVector(), - TypeTests.takeVector(), TypeTestAssumeVCalls.takeVector(), - TypeCheckedLoadVCalls.takeVector(), + Flags, NumInsts, FunFlags, RefEdges.takeVector(), + CallGraphEdges.takeVector(), TypeTests.takeVector(), + TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(), TypeTestAssumeConstVCalls.takeVector(), TypeCheckedLoadConstVCalls.takeVector()); if (NonRenamableLocal) @@ -295,7 +334,7 @@ computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V, findRefEdges(Index, &V, RefEdges, Visited); bool NonRenamableLocal = isNonRenamableLocal(V); GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal, - /* Live = */ false); + /* Live = */ false, V.isDSOLocal()); auto GVarSummary = llvm::make_unique<GlobalVarSummary>(Flags, RefEdges.takeVector()); if (NonRenamableLocal) @@ -308,8 +347,8 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, DenseSet<GlobalValue::GUID> &CantBePromoted) { bool NonRenamableLocal = isNonRenamableLocal(A); GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal, - /* Live = */ false); - auto AS = llvm::make_unique<AliasSummary>(Flags, ArrayRef<ValueInfo>{}); + /* Live = */ false, A.isDSOLocal()); + auto AS = llvm::make_unique<AliasSummary>(Flags); auto *Aliasee = A.getBaseObject(); auto *AliaseeSummary = Index.getGlobalValueSummary(*Aliasee); assert(AliaseeSummary && "Alias expects aliasee summary to be parsed"); @@ -352,6 +391,59 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( } } + bool HasLocalInlineAsmSymbol = false; + if (!M.getModuleInlineAsm().empty()) { + // Collect the local values defined by module level asm, and set up + // summaries for these symbols so that they can be marked as NoRename, + // to prevent export of any use of them in regular IR that would require + // renaming within the module level asm. Note we don't need to create a + // summary for weak or global defs, as they don't need to be flagged as + // NoRename, and defs in module level asm can't be imported anyway. + // Also, any values used but not defined within module level asm should + // be listed on the llvm.used or llvm.compiler.used global and marked as + // referenced from there. + ModuleSymbolTable::CollectAsmSymbols( + M, [&](StringRef Name, object::BasicSymbolRef::Flags Flags) { + // Symbols not marked as Weak or Global are local definitions. + if (Flags & (object::BasicSymbolRef::SF_Weak | + object::BasicSymbolRef::SF_Global)) + return; + HasLocalInlineAsmSymbol = true; + GlobalValue *GV = M.getNamedValue(Name); + if (!GV) + return; + assert(GV->isDeclaration() && "Def in module asm already has definition"); + GlobalValueSummary::GVFlags GVFlags(GlobalValue::InternalLinkage, + /* NotEligibleToImport = */ true, + /* Live = */ true, + /* Local */ GV->isDSOLocal()); + CantBePromoted.insert(GlobalValue::getGUID(Name)); + // Create the appropriate summary type. + if (Function *F = dyn_cast<Function>(GV)) { + std::unique_ptr<FunctionSummary> Summary = + llvm::make_unique<FunctionSummary>( + GVFlags, 0, + FunctionSummary::FFlags{ + F->hasFnAttribute(Attribute::ReadNone), + F->hasFnAttribute(Attribute::ReadOnly), + F->hasFnAttribute(Attribute::NoRecurse), + F->returnDoesNotAlias()}, + ArrayRef<ValueInfo>{}, ArrayRef<FunctionSummary::EdgeTy>{}, + ArrayRef<GlobalValue::GUID>{}, + ArrayRef<FunctionSummary::VFuncId>{}, + ArrayRef<FunctionSummary::VFuncId>{}, + ArrayRef<FunctionSummary::ConstVCall>{}, + ArrayRef<FunctionSummary::ConstVCall>{}); + Index.addGlobalValueSummary(Name, std::move(Summary)); + } else { + std::unique_ptr<GlobalVarSummary> Summary = + llvm::make_unique<GlobalVarSummary>(GVFlags, + ArrayRef<ValueInfo>{}); + Index.addGlobalValueSummary(Name, std::move(Summary)); + } + }); + } + // Compute summaries for all functions defined in module, and save in the // index. for (auto &F : M) { @@ -369,7 +461,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( BFI = BFIPtr.get(); } - computeFunctionSummary(Index, M, F, BFI, PSI, !LocalsUsed.empty(), + computeFunctionSummary(Index, M, F, BFI, PSI, + !LocalsUsed.empty() || HasLocalInlineAsmSymbol, CantBePromoted); } @@ -401,52 +494,6 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( setLiveRoot(Index, "llvm.global_dtors"); setLiveRoot(Index, "llvm.global.annotations"); - if (!M.getModuleInlineAsm().empty()) { - // Collect the local values defined by module level asm, and set up - // summaries for these symbols so that they can be marked as NoRename, - // to prevent export of any use of them in regular IR that would require - // renaming within the module level asm. Note we don't need to create a - // summary for weak or global defs, as they don't need to be flagged as - // NoRename, and defs in module level asm can't be imported anyway. - // Also, any values used but not defined within module level asm should - // be listed on the llvm.used or llvm.compiler.used global and marked as - // referenced from there. - ModuleSymbolTable::CollectAsmSymbols( - M, [&M, &Index, &CantBePromoted](StringRef Name, - object::BasicSymbolRef::Flags Flags) { - // Symbols not marked as Weak or Global are local definitions. - if (Flags & (object::BasicSymbolRef::SF_Weak | - object::BasicSymbolRef::SF_Global)) - return; - GlobalValue *GV = M.getNamedValue(Name); - if (!GV) - return; - assert(GV->isDeclaration() && "Def in module asm already has definition"); - GlobalValueSummary::GVFlags GVFlags(GlobalValue::InternalLinkage, - /* NotEligibleToImport = */ true, - /* Live = */ true); - CantBePromoted.insert(GlobalValue::getGUID(Name)); - // Create the appropriate summary type. - if (isa<Function>(GV)) { - std::unique_ptr<FunctionSummary> Summary = - llvm::make_unique<FunctionSummary>( - GVFlags, 0, ArrayRef<ValueInfo>{}, - ArrayRef<FunctionSummary::EdgeTy>{}, - ArrayRef<GlobalValue::GUID>{}, - ArrayRef<FunctionSummary::VFuncId>{}, - ArrayRef<FunctionSummary::VFuncId>{}, - ArrayRef<FunctionSummary::ConstVCall>{}, - ArrayRef<FunctionSummary::ConstVCall>{}); - Index.addGlobalValueSummary(Name, std::move(Summary)); - } else { - std::unique_ptr<GlobalVarSummary> Summary = - llvm::make_unique<GlobalVarSummary>(GVFlags, - ArrayRef<ValueInfo>{}); - Index.addGlobalValueSummary(Name, std::move(Summary)); - } - }); - } - bool IsThinLTO = true; if (auto *MD = mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("ThinLTO"))) @@ -503,6 +550,7 @@ ModuleSummaryIndexAnalysis::run(Module &M, ModuleAnalysisManager &AM) { } char ModuleSummaryIndexWrapperPass::ID = 0; + INITIALIZE_PASS_BEGIN(ModuleSummaryIndexWrapperPass, "module-summary-analysis", "Module Summary Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) diff --git a/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp index ed03406ca8c6..096ea661ecb6 100644 --- a/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp @@ -123,7 +123,7 @@ ModRefInfo ObjCARCAAResult::getModRefInfo(ImmutableCallSite CS, // These functions don't access any memory visible to the compiler. // Note that this doesn't include objc_retainBlock, because it updates // pointers when it copies block data. - return MRI_NoModRef; + return ModRefInfo::NoModRef; default: break; } diff --git a/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp b/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp index e3e74aa249da..55335f3a7cb0 100644 --- a/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp +++ b/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp @@ -21,8 +21,6 @@ using namespace llvm::objcarc; /// \brief A handy option to enable/disable all ARC Optimizations. bool llvm::objcarc::EnableARCOpts; -static cl::opt<bool, true> -EnableARCOptimizations("enable-objc-arc-opts", - cl::desc("enable/disable all ARC Optimizations"), - cl::location(EnableARCOpts), - cl::init(true)); +static cl::opt<bool, true> EnableARCOptimizations( + "enable-objc-arc-opts", cl::desc("enable/disable all ARC Optimizations"), + cl::location(EnableARCOpts), cl::init(true), cl::Hidden); diff --git a/contrib/llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp b/contrib/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp index eb259fd7a384..8ece0a2a3ed3 100644 --- a/contrib/llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp +++ b/contrib/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp @@ -1,4 +1,4 @@ -//===- OptimizationDiagnosticInfo.cpp - Optimization Diagnostic -*- C++ -*-===// +//===- OptimizationRemarkEmitter.cpp - Optimization Diagnostic --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -12,11 +12,10 @@ // used to compute the "hotness" of the diagnostic message. //===----------------------------------------------------------------------===// -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/LLVMContext.h" @@ -64,86 +63,6 @@ Optional<uint64_t> OptimizationRemarkEmitter::computeHotness(const Value *V) { return BFI->getBlockProfileCount(cast<BasicBlock>(V)); } -namespace llvm { -namespace yaml { - -void MappingTraits<DiagnosticInfoOptimizationBase *>::mapping( - IO &io, DiagnosticInfoOptimizationBase *&OptDiag) { - assert(io.outputting() && "input not yet implemented"); - - if (io.mapTag("!Passed", - (OptDiag->getKind() == DK_OptimizationRemark || - OptDiag->getKind() == DK_MachineOptimizationRemark))) - ; - else if (io.mapTag( - "!Missed", - (OptDiag->getKind() == DK_OptimizationRemarkMissed || - OptDiag->getKind() == DK_MachineOptimizationRemarkMissed))) - ; - else if (io.mapTag( - "!Analysis", - (OptDiag->getKind() == DK_OptimizationRemarkAnalysis || - OptDiag->getKind() == DK_MachineOptimizationRemarkAnalysis))) - ; - else if (io.mapTag("!AnalysisFPCommute", - OptDiag->getKind() == - DK_OptimizationRemarkAnalysisFPCommute)) - ; - else if (io.mapTag("!AnalysisAliasing", - OptDiag->getKind() == - DK_OptimizationRemarkAnalysisAliasing)) - ; - else if (io.mapTag("!Failure", OptDiag->getKind() == DK_OptimizationFailure)) - ; - else - llvm_unreachable("Unknown remark type"); - - // These are read-only for now. - DiagnosticLocation DL = OptDiag->getLocation(); - StringRef FN = - GlobalValue::dropLLVMManglingEscape(OptDiag->getFunction().getName()); - - StringRef PassName(OptDiag->PassName); - io.mapRequired("Pass", PassName); - io.mapRequired("Name", OptDiag->RemarkName); - if (!io.outputting() || DL.isValid()) - io.mapOptional("DebugLoc", DL); - io.mapRequired("Function", FN); - io.mapOptional("Hotness", OptDiag->Hotness); - io.mapOptional("Args", OptDiag->Args); -} - -template <> struct MappingTraits<DiagnosticLocation> { - static void mapping(IO &io, DiagnosticLocation &DL) { - assert(io.outputting() && "input not yet implemented"); - - StringRef File = DL.getFilename(); - unsigned Line = DL.getLine(); - unsigned Col = DL.getColumn(); - - io.mapRequired("File", File); - io.mapRequired("Line", Line); - io.mapRequired("Column", Col); - } - - static const bool flow = true; -}; - -// Implement this as a mapping for now to get proper quotation for the value. -template <> struct MappingTraits<DiagnosticInfoOptimizationBase::Argument> { - static void mapping(IO &io, DiagnosticInfoOptimizationBase::Argument &A) { - assert(io.outputting() && "input not yet implemented"); - io.mapRequired(A.Key.data(), A.Val); - if (A.Loc.isValid()) - io.mapOptional("DebugLoc", A.Loc); - } -}; - -} // end namespace yaml -} // end namespace llvm - -LLVM_YAML_IS_SEQUENCE_VECTOR(DiagnosticInfoOptimizationBase::Argument) - void OptimizationRemarkEmitter::computeHotness( DiagnosticInfoIROptimization &OptDiag) { const Value *V = OptDiag.getCodeRegion(); @@ -155,23 +74,14 @@ void OptimizationRemarkEmitter::emit( DiagnosticInfoOptimizationBase &OptDiagBase) { auto &OptDiag = cast<DiagnosticInfoIROptimization>(OptDiagBase); computeHotness(OptDiag); - // If a diagnostic has a hotness value, then only emit it if its hotness - // meets the threshold. - if (OptDiag.getHotness() && - *OptDiag.getHotness() < - F->getContext().getDiagnosticsHotnessThreshold()) { + + // Only emit it if its hotness meets the threshold. + if (OptDiag.getHotness().getValueOr(0) < + F->getContext().getDiagnosticsHotnessThreshold()) { return; } - yaml::Output *Out = F->getContext().getDiagnosticsOutputFile(); - if (Out) { - auto *P = const_cast<DiagnosticInfoOptimizationBase *>(&OptDiagBase); - *Out << P; - } - // FIXME: now that IsVerbose is part of DI, filtering for this will be moved - // from here to clang. - if (!OptDiag.isVerbose() || shouldEmitVerbose()) - F->getContext().diagnose(OptDiag); + F->getContext().diagnose(OptDiag); } OptimizationRemarkEmitterWrapperPass::OptimizationRemarkEmitterWrapperPass() diff --git a/contrib/llvm/lib/Analysis/PostDominators.cpp b/contrib/llvm/lib/Analysis/PostDominators.cpp index 1caf151546d9..2282401085d4 100644 --- a/contrib/llvm/lib/Analysis/PostDominators.cpp +++ b/contrib/llvm/lib/Analysis/PostDominators.cpp @@ -12,13 +12,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/PostDominators.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SetOperations.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Instructions.h" +#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/GenericDomTreeConstruction.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" + using namespace llvm; #define DEBUG_TYPE "postdomtree" @@ -28,6 +26,7 @@ using namespace llvm; //===----------------------------------------------------------------------===// char PostDominatorTreeWrapperPass::ID = 0; + INITIALIZE_PASS(PostDominatorTreeWrapperPass, "postdomtree", "Post-Dominator Tree Construction", true, true) diff --git a/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp index 12b86daa602b..671744f93fb8 100644 --- a/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp +++ b/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp @@ -30,7 +30,7 @@ using namespace llvm; // considered cold). static cl::opt<int> ProfileSummaryCutoffHot( - "profile-summary-cutoff-hot", cl::Hidden, cl::init(999000), cl::ZeroOrMore, + "profile-summary-cutoff-hot", cl::Hidden, cl::init(990000), cl::ZeroOrMore, cl::desc("A count is hot if it exceeds the minimum count to" " reach this percentile of total counts.")); @@ -39,9 +39,21 @@ static cl::opt<int> ProfileSummaryCutoffCold( cl::desc("A count is cold if it is below the minimum count" " to reach this percentile of total counts.")); -// Find the minimum count to reach a desired percentile of counts. -static uint64_t getMinCountForPercentile(SummaryEntryVector &DS, - uint64_t Percentile) { +static cl::opt<bool> ProfileSampleAccurate( + "profile-sample-accurate", cl::Hidden, cl::init(false), + cl::desc("If the sample profile is accurate, we will mark all un-sampled " + "callsite as cold. Otherwise, treat un-sampled callsites as if " + "we have no profile.")); +static cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold( + "profile-summary-huge-working-set-size-threshold", cl::Hidden, + cl::init(15000), cl::ZeroOrMore, + cl::desc("The code working set size is considered huge if the number of" + " blocks required to reach the -profile-summary-cutoff-hot" + " percentile exceeds this count.")); + +// Find the summary entry for a desired percentile of counts. +static const ProfileSummaryEntry &getEntryForPercentile(SummaryEntryVector &DS, + uint64_t Percentile) { auto Compare = [](const ProfileSummaryEntry &Entry, uint64_t Percentile) { return Entry.Cutoff < Percentile; }; @@ -50,7 +62,7 @@ static uint64_t getMinCountForPercentile(SummaryEntryVector &DS, // detailed summary. if (It == DS.end()) report_fatal_error("Desired percentile exceeds the maximum cutoff"); - return It->MinCount; + return *It; } // The profile summary metadata may be attached either by the frontend or by @@ -78,10 +90,12 @@ ProfileSummaryInfo::getProfileCount(const Instruction *Inst, if (hasSampleProfile()) { // In sample PGO mode, check if there is a profile metadata on the // instruction. If it is present, determine hotness solely based on that, - // since the sampled entry count may not be accurate. + // since the sampled entry count may not be accurate. If there is no + // annotated on the instruction, return None. uint64_t TotalCount; if (Inst->extractProfTotalWeight(TotalCount)) return TotalCount; + return None; } if (BFI) return BFI->getBlockProfileCount(Inst->getParent()); @@ -161,10 +175,20 @@ void ProfileSummaryInfo::computeThresholds() { if (!computeSummary()) return; auto &DetailedSummary = Summary->getDetailedSummary(); - HotCountThreshold = - getMinCountForPercentile(DetailedSummary, ProfileSummaryCutoffHot); - ColdCountThreshold = - getMinCountForPercentile(DetailedSummary, ProfileSummaryCutoffCold); + auto &HotEntry = + getEntryForPercentile(DetailedSummary, ProfileSummaryCutoffHot); + HotCountThreshold = HotEntry.MinCount; + auto &ColdEntry = + getEntryForPercentile(DetailedSummary, ProfileSummaryCutoffCold); + ColdCountThreshold = ColdEntry.MinCount; + HasHugeWorkingSetSize = + HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold; +} + +bool ProfileSummaryInfo::hasHugeWorkingSetSize() { + if (!HasHugeWorkingSetSize) + computeThresholds(); + return HasHugeWorkingSetSize && HasHugeWorkingSetSize.getValue(); } bool ProfileSummaryInfo::isHotCount(uint64_t C) { @@ -199,7 +223,16 @@ bool ProfileSummaryInfo::isHotCallSite(const CallSite &CS, bool ProfileSummaryInfo::isColdCallSite(const CallSite &CS, BlockFrequencyInfo *BFI) { auto C = getProfileCount(CS.getInstruction(), BFI); - return C && isColdCount(*C); + if (C) + return isColdCount(*C); + + // In SamplePGO, if the caller has been sampled, and there is no profile + // annotatedon the callsite, we consider the callsite as cold. + // If there is no profile for the caller, and we know the profile is + // accurate, we consider the callsite as cold. + return (hasSampleProfile() && + (CS.getCaller()->getEntryCount() || ProfileSampleAccurate || + CS.getCaller()->hasFnAttribute("profile-sample-accurate"))); } INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info", diff --git a/contrib/llvm/lib/Analysis/PtrUseVisitor.cpp b/contrib/llvm/lib/Analysis/PtrUseVisitor.cpp index 68c7535ea594..1fdaf4d55b59 100644 --- a/contrib/llvm/lib/Analysis/PtrUseVisitor.cpp +++ b/contrib/llvm/lib/Analysis/PtrUseVisitor.cpp @@ -6,12 +6,16 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// /// \file /// Implementation of the pointer use visitors. -/// +// //===----------------------------------------------------------------------===// #include "llvm/Analysis/PtrUseVisitor.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include <algorithm> using namespace llvm; diff --git a/contrib/llvm/lib/Analysis/RegionPass.cpp b/contrib/llvm/lib/Analysis/RegionPass.cpp index b38e6225c840..c5d71b25e022 100644 --- a/contrib/llvm/lib/Analysis/RegionPass.cpp +++ b/contrib/llvm/lib/Analysis/RegionPass.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// #include "llvm/Analysis/RegionPass.h" -#include "llvm/Analysis/RegionIterator.h" #include "llvm/IR/OptBisect.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" @@ -208,7 +207,7 @@ public: return false; } - StringRef getPassName() const override { return "Print Region IR"; }
+ StringRef getPassName() const override { return "Print Region IR"; } }; char PrintRegionPass::ID = 0; diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp index 9539fd7c7559..0b8604187121 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp @@ -59,12 +59,23 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/Sequence.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -72,28 +83,55 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> +#include <cassert> +#include <climits> +#include <cstddef> +#include <cstdint> +#include <cstdlib> +#include <map> +#include <memory> +#include <tuple> +#include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "scalar-evolution" @@ -115,11 +153,11 @@ MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, cl::init(100)); // FIXME: Enable this with EXPENSIVE_CHECKS when the test suite is clean. +static cl::opt<bool> VerifySCEV( + "verify-scev", cl::Hidden, + cl::desc("Verify ScalarEvolution's backedge taken counts (slow)")); static cl::opt<bool> -VerifySCEV("verify-scev", - cl::desc("Verify ScalarEvolution's backedge taken counts (slow)")); -static cl::opt<bool> - VerifySCEVMap("verify-scev-maps", + VerifySCEVMap("verify-scev-maps", cl::Hidden, cl::desc("Verify no dangling value in ScalarEvolution's " "ExprValueMap (slow)")); @@ -415,9 +453,6 @@ void SCEVUnknown::deleted() { } void SCEVUnknown::allUsesReplacedWith(Value *New) { - // Clear this SCEVUnknown from various maps. - SE->forgetMemoizedResults(this); - // Remove this SCEVUnknown from the uniquing map. SE->UniqueSCEVs.RemoveNode(this); @@ -514,10 +549,10 @@ bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const { /// Since we do not continue running this routine on expression trees once we /// have seen unequal values, there is no need to track them in the cache. static int -CompareValueComplexity(SmallSet<std::pair<Value *, Value *>, 8> &EqCache, +CompareValueComplexity(EquivalenceClasses<const Value *> &EqCacheValue, const LoopInfo *const LI, Value *LV, Value *RV, unsigned Depth) { - if (Depth > MaxValueCompareDepth || EqCache.count({LV, RV})) + if (Depth > MaxValueCompareDepth || EqCacheValue.isEquivalent(LV, RV)) return 0; // Order pointer values after integer values. This helps SCEVExpander form @@ -577,14 +612,14 @@ CompareValueComplexity(SmallSet<std::pair<Value *, Value *>, 8> &EqCache, for (unsigned Idx : seq(0u, LNumOps)) { int Result = - CompareValueComplexity(EqCache, LI, LInst->getOperand(Idx), + CompareValueComplexity(EqCacheValue, LI, LInst->getOperand(Idx), RInst->getOperand(Idx), Depth + 1); if (Result != 0) return Result; } } - EqCache.insert({LV, RV}); + EqCacheValue.unionSets(LV, RV); return 0; } @@ -592,7 +627,8 @@ CompareValueComplexity(SmallSet<std::pair<Value *, Value *>, 8> &EqCache, // than RHS, respectively. A three-way result allows recursive comparisons to be // more efficient. static int CompareSCEVComplexity( - SmallSet<std::pair<const SCEV *, const SCEV *>, 8> &EqCacheSCEV, + EquivalenceClasses<const SCEV *> &EqCacheSCEV, + EquivalenceClasses<const Value *> &EqCacheValue, const LoopInfo *const LI, const SCEV *LHS, const SCEV *RHS, DominatorTree &DT, unsigned Depth = 0) { // Fast-path: SCEVs are uniqued so we can do a quick equality check. @@ -604,7 +640,7 @@ static int CompareSCEVComplexity( if (LType != RType) return (int)LType - (int)RType; - if (Depth > MaxSCEVCompareDepth || EqCacheSCEV.count({LHS, RHS})) + if (Depth > MaxSCEVCompareDepth || EqCacheSCEV.isEquivalent(LHS, RHS)) return 0; // Aside from the getSCEVType() ordering, the particular ordering // isn't very important except that it's beneficial to be consistent, @@ -614,11 +650,10 @@ static int CompareSCEVComplexity( const SCEVUnknown *LU = cast<SCEVUnknown>(LHS); const SCEVUnknown *RU = cast<SCEVUnknown>(RHS); - SmallSet<std::pair<Value *, Value *>, 8> EqCache; - int X = CompareValueComplexity(EqCache, LI, LU->getValue(), RU->getValue(), - Depth + 1); + int X = CompareValueComplexity(EqCacheValue, LI, LU->getValue(), + RU->getValue(), Depth + 1); if (X == 0) - EqCacheSCEV.insert({LHS, RHS}); + EqCacheSCEV.unionSets(LHS, RHS); return X; } @@ -659,14 +694,19 @@ static int CompareSCEVComplexity( if (LNumOps != RNumOps) return (int)LNumOps - (int)RNumOps; + // Compare NoWrap flags. + if (LA->getNoWrapFlags() != RA->getNoWrapFlags()) + return (int)LA->getNoWrapFlags() - (int)RA->getNoWrapFlags(); + // Lexicographically compare. for (unsigned i = 0; i != LNumOps; ++i) { - int X = CompareSCEVComplexity(EqCacheSCEV, LI, LA->getOperand(i), - RA->getOperand(i), DT, Depth + 1); + int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, + LA->getOperand(i), RA->getOperand(i), DT, + Depth + 1); if (X != 0) return X; } - EqCacheSCEV.insert({LHS, RHS}); + EqCacheSCEV.unionSets(LHS, RHS); return 0; } @@ -682,15 +722,18 @@ static int CompareSCEVComplexity( if (LNumOps != RNumOps) return (int)LNumOps - (int)RNumOps; + // Compare NoWrap flags. + if (LC->getNoWrapFlags() != RC->getNoWrapFlags()) + return (int)LC->getNoWrapFlags() - (int)RC->getNoWrapFlags(); + for (unsigned i = 0; i != LNumOps; ++i) { - if (i >= RNumOps) - return 1; - int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(i), - RC->getOperand(i), DT, Depth + 1); + int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, + LC->getOperand(i), RC->getOperand(i), DT, + Depth + 1); if (X != 0) return X; } - EqCacheSCEV.insert({LHS, RHS}); + EqCacheSCEV.unionSets(LHS, RHS); return 0; } @@ -699,14 +742,14 @@ static int CompareSCEVComplexity( const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS); // Lexicographically compare udiv expressions. - int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getLHS(), RC->getLHS(), - DT, Depth + 1); + int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getLHS(), + RC->getLHS(), DT, Depth + 1); if (X != 0) return X; - X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getRHS(), RC->getRHS(), DT, - Depth + 1); + X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getRHS(), + RC->getRHS(), DT, Depth + 1); if (X == 0) - EqCacheSCEV.insert({LHS, RHS}); + EqCacheSCEV.unionSets(LHS, RHS); return X; } @@ -717,10 +760,11 @@ static int CompareSCEVComplexity( const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS); // Compare cast expressions by operand. - int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(), - RC->getOperand(), DT, Depth + 1); + int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, + LC->getOperand(), RC->getOperand(), DT, + Depth + 1); if (X == 0) - EqCacheSCEV.insert({LHS, RHS}); + EqCacheSCEV.unionSets(LHS, RHS); return X; } @@ -739,26 +783,26 @@ static int CompareSCEVComplexity( /// results from this routine. In other words, we don't want the results of /// this to depend on where the addresses of various SCEV objects happened to /// land in memory. -/// static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops, LoopInfo *LI, DominatorTree &DT) { if (Ops.size() < 2) return; // Noop - SmallSet<std::pair<const SCEV *, const SCEV *>, 8> EqCache; + EquivalenceClasses<const SCEV *> EqCacheSCEV; + EquivalenceClasses<const Value *> EqCacheValue; if (Ops.size() == 2) { // This is the common case, which also happens to be trivially simple. // Special case it. const SCEV *&LHS = Ops[0], *&RHS = Ops[1]; - if (CompareSCEVComplexity(EqCache, LI, RHS, LHS, DT) < 0) + if (CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, RHS, LHS, DT) < 0) std::swap(LHS, RHS); return; } // Do the rough sort by complexity. std::stable_sort(Ops.begin(), Ops.end(), - [&EqCache, LI, &DT](const SCEV *LHS, const SCEV *RHS) { - return - CompareSCEVComplexity(EqCache, LI, LHS, RHS, DT) < 0; + [&](const SCEV *LHS, const SCEV *RHS) { + return CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, + LHS, RHS, DT) < 0; }); // Now that we are sorted by complexity, group elements of the same @@ -785,14 +829,16 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops, // Returns the size of the SCEV S. static inline int sizeOfSCEV(const SCEV *S) { struct FindSCEVSize { - int Size; - FindSCEVSize() : Size(0) {} + int Size = 0; + + FindSCEVSize() = default; bool follow(const SCEV *S) { ++Size; // Keep looking at all operands of S. return true; } + bool isDone() const { return false; } @@ -1032,7 +1078,7 @@ private: const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One; }; -} +} // end anonymous namespace //===----------------------------------------------------------------------===// // Simple SCEV method implementations @@ -1157,7 +1203,6 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, /// A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3) /// /// where BC(It, k) stands for binomial coefficient. -/// const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const { const SCEV *Result = getStart(); @@ -1256,6 +1301,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); return S; } @@ -1343,7 +1389,8 @@ struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase { const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits< SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr; -} + +} // end anonymous namespace // The recurrence AR has been shown to have no signed/unsigned wrap or something // close to it. Typically, if we can prove NSW/NUW for AR, then we can just as @@ -1473,7 +1520,6 @@ static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty, // // In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T // is `Delta` (defined below). -// template <typename ExtendOpTy> bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start, const SCEV *Step, @@ -1484,7 +1530,6 @@ bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start, // time here. It is correct (but more expensive) to continue with a // non-constant `Start` and do a general SCEV subtraction to compute // `PreStart` below. - // const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start); if (!StartC) return false; @@ -1547,6 +1592,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); return S; } @@ -1733,6 +1779,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); return S; } @@ -1770,6 +1817,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); return S; } @@ -1981,12 +2029,12 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); return S; } /// getAnyExtendExpr - Return a SCEV for the given operand extended with /// unspecified bits out to the given type. -/// const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && @@ -2057,7 +2105,6 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, /// may be exposed. This helps getAddRecExpr short-circuit extra work in /// the common case where no interesting opportunities are present, and /// is also used as a check to avoid infinite recursion. -/// static bool CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M, SmallVectorImpl<const SCEV *> &NewOps, @@ -2132,7 +2179,8 @@ StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type, const SmallVectorImpl<const SCEV *> &Ops, SCEV::NoWrapFlags Flags) { using namespace std::placeholders; - typedef OverflowingBinaryOperator OBO; + + using OBO = OverflowingBinaryOperator; bool CanAnalyze = Type == scAddExpr || Type == scAddRecExpr || Type == scMulExpr; @@ -2306,12 +2354,23 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, // Check for truncates. If all the operands are truncated from the same // type, see if factoring out the truncate would permit the result to be - // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n) + // folded. eg., n*trunc(x) + m*trunc(y) --> trunc(trunc(m)*x + trunc(n)*y) // if the contents of the resulting outer trunc fold to something simple. - for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) { - const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]); - Type *DstType = Trunc->getType(); - Type *SrcType = Trunc->getOperand()->getType(); + auto FindTruncSrcType = [&]() -> Type * { + // We're ultimately looking to fold an addrec of truncs and muls of only + // constants and truncs, so if we find any other types of SCEV + // as operands of the addrec then we bail and return nullptr here. + // Otherwise, we return the type of the operand of a trunc that we find. + if (auto *T = dyn_cast<SCEVTruncateExpr>(Ops[Idx])) + return T->getOperand()->getType(); + if (const auto *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) { + const auto *LastOp = Mul->getOperand(Mul->getNumOperands() - 1); + if (const auto *T = dyn_cast<SCEVTruncateExpr>(LastOp)) + return T->getOperand()->getType(); + } + return nullptr; + }; + if (auto *SrcType = FindTruncSrcType()) { SmallVector<const SCEV *, 8> LargeOps; bool Ok = true; // Check all the operands to see if they can be represented in the @@ -2354,7 +2413,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, const SCEV *Fold = getAddExpr(LargeOps, Flags, Depth + 1); // If it folds to something simple, use it. Otherwise, don't. if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold)) - return getTruncateExpr(Fold, DstType); + return getTruncateExpr(Fold, Ty); } } @@ -2608,8 +2667,8 @@ ScalarEvolution::getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops, SCEV::NoWrapFlags Flags) { FoldingSetNodeID ID; ID.AddInteger(scAddExpr); - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - ID.AddPointer(Ops[i]); + for (const SCEV *Op : Ops) + ID.AddPointer(Op); void *IP = nullptr; SCEVAddExpr *S = static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); @@ -2619,6 +2678,7 @@ ScalarEvolution::getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops, S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); } S->setNoWrapFlags(Flags); return S; @@ -2640,6 +2700,7 @@ ScalarEvolution::getOrCreateMulExpr(SmallVectorImpl<const SCEV *> &Ops, S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); } S->setNoWrapFlags(Flags); return S; @@ -2679,20 +2740,24 @@ static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) { /// Determine if any of the operands in this SCEV are a constant or if /// any of the add or multiply expressions in this SCEV contain a constant. -static bool containsConstantSomewhere(const SCEV *StartExpr) { - SmallVector<const SCEV *, 4> Ops; - Ops.push_back(StartExpr); - while (!Ops.empty()) { - const SCEV *CurrentExpr = Ops.pop_back_val(); - if (isa<SCEVConstant>(*CurrentExpr)) - return true; +static bool containsConstantInAddMulChain(const SCEV *StartExpr) { + struct FindConstantInAddMulChain { + bool FoundConstant = false; - if (isa<SCEVAddExpr>(*CurrentExpr) || isa<SCEVMulExpr>(*CurrentExpr)) { - const auto *CurrentNAry = cast<SCEVNAryExpr>(CurrentExpr); - Ops.append(CurrentNAry->op_begin(), CurrentNAry->op_end()); + bool follow(const SCEV *S) { + FoundConstant |= isa<SCEVConstant>(S); + return isa<SCEVAddExpr>(S) || isa<SCEVMulExpr>(S); } - } - return false; + + bool isDone() const { + return FoundConstant; + } + }; + + FindConstantInAddMulChain F; + SCEVTraversal<FindConstantInAddMulChain> ST(F); + ST.visitAll(StartExpr); + return F.FoundConstant; } /// Get a canonical multiply expression, or something simpler if possible. @@ -2729,7 +2794,11 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, // If any of Add's ops are Adds or Muls with a constant, // apply this transformation as well. if (Add->getNumOperands() == 2) - if (containsConstantSomewhere(Add)) + // TODO: There are some cases where this transformation is not + // profitable, for example: + // Add = (C0 + X) * Y + Z. + // Maybe the scope of this transformation should be narrowed down. + if (containsConstantInAddMulChain(Add)) return getAddExpr(getMulExpr(LHSC, Add->getOperand(0), SCEV::FlagAnyWrap, Depth + 1), getMulExpr(LHSC, Add->getOperand(1), @@ -2941,6 +3010,34 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, return getOrCreateMulExpr(Ops, Flags); } +/// Represents an unsigned remainder expression based on unsigned division. +const SCEV *ScalarEvolution::getURemExpr(const SCEV *LHS, + const SCEV *RHS) { + assert(getEffectiveSCEVType(LHS->getType()) == + getEffectiveSCEVType(RHS->getType()) && + "SCEVURemExpr operand types don't match!"); + + // Short-circuit easy cases + if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) { + // If constant is one, the result is trivial + if (RHSC->getValue()->isOne()) + return getZero(LHS->getType()); // X urem 1 --> 0 + + // If constant is a power of two, fold into a zext(trunc(LHS)). + if (RHSC->getAPInt().isPowerOf2()) { + Type *FullTy = LHS->getType(); + Type *TruncTy = + IntegerType::get(getContext(), RHSC->getAPInt().logBase2()); + return getZeroExtendExpr(getTruncateExpr(LHS, TruncTy), FullTy); + } + } + + // Fallback to %a == %x urem %y == %x -<nuw> ((%x udiv %y) *<nuw> %y) + const SCEV *UDiv = getUDivExpr(LHS, RHS); + const SCEV *Mult = getMulExpr(UDiv, RHS, SCEV::FlagNUW); + return getMinusSCEV(LHS, Mult, SCEV::FlagNUW); +} + /// Get a canonical unsigned division expression, or something simpler if /// possible. const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, @@ -3056,6 +3153,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator), LHS, RHS); UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); return S; } @@ -3236,6 +3334,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator), O, Operands.size(), L); UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); } S->setNoWrapFlags(Flags); return S; @@ -3391,6 +3490,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); return S; } @@ -3492,6 +3592,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); return S; } @@ -3714,7 +3815,6 @@ const SCEV *ScalarEvolution::getExistingSCEV(Value *V) { } /// Return a SCEV corresponding to -V = -1*V -/// const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags) { if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V)) @@ -3957,6 +4057,7 @@ void ScalarEvolution::forgetSymbolicName(Instruction *PN, const SCEV *SymName) { } namespace { + class SCEVInitRewriter : public SCEVRewriteVisitor<SCEVInitRewriter> { public: static const SCEV *rewrite(const SCEV *S, const Loop *L, @@ -3966,9 +4067,6 @@ public: return Rewriter.isValid() ? Result : SE.getCouldNotCompute(); } - SCEVInitRewriter(const Loop *L, ScalarEvolution &SE) - : SCEVRewriteVisitor(SE), L(L), Valid(true) {} - const SCEV *visitUnknown(const SCEVUnknown *Expr) { if (!SE.isLoopInvariant(Expr, L)) Valid = false; @@ -3986,10 +4084,93 @@ public: bool isValid() { return Valid; } private: + explicit SCEVInitRewriter(const Loop *L, ScalarEvolution &SE) + : SCEVRewriteVisitor(SE), L(L) {} + + const Loop *L; + bool Valid = true; +}; + +/// This class evaluates the compare condition by matching it against the +/// condition of loop latch. If there is a match we assume a true value +/// for the condition while building SCEV nodes. +class SCEVBackedgeConditionFolder + : public SCEVRewriteVisitor<SCEVBackedgeConditionFolder> { +public: + static const SCEV *rewrite(const SCEV *S, const Loop *L, + ScalarEvolution &SE) { + bool IsPosBECond = false; + Value *BECond = nullptr; + if (BasicBlock *Latch = L->getLoopLatch()) { + BranchInst *BI = dyn_cast<BranchInst>(Latch->getTerminator()); + if (BI && BI->isConditional()) { + assert(BI->getSuccessor(0) != BI->getSuccessor(1) && + "Both outgoing branches should not target same header!"); + BECond = BI->getCondition(); + IsPosBECond = BI->getSuccessor(0) == L->getHeader(); + } else { + return S; + } + } + SCEVBackedgeConditionFolder Rewriter(L, BECond, IsPosBECond, SE); + return Rewriter.visit(S); + } + + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + const SCEV *Result = Expr; + bool InvariantF = SE.isLoopInvariant(Expr, L); + + if (!InvariantF) { + Instruction *I = cast<Instruction>(Expr->getValue()); + switch (I->getOpcode()) { + case Instruction::Select: { + SelectInst *SI = cast<SelectInst>(I); + Optional<const SCEV *> Res = + compareWithBackedgeCondition(SI->getCondition()); + if (Res.hasValue()) { + bool IsOne = cast<SCEVConstant>(Res.getValue())->getValue()->isOne(); + Result = SE.getSCEV(IsOne ? SI->getTrueValue() : SI->getFalseValue()); + } + break; + } + default: { + Optional<const SCEV *> Res = compareWithBackedgeCondition(I); + if (Res.hasValue()) + Result = Res.getValue(); + break; + } + } + } + return Result; + } + +private: + explicit SCEVBackedgeConditionFolder(const Loop *L, Value *BECond, + bool IsPosBECond, ScalarEvolution &SE) + : SCEVRewriteVisitor(SE), L(L), BackedgeCond(BECond), + IsPositiveBECond(IsPosBECond) {} + + Optional<const SCEV *> compareWithBackedgeCondition(Value *IC); + const Loop *L; - bool Valid; + /// Loop back condition. + Value *BackedgeCond = nullptr; + /// Set to true if loop back is on positive branch condition. + bool IsPositiveBECond; }; +Optional<const SCEV *> +SCEVBackedgeConditionFolder::compareWithBackedgeCondition(Value *IC) { + + // If value matches the backedge condition for loop latch, + // then return a constant evolution node based on loopback + // branch taken. + if (BackedgeCond == IC) + return IsPositiveBECond ? SE.getOne(Type::getInt1Ty(SE.getContext())) + : SE.getZero(Type::getInt1Ty(SE.getContext())); + return None; +} + class SCEVShiftRewriter : public SCEVRewriteVisitor<SCEVShiftRewriter> { public: static const SCEV *rewrite(const SCEV *S, const Loop *L, @@ -3999,9 +4180,6 @@ public: return Rewriter.isValid() ? Result : SE.getCouldNotCompute(); } - SCEVShiftRewriter(const Loop *L, ScalarEvolution &SE) - : SCEVRewriteVisitor(SE), L(L), Valid(true) {} - const SCEV *visitUnknown(const SCEVUnknown *Expr) { // Only allow AddRecExprs for this loop. if (!SE.isLoopInvariant(Expr, L)) @@ -4015,12 +4193,17 @@ public: Valid = false; return Expr; } + bool isValid() { return Valid; } private: + explicit SCEVShiftRewriter(const Loop *L, ScalarEvolution &SE) + : SCEVRewriteVisitor(SE), L(L) {} + const Loop *L; - bool Valid; + bool Valid = true; }; + } // end anonymous namespace SCEV::NoWrapFlags @@ -4028,7 +4211,8 @@ ScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) { if (!AR->isAffine()) return SCEV::FlagAnyWrap; - typedef OverflowingBinaryOperator OBO; + using OBO = OverflowingBinaryOperator; + SCEV::NoWrapFlags Result = SCEV::FlagAnyWrap; if (!AR->hasNoSignedWrap()) { @@ -4055,6 +4239,7 @@ ScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) { } namespace { + /// Represents an abstract binary operation. This may exist as a /// normal instruction or constant expression, or may have been /// derived from an expression tree. @@ -4062,16 +4247,16 @@ struct BinaryOp { unsigned Opcode; Value *LHS; Value *RHS; - bool IsNSW; - bool IsNUW; + bool IsNSW = false; + bool IsNUW = false; /// Op is set if this BinaryOp corresponds to a concrete LLVM instruction or /// constant expression. - Operator *Op; + Operator *Op = nullptr; explicit BinaryOp(Operator *Op) : Opcode(Op->getOpcode()), LHS(Op->getOperand(0)), RHS(Op->getOperand(1)), - IsNSW(false), IsNUW(false), Op(Op) { + Op(Op) { if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Op)) { IsNSW = OBO->hasNoSignedWrap(); IsNUW = OBO->hasNoUnsignedWrap(); @@ -4080,11 +4265,10 @@ struct BinaryOp { explicit BinaryOp(unsigned Opcode, Value *LHS, Value *RHS, bool IsNSW = false, bool IsNUW = false) - : Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW), - Op(nullptr) {} + : Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW) {} }; -} +} // end anonymous namespace /// Try to map \p V into a BinaryOp, and return \c None on failure. static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) { @@ -4101,6 +4285,7 @@ static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) { case Instruction::Sub: case Instruction::Mul: case Instruction::UDiv: + case Instruction::URem: case Instruction::And: case Instruction::Or: case Instruction::AShr: @@ -4145,7 +4330,7 @@ static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) { if (auto *F = CI->getCalledFunction()) switch (F->getIntrinsicID()) { case Intrinsic::sadd_with_overflow: - case Intrinsic::uadd_with_overflow: { + case Intrinsic::uadd_with_overflow: if (!isOverflowIntrinsicNoWrap(cast<IntrinsicInst>(CI), DT)) return BinaryOp(Instruction::Add, CI->getArgOperand(0), CI->getArgOperand(1)); @@ -4161,13 +4346,21 @@ static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) { return BinaryOp(Instruction::Add, CI->getArgOperand(0), CI->getArgOperand(1), /* IsNSW = */ false, /* IsNUW*/ true); - } - case Intrinsic::ssub_with_overflow: case Intrinsic::usub_with_overflow: - return BinaryOp(Instruction::Sub, CI->getArgOperand(0), - CI->getArgOperand(1)); + if (!isOverflowIntrinsicNoWrap(cast<IntrinsicInst>(CI), DT)) + return BinaryOp(Instruction::Sub, CI->getArgOperand(0), + CI->getArgOperand(1)); + // The same reasoning as sadd/uadd above. + if (F->getIntrinsicID() == Intrinsic::ssub_with_overflow) + return BinaryOp(Instruction::Sub, CI->getArgOperand(0), + CI->getArgOperand(1), /* IsNSW = */ true, + /* IsNUW = */ false); + else + return BinaryOp(Instruction::Sub, CI->getArgOperand(0), + CI->getArgOperand(1), /* IsNSW = */ false, + /* IsNUW = */ true); case Intrinsic::smul_with_overflow: case Intrinsic::umul_with_overflow: return BinaryOp(Instruction::Mul, CI->getArgOperand(0), @@ -4184,28 +4377,27 @@ static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) { return None; } -/// Helper function to createAddRecFromPHIWithCasts. We have a phi +/// Helper function to createAddRecFromPHIWithCasts. We have a phi /// node whose symbolic (unknown) SCEV is \p SymbolicPHI, which is updated via -/// the loop backedge by a SCEVAddExpr, possibly also with a few casts on the -/// way. This function checks if \p Op, an operand of this SCEVAddExpr, +/// the loop backedge by a SCEVAddExpr, possibly also with a few casts on the +/// way. This function checks if \p Op, an operand of this SCEVAddExpr, /// follows one of the following patterns: /// Op == (SExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) /// Op == (ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) /// If the SCEV expression of \p Op conforms with one of the expected patterns /// we return the type of the truncation operation, and indicate whether the -/// truncated type should be treated as signed/unsigned by setting +/// truncated type should be treated as signed/unsigned by setting /// \p Signed to true/false, respectively. static Type *isSimpleCastedPHI(const SCEV *Op, const SCEVUnknown *SymbolicPHI, bool &Signed, ScalarEvolution &SE) { - - // The case where Op == SymbolicPHI (that is, with no type conversions on - // the way) is handled by the regular add recurrence creating logic and + // The case where Op == SymbolicPHI (that is, with no type conversions on + // the way) is handled by the regular add recurrence creating logic and // would have already been triggered in createAddRecForPHI. Reaching it here - // means that createAddRecFromPHI had failed for this PHI before (e.g., + // means that createAddRecFromPHI had failed for this PHI before (e.g., // because one of the other operands of the SCEVAddExpr updating this PHI is - // not invariant). + // not invariant). // - // Here we look for the case where Op = (ext(trunc(SymbolicPHI))), and in + // Here we look for the case where Op = (ext(trunc(SymbolicPHI))), and in // this case predicates that allow us to prove that Op == SymbolicPHI will // be added. if (Op == SymbolicPHI) @@ -4228,7 +4420,7 @@ static Type *isSimpleCastedPHI(const SCEV *Op, const SCEVUnknown *SymbolicPHI, const SCEV *X = Trunc->getOperand(); if (X != SymbolicPHI) return nullptr; - Signed = SExt ? true : false; + Signed = SExt != nullptr; return Trunc->getType(); } @@ -4257,7 +4449,7 @@ static const Loop *isIntegerLoopHeaderPHI(const PHINode *PN, LoopInfo &LI) { // It will visitMul->visitAdd->visitSExt->visitTrunc->visitUnknown(%X), // and call this function with %SymbolicPHI = %X. // -// The analysis will find that the value coming around the backedge has +// The analysis will find that the value coming around the backedge has // the following SCEV: // BEValue = ((sext i32 (trunc i64 %X to i32) to i64) + %Step) // Upon concluding that this matches the desired pattern, the function @@ -4270,21 +4462,21 @@ static const Loop *isIntegerLoopHeaderPHI(const PHINode *PN, LoopInfo &LI) { // The returned pair means that SymbolicPHI can be rewritten into NewAddRec // under the predicates {P1,P2,P3}. // This predicated rewrite will be cached in PredicatedSCEVRewrites: -// PredicatedSCEVRewrites[{%X,L}] = {NewAddRec, {P1,P2,P3)} +// PredicatedSCEVRewrites[{%X,L}] = {NewAddRec, {P1,P2,P3)} // // TODO's: // // 1) Extend the Induction descriptor to also support inductions that involve -// casts: When needed (namely, when we are called in the context of the -// vectorizer induction analysis), a Set of cast instructions will be +// casts: When needed (namely, when we are called in the context of the +// vectorizer induction analysis), a Set of cast instructions will be // populated by this method, and provided back to isInductionPHI. This is // needed to allow the vectorizer to properly record them to be ignored by // the cost model and to avoid vectorizing them (otherwise these casts, -// which are redundant under the runtime overflow checks, will be -// vectorized, which can be costly). +// which are redundant under the runtime overflow checks, will be +// vectorized, which can be costly). // // 2) Support additional induction/PHISCEV patterns: We also want to support -// inductions where the sext-trunc / zext-trunc operations (partly) occur +// inductions where the sext-trunc / zext-trunc operations (partly) occur // after the induction update operation (the induction increment): // // (Trunc iy (SExt/ZExt ix (%SymbolicPHI + InvariantAccum) to iy) to ix) @@ -4294,17 +4486,16 @@ static const Loop *isIntegerLoopHeaderPHI(const PHINode *PN, LoopInfo &LI) { // which correspond to a phi->trunc->add->sext/zext->phi update chain. // // 3) Outline common code with createAddRecFromPHI to avoid duplication. -// Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>> ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI) { SmallVector<const SCEVPredicate *, 3> Predicates; - // *** Part1: Analyze if we have a phi-with-cast pattern for which we can + // *** Part1: Analyze if we have a phi-with-cast pattern for which we can // return an AddRec expression under some predicate. - + auto *PN = cast<PHINode>(SymbolicPHI->getValue()); const Loop *L = isIntegerLoopHeaderPHI(PN, LI); - assert (L && "Expecting an integer loop header phi"); + assert(L && "Expecting an integer loop header phi"); // The loop may have multiple entrances or multiple exits; we can analyze // this phi as an addrec if it has a unique entry value and a unique @@ -4339,12 +4530,12 @@ ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI return None; // If there is a single occurrence of the symbolic value, possibly - // casted, replace it with a recurrence. + // casted, replace it with a recurrence. unsigned FoundIndex = Add->getNumOperands(); Type *TruncTy = nullptr; bool Signed; for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) - if ((TruncTy = + if ((TruncTy = isSimpleCastedPHI(Add->getOperand(i), SymbolicPHI, Signed, *this))) if (FoundIndex == e) { FoundIndex = i; @@ -4366,77 +4557,122 @@ ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI if (!isLoopInvariant(Accum, L)) return None; - - // *** Part2: Create the predicates + // *** Part2: Create the predicates // Analysis was successful: we have a phi-with-cast pattern for which we // can return an AddRec expression under the following predicates: // // P1: A Wrap predicate that guarantees that Trunc(Start) + i*Trunc(Accum) // fits within the truncated type (does not overflow) for i = 0 to n-1. - // P2: An Equal predicate that guarantees that + // P2: An Equal predicate that guarantees that // Start = (Ext ix (Trunc iy (Start) to ix) to iy) - // P3: An Equal predicate that guarantees that + // P3: An Equal predicate that guarantees that // Accum = (Ext ix (Trunc iy (Accum) to ix) to iy) // - // As we next prove, the above predicates guarantee that: + // As we next prove, the above predicates guarantee that: // Start + i*Accum = (Ext ix (Trunc iy ( Start + i*Accum ) to ix) to iy) // // // More formally, we want to prove that: - // Expr(i+1) = Start + (i+1) * Accum - // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum + // Expr(i+1) = Start + (i+1) * Accum + // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum // // Given that: - // 1) Expr(0) = Start - // 2) Expr(1) = Start + Accum + // 1) Expr(0) = Start + // 2) Expr(1) = Start + Accum // = (Ext ix (Trunc iy (Start) to ix) to iy) + Accum :: from P2 // 3) Induction hypothesis (step i): - // Expr(i) = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum + // Expr(i) = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum // // Proof: // Expr(i+1) = // = Start + (i+1)*Accum // = (Start + i*Accum) + Accum - // = Expr(i) + Accum - // = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum + Accum + // = Expr(i) + Accum + // = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum + Accum // :: from step i // - // = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) + Accum + Accum + // = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) + Accum + Accum // // = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) // + (Ext ix (Trunc iy (Accum) to ix) to iy) // + Accum :: from P3 // - // = (Ext ix (Trunc iy ((Start + (i-1)*Accum) + Accum) to ix) to iy) + // = (Ext ix (Trunc iy ((Start + (i-1)*Accum) + Accum) to ix) to iy) // + Accum :: from P1: Ext(x)+Ext(y)=>Ext(x+y) // // = (Ext ix (Trunc iy (Start + i*Accum) to ix) to iy) + Accum - // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum + // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum // // By induction, the same applies to all iterations 1<=i<n: // - + // Create a truncated addrec for which we will add a no overflow check (P1). const SCEV *StartVal = getSCEV(StartValueV); - const SCEV *PHISCEV = + const SCEV *PHISCEV = getAddRecExpr(getTruncateExpr(StartVal, TruncTy), - getTruncateExpr(Accum, TruncTy), L, SCEV::FlagAnyWrap); - const auto *AR = cast<SCEVAddRecExpr>(PHISCEV); + getTruncateExpr(Accum, TruncTy), L, SCEV::FlagAnyWrap); - SCEVWrapPredicate::IncrementWrapFlags AddedFlags = - Signed ? SCEVWrapPredicate::IncrementNSSW - : SCEVWrapPredicate::IncrementNUSW; - const SCEVPredicate *AddRecPred = getWrapPredicate(AR, AddedFlags); - Predicates.push_back(AddRecPred); + // PHISCEV can be either a SCEVConstant or a SCEVAddRecExpr. + // ex: If truncated Accum is 0 and StartVal is a constant, then PHISCEV + // will be constant. + // + // If PHISCEV is a constant, then P1 degenerates into P2 or P3, so we don't + // add P1. + if (const auto *AR = dyn_cast<SCEVAddRecExpr>(PHISCEV)) { + SCEVWrapPredicate::IncrementWrapFlags AddedFlags = + Signed ? SCEVWrapPredicate::IncrementNSSW + : SCEVWrapPredicate::IncrementNUSW; + const SCEVPredicate *AddRecPred = getWrapPredicate(AR, AddedFlags); + Predicates.push_back(AddRecPred); + } // Create the Equal Predicates P2,P3: - auto AppendPredicate = [&](const SCEV *Expr) -> void { - assert (isLoopInvariant(Expr, L) && "Expr is expected to be invariant"); + + // It is possible that the predicates P2 and/or P3 are computable at + // compile time due to StartVal and/or Accum being constants. + // If either one is, then we can check that now and escape if either P2 + // or P3 is false. + + // Construct the extended SCEV: (Ext ix (Trunc iy (Expr) to ix) to iy) + // for each of StartVal and Accum + auto getExtendedExpr = [&](const SCEV *Expr, + bool CreateSignExtend) -> const SCEV * { + assert(isLoopInvariant(Expr, L) && "Expr is expected to be invariant"); const SCEV *TruncatedExpr = getTruncateExpr(Expr, TruncTy); const SCEV *ExtendedExpr = - Signed ? getSignExtendExpr(TruncatedExpr, Expr->getType()) - : getZeroExtendExpr(TruncatedExpr, Expr->getType()); + CreateSignExtend ? getSignExtendExpr(TruncatedExpr, Expr->getType()) + : getZeroExtendExpr(TruncatedExpr, Expr->getType()); + return ExtendedExpr; + }; + + // Given: + // ExtendedExpr = (Ext ix (Trunc iy (Expr) to ix) to iy + // = getExtendedExpr(Expr) + // Determine whether the predicate P: Expr == ExtendedExpr + // is known to be false at compile time + auto PredIsKnownFalse = [&](const SCEV *Expr, + const SCEV *ExtendedExpr) -> bool { + return Expr != ExtendedExpr && + isKnownPredicate(ICmpInst::ICMP_NE, Expr, ExtendedExpr); + }; + + const SCEV *StartExtended = getExtendedExpr(StartVal, Signed); + if (PredIsKnownFalse(StartVal, StartExtended)) { + DEBUG(dbgs() << "P2 is compile-time false\n";); + return None; + } + + // The Step is always Signed (because the overflow checks are either + // NSSW or NUSW) + const SCEV *AccumExtended = getExtendedExpr(Accum, /*CreateSignExtend=*/true); + if (PredIsKnownFalse(Accum, AccumExtended)) { + DEBUG(dbgs() << "P3 is compile-time false\n";); + return None; + } + + auto AppendPredicate = [&](const SCEV *Expr, + const SCEV *ExtendedExpr) -> void { if (Expr != ExtendedExpr && !isKnownPredicate(ICmpInst::ICMP_EQ, Expr, ExtendedExpr)) { const SCEVPredicate *Pred = getEqualPredicate(Expr, ExtendedExpr); @@ -4444,14 +4680,14 @@ ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI Predicates.push_back(Pred); } }; - - AppendPredicate(StartVal); - AppendPredicate(Accum); - + + AppendPredicate(StartVal, StartExtended); + AppendPredicate(Accum, AccumExtended); + // *** Part3: Predicates are ready. Now go ahead and create the new addrec in // which the casts had been folded away. The caller can rewrite SymbolicPHI // into NewAR if it will also add the runtime overflow checks specified in - // Predicates. + // Predicates. auto *NewAR = getAddRecExpr(StartVal, Accum, L, SCEV::FlagAnyWrap); std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>> PredRewrite = @@ -4463,7 +4699,6 @@ ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>> ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) { - auto *PN = cast<PHINode>(SymbolicPHI->getValue()); const Loop *L = isIntegerLoopHeaderPHI(PN, LI); if (!L) @@ -4475,7 +4710,7 @@ ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) { std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>> Rewrite = I->second; // Analysis was done before and failed to create an AddRec: - if (Rewrite.first == SymbolicPHI) + if (Rewrite.first == SymbolicPHI) return None; // Analysis was done before and succeeded to create an AddRec under // a predicate: @@ -4497,6 +4732,30 @@ ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) { return Rewrite; } +// FIXME: This utility is currently required because the Rewriter currently +// does not rewrite this expression: +// {0, +, (sext ix (trunc iy to ix) to iy)} +// into {0, +, %step}, +// even when the following Equal predicate exists: +// "%step == (sext ix (trunc iy to ix) to iy)". +bool PredicatedScalarEvolution::areAddRecsEqualWithPreds( + const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const { + if (AR1 == AR2) + return true; + + auto areExprsEqual = [&](const SCEV *Expr1, const SCEV *Expr2) -> bool { + if (Expr1 != Expr2 && !Preds.implies(SE.getEqualPredicate(Expr1, Expr2)) && + !Preds.implies(SE.getEqualPredicate(Expr2, Expr1))) + return false; + return true; + }; + + if (!areExprsEqual(AR1->getStart(), AR2->getStart()) || + !areExprsEqual(AR1->getStepRecurrence(SE), AR2->getStepRecurrence(SE))) + return false; + return true; +} + /// A helper function for createAddRecFromPHI to handle simple cases. /// /// This function tries to find an AddRec expression for the simplest (yet most @@ -4612,7 +4871,8 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) { SmallVector<const SCEV *, 8> Ops; for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) if (i != FoundIndex) - Ops.push_back(Add->getOperand(i)); + Ops.push_back(SCEVBackedgeConditionFolder::rewrite(Add->getOperand(i), + L, *this)); const SCEV *Accum = getAddExpr(Ops); // This is not a valid addrec if the step amount is varying each @@ -5599,7 +5859,7 @@ bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) { ScalarEvolution::LoopProperties ScalarEvolution::getLoopProperties(const Loop *L) { - typedef ScalarEvolution::LoopProperties LoopProperties; + using LoopProperties = ScalarEvolution::LoopProperties; auto Itr = LoopPropertiesCache.find(L); if (Itr == LoopPropertiesCache.end()) { @@ -5735,6 +5995,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { } case Instruction::UDiv: return getUDivExpr(getSCEV(BO->LHS), getSCEV(BO->RHS)); + case Instruction::URem: + return getURemExpr(getSCEV(BO->LHS), getSCEV(BO->RHS)); case Instruction::Sub: { SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; if (BO->Op) @@ -5886,7 +6148,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { } break; - case Instruction::AShr: + case Instruction::AShr: { // AShr X, C, where C is a constant. ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS); if (!CI) @@ -5938,6 +6200,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { } break; } + } } switch (U->getOpcode()) { @@ -5948,6 +6211,21 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType()); case Instruction::SExt: + if (auto BO = MatchBinaryOp(U->getOperand(0), DT)) { + // The NSW flag of a subtract does not always survive the conversion to + // A + (-1)*B. By pushing sign extension onto its operands we are much + // more likely to preserve NSW and allow later AddRec optimisations. + // + // NOTE: This is effectively duplicating this logic from getSignExtend: + // sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw> + // but by that point the NSW information has potentially been lost. + if (BO->Opcode == Instruction::Sub && BO->IsNSW) { + Type *Ty = U->getType(); + auto *V1 = getSignExtendExpr(getSCEV(BO->LHS), Ty); + auto *V2 = getSignExtendExpr(getSCEV(BO->RHS), Ty); + return getMinusSCEV(V1, V2, SCEV::FlagNSW); + } + } return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType()); case Instruction::BitCast: @@ -5987,8 +6265,6 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { return getUnknown(V); } - - //===----------------------------------------------------------------------===// // Iteration Count Computation Code // @@ -6177,11 +6453,9 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { SmallVector<Instruction *, 16> Worklist; PushLoopPHIs(L, Worklist); - SmallPtrSet<Instruction *, 8> Visited; + SmallPtrSet<Instruction *, 8> Discovered; while (!Worklist.empty()) { Instruction *I = Worklist.pop_back_val(); - if (!Visited.insert(I).second) - continue; ValueExprMapType::iterator It = ValueExprMap.find_as(static_cast<Value *>(I)); @@ -6202,7 +6476,31 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { ConstantEvolutionLoopExitValue.erase(PN); } - PushDefUseChildren(I, Worklist); + // Since we don't need to invalidate anything for correctness and we're + // only invalidating to make SCEV's results more precise, we get to stop + // early to avoid invalidating too much. This is especially important in + // cases like: + // + // %v = f(pn0, pn1) // pn0 and pn1 used through some other phi node + // loop0: + // %pn0 = phi + // ... + // loop1: + // %pn1 = phi + // ... + // + // where both loop0 and loop1's backedge taken count uses the SCEV + // expression for %v. If we don't have the early stop below then in cases + // like the above, getBackedgeTakenInfo(loop1) will clear out the trip + // count for loop0 and getBackedgeTakenInfo(loop0) will clear out the trip + // count for loop1, effectively nullifying SCEV's trip count cache. + for (auto *U : I->users()) + if (auto *I = dyn_cast<Instruction>(U)) { + auto *LoopForUser = LI.getLoopFor(I->getParent()); + if (LoopForUser && L->contains(LoopForUser) && + Discovered.insert(I).second) + Worklist.push_back(I); + } } } @@ -6217,7 +6515,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { void ScalarEvolution::forgetLoop(const Loop *L) { // Drop any stored trip count value. auto RemoveLoopFromBackedgeMap = - [L](DenseMap<const Loop *, BackedgeTakenInfo> &Map) { + [](DenseMap<const Loop *, BackedgeTakenInfo> &Map, const Loop *L) { auto BTCPos = Map.find(L); if (BTCPos != Map.end()) { BTCPos->second.clear(); @@ -6225,47 +6523,59 @@ void ScalarEvolution::forgetLoop(const Loop *L) { } }; - RemoveLoopFromBackedgeMap(BackedgeTakenCounts); - RemoveLoopFromBackedgeMap(PredicatedBackedgeTakenCounts); + SmallVector<const Loop *, 16> LoopWorklist(1, L); + SmallVector<Instruction *, 32> Worklist; + SmallPtrSet<Instruction *, 16> Visited; - // Drop information about predicated SCEV rewrites for this loop. - for (auto I = PredicatedSCEVRewrites.begin(); - I != PredicatedSCEVRewrites.end();) { - std::pair<const SCEV *, const Loop *> Entry = I->first; - if (Entry.second == L) - PredicatedSCEVRewrites.erase(I++); - else - ++I; - } + // Iterate over all the loops and sub-loops to drop SCEV information. + while (!LoopWorklist.empty()) { + auto *CurrL = LoopWorklist.pop_back_val(); - // Drop information about expressions based on loop-header PHIs. - SmallVector<Instruction *, 16> Worklist; - PushLoopPHIs(L, Worklist); + RemoveLoopFromBackedgeMap(BackedgeTakenCounts, CurrL); + RemoveLoopFromBackedgeMap(PredicatedBackedgeTakenCounts, CurrL); - SmallPtrSet<Instruction *, 8> Visited; - while (!Worklist.empty()) { - Instruction *I = Worklist.pop_back_val(); - if (!Visited.insert(I).second) - continue; + // Drop information about predicated SCEV rewrites for this loop. + for (auto I = PredicatedSCEVRewrites.begin(); + I != PredicatedSCEVRewrites.end();) { + std::pair<const SCEV *, const Loop *> Entry = I->first; + if (Entry.second == CurrL) + PredicatedSCEVRewrites.erase(I++); + else + ++I; + } - ValueExprMapType::iterator It = - ValueExprMap.find_as(static_cast<Value *>(I)); - if (It != ValueExprMap.end()) { - eraseValueFromMap(It->first); - forgetMemoizedResults(It->second); - if (PHINode *PN = dyn_cast<PHINode>(I)) - ConstantEvolutionLoopExitValue.erase(PN); + auto LoopUsersItr = LoopUsers.find(CurrL); + if (LoopUsersItr != LoopUsers.end()) { + for (auto *S : LoopUsersItr->second) + forgetMemoizedResults(S); + LoopUsers.erase(LoopUsersItr); } - PushDefUseChildren(I, Worklist); - } + // Drop information about expressions based on loop-header PHIs. + PushLoopPHIs(CurrL, Worklist); - // Forget all contained loops too, to avoid dangling entries in the - // ValuesAtScopes map. - for (Loop *I : *L) - forgetLoop(I); + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + if (!Visited.insert(I).second) + continue; - LoopPropertiesCache.erase(L); + ValueExprMapType::iterator It = + ValueExprMap.find_as(static_cast<Value *>(I)); + if (It != ValueExprMap.end()) { + eraseValueFromMap(It->first); + forgetMemoizedResults(It->second); + if (PHINode *PN = dyn_cast<PHINode>(I)) + ConstantEvolutionLoopExitValue.erase(PN); + } + + PushDefUseChildren(I, Worklist); + } + + LoopPropertiesCache.erase(CurrL); + // Forget all contained loops too, to avoid dangling entries in the + // ValuesAtScopes map. + LoopWorklist.append(CurrL->begin(), CurrL->end()); + } } void ScalarEvolution::forgetValue(Value *V) { @@ -6377,7 +6687,7 @@ bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S, } ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E) - : ExactNotTaken(E), MaxNotTaken(E), MaxOrZero(false) { + : ExactNotTaken(E), MaxNotTaken(E) { assert((isa<SCEVCouldNotCompute>(MaxNotTaken) || isa<SCEVConstant>(MaxNotTaken)) && "No point in having a non-constant max backedge taken count!"); @@ -6422,7 +6732,8 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( &&ExitCounts, bool Complete, const SCEV *MaxCount, bool MaxOrZero) : MaxAndComplete(MaxCount, Complete), MaxOrZero(MaxOrZero) { - typedef ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo EdgeExitInfo; + using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo; + ExitNotTaken.reserve(ExitCounts.size()); std::transform( ExitCounts.begin(), ExitCounts.end(), std::back_inserter(ExitNotTaken), @@ -6454,7 +6765,7 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L, SmallVector<BasicBlock *, 8> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); - typedef ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo EdgeExitInfo; + using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo; SmallVector<EdgeExitInfo, 4> ExitCounts; bool CouldComputeBECount = true; @@ -6521,8 +6832,7 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L, ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock, - bool AllowPredicates) { - + bool AllowPredicates) { // Okay, we've chosen an exiting block. See what condition causes us to exit // at this block and remember the exit block and whether all other targets // lead to the loop header. @@ -6785,19 +7095,19 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, BasicBlock *FBB, bool ControlsExit, bool AllowPredicates) { - // If the condition was exit on true, convert the condition to exit on false - ICmpInst::Predicate Cond; + ICmpInst::Predicate Pred; if (!L->contains(FBB)) - Cond = ExitCond->getPredicate(); + Pred = ExitCond->getPredicate(); else - Cond = ExitCond->getInversePredicate(); + Pred = ExitCond->getInversePredicate(); + const ICmpInst::Predicate OriginalPred = Pred; // Handle common loops like: for (X = "string"; *X; ++X) if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0))) if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) { ExitLimit ItCnt = - computeLoadConstantCompareExitLimit(LI, RHS, L, Cond); + computeLoadConstantCompareExitLimit(LI, RHS, L, Pred); if (ItCnt.hasAnyInfo()) return ItCnt; } @@ -6814,11 +7124,11 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) { // If there is a loop-invariant, force it into the RHS. std::swap(LHS, RHS); - Cond = ICmpInst::getSwappedPredicate(Cond); + Pred = ICmpInst::getSwappedPredicate(Pred); } // Simplify the operands before analyzing them. - (void)SimplifyICmpOperands(Cond, LHS, RHS); + (void)SimplifyICmpOperands(Pred, LHS, RHS); // If we have a comparison of a chrec against a constant, try to use value // ranges to answer this query. @@ -6827,13 +7137,13 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, if (AddRec->getLoop() == L) { // Form the constant range. ConstantRange CompRange = - ConstantRange::makeExactICmpRegion(Cond, RHSC->getAPInt()); + ConstantRange::makeExactICmpRegion(Pred, RHSC->getAPInt()); const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this); if (!isa<SCEVCouldNotCompute>(Ret)) return Ret; } - switch (Cond) { + switch (Pred) { case ICmpInst::ICMP_NE: { // while (X != Y) // Convert to: while (X-Y != 0) ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit, @@ -6849,7 +7159,7 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, } case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_ULT: { // while (X < Y) - bool IsSigned = Cond == ICmpInst::ICMP_SLT; + bool IsSigned = Pred == ICmpInst::ICMP_SLT; ExitLimit EL = howManyLessThans(LHS, RHS, L, IsSigned, ControlsExit, AllowPredicates); if (EL.hasAnyInfo()) return EL; @@ -6857,7 +7167,7 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, } case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_UGT: { // while (X > Y) - bool IsSigned = Cond == ICmpInst::ICMP_SGT; + bool IsSigned = Pred == ICmpInst::ICMP_SGT; ExitLimit EL = howManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit, AllowPredicates); @@ -6875,7 +7185,7 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, return ExhaustiveCount; return computeShiftCompareExitLimit(ExitCond->getOperand(0), - ExitCond->getOperand(1), L, Cond); + ExitCond->getOperand(1), L, OriginalPred); } ScalarEvolution::ExitLimit @@ -6920,7 +7230,6 @@ ScalarEvolution::computeLoadConstantCompareExitLimit( Constant *RHS, const Loop *L, ICmpInst::Predicate predicate) { - if (LI->isVolatile()) return getCouldNotCompute(); // Check to see if the loaded pointer is a getelementptr of a global. @@ -7333,8 +7642,8 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, Value *BEValue = PN->getIncomingValueForBlock(Latch); // Execute the loop symbolically to determine the exit value. - if (BEs.getActiveBits() >= 32) - return RetVal = nullptr; // More than 2^32-1 iterations?? Not doing it! + assert(BEs.getActiveBits() < CHAR_BIT * sizeof(unsigned) && + "BEs is <= MaxBruteForceIterations which is an 'unsigned'!"); unsigned NumIterations = BEs.getZExtValue(); // must be in range unsigned IterationNum = 0; @@ -7839,7 +8148,6 @@ static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const SCEV *B, /// Find the roots of the quadratic equation for the given quadratic chrec /// {L,+,M,+,N}. This returns either the two roots (which might be the same) or /// two SCEVCouldNotCompute objects. -/// static Optional<std::pair<const SCEVConstant *,const SCEVConstant *>> SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!"); @@ -8080,7 +8388,6 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) { /// expressions are equal, however for the purposes of looking for a condition /// guarding a loop, it can be useful to be a little more general, since a /// front-end may have replicated the controlling expression. -/// static bool HasSameValue(const SCEV *A, const SCEV *B) { // Quick check to see if they are the same SCEV. if (A == B) return true; @@ -8527,7 +8834,6 @@ bool ScalarEvolution::isKnownPredicateViaConstantRanges( bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { - // Match Result to (X + Y)<ExpectedFlags> where Y is a constant integer. // Return Y via OutY. auto MatchBinaryAddToConst = @@ -8693,7 +8999,6 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()]; DTN != HeaderDTN; DTN = DTN->getIDom()) { - assert(DTN && "should reach the loop header before reaching the root!"); BasicBlock *BB = DTN->getBlock(); @@ -9116,7 +9421,6 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, getNotSCEV(FoundLHS)); } - /// If Expr computes ~A, return A else return nullptr static const SCEV *MatchNotExpr(const SCEV *Expr) { const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr); @@ -9132,7 +9436,6 @@ static const SCEV *MatchNotExpr(const SCEV *Expr) { return AddRHS->getOperand(1); } - /// Is MaybeMaxExpr an SMax or UMax of Candidate and some other values? template<typename MaxExprType> static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr, @@ -9143,7 +9446,6 @@ static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr, return find(MaxExpr->operands(), Candidate) != MaxExpr->op_end(); } - /// Is MaybeMinExpr an SMin or UMin of Candidate and some other values? template<typename MaxExprType> static bool IsMinConsistingOf(ScalarEvolution &SE, @@ -9159,7 +9461,6 @@ static bool IsMinConsistingOf(ScalarEvolution &SE, static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { - // If both sides are affine addrecs for the same loop, with equal // steps, and we know the recurrences don't wrap, then we only // need to check the predicate on the starting values. @@ -9295,7 +9596,9 @@ bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred, } else if (auto *LHSUnknownExpr = dyn_cast<SCEVUnknown>(LHS)) { Value *LL, *LR; // FIXME: Once we have SDiv implemented, we can get rid of this matching. + using namespace llvm::PatternMatch; + if (match(LHSUnknownExpr->getValue(), m_SDiv(m_Value(LL), m_Value(LR)))) { // Rules for division. // We are going to perform some comparisons with Denominator and its @@ -9510,14 +9813,54 @@ const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step, return getUDivExpr(Delta, Step); } +const SCEV *ScalarEvolution::computeMaxBECountForLT(const SCEV *Start, + const SCEV *Stride, + const SCEV *End, + unsigned BitWidth, + bool IsSigned) { + + assert(!isKnownNonPositive(Stride) && + "Stride is expected strictly positive!"); + // Calculate the maximum backedge count based on the range of values + // permitted by Start, End, and Stride. + const SCEV *MaxBECount; + APInt MinStart = + IsSigned ? getSignedRangeMin(Start) : getUnsignedRangeMin(Start); + + APInt StrideForMaxBECount = + IsSigned ? getSignedRangeMin(Stride) : getUnsignedRangeMin(Stride); + + // We already know that the stride is positive, so we paper over conservatism + // in our range computation by forcing StrideForMaxBECount to be at least one. + // In theory this is unnecessary, but we expect MaxBECount to be a + // SCEVConstant, and (udiv <constant> 0) is not constant folded by SCEV (there + // is nothing to constant fold it to). + APInt One(BitWidth, 1, IsSigned); + StrideForMaxBECount = APIntOps::smax(One, StrideForMaxBECount); + + APInt MaxValue = IsSigned ? APInt::getSignedMaxValue(BitWidth) + : APInt::getMaxValue(BitWidth); + APInt Limit = MaxValue - (StrideForMaxBECount - 1); + + // Although End can be a MAX expression we estimate MaxEnd considering only + // the case End = RHS of the loop termination condition. This is safe because + // in the other case (End - Start) is zero, leading to a zero maximum backedge + // taken count. + APInt MaxEnd = IsSigned ? APIntOps::smin(getSignedRangeMax(End), Limit) + : APIntOps::umin(getUnsignedRangeMax(End), Limit); + + MaxBECount = computeBECount(getConstant(MaxEnd - MinStart) /* Delta */, + getConstant(StrideForMaxBECount) /* Step */, + false /* Equality */); + + return MaxBECount; +} + ScalarEvolution::ExitLimit ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool IsSigned, bool ControlsExit, bool AllowPredicates) { SmallPtrSet<const SCEVPredicate *, 4> Predicates; - // We handle only IV < Invariant - if (!isLoopInvariant(RHS, L)) - return getCouldNotCompute(); const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS); bool PredicatedIV = false; @@ -9588,7 +9931,6 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, if (PredicatedIV || !NoWrap || isKnownNonPositive(Stride) || !loopHasNoSideEffects(L)) return getCouldNotCompute(); - } else if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap)) // Avoid proven overflow cases: this will ensure that the backedge taken @@ -9601,6 +9943,17 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, : ICmpInst::ICMP_ULT; const SCEV *Start = IV->getStart(); const SCEV *End = RHS; + // When the RHS is not invariant, we do not know the end bound of the loop and + // cannot calculate the ExactBECount needed by ExitLimit. However, we can + // calculate the MaxBECount, given the start, stride and max value for the end + // bound of the loop (RHS), and the fact that IV does not overflow (which is + // checked above). + if (!isLoopInvariant(RHS, L)) { + const SCEV *MaxBECount = computeMaxBECountForLT( + Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned); + return ExitLimit(getCouldNotCompute() /* ExactNotTaken */, MaxBECount, + false /*MaxOrZero*/, Predicates); + } // If the backedge is taken at least once, then it will be taken // (End-Start)/Stride times (rounded up to a multiple of Stride), where Start // is the LHS value of the less-than comparison the first time it is evaluated @@ -9633,37 +9986,8 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, MaxBECount = BECountIfBackedgeTaken; MaxOrZero = true; } else { - // Calculate the maximum backedge count based on the range of values - // permitted by Start, End, and Stride. - APInt MinStart = IsSigned ? getSignedRangeMin(Start) - : getUnsignedRangeMin(Start); - - unsigned BitWidth = getTypeSizeInBits(LHS->getType()); - - APInt StrideForMaxBECount; - - if (PositiveStride) - StrideForMaxBECount = - IsSigned ? getSignedRangeMin(Stride) - : getUnsignedRangeMin(Stride); - else - // Using a stride of 1 is safe when computing max backedge taken count for - // a loop with unknown stride. - StrideForMaxBECount = APInt(BitWidth, 1, IsSigned); - - APInt Limit = - IsSigned ? APInt::getSignedMaxValue(BitWidth) - (StrideForMaxBECount - 1) - : APInt::getMaxValue(BitWidth) - (StrideForMaxBECount - 1); - - // Although End can be a MAX expression we estimate MaxEnd considering only - // the case End = RHS. This is safe because in the other case (End - Start) - // is zero, leading to a zero maximum backedge taken count. - APInt MaxEnd = - IsSigned ? APIntOps::smin(getSignedRangeMax(RHS), Limit) - : APIntOps::umin(getUnsignedRangeMax(RHS), Limit); - - MaxBECount = computeBECount(getConstant(MaxEnd - MinStart), - getConstant(StrideForMaxBECount), false); + MaxBECount = computeMaxBECountForLT( + Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned); } if (isa<SCEVCouldNotCompute>(MaxBECount) && @@ -9874,6 +10198,7 @@ static inline bool containsUndefs(const SCEV *S) { } namespace { + // Collect all steps of SCEV expressions. struct SCEVCollectStrides { ScalarEvolution &SE; @@ -9887,6 +10212,7 @@ struct SCEVCollectStrides { Strides.push_back(AR->getStepRecurrence(SE)); return true; } + bool isDone() const { return false; } }; @@ -9894,8 +10220,7 @@ struct SCEVCollectStrides { struct SCEVCollectTerms { SmallVectorImpl<const SCEV *> &Terms; - SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T) - : Terms(T) {} + SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T) : Terms(T) {} bool follow(const SCEV *S) { if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S) || @@ -9910,6 +10235,7 @@ struct SCEVCollectTerms { // Keep looking. return true; } + bool isDone() const { return false; } }; @@ -9918,7 +10244,7 @@ struct SCEVHasAddRec { bool &ContainsAddRec; SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) { - ContainsAddRec = false; + ContainsAddRec = false; } bool follow(const SCEV *S) { @@ -9932,6 +10258,7 @@ struct SCEVHasAddRec { // Keep looking. return true; } + bool isDone() const { return false; } }; @@ -9985,9 +10312,11 @@ struct SCEVCollectAddRecMultiplies { // Keep looking. return true; } + bool isDone() const { return false; } }; -} + +} // end anonymous namespace /// Find parametric terms in this SCEVAddRecExpr. We first for parameters in /// two places: @@ -10066,7 +10395,6 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE, return true; } - // Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter. static inline bool containsParameters(SmallVectorImpl<const SCEV *> &Terms) { for (const SCEV *T : Terms) @@ -10181,7 +10509,6 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms, void ScalarEvolution::computeAccessFunctions( const SCEV *Expr, SmallVectorImpl<const SCEV *> &Subscripts, SmallVectorImpl<const SCEV *> &Sizes) { - // Early exit in case this SCEV is not an affine multivariate function. if (Sizes.empty()) return; @@ -10285,7 +10612,6 @@ void ScalarEvolution::computeAccessFunctions( /// DelinearizationPass that walks through all loads and stores of a function /// asking for the SCEV of the memory access with respect to all enclosing /// loops, calling SCEV->delinearize on that and printing the results. - void ScalarEvolution::delinearize(const SCEV *Expr, SmallVectorImpl<const SCEV *> &Subscripts, SmallVectorImpl<const SCEV *> &Sizes, @@ -10374,11 +10700,8 @@ ScalarEvolution::ScalarEvolution(Function &F, TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI) : F(F), TLI(TLI), AC(AC), DT(DT), LI(LI), - CouldNotCompute(new SCEVCouldNotCompute()), - WalkingBEDominatingConds(false), ProvingSplitPredicate(false), - ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64), - FirstUnknown(nullptr) { - + CouldNotCompute(new SCEVCouldNotCompute()), ValuesAtScopes(64), + LoopDispositions(64), BlockDispositions(64) { // To use guards for proving predicates, we need to scan every instruction in // relevant basic blocks, and not just terminators. Doing this is a waste of // time if the IR does not actually contain any calls to @@ -10399,7 +10722,6 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg) LI(Arg.LI), CouldNotCompute(std::move(Arg.CouldNotCompute)), ValueExprMap(std::move(Arg.ValueExprMap)), PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)), - WalkingBEDominatingConds(false), ProvingSplitPredicate(false), MinTrailingZerosCache(std::move(Arg.MinTrailingZerosCache)), BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)), PredicatedBackedgeTakenCounts( @@ -10415,6 +10737,7 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg) UniqueSCEVs(std::move(Arg.UniqueSCEVs)), UniquePreds(std::move(Arg.UniquePreds)), SCEVAllocator(std::move(Arg.SCEVAllocator)), + LoopUsers(std::move(Arg.LoopUsers)), PredicatedSCEVRewrites(std::move(Arg.PredicatedSCEVRewrites)), FirstUnknown(Arg.FirstUnknown) { Arg.FirstUnknown = nullptr; @@ -10647,9 +10970,11 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { if (!L) return LoopVariant; - // This recurrence is variant w.r.t. L if L contains AR's loop. - if (L->contains(AR->getLoop())) + // Everything that is not defined at loop entry is variant. + if (DT.dominates(L->getHeader(), AR->getLoop()->getHeader())) return LoopVariant; + assert(!L->contains(AR->getLoop()) && "Containing loop's header does not" + " dominate the contained loop's header?"); // This recurrence is invariant w.r.t. L if AR's loop contains L. if (AR->getLoop()->contains(L)) @@ -10806,7 +11131,16 @@ bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { return SCEVExprContains(S, [&](const SCEV *Expr) { return Expr == Op; }); } -void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { +bool ScalarEvolution::ExitLimit::hasOperand(const SCEV *S) const { + auto IsS = [&](const SCEV *X) { return S == X; }; + auto ContainsS = [&](const SCEV *X) { + return !isa<SCEVCouldNotCompute>(X) && SCEVExprContains(X, IsS); + }; + return ContainsS(ExactNotTaken) || ContainsS(MaxNotTaken); +} + +void +ScalarEvolution::forgetMemoizedResults(const SCEV *S) { ValuesAtScopes.erase(S); LoopDispositions.erase(S); BlockDispositions.erase(S); @@ -10816,7 +11150,7 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { HasRecMap.erase(S); MinTrailingZerosCache.erase(S); - for (auto I = PredicatedSCEVRewrites.begin(); + for (auto I = PredicatedSCEVRewrites.begin(); I != PredicatedSCEVRewrites.end();) { std::pair<const SCEV *, const Loop *> Entry = I->first; if (Entry.first == S) @@ -10841,6 +11175,25 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts); } +void ScalarEvolution::addToLoopUseLists(const SCEV *S) { + struct FindUsedLoops { + SmallPtrSet<const Loop *, 8> LoopsUsed; + bool follow(const SCEV *S) { + if (auto *AR = dyn_cast<SCEVAddRecExpr>(S)) + LoopsUsed.insert(AR->getLoop()); + return true; + } + + bool isDone() const { return false; } + }; + + FindUsedLoops F; + SCEVTraversal<FindUsedLoops>(F).visitAll(S); + + for (auto *L : F.LoopsUsed) + LoopUsers[L].push_back(S); +} + void ScalarEvolution::verify() const { ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); ScalarEvolution SE2(F, TLI, AC, DT, LI); @@ -10849,9 +11202,12 @@ void ScalarEvolution::verify() const { // Map's SCEV expressions from one ScalarEvolution "universe" to another. struct SCEVMapper : public SCEVRewriteVisitor<SCEVMapper> { + SCEVMapper(ScalarEvolution &SE) : SCEVRewriteVisitor<SCEVMapper>(SE) {} + const SCEV *visitConstant(const SCEVConstant *Constant) { return SE.getConstant(Constant->getAPInt()); } + const SCEV *visitUnknown(const SCEVUnknown *Expr) { return SE.getUnknown(Expr->getValue()); } @@ -10859,7 +11215,6 @@ void ScalarEvolution::verify() const { const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { return SE.getCouldNotCompute(); } - SCEVMapper(ScalarEvolution &SE) : SCEVRewriteVisitor<SCEVMapper>(SE) {} }; SCEVMapper SCM(SE2); @@ -10948,6 +11303,7 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(ScalarEvolutionWrapperPass, "scalar-evolution", "Scalar Evolution Analysis", false, true) + char ScalarEvolutionWrapperPass::ID = 0; ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) { @@ -11023,6 +11379,7 @@ namespace { class SCEVPredicateRewriter : public SCEVRewriteVisitor<SCEVPredicateRewriter> { public: + /// Rewrites \p S in the context of a loop L and the SCEV predication /// infrastructure. /// @@ -11038,11 +11395,6 @@ public: return Rewriter.visit(S); } - SCEVPredicateRewriter(const Loop *L, ScalarEvolution &SE, - SmallPtrSetImpl<const SCEVPredicate *> *NewPreds, - SCEVUnionPredicate *Pred) - : SCEVRewriteVisitor(SE), NewPreds(NewPreds), Pred(Pred), L(L) {} - const SCEV *visitUnknown(const SCEVUnknown *Expr) { if (Pred) { auto ExprPreds = Pred->getPredicatesForExpr(Expr); @@ -11087,6 +11439,11 @@ public: } private: + explicit SCEVPredicateRewriter(const Loop *L, ScalarEvolution &SE, + SmallPtrSetImpl<const SCEVPredicate *> *NewPreds, + SCEVUnionPredicate *Pred) + : SCEVRewriteVisitor(SE), NewPreds(NewPreds), Pred(Pred), L(L) {} + bool addOverflowAssumption(const SCEVPredicate *P) { if (!NewPreds) { // Check if we've already made this assumption. @@ -11103,10 +11460,10 @@ private: } // If \p Expr represents a PHINode, we try to see if it can be represented - // as an AddRec, possibly under a predicate (PHISCEVPred). If it is possible + // as an AddRec, possibly under a predicate (PHISCEVPred). If it is possible // to add this predicate as a runtime overflow check, we return the AddRec. - // If \p Expr does not meet these conditions (is not a PHI node, or we - // couldn't create an AddRec for it, or couldn't add the predicate), we just + // If \p Expr does not meet these conditions (is not a PHI node, or we + // couldn't create an AddRec for it, or couldn't add the predicate), we just // return \p Expr. const SCEV *convertToAddRecWithPreds(const SCEVUnknown *Expr) { if (!isa<PHINode>(Expr->getValue())) @@ -11121,11 +11478,12 @@ private: } return PredicatedRewrite->first; } - + SmallPtrSetImpl<const SCEVPredicate *> *NewPreds; SCEVUnionPredicate *Pred; const Loop *L; }; + } // end anonymous namespace const SCEV *ScalarEvolution::rewriteUsingPredicate(const SCEV *S, const Loop *L, @@ -11136,7 +11494,6 @@ const SCEV *ScalarEvolution::rewriteUsingPredicate(const SCEV *S, const Loop *L, const SCEVAddRecExpr *ScalarEvolution::convertSCEVToAddRecWithPredicates( const SCEV *S, const Loop *L, SmallPtrSetImpl<const SCEVPredicate *> &Preds) { - SmallPtrSet<const SCEVPredicate *, 4> TransformPreds; S = SCEVPredicateRewriter::rewrite(S, L, *this, &TransformPreds, nullptr); auto *AddRec = dyn_cast<SCEVAddRecExpr>(S); @@ -11292,7 +11649,7 @@ void SCEVUnionPredicate::add(const SCEVPredicate *N) { PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE, Loop &L) - : SE(SE), L(L), Generation(0), BackedgeCount(nullptr) {} + : SE(SE), L(L) {} const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) { const SCEV *Expr = SE.getSCEV(V); diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp index 47bdac00ae1f..86f714b930d0 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -878,7 +878,7 @@ bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, if (IncV->mayHaveSideEffects()) return false; - if (IncV != PN) + if (IncV == PN) return true; return isNormalAddRecExprPHI(PN, IncV, L); @@ -1143,7 +1143,11 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, for (auto &I : *L->getHeader()) { auto *PN = dyn_cast<PHINode>(&I); - if (!PN || !SE.isSCEVable(PN->getType())) + // Found first non-phi, the rest of instructions are also not Phis. + if (!PN) + break; + + if (!SE.isSCEVable(PN->getType())) continue; const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PN)); @@ -1728,10 +1732,28 @@ Value *SCEVExpander::expand(const SCEV *S) { InsertPt = &*L->getHeader()->getFirstInsertionPt(); } } else { + // We can move insertion point only if there is no div or rem operations + // otherwise we are risky to move it over the check for zero denominator. + auto SafeToHoist = [](const SCEV *S) { + return !SCEVExprContains(S, [](const SCEV *S) { + if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) { + if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS())) + // Division by non-zero constants can be hoisted. + return SC->getValue()->isZero(); + // All other divisions should not be moved as they may be + // divisions by zero and should be kept within the + // conditions of the surrounding loops that guard their + // execution (see PR35406). + return true; + } + return false; + }); + }; // If the SCEV is computable at this level, insert it into the header // after the PHIs (and after any other instructions that we've inserted // there) so that it is guaranteed to dominate any user inside the loop. - if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) + if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L) && + SafeToHoist(S)) InsertPt = &*L->getHeader()->getFirstInsertionPt(); while (InsertPt->getIterator() != Builder.GetInsertPoint() && (isInsertedInstruction(InsertPt) || @@ -2293,4 +2315,9 @@ bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) { visitAll(S, Search); return !Search.IsUnsafe; } + +bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, + ScalarEvolution &SE) { + return isSafeToExpand(S, SE) && SE.dominates(S, InsertionPoint->getParent()); +} } diff --git a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp index 833c6e09f6fd..f12275aff387 100644 --- a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp +++ b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp @@ -34,11 +34,12 @@ #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/IR/Constants.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -47,17 +48,18 @@ using namespace llvm; // can also be achieved by stripping the associated metadata tags from IR, but // this option is sometimes more convenient. static cl::opt<bool> EnableScopedNoAlias("enable-scoped-noalias", - cl::init(true)); + cl::init(true), cl::Hidden); namespace { + /// This is a simple wrapper around an MDNode which provides a higher-level /// interface by hiding the details of how alias analysis information is encoded /// in its operands. class AliasScopeNode { - const MDNode *Node; + const MDNode *Node = nullptr; public: - AliasScopeNode() : Node(nullptr) {} + AliasScopeNode() = default; explicit AliasScopeNode(const MDNode *N) : Node(N) {} /// Get the MDNode for this AliasScopeNode. @@ -70,7 +72,8 @@ public: return dyn_cast_or_null<MDNode>(Node->getOperand(1)); } }; -} // end of anonymous namespace + +} // end anonymous namespace AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA, const MemoryLocation &LocB) { @@ -99,12 +102,12 @@ ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS, if (!mayAliasInScopes(Loc.AATags.Scope, CS.getInstruction()->getMetadata( LLVMContext::MD_noalias))) - return MRI_NoModRef; + return ModRefInfo::NoModRef; if (!mayAliasInScopes( CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), Loc.AATags.NoAlias)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; return AAResultBase::getModRefInfo(CS, Loc); } @@ -117,12 +120,12 @@ ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS1, if (!mayAliasInScopes( CS1.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), CS2.getInstruction()->getMetadata(LLVMContext::MD_noalias))) - return MRI_NoModRef; + return ModRefInfo::NoModRef; if (!mayAliasInScopes( CS2.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), CS1.getInstruction()->getMetadata(LLVMContext::MD_noalias))) - return MRI_NoModRef; + return ModRefInfo::NoModRef; return AAResultBase::getModRefInfo(CS1, CS2); } @@ -181,6 +184,7 @@ ScopedNoAliasAAResult ScopedNoAliasAA::run(Function &F, } char ScopedNoAliasAAWrapperPass::ID = 0; + INITIALIZE_PASS(ScopedNoAliasAAWrapperPass, "scoped-noalias", "Scoped NoAlias Alias Analysis", false, true) diff --git a/contrib/llvm/lib/Analysis/SparsePropagation.cpp b/contrib/llvm/lib/Analysis/SparsePropagation.cpp deleted file mode 100644 index 470f4bee1e0a..000000000000 --- a/contrib/llvm/lib/Analysis/SparsePropagation.cpp +++ /dev/null @@ -1,347 +0,0 @@ -//===- SparsePropagation.cpp - Sparse Conditional Property Propagation ----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements an abstract sparse conditional propagation algorithm, -// modeled after SCCP, but with a customizable lattice function. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/SparsePropagation.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -#define DEBUG_TYPE "sparseprop" - -//===----------------------------------------------------------------------===// -// AbstractLatticeFunction Implementation -//===----------------------------------------------------------------------===// - -AbstractLatticeFunction::~AbstractLatticeFunction() {} - -/// PrintValue - Render the specified lattice value to the specified stream. -void AbstractLatticeFunction::PrintValue(LatticeVal V, raw_ostream &OS) { - if (V == UndefVal) - OS << "undefined"; - else if (V == OverdefinedVal) - OS << "overdefined"; - else if (V == UntrackedVal) - OS << "untracked"; - else - OS << "unknown lattice value"; -} - -//===----------------------------------------------------------------------===// -// SparseSolver Implementation -//===----------------------------------------------------------------------===// - -/// getOrInitValueState - Return the LatticeVal object that corresponds to the -/// value, initializing the value's state if it hasn't been entered into the -/// map yet. This function is necessary because not all values should start -/// out in the underdefined state... Arguments should be overdefined, and -/// constants should be marked as constants. -/// -SparseSolver::LatticeVal SparseSolver::getOrInitValueState(Value *V) { - DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(V); - if (I != ValueState.end()) return I->second; // Common case, in the map - - LatticeVal LV; - if (LatticeFunc->IsUntrackedValue(V)) - return LatticeFunc->getUntrackedVal(); - else if (Constant *C = dyn_cast<Constant>(V)) - LV = LatticeFunc->ComputeConstant(C); - else if (Argument *A = dyn_cast<Argument>(V)) - LV = LatticeFunc->ComputeArgument(A); - else if (!isa<Instruction>(V)) - // All other non-instructions are overdefined. - LV = LatticeFunc->getOverdefinedVal(); - else - // All instructions are underdefined by default. - LV = LatticeFunc->getUndefVal(); - - // If this value is untracked, don't add it to the map. - if (LV == LatticeFunc->getUntrackedVal()) - return LV; - return ValueState[V] = LV; -} - -/// UpdateState - When the state for some instruction is potentially updated, -/// this function notices and adds I to the worklist if needed. -void SparseSolver::UpdateState(Instruction &Inst, LatticeVal V) { - DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(&Inst); - if (I != ValueState.end() && I->second == V) - return; // No change. - - // An update. Visit uses of I. - ValueState[&Inst] = V; - InstWorkList.push_back(&Inst); -} - -/// MarkBlockExecutable - This method can be used by clients to mark all of -/// the blocks that are known to be intrinsically live in the processed unit. -void SparseSolver::MarkBlockExecutable(BasicBlock *BB) { - DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << "\n"); - BBExecutable.insert(BB); // Basic block is executable! - BBWorkList.push_back(BB); // Add the block to the work list! -} - -/// markEdgeExecutable - Mark a basic block as executable, adding it to the BB -/// work list if it is not already executable... -void SparseSolver::markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) { - if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second) - return; // This edge is already known to be executable! - - DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName() - << " -> " << Dest->getName() << "\n"); - - if (BBExecutable.count(Dest)) { - // The destination is already executable, but we just made an edge - // feasible that wasn't before. Revisit the PHI nodes in the block - // because they have potentially new operands. - for (BasicBlock::iterator I = Dest->begin(); isa<PHINode>(I); ++I) - visitPHINode(*cast<PHINode>(I)); - - } else { - MarkBlockExecutable(Dest); - } -} - - -/// getFeasibleSuccessors - Return a vector of booleans to indicate which -/// successors are reachable from a given terminator instruction. -void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI, - SmallVectorImpl<bool> &Succs, - bool AggressiveUndef) { - Succs.resize(TI.getNumSuccessors()); - if (TI.getNumSuccessors() == 0) return; - - if (BranchInst *BI = dyn_cast<BranchInst>(&TI)) { - if (BI->isUnconditional()) { - Succs[0] = true; - return; - } - - LatticeVal BCValue; - if (AggressiveUndef) - BCValue = getOrInitValueState(BI->getCondition()); - else - BCValue = getLatticeState(BI->getCondition()); - - if (BCValue == LatticeFunc->getOverdefinedVal() || - BCValue == LatticeFunc->getUntrackedVal()) { - // Overdefined condition variables can branch either way. - Succs[0] = Succs[1] = true; - return; - } - - // If undefined, neither is feasible yet. - if (BCValue == LatticeFunc->getUndefVal()) - return; - - Constant *C = LatticeFunc->GetConstant(BCValue, BI->getCondition(), *this); - if (!C || !isa<ConstantInt>(C)) { - // Non-constant values can go either way. - Succs[0] = Succs[1] = true; - return; - } - - // Constant condition variables mean the branch can only go a single way - Succs[C->isNullValue()] = true; - return; - } - - if (isa<InvokeInst>(TI)) { - // Invoke instructions successors are always executable. - // TODO: Could ask the lattice function if the value can throw. - Succs[0] = Succs[1] = true; - return; - } - - if (isa<IndirectBrInst>(TI)) { - Succs.assign(Succs.size(), true); - return; - } - - SwitchInst &SI = cast<SwitchInst>(TI); - LatticeVal SCValue; - if (AggressiveUndef) - SCValue = getOrInitValueState(SI.getCondition()); - else - SCValue = getLatticeState(SI.getCondition()); - - if (SCValue == LatticeFunc->getOverdefinedVal() || - SCValue == LatticeFunc->getUntrackedVal()) { - // All destinations are executable! - Succs.assign(TI.getNumSuccessors(), true); - return; - } - - // If undefined, neither is feasible yet. - if (SCValue == LatticeFunc->getUndefVal()) - return; - - Constant *C = LatticeFunc->GetConstant(SCValue, SI.getCondition(), *this); - if (!C || !isa<ConstantInt>(C)) { - // All destinations are executable! - Succs.assign(TI.getNumSuccessors(), true); - return; - } - SwitchInst::CaseHandle Case = *SI.findCaseValue(cast<ConstantInt>(C)); - Succs[Case.getSuccessorIndex()] = true; -} - - -/// isEdgeFeasible - Return true if the control flow edge from the 'From' -/// basic block to the 'To' basic block is currently feasible... -bool SparseSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To, - bool AggressiveUndef) { - SmallVector<bool, 16> SuccFeasible; - TerminatorInst *TI = From->getTerminator(); - getFeasibleSuccessors(*TI, SuccFeasible, AggressiveUndef); - - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - if (TI->getSuccessor(i) == To && SuccFeasible[i]) - return true; - - return false; -} - -void SparseSolver::visitTerminatorInst(TerminatorInst &TI) { - SmallVector<bool, 16> SuccFeasible; - getFeasibleSuccessors(TI, SuccFeasible, true); - - BasicBlock *BB = TI.getParent(); - - // Mark all feasible successors executable... - for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i) - if (SuccFeasible[i]) - markEdgeExecutable(BB, TI.getSuccessor(i)); -} - -void SparseSolver::visitPHINode(PHINode &PN) { - // The lattice function may store more information on a PHINode than could be - // computed from its incoming values. For example, SSI form stores its sigma - // functions as PHINodes with a single incoming value. - if (LatticeFunc->IsSpecialCasedPHI(&PN)) { - LatticeVal IV = LatticeFunc->ComputeInstructionState(PN, *this); - if (IV != LatticeFunc->getUntrackedVal()) - UpdateState(PN, IV); - return; - } - - LatticeVal PNIV = getOrInitValueState(&PN); - LatticeVal Overdefined = LatticeFunc->getOverdefinedVal(); - - // If this value is already overdefined (common) just return. - if (PNIV == Overdefined || PNIV == LatticeFunc->getUntrackedVal()) - return; // Quick exit - - // Super-extra-high-degree PHI nodes are unlikely to ever be interesting, - // and slow us down a lot. Just mark them overdefined. - if (PN.getNumIncomingValues() > 64) { - UpdateState(PN, Overdefined); - return; - } - - // Look at all of the executable operands of the PHI node. If any of them - // are overdefined, the PHI becomes overdefined as well. Otherwise, ask the - // transfer function to give us the merge of the incoming values. - for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { - // If the edge is not yet known to be feasible, it doesn't impact the PHI. - if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent(), true)) - continue; - - // Merge in this value. - LatticeVal OpVal = getOrInitValueState(PN.getIncomingValue(i)); - if (OpVal != PNIV) - PNIV = LatticeFunc->MergeValues(PNIV, OpVal); - - if (PNIV == Overdefined) - break; // Rest of input values don't matter. - } - - // Update the PHI with the compute value, which is the merge of the inputs. - UpdateState(PN, PNIV); -} - - -void SparseSolver::visitInst(Instruction &I) { - // PHIs are handled by the propagation logic, they are never passed into the - // transfer functions. - if (PHINode *PN = dyn_cast<PHINode>(&I)) - return visitPHINode(*PN); - - // Otherwise, ask the transfer function what the result is. If this is - // something that we care about, remember it. - LatticeVal IV = LatticeFunc->ComputeInstructionState(I, *this); - if (IV != LatticeFunc->getUntrackedVal()) - UpdateState(I, IV); - - if (TerminatorInst *TI = dyn_cast<TerminatorInst>(&I)) - visitTerminatorInst(*TI); -} - -void SparseSolver::Solve(Function &F) { - MarkBlockExecutable(&F.getEntryBlock()); - - // Process the work lists until they are empty! - while (!BBWorkList.empty() || !InstWorkList.empty()) { - // Process the instruction work list. - while (!InstWorkList.empty()) { - Instruction *I = InstWorkList.back(); - InstWorkList.pop_back(); - - DEBUG(dbgs() << "\nPopped off I-WL: " << *I << "\n"); - - // "I" got into the work list because it made a transition. See if any - // users are both live and in need of updating. - for (User *U : I->users()) { - Instruction *UI = cast<Instruction>(U); - if (BBExecutable.count(UI->getParent())) // Inst is executable? - visitInst(*UI); - } - } - - // Process the basic block work list. - while (!BBWorkList.empty()) { - BasicBlock *BB = BBWorkList.back(); - BBWorkList.pop_back(); - - DEBUG(dbgs() << "\nPopped off BBWL: " << *BB); - - // Notify all instructions in this basic block that they are newly - // executable. - for (Instruction &I : *BB) - visitInst(I); - } - } -} - -void SparseSolver::Print(Function &F, raw_ostream &OS) const { - OS << "\nFUNCTION: " << F.getName() << "\n"; - for (auto &BB : F) { - if (!BBExecutable.count(&BB)) - OS << "INFEASIBLE: "; - OS << "\t"; - if (BB.hasName()) - OS << BB.getName() << ":\n"; - else - OS << "; anon bb\n"; - for (auto &I : BB) { - LatticeFunc->PrintValue(getLatticeState(&I), OS); - OS << I << "\n"; - } - - OS << "\n"; - } -} - diff --git a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp index 2be5d5caf7c2..d18246ac5941 100644 --- a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -50,9 +50,9 @@ static bool hasSinCosPiStret(const Triple &T) { return true; } -/// initialize - Initialize the set of available library functions based on the -/// specified target triple. This should be carefully written so that a missing -/// target triple gets a sane set of defaults. +/// Initialize the set of available library functions based on the specified +/// target triple. This should be carefully written so that a missing target +/// triple gets a sane set of defaults. static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef<StringRef> StandardNames) { // Verify that the StandardNames array is in alphabetical order. @@ -182,6 +182,9 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_atanh); TLI.setUnavailable(LibFunc_atanhf); TLI.setUnavailable(LibFunc_atanhl); + TLI.setUnavailable(LibFunc_cabs); + TLI.setUnavailable(LibFunc_cabsf); + TLI.setUnavailable(LibFunc_cabsl); TLI.setUnavailable(LibFunc_cbrt); TLI.setUnavailable(LibFunc_cbrtf); TLI.setUnavailable(LibFunc_cbrtl); @@ -605,7 +608,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, return (NumParams == 3 && FTy.getReturnType()->isPointerTy() && FTy.getParamType(0) == FTy.getReturnType() && FTy.getParamType(1) == FTy.getReturnType() && - FTy.getParamType(2)->isIntegerTy()); + IsSizeTTy(FTy.getParamType(2))); case LibFunc_strcpy_chk: case LibFunc_stpcpy_chk: @@ -630,7 +633,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) && FTy.getParamType(0) == FTy.getParamType(1) && FTy.getParamType(0) == PCharTy && - FTy.getParamType(2)->isIntegerTy()); + IsSizeTTy(FTy.getParamType(2))); case LibFunc_strxfrm: return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() && @@ -645,7 +648,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, return (NumParams == 3 && FTy.getReturnType()->isIntegerTy(32) && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(0) == FTy.getParamType(1) && - FTy.getParamType(2)->isIntegerTy()); + IsSizeTTy(FTy.getParamType(2))); case LibFunc_strspn: case LibFunc_strcspn: @@ -1267,6 +1270,25 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, return (NumParams == 1 && FTy.getParamType(0)->isPointerTy() && FTy.getReturnType()->isIntegerTy()); + case LibFunc_cabs: + case LibFunc_cabsf: + case LibFunc_cabsl: { + Type* RetTy = FTy.getReturnType(); + if (!RetTy->isFloatingPointTy()) + return false; + + // NOTE: These prototypes are target specific and currently support + // "complex" passed as an array or discrete real & imaginary parameters. + // Add other calling conventions to enable libcall optimizations. + if (NumParams == 1) + return (FTy.getParamType(0)->isArrayTy() && + FTy.getParamType(0)->getArrayNumElements() == 2 && + FTy.getParamType(0)->getArrayElementType() == RetTy); + else if (NumParams == 2) + return (FTy.getParamType(0) == RetTy && FTy.getParamType(1) == RetTy); + else + return false; + } case LibFunc::NumLibFuncs: break; } @@ -1519,20 +1541,11 @@ TargetLibraryInfoImpl &TargetLibraryAnalysis::lookupInfoImpl(const Triple &T) { return *Impl; } -unsigned TargetLibraryInfoImpl::getTargetWCharSize(const Triple &T) { - // See also clang/lib/Basic/Targets.cpp. - if (T.isPS4() || T.isOSWindows() || T.isArch16Bit()) - return 2; - if (T.getArch() == Triple::xcore) - return 1; - return 4; -} - unsigned TargetLibraryInfoImpl::getWCharSize(const Module &M) const { if (auto *ShortWChar = cast_or_null<ConstantAsMetadata>( M.getModuleFlag("wchar_size"))) return cast<ConstantInt>(ShortWChar->getValue())->getZExtValue(); - return getTargetWCharSize(Triple(M.getTargetTriple())); + return 0; } TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass() diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp index 25813c65037f..b744cae51ed7 100644 --- a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -16,18 +16,19 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include <utility> using namespace llvm; +using namespace PatternMatch; #define DEBUG_TYPE "tti" -static cl::opt<bool> UseWideMemcpyLoopLowering( - "use-wide-memcpy-loop-lowering", cl::init(false), - cl::desc("Enables the new wide memcpy loop lowering in Transforms/Utils."), - cl::Hidden); +static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false), + cl::Hidden, + cl::desc("Recognize reduction patterns.")); namespace { /// \brief No-op implementation of the TTI interface using the utility base @@ -144,9 +145,10 @@ bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, - unsigned AddrSpace) const { + unsigned AddrSpace, + Instruction *I) const { return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, - Scale, AddrSpace); + Scale, AddrSpace, I); } bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const { @@ -166,7 +168,16 @@ bool TargetTransformInfo::isLegalMaskedGather(Type *DataType) const { } bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const { - return TTIImpl->isLegalMaskedGather(DataType); + return TTIImpl->isLegalMaskedScatter(DataType); +} + +bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const { + return TTIImpl->hasDivRemOp(DataType, IsSigned); +} + +bool TargetTransformInfo::hasVolatileVariant(Instruction *I, + unsigned AddrSpace) const { + return TTIImpl->hasVolatileVariant(I, AddrSpace); } bool TargetTransformInfo::prefersVectorizedAddressing() const { @@ -184,9 +195,8 @@ int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, return Cost; } -bool TargetTransformInfo::isFoldableMemAccessOffset(Instruction *I, - int64_t Offset) const { - return TTIImpl->isFoldableMemAccessOffset(I, Offset); +bool TargetTransformInfo::LSRWithInstrQueries() const { + return TTIImpl->LSRWithInstrQueries(); } bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const { @@ -235,8 +245,9 @@ bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) c return TTIImpl->enableAggressiveInterleaving(LoopHasReductions); } -bool TargetTransformInfo::expandMemCmp(Instruction *I, unsigned &MaxLoadSize) const { - return TTIImpl->expandMemCmp(I, MaxLoadSize); +const TargetTransformInfo::MemCmpExpansionOptions * +TargetTransformInfo::enableMemCmpExpansion(bool IsZeroCmp) const { + return TTIImpl->enableMemCmpExpansion(IsZeroCmp); } bool TargetTransformInfo::enableInterleavedAccessVectorization() const { @@ -265,6 +276,10 @@ bool TargetTransformInfo::haveFastSqrt(Type *Ty) const { return TTIImpl->haveFastSqrt(Ty); } +bool TargetTransformInfo::isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { + return TTIImpl->isFCmpOrdCheaperThanFCmpZero(Ty); +} + int TargetTransformInfo::getFPOpCost(Type *Ty) const { int Cost = TTIImpl->getFPOpCost(Ty); assert(Cost >= 0 && "TTI should not produce negative costs!"); @@ -321,6 +336,16 @@ unsigned TargetTransformInfo::getCacheLineSize() const { return TTIImpl->getCacheLineSize(); } +llvm::Optional<unsigned> TargetTransformInfo::getCacheSize(CacheLevel Level) + const { + return TTIImpl->getCacheSize(Level); +} + +llvm::Optional<unsigned> TargetTransformInfo::getCacheAssociativity( + CacheLevel Level) const { + return TTIImpl->getCacheAssociativity(Level); +} + unsigned TargetTransformInfo::getPrefetchDistance() const { return TTIImpl->getPrefetchDistance(); } @@ -467,9 +492,18 @@ int TargetTransformInfo::getAddressComputationCost(Type *Tp, return Cost; } -int TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty, - bool IsPairwiseForm) const { - int Cost = TTIImpl->getReductionCost(Opcode, Ty, IsPairwiseForm); +int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwiseForm) const { + int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getMinMaxReductionCost(Type *Ty, Type *CondTy, + bool IsPairwiseForm, + bool IsUnsigned) const { + int Cost = + TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -508,10 +542,6 @@ void TargetTransformInfo::getMemcpyLoopResidualLoweringType( SrcAlign, DestAlign); } -bool TargetTransformInfo::useWideIRMemcpyLoopLowering() const { - return UseWideMemcpyLoopLowering; -} - bool TargetTransformInfo::areInlineCompatible(const Function *Caller, const Function *Callee) const { return TTIImpl->areInlineCompatible(Caller, Callee); @@ -564,6 +594,557 @@ bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const { return TTIImpl->shouldExpandReduction(II); } +int TargetTransformInfo::getInstructionLatency(const Instruction *I) const { + return TTIImpl->getInstructionLatency(I); +} + +static bool isReverseVectorMask(ArrayRef<int> Mask) { + for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i) + if (Mask[i] >= 0 && Mask[i] != (int)(MaskSize - 1 - i)) + return false; + return true; +} + +static bool isSingleSourceVectorMask(ArrayRef<int> Mask) { + bool Vec0 = false; + bool Vec1 = false; + for (unsigned i = 0, NumVecElts = Mask.size(); i < NumVecElts; ++i) { + if (Mask[i] >= 0) { + if ((unsigned)Mask[i] >= NumVecElts) + Vec1 = true; + else + Vec0 = true; + } + } + return !(Vec0 && Vec1); +} + +static bool isZeroEltBroadcastVectorMask(ArrayRef<int> Mask) { + for (unsigned i = 0; i < Mask.size(); ++i) + if (Mask[i] > 0) + return false; + return true; +} + +static bool isAlternateVectorMask(ArrayRef<int> Mask) { + bool isAlternate = true; + unsigned MaskSize = Mask.size(); + + // Example: shufflevector A, B, <0,5,2,7> + for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { + if (Mask[i] < 0) + continue; + isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i); + } + + if (isAlternate) + return true; + + isAlternate = true; + // Example: shufflevector A, B, <4,1,6,3> + for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { + if (Mask[i] < 0) + continue; + isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i); + } + + return isAlternate; +} + +static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { + TargetTransformInfo::OperandValueKind OpInfo = + TargetTransformInfo::OK_AnyValue; + + // Check for a splat of a constant or for a non uniform vector of constants. + if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) { + OpInfo = TargetTransformInfo::OK_NonUniformConstantValue; + if (cast<Constant>(V)->getSplatValue() != nullptr) + OpInfo = TargetTransformInfo::OK_UniformConstantValue; + } + + // Check for a splat of a uniform value. This is not loop aware, so return + // true only for the obviously uniform cases (argument, globalvalue) + const Value *Splat = getSplatValue(V); + if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat))) + OpInfo = TargetTransformInfo::OK_UniformValue; + + return OpInfo; +} + +static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, + unsigned Level) { + // We don't need a shuffle if we just want to have element 0 in position 0 of + // the vector. + if (!SI && Level == 0 && IsLeft) + return true; + else if (!SI) + return false; + + SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1); + + // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether + // we look at the left or right side. + for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2) + Mask[i] = val; + + SmallVector<int, 16> ActualMask = SI->getShuffleMask(); + return Mask == ActualMask; +} + +namespace { +/// Kind of the reduction data. +enum ReductionKind { + RK_None, /// Not a reduction. + RK_Arithmetic, /// Binary reduction data. + RK_MinMax, /// Min/max reduction data. + RK_UnsignedMinMax, /// Unsigned min/max reduction data. +}; +/// Contains opcode + LHS/RHS parts of the reduction operations. +struct ReductionData { + ReductionData() = delete; + ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS) + : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) { + assert(Kind != RK_None && "expected binary or min/max reduction only."); + } + unsigned Opcode = 0; + Value *LHS = nullptr; + Value *RHS = nullptr; + ReductionKind Kind = RK_None; + bool hasSameData(ReductionData &RD) const { + return Kind == RD.Kind && Opcode == RD.Opcode; + } +}; +} // namespace + +static Optional<ReductionData> getReductionData(Instruction *I) { + Value *L, *R; + if (m_BinOp(m_Value(L), m_Value(R)).match(I)) + return ReductionData(RK_Arithmetic, I->getOpcode(), L, R); + if (auto *SI = dyn_cast<SelectInst>(I)) { + if (m_SMin(m_Value(L), m_Value(R)).match(SI) || + m_SMax(m_Value(L), m_Value(R)).match(SI) || + m_OrdFMin(m_Value(L), m_Value(R)).match(SI) || + m_OrdFMax(m_Value(L), m_Value(R)).match(SI) || + m_UnordFMin(m_Value(L), m_Value(R)).match(SI) || + m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) { + auto *CI = cast<CmpInst>(SI->getCondition()); + return ReductionData(RK_MinMax, CI->getOpcode(), L, R); + } + if (m_UMin(m_Value(L), m_Value(R)).match(SI) || + m_UMax(m_Value(L), m_Value(R)).match(SI)) { + auto *CI = cast<CmpInst>(SI->getCondition()); + return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R); + } + } + return llvm::None; +} + +static ReductionKind matchPairwiseReductionAtLevel(Instruction *I, + unsigned Level, + unsigned NumLevels) { + // Match one level of pairwise operations. + // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> + // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> + // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 + if (!I) + return RK_None; + + assert(I->getType()->isVectorTy() && "Expecting a vector type"); + + Optional<ReductionData> RD = getReductionData(I); + if (!RD) + return RK_None; + + ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(RD->LHS); + if (!LS && Level) + return RK_None; + ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(RD->RHS); + if (!RS && Level) + return RK_None; + + // On level 0 we can omit one shufflevector instruction. + if (!Level && !RS && !LS) + return RK_None; + + // Shuffle inputs must match. + Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr; + Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr; + Value *NextLevelOp = nullptr; + if (NextLevelOpR && NextLevelOpL) { + // If we have two shuffles their operands must match. + if (NextLevelOpL != NextLevelOpR) + return RK_None; + + NextLevelOp = NextLevelOpL; + } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) { + // On the first level we can omit the shufflevector <0, undef,...>. So the + // input to the other shufflevector <1, undef> must match with one of the + // inputs to the current binary operation. + // Example: + // %NextLevelOpL = shufflevector %R, <1, undef ...> + // %BinOp = fadd %NextLevelOpL, %R + if (NextLevelOpL && NextLevelOpL != RD->RHS) + return RK_None; + else if (NextLevelOpR && NextLevelOpR != RD->LHS) + return RK_None; + + NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS; + } else + return RK_None; + + // Check that the next levels binary operation exists and matches with the + // current one. + if (Level + 1 != NumLevels) { + Optional<ReductionData> NextLevelRD = + getReductionData(cast<Instruction>(NextLevelOp)); + if (!NextLevelRD || !RD->hasSameData(*NextLevelRD)) + return RK_None; + } + + // Shuffle mask for pairwise operation must match. + if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) { + if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level)) + return RK_None; + } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) { + if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level)) + return RK_None; + } else { + return RK_None; + } + + if (++Level == NumLevels) + return RD->Kind; + + // Match next level. + return matchPairwiseReductionAtLevel(cast<Instruction>(NextLevelOp), Level, + NumLevels); +} + +static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot, + unsigned &Opcode, Type *&Ty) { + if (!EnableReduxCost) + return RK_None; + + // Need to extract the first element. + ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); + unsigned Idx = ~0u; + if (CI) + Idx = CI->getZExtValue(); + if (Idx != 0) + return RK_None; + + auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0)); + if (!RdxStart) + return RK_None; + Optional<ReductionData> RD = getReductionData(RdxStart); + if (!RD) + return RK_None; + + Type *VecTy = RdxStart->getType(); + unsigned NumVecElems = VecTy->getVectorNumElements(); + if (!isPowerOf2_32(NumVecElems)) + return RK_None; + + // We look for a sequence of shuffle,shuffle,add triples like the following + // that builds a pairwise reduction tree. + // + // (X0, X1, X2, X3) + // (X0 + X1, X2 + X3, undef, undef) + // ((X0 + X1) + (X2 + X3), undef, undef, undef) + // + // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> + // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> + // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 + // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, + // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> + // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, + // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> + // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 + // %r = extractelement <4 x float> %bin.rdx8, i32 0 + if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) == + RK_None) + return RK_None; + + Opcode = RD->Opcode; + Ty = VecTy; + + return RD->Kind; +} + +static std::pair<Value *, ShuffleVectorInst *> +getShuffleAndOtherOprd(Value *L, Value *R) { + ShuffleVectorInst *S = nullptr; + + if ((S = dyn_cast<ShuffleVectorInst>(L))) + return std::make_pair(R, S); + + S = dyn_cast<ShuffleVectorInst>(R); + return std::make_pair(L, S); +} + +static ReductionKind +matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, + unsigned &Opcode, Type *&Ty) { + if (!EnableReduxCost) + return RK_None; + + // Need to extract the first element. + ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); + unsigned Idx = ~0u; + if (CI) + Idx = CI->getZExtValue(); + if (Idx != 0) + return RK_None; + + auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0)); + if (!RdxStart) + return RK_None; + Optional<ReductionData> RD = getReductionData(RdxStart); + if (!RD) + return RK_None; + + Type *VecTy = ReduxRoot->getOperand(0)->getType(); + unsigned NumVecElems = VecTy->getVectorNumElements(); + if (!isPowerOf2_32(NumVecElems)) + return RK_None; + + // We look for a sequence of shuffles and adds like the following matching one + // fadd, shuffle vector pair at a time. + // + // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> + // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf + // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, + // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> + // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 + // %r = extractelement <4 x float> %bin.rdx8, i32 0 + + unsigned MaskStart = 1; + Instruction *RdxOp = RdxStart; + SmallVector<int, 32> ShuffleMask(NumVecElems, 0); + unsigned NumVecElemsRemain = NumVecElems; + while (NumVecElemsRemain - 1) { + // Check for the right reduction operation. + if (!RdxOp) + return RK_None; + Optional<ReductionData> RDLevel = getReductionData(RdxOp); + if (!RDLevel || !RDLevel->hasSameData(*RD)) + return RK_None; + + Value *NextRdxOp; + ShuffleVectorInst *Shuffle; + std::tie(NextRdxOp, Shuffle) = + getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS); + + // Check the current reduction operation and the shuffle use the same value. + if (Shuffle == nullptr) + return RK_None; + if (Shuffle->getOperand(0) != NextRdxOp) + return RK_None; + + // Check that shuffle masks matches. + for (unsigned j = 0; j != MaskStart; ++j) + ShuffleMask[j] = MaskStart + j; + // Fill the rest of the mask with -1 for undef. + std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1); + + SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); + if (ShuffleMask != Mask) + return RK_None; + + RdxOp = dyn_cast<Instruction>(NextRdxOp); + NumVecElemsRemain /= 2; + MaskStart *= 2; + } + + Opcode = RD->Opcode; + Ty = VecTy; + return RD->Kind; +} + +int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { + switch (I->getOpcode()) { + case Instruction::GetElementPtr: + return getUserCost(I); + + case Instruction::Ret: + case Instruction::PHI: + case Instruction::Br: { + return getCFInstrCost(I->getOpcode()); + } + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + TargetTransformInfo::OperandValueKind Op1VK = + getOperandInfo(I->getOperand(0)); + TargetTransformInfo::OperandValueKind Op2VK = + getOperandInfo(I->getOperand(1)); + SmallVector<const Value*, 2> Operands(I->operand_values()); + return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, + Op2VK, TargetTransformInfo::OP_None, + TargetTransformInfo::OP_None, + Operands); + } + case Instruction::Select: { + const SelectInst *SI = cast<SelectInst>(I); + Type *CondTy = SI->getCondition()->getType(); + return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I); + } + case Instruction::ICmp: + case Instruction::FCmp: { + Type *ValTy = I->getOperand(0)->getType(); + return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I); + } + case Instruction::Store: { + const StoreInst *SI = cast<StoreInst>(I); + Type *ValTy = SI->getValueOperand()->getType(); + return getMemoryOpCost(I->getOpcode(), ValTy, + SI->getAlignment(), + SI->getPointerAddressSpace(), I); + } + case Instruction::Load: { + const LoadInst *LI = cast<LoadInst>(I); + return getMemoryOpCost(I->getOpcode(), I->getType(), + LI->getAlignment(), + LI->getPointerAddressSpace(), I); + } + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: + case Instruction::AddrSpaceCast: { + Type *SrcTy = I->getOperand(0)->getType(); + return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I); + } + case Instruction::ExtractElement: { + const ExtractElementInst * EEI = cast<ExtractElementInst>(I); + ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + + // Try to match a reduction sequence (series of shufflevector and vector + // adds followed by a extractelement). + unsigned ReduxOpCode; + Type *ReduxType; + + switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) { + case RK_Arithmetic: + return getArithmeticReductionCost(ReduxOpCode, ReduxType, + /*IsPairwiseForm=*/false); + case RK_MinMax: + return getMinMaxReductionCost( + ReduxType, CmpInst::makeCmpResultType(ReduxType), + /*IsPairwiseForm=*/false, /*IsUnsigned=*/false); + case RK_UnsignedMinMax: + return getMinMaxReductionCost( + ReduxType, CmpInst::makeCmpResultType(ReduxType), + /*IsPairwiseForm=*/false, /*IsUnsigned=*/true); + case RK_None: + break; + } + + switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) { + case RK_Arithmetic: + return getArithmeticReductionCost(ReduxOpCode, ReduxType, + /*IsPairwiseForm=*/true); + case RK_MinMax: + return getMinMaxReductionCost( + ReduxType, CmpInst::makeCmpResultType(ReduxType), + /*IsPairwiseForm=*/true, /*IsUnsigned=*/false); + case RK_UnsignedMinMax: + return getMinMaxReductionCost( + ReduxType, CmpInst::makeCmpResultType(ReduxType), + /*IsPairwiseForm=*/true, /*IsUnsigned=*/true); + case RK_None: + break; + } + + return getVectorInstrCost(I->getOpcode(), + EEI->getOperand(0)->getType(), Idx); + } + case Instruction::InsertElement: { + const InsertElementInst * IE = cast<InsertElementInst>(I); + ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + return getVectorInstrCost(I->getOpcode(), + IE->getType(), Idx); + } + case Instruction::ShuffleVector: { + const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); + Type *VecTypOp0 = Shuffle->getOperand(0)->getType(); + unsigned NumVecElems = VecTypOp0->getVectorNumElements(); + SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); + + if (NumVecElems == Mask.size()) { + if (isReverseVectorMask(Mask)) + return getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, + 0, nullptr); + if (isAlternateVectorMask(Mask)) + return getShuffleCost(TargetTransformInfo::SK_Alternate, + VecTypOp0, 0, nullptr); + + if (isZeroEltBroadcastVectorMask(Mask)) + return getShuffleCost(TargetTransformInfo::SK_Broadcast, + VecTypOp0, 0, nullptr); + + if (isSingleSourceVectorMask(Mask)) + return getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, + VecTypOp0, 0, nullptr); + + return getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, + VecTypOp0, 0, nullptr); + } + + return -1; + } + case Instruction::Call: + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + SmallVector<Value *, 4> Args(II->arg_operands()); + + FastMathFlags FMF; + if (auto *FPMO = dyn_cast<FPMathOperator>(II)) + FMF = FPMO->getFastMathFlags(); + + return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), + Args, FMF); + } + return -1; + default: + // We don't have any information on this instruction. + return -1; + } +} + TargetTransformInfo::Concept::~Concept() {} TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} diff --git a/contrib/llvm/lib/Analysis/Trace.cpp b/contrib/llvm/lib/Analysis/Trace.cpp index c7e2c0f3412a..34c998501a6c 100644 --- a/contrib/llvm/lib/Analysis/Trace.cpp +++ b/contrib/llvm/lib/Analysis/Trace.cpp @@ -16,9 +16,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/Trace.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" + using namespace llvm; Function *Trace::getFunction() const { @@ -30,7 +33,6 @@ Module *Trace::getModule() const { } /// print - Write trace to output stream. -/// void Trace::print(raw_ostream &O) const { Function *F = getFunction(); O << "; Trace from function " << F->getName() << ", blocks:\n"; @@ -45,7 +47,6 @@ void Trace::print(raw_ostream &O) const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// dump - Debugger convenience method; writes trace to standard error /// output stream. -/// LLVM_DUMP_METHOD void Trace::dump() const { print(dbgs()); } diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index 86c528de267a..c9ed026a1e33 100644 --- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -123,27 +123,38 @@ #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include <cassert> +#include <cstdint> + using namespace llvm; // A handy option for disabling TBAA functionality. The same effect can also be // achieved by stripping the !tbaa tags from IR, but this option is sometimes // more convenient. -static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true)); +static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true), cl::Hidden); namespace { + /// This is a simple wrapper around an MDNode which provides a higher-level /// interface by hiding the details of how alias analysis information is encoded /// in its operands. template<typename MDNodeTy> class TBAANodeImpl { - MDNodeTy *Node; + MDNodeTy *Node = nullptr; public: - TBAANodeImpl() : Node(nullptr) {} + TBAANodeImpl() = default; explicit TBAANodeImpl(MDNodeTy *N) : Node(N) {} /// getNode - Get the MDNode for this TBAANode. @@ -176,8 +187,8 @@ public: /// \name Specializations of \c TBAANodeImpl for const and non const qualified /// \c MDNode. /// @{ -typedef TBAANodeImpl<const MDNode> TBAANode; -typedef TBAANodeImpl<MDNode> MutableTBAANode; +using TBAANode = TBAANodeImpl<const MDNode>; +using MutableTBAANode = TBAANodeImpl<MDNode>; /// @} /// This is a simple wrapper around an MDNode which provides a @@ -197,12 +208,15 @@ public: MDNodeTy *getBaseType() const { return dyn_cast_or_null<MDNode>(Node->getOperand(0)); } + MDNodeTy *getAccessType() const { return dyn_cast_or_null<MDNode>(Node->getOperand(1)); } + uint64_t getOffset() const { return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue(); } + /// Test if this TBAAStructTagNode represents a type for objects /// which are not modified (by any means) in the context where this /// AliasAnalysis is relevant. @@ -219,8 +233,8 @@ public: /// \name Specializations of \c TBAAStructTagNodeImpl for const and non const /// qualified \c MDNods. /// @{ -typedef TBAAStructTagNodeImpl<const MDNode> TBAAStructTagNode; -typedef TBAAStructTagNodeImpl<MDNode> MutableTBAAStructTagNode; +using TBAAStructTagNode = TBAAStructTagNodeImpl<const MDNode>; +using MutableTBAAStructTagNode = TBAAStructTagNodeImpl<MDNode>; /// @} /// This is a simple wrapper around an MDNode which provides a @@ -228,10 +242,10 @@ typedef TBAAStructTagNodeImpl<MDNode> MutableTBAAStructTagNode; /// information is encoded in its operands. class TBAAStructTypeNode { /// This node should be created with createTBAAStructTypeNode. - const MDNode *Node; + const MDNode *Node = nullptr; public: - TBAAStructTypeNode() : Node(nullptr) {} + TBAAStructTypeNode() = default; explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {} /// Get the MDNode for this TBAAStructTypeNode. @@ -283,7 +297,8 @@ public: return TBAAStructTypeNode(P); } }; -} + +} // end anonymous namespace /// Check the first operand of the tbaa tag node, if it is a MDNode, we treat /// it as struct-path aware TBAA format, otherwise, we treat it as scalar TBAA @@ -299,17 +314,8 @@ AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA, if (!EnableTBAA) return AAResultBase::alias(LocA, LocB); - // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must - // be conservative. - const MDNode *AM = LocA.AATags.TBAA; - if (!AM) - return AAResultBase::alias(LocA, LocB); - const MDNode *BM = LocB.AATags.TBAA; - if (!BM) - return AAResultBase::alias(LocA, LocB); - - // If they may alias, chain to the next AliasAnalysis. - if (Aliases(AM, BM)) + // If accesses may alias, chain to the next AliasAnalysis. + if (Aliases(LocA.AATags.TBAA, LocB.AATags.TBAA)) return AAResultBase::alias(LocA, LocB); // Otherwise return a definitive result. @@ -365,7 +371,7 @@ ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS, if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) if (!Aliases(L, M)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; return AAResultBase::getModRefInfo(CS, Loc); } @@ -380,7 +386,7 @@ ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS1, if (const MDNode *M2 = CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) if (!Aliases(M1, M2)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; return AAResultBase::getModRefInfo(CS1, CS2); } @@ -409,25 +415,24 @@ bool MDNode::isTBAAVtableAccess() const { return false; } +static bool matchAccessTags(const MDNode *A, const MDNode *B, + const MDNode **GenericTag = nullptr); + MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { + const MDNode *GenericTag; + matchAccessTags(A, B, &GenericTag); + return const_cast<MDNode*>(GenericTag); +} + +static const MDNode *getLeastCommonType(const MDNode *A, const MDNode *B) { if (!A || !B) return nullptr; if (A == B) return A; - // For struct-path aware TBAA, we use the access type of the tag. - assert(isStructPathTBAA(A) && isStructPathTBAA(B) && - "Auto upgrade should have taken care of this!"); - A = cast_or_null<MDNode>(MutableTBAAStructTagNode(A).getAccessType()); - if (!A) - return nullptr; - B = cast_or_null<MDNode>(MutableTBAAStructTagNode(B).getAccessType()); - if (!B) - return nullptr; - - SmallSetVector<MDNode *, 4> PathA; - MutableTBAANode TA(A); + SmallSetVector<const MDNode *, 4> PathA; + TBAANode TA(A); while (TA.getNode()) { if (PathA.count(TA.getNode())) report_fatal_error("Cycle found in TBAA metadata."); @@ -435,8 +440,8 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { TA = TA.getParent(); } - SmallSetVector<MDNode *, 4> PathB; - MutableTBAANode TB(B); + SmallSetVector<const MDNode *, 4> PathB; + TBAANode TB(B); while (TB.getNode()) { if (PathB.count(TB.getNode())) report_fatal_error("Cycle found in TBAA metadata."); @@ -447,7 +452,7 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { int IA = PathA.size() - 1; int IB = PathB.size() - 1; - MDNode *Ret = nullptr; + const MDNode *Ret = nullptr; while (IA >= 0 && IB >= 0) { if (PathA[IA] == PathB[IB]) Ret = PathA[IA]; @@ -457,17 +462,7 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { --IB; } - // We either did not find a match, or the only common base "type" is - // the root node. In either case, we don't have any useful TBAA - // metadata to attach. - if (!Ret || Ret->getNumOperands() < 2) - return nullptr; - - // We need to convert from a type node to a tag node. - Type *Int64 = IntegerType::get(A->getContext(), 64); - Metadata *Ops[3] = {Ret, Ret, - ConstantAsMetadata::get(ConstantInt::get(Int64, 0))}; - return MDNode::get(A->getContext(), Ops); + return Ret; } void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const { @@ -490,70 +485,96 @@ void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const { N.NoAlias = getMetadata(LLVMContext::MD_noalias); } -/// Aliases - Test whether the type represented by A may alias the -/// type represented by B. -bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const { - // Verify that both input nodes are struct-path aware. Auto-upgrade should - // have taken care of this. - assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware."); - assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware."); +static bool findAccessType(TBAAStructTagNode BaseTag, + const MDNode *AccessTypeNode, + uint64_t &OffsetInBase) { + // Start from the base type, follow the edge with the correct offset in + // the type DAG and adjust the offset until we reach the access type or + // until we reach a root node. + TBAAStructTypeNode BaseType(BaseTag.getBaseType()); + OffsetInBase = BaseTag.getOffset(); + + while (const MDNode *BaseTypeNode = BaseType.getNode()) { + if (BaseTypeNode == AccessTypeNode) + return true; - // Keep track of the root node for A and B. - TBAAStructTypeNode RootA, RootB; - TBAAStructTagNode TagA(A), TagB(B); + // Follow the edge with the correct offset, Offset will be adjusted to + // be relative to the field type. + BaseType = BaseType.getParent(OffsetInBase); + } + return false; +} - // TODO: We need to check if AccessType of TagA encloses AccessType of - // TagB to support aggregate AccessType. If yes, return true. +static const MDNode *createAccessTag(const MDNode *AccessType) { + // If there is no access type or the access type is the root node, then + // we don't have any useful access tag to return. + if (!AccessType || AccessType->getNumOperands() < 2) + return nullptr; - // Start from the base type of A, follow the edge with the correct offset in - // the type DAG and adjust the offset until we reach the base type of B or - // until we reach the Root node. - // Compare the adjusted offset once we have the same base. + Type *Int64 = IntegerType::get(AccessType->getContext(), 64); + auto *ImmutabilityFlag = ConstantAsMetadata::get(ConstantInt::get(Int64, 0)); + Metadata *Ops[] = {const_cast<MDNode*>(AccessType), + const_cast<MDNode*>(AccessType), ImmutabilityFlag}; + return MDNode::get(AccessType->getContext(), Ops); +} - // Climb the type DAG from base type of A to see if we reach base type of B. - const MDNode *BaseA = TagA.getBaseType(); - const MDNode *BaseB = TagB.getBaseType(); - uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset(); - for (TBAAStructTypeNode T(BaseA);;) { - if (T.getNode() == BaseB) - // Base type of A encloses base type of B, check if the offsets match. - return OffsetA == OffsetB; - - RootA = T; - // Follow the edge with the correct offset, OffsetA will be adjusted to - // be relative to the field type. - T = T.getParent(OffsetA); - if (!T.getNode()) - break; +/// matchTags - Return true if the given couple of accesses are allowed to +/// overlap. If \arg GenericTag is not null, then on return it points to the +/// most generic access descriptor for the given two. +static bool matchAccessTags(const MDNode *A, const MDNode *B, + const MDNode **GenericTag) { + if (A == B) { + if (GenericTag) + *GenericTag = A; + return true; } - // Reset OffsetA and climb the type DAG from base type of B to see if we reach - // base type of A. - OffsetA = TagA.getOffset(); - for (TBAAStructTypeNode T(BaseB);;) { - if (T.getNode() == BaseA) - // Base type of B encloses base type of A, check if the offsets match. - return OffsetA == OffsetB; - - RootB = T; - // Follow the edge with the correct offset, OffsetB will be adjusted to - // be relative to the field type. - T = T.getParent(OffsetB); - if (!T.getNode()) - break; + // Accesses with no TBAA information may alias with any other accesses. + if (!A || !B) { + if (GenericTag) + *GenericTag = nullptr; + return true; } - // Neither node is an ancestor of the other. + // Verify that both input nodes are struct-path aware. Auto-upgrade should + // have taken care of this. + assert(isStructPathTBAA(A) && "Access A is not struct-path aware!"); + assert(isStructPathTBAA(B) && "Access B is not struct-path aware!"); - // If they have different roots, they're part of different potentially - // unrelated type systems, so we must be conservative. - if (RootA.getNode() != RootB.getNode()) + TBAAStructTagNode TagA(A), TagB(B); + const MDNode *CommonType = getLeastCommonType(TagA.getAccessType(), + TagB.getAccessType()); + if (GenericTag) + *GenericTag = createAccessTag(CommonType); + + // TODO: We need to check if AccessType of TagA encloses AccessType of + // TagB to support aggregate AccessType. If yes, return true. + + // Climb the type DAG from base type of A to see if we reach base type of B. + uint64_t OffsetA; + if (findAccessType(TagA, TagB.getBaseType(), OffsetA)) + return OffsetA == TagB.getOffset(); + + // Climb the type DAG from base type of B to see if we reach base type of A. + uint64_t OffsetB; + if (findAccessType(TagB, TagA.getBaseType(), OffsetB)) + return OffsetB == TagA.getOffset(); + + // If the final access types have different roots, they're part of different + // potentially unrelated type systems, so we must be conservative. + if (!CommonType) return true; // If they have the same root, then we've proved there's no alias. return false; } +/// Aliases - Test whether the access represented by tag A may alias the +/// access represented by tag B. +bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const { + return matchAccessTags(A, B); +} + AnalysisKey TypeBasedAA::Key; TypeBasedAAResult TypeBasedAA::run(Function &F, FunctionAnalysisManager &AM) { diff --git a/contrib/llvm/lib/Analysis/ValueLattice.cpp b/contrib/llvm/lib/Analysis/ValueLattice.cpp new file mode 100644 index 000000000000..7de437ca480e --- /dev/null +++ b/contrib/llvm/lib/Analysis/ValueLattice.cpp @@ -0,0 +1,26 @@ +//===- ValueLattice.cpp - Value constraint analysis -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ValueLattice.h" + +namespace llvm { +raw_ostream &operator<<(raw_ostream &OS, const ValueLatticeElement &Val) { + if (Val.isUndefined()) + return OS << "undefined"; + if (Val.isOverdefined()) + return OS << "overdefined"; + + if (Val.isNotConstant()) + return OS << "notconstant<" << *Val.getNotConstant() << ">"; + if (Val.isConstantRange()) + return OS << "constantrange<" << Val.getConstantRange().getLower() << ", " + << Val.getConstantRange().getUpper() << ">"; + return OS << "constant<" << *Val.getConstant() << ">"; +} +} // end namespace llvm diff --git a/contrib/llvm/lib/Analysis/ValueLatticeUtils.cpp b/contrib/llvm/lib/Analysis/ValueLatticeUtils.cpp new file mode 100644 index 000000000000..22c9de4fe94d --- /dev/null +++ b/contrib/llvm/lib/Analysis/ValueLatticeUtils.cpp @@ -0,0 +1,44 @@ +//===-- ValueLatticeUtils.cpp - Utils for solving lattices ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements common functions useful for performing data-flow +// analyses that propagate values across function boundaries. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ValueLatticeUtils.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +using namespace llvm; + +bool llvm::canTrackArgumentsInterprocedurally(Function *F) { + return F->hasLocalLinkage() && !F->hasAddressTaken(); +} + +bool llvm::canTrackReturnsInterprocedurally(Function *F) { + return F->hasExactDefinition() && !F->hasFnAttribute(Attribute::Naked); +} + +bool llvm::canTrackGlobalVariableInterprocedurally(GlobalVariable *GV) { + if (GV->isConstant() || !GV->hasLocalLinkage() || + !GV->hasDefinitiveInitializer()) + return false; + return !any_of(GV->users(), [&](User *U) { + if (auto *Store = dyn_cast<StoreInst>(U)) { + if (Store->getValueOperand() == GV || Store->isVolatile()) + return true; + } else if (auto *Load = dyn_cast<LoadInst>(U)) { + if (Load->isVolatile()) + return true; + } else { + return true; + } + return false; + }); +} diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp index cdfe74d158c9..2730daefa625 100644 --- a/contrib/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp @@ -13,37 +13,66 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ValueTracking.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" -#include "llvm/Analysis/VectorUtils.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/IR/Statepoint.h" -#include "llvm/Support/Debug.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include <algorithm> #include <array> -#include <cstring> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <utility> + using namespace llvm; using namespace llvm::PatternMatch; @@ -54,12 +83,6 @@ const unsigned MaxDepth = 6; static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses", cl::Hidden, cl::init(20)); -// This optimization is known to cause performance regressions is some cases, -// keep it under a temporary flag for now. -static cl::opt<bool> -DontImproveNonNegativePhiBits("dont-improve-non-negative-phi-bits", - cl::Hidden, cl::init(true)); - /// Returns the bitwidth of the given scalar or pointer type. For vector types, /// returns the element type's bitwidth. static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { @@ -70,6 +93,7 @@ static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { } namespace { + // Simplifying using an assume can only be done in a particular control-flow // context (the context instruction provides that context). If an assume and // the context instruction are not in the same block then the DT helps in @@ -79,6 +103,7 @@ struct Query { AssumptionCache *AC; const Instruction *CxtI; const DominatorTree *DT; + // Unlike the other analyses, this may be a nullptr because not all clients // provide it currently. OptimizationRemarkEmitter *ORE; @@ -92,11 +117,12 @@ struct Query { /// isKnownNonZero, which calls computeKnownBits and isKnownToBeAPowerOfTwo /// (all of which can call computeKnownBits), and so on. std::array<const Value *, MaxDepth> Excluded; - unsigned NumExcluded; + + unsigned NumExcluded = 0; Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, OptimizationRemarkEmitter *ORE = nullptr) - : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), NumExcluded(0) {} + : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE) {} Query(const Query &Q, const Value *NewExcl) : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE), @@ -113,6 +139,7 @@ struct Query { return std::find(Excluded.begin(), End, Value) != End; } }; + } // end anonymous namespace // Given the provided Value and, potentially, a context instruction, return @@ -171,7 +198,6 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, return (LHSKnown.Zero | RHSKnown.Zero).isAllOnesValue(); } - bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI) { for (const User *U : CxtI->users()) { if (const ICmpInst *IC = dyn_cast<ICmpInst>(U)) @@ -275,47 +301,7 @@ static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1, computeKnownBits(Op0, LHSKnown, Depth + 1, Q); computeKnownBits(Op1, Known2, Depth + 1, Q); - // Carry in a 1 for a subtract, rather than a 0. - uint64_t CarryIn = 0; - if (!Add) { - // Sum = LHS + ~RHS + 1 - std::swap(Known2.Zero, Known2.One); - CarryIn = 1; - } - - APInt PossibleSumZero = ~LHSKnown.Zero + ~Known2.Zero + CarryIn; - APInt PossibleSumOne = LHSKnown.One + Known2.One + CarryIn; - - // Compute known bits of the carry. - APInt CarryKnownZero = ~(PossibleSumZero ^ LHSKnown.Zero ^ Known2.Zero); - APInt CarryKnownOne = PossibleSumOne ^ LHSKnown.One ^ Known2.One; - - // Compute set of known bits (where all three relevant bits are known). - APInt LHSKnownUnion = LHSKnown.Zero | LHSKnown.One; - APInt RHSKnownUnion = Known2.Zero | Known2.One; - APInt CarryKnownUnion = CarryKnownZero | CarryKnownOne; - APInt Known = LHSKnownUnion & RHSKnownUnion & CarryKnownUnion; - - assert((PossibleSumZero & Known) == (PossibleSumOne & Known) && - "known bits of sum differ"); - - // Compute known bits of the result. - KnownOut.Zero = ~PossibleSumOne & Known; - KnownOut.One = PossibleSumOne & Known; - - // Are we still trying to solve for the sign bit? - if (!Known.isSignBitSet()) { - if (NSW) { - // Adding two non-negative numbers, or subtracting a negative number from - // a non-negative one, can't wrap into negative. - if (LHSKnown.isNonNegative() && Known2.isNonNegative()) - KnownOut.makeNonNegative(); - // Adding two negative numbers, or subtracting a non-negative number from - // a negative one, can't wrap into non-negative. - else if (LHSKnown.isNegative() && Known2.isNegative()) - KnownOut.makeNegative(); - } - } + KnownOut = KnownBits::computeForAddSub(Add, NSW, LHSKnown, Known2); } static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, @@ -350,21 +336,78 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, } } - // If low bits are zero in either operand, output low known-0 bits. - // Also compute a conservative estimate for high known-0 bits. - // More trickiness is possible, but this is sufficient for the - // interesting case of alignment computation. - unsigned TrailZ = Known.countMinTrailingZeros() + - Known2.countMinTrailingZeros(); + assert(!Known.hasConflict() && !Known2.hasConflict()); + // Compute a conservative estimate for high known-0 bits. unsigned LeadZ = std::max(Known.countMinLeadingZeros() + Known2.countMinLeadingZeros(), BitWidth) - BitWidth; - - TrailZ = std::min(TrailZ, BitWidth); LeadZ = std::min(LeadZ, BitWidth); + + // The result of the bottom bits of an integer multiply can be + // inferred by looking at the bottom bits of both operands and + // multiplying them together. + // We can infer at least the minimum number of known trailing bits + // of both operands. Depending on number of trailing zeros, we can + // infer more bits, because (a*b) <=> ((a/m) * (b/n)) * (m*n) assuming + // a and b are divisible by m and n respectively. + // We then calculate how many of those bits are inferrable and set + // the output. For example, the i8 mul: + // a = XXXX1100 (12) + // b = XXXX1110 (14) + // We know the bottom 3 bits are zero since the first can be divided by + // 4 and the second by 2, thus having ((12/4) * (14/2)) * (2*4). + // Applying the multiplication to the trimmed arguments gets: + // XX11 (3) + // X111 (7) + // ------- + // XX11 + // XX11 + // XX11 + // XX11 + // ------- + // XXXXX01 + // Which allows us to infer the 2 LSBs. Since we're multiplying the result + // by 8, the bottom 3 bits will be 0, so we can infer a total of 5 bits. + // The proof for this can be described as: + // Pre: (C1 >= 0) && (C1 < (1 << C5)) && (C2 >= 0) && (C2 < (1 << C6)) && + // (C7 == (1 << (umin(countTrailingZeros(C1), C5) + + // umin(countTrailingZeros(C2), C6) + + // umin(C5 - umin(countTrailingZeros(C1), C5), + // C6 - umin(countTrailingZeros(C2), C6)))) - 1) + // %aa = shl i8 %a, C5 + // %bb = shl i8 %b, C6 + // %aaa = or i8 %aa, C1 + // %bbb = or i8 %bb, C2 + // %mul = mul i8 %aaa, %bbb + // %mask = and i8 %mul, C7 + // => + // %mask = i8 ((C1*C2)&C7) + // Where C5, C6 describe the known bits of %a, %b + // C1, C2 describe the known bottom bits of %a, %b. + // C7 describes the mask of the known bits of the result. + APInt Bottom0 = Known.One; + APInt Bottom1 = Known2.One; + + // How many times we'd be able to divide each argument by 2 (shr by 1). + // This gives us the number of trailing zeros on the multiplication result. + unsigned TrailBitsKnown0 = (Known.Zero | Known.One).countTrailingOnes(); + unsigned TrailBitsKnown1 = (Known2.Zero | Known2.One).countTrailingOnes(); + unsigned TrailZero0 = Known.countMinTrailingZeros(); + unsigned TrailZero1 = Known2.countMinTrailingZeros(); + unsigned TrailZ = TrailZero0 + TrailZero1; + + // Figure out the fewest known-bits operand. + unsigned SmallestOperand = std::min(TrailBitsKnown0 - TrailZero0, + TrailBitsKnown1 - TrailZero1); + unsigned ResultBitsKnown = std::min(SmallestOperand + TrailZ, BitWidth); + + APInt BottomKnown = Bottom0.getLoBits(TrailBitsKnown0) * + Bottom1.getLoBits(TrailBitsKnown1); + Known.resetAll(); - Known.Zero.setLowBits(TrailZ); Known.Zero.setHighBits(LeadZ); + Known.Zero |= (~BottomKnown).getLoBits(ResultBitsKnown); + Known.One |= BottomKnown.getLoBits(ResultBitsKnown); // Only make use of no-wrap flags if we failed to compute the sign bit // directly. This matters if the multiplication always overflows, in @@ -420,17 +463,19 @@ static bool isEphemeralValueOf(const Instruction *I, const Value *E) { continue; // If all uses of this value are ephemeral, then so is this value. - if (all_of(V->users(), [&](const User *U) { return EphValues.count(U); })) { + if (llvm::all_of(V->users(), [&](const User *U) { + return EphValues.count(U); + })) { if (V == E) return true; - EphValues.insert(V); - if (const User *U = dyn_cast<User>(V)) - for (User::const_op_iterator J = U->op_begin(), JE = U->op_end(); - J != JE; ++J) { - if (isSafeToSpeculativelyExecute(*J)) - WorkSet.push_back(*J); - } + if (V == I || isSafeToSpeculativelyExecute(V)) { + EphValues.insert(V); + if (const User *U = dyn_cast<User>(V)) + for (User::const_op_iterator J = U->op_begin(), JE = U->op_end(); + J != JE; ++J) + WorkSet.push_back(*J); + } } } @@ -438,13 +483,14 @@ static bool isEphemeralValueOf(const Instruction *I, const Value *E) { } // Is this an intrinsic that cannot be speculated but also cannot trap? -static bool isAssumeLikeIntrinsic(const Instruction *I) { +bool llvm::isAssumeLikeIntrinsic(const Instruction *I) { if (const CallInst *CI = dyn_cast<CallInst>(I)) if (Function *F = CI->getCalledFunction()) switch (F->getIntrinsicID()) { default: break; // FIXME: This list is repeated from NoTTI::getIntrinsicCost. case Intrinsic::assume: + case Intrinsic::sideeffect: case Intrinsic::dbg_declare: case Intrinsic::dbg_value: case Intrinsic::invariant_start: @@ -463,7 +509,6 @@ static bool isAssumeLikeIntrinsic(const Instruction *I) { bool llvm::isValidAssumeForContext(const Instruction *Inv, const Instruction *CxtI, const DominatorTree *DT) { - // There are two restrictions on the use of an assume: // 1. The assume must dominate the context (or the control flow must // reach the assume whenever it reaches the context). @@ -560,7 +605,7 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, m_BitCast(m_Specific(V)))); CmpInst::Predicate Pred; - ConstantInt *C; + uint64_t C; // assume(v = a) if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { @@ -662,51 +707,55 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, } else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q.CxtI, Q.DT) && + C < BitWidth) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them to known // bits in V shifted to the right by C. - RHSKnown.Zero.lshrInPlace(C->getZExtValue()); + RHSKnown.Zero.lshrInPlace(C); Known.Zero |= RHSKnown.Zero; - RHSKnown.One.lshrInPlace(C->getZExtValue()); + RHSKnown.One.lshrInPlace(C); Known.One |= RHSKnown.One; // assume(~(v << c) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q.CxtI, Q.DT) && + C < BitWidth) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them inverted // to known bits in V shifted to the right by C. - RHSKnown.One.lshrInPlace(C->getZExtValue()); + RHSKnown.One.lshrInPlace(C); Known.Zero |= RHSKnown.One; - RHSKnown.Zero.lshrInPlace(C->getZExtValue()); + RHSKnown.Zero.lshrInPlace(C); Known.One |= RHSKnown.Zero; // assume(v >> c = a) } else if (match(Arg, m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q.CxtI, Q.DT) && + C < BitWidth) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them to known // bits in V shifted to the right by C. - Known.Zero |= RHSKnown.Zero << C->getZExtValue(); - Known.One |= RHSKnown.One << C->getZExtValue(); + Known.Zero |= RHSKnown.Zero << C; + Known.One |= RHSKnown.One << C; // assume(~(v >> c) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q.CxtI, Q.DT) && + C < BitWidth) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them inverted // to known bits in V shifted to the right by C. - Known.Zero |= RHSKnown.One << C->getZExtValue(); - Known.One |= RHSKnown.Zero << C->getZExtValue(); + Known.Zero |= RHSKnown.One << C; + Known.One |= RHSKnown.Zero << C; // assume(v >=_s c) where c is non-negative } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SGE && @@ -784,24 +833,26 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, if (Known.Zero.intersects(Known.One)) { Known.resetAll(); - if (Q.ORE) { - auto *CxtI = const_cast<Instruction *>(Q.CxtI); - OptimizationRemarkAnalysis ORA("value-tracking", "BadAssumption", CxtI); - Q.ORE->emit(ORA << "Detected conflicting code assumptions. Program may " - "have undefined behavior, or compiler may have " - "internal error."); - } + if (Q.ORE) + Q.ORE->emit([&]() { + auto *CxtI = const_cast<Instruction *>(Q.CxtI); + return OptimizationRemarkAnalysis("value-tracking", "BadAssumption", + CxtI) + << "Detected conflicting code assumptions. Program may " + "have undefined behavior, or compiler may have " + "internal error."; + }); } } -// Compute known bits from a shift operator, including those with a -// non-constant shift amount. Known is the outputs of this function. Known2 is a -// pre-allocated temporary with the/ same bit width as Known. KZF and KOF are -// operator-specific functors that, given the known-zero or known-one bits -// respectively, and a shift amount, compute the implied known-zero or known-one -// bits of the shift operator's result respectively for that shift amount. The -// results from calling KZF and KOF are conservatively combined for all -// permitted shift amounts. +/// Compute known bits from a shift operator, including those with a +/// non-constant shift amount. Known is the output of this function. Known2 is a +/// pre-allocated temporary with the same bit width as Known. KZF and KOF are +/// operator-specific functors that, given the known-zero or known-one bits +/// respectively, and a shift amount, compute the implied known-zero or +/// known-one bits of the shift operator's result respectively for that shift +/// amount. The results from calling KZF and KOF are conservatively combined for +/// all permitted shift amounts. static void computeKnownBitsFromShiftOperator( const Operator *I, KnownBits &Known, KnownBits &Known2, unsigned Depth, const Query &Q, @@ -815,19 +866,20 @@ static void computeKnownBitsFromShiftOperator( computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); Known.Zero = KZF(Known.Zero, ShiftAmt); Known.One = KOF(Known.One, ShiftAmt); - // If there is conflict between Known.Zero and Known.One, this must be an - // overflowing left shift, so the shift result is undefined. Clear Known - // bits so that other code could propagate this undef. - if ((Known.Zero & Known.One) != 0) - Known.resetAll(); + // If the known bits conflict, this must be an overflowing left shift, so + // the shift result is poison. We can return anything we want. Choose 0 for + // the best folding opportunity. + if (Known.hasConflict()) + Known.setAllZero(); return; } computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); - // If the shift amount could be greater than or equal to the bit-width of the LHS, the - // value could be undef, so we don't know anything about it. + // If the shift amount could be greater than or equal to the bit-width of the + // LHS, the value could be poison, but bail out because the check below is + // expensive. TODO: Should we just carry on? if ((~Known.Zero).uge(BitWidth)) { Known.resetAll(); return; @@ -851,8 +903,7 @@ static void computeKnownBitsFromShiftOperator( // Early exit if we can't constrain any well-defined shift amount. if (!(ShiftAmtKZ & (PowerOf2Ceil(BitWidth) - 1)) && !(ShiftAmtKO & (PowerOf2Ceil(BitWidth) - 1))) { - ShifterOperandIsNonZero = - isKnownNonZero(I->getOperand(1), Depth + 1, Q); + ShifterOperandIsNonZero = isKnownNonZero(I->getOperand(1), Depth + 1, Q); if (!*ShifterOperandIsNonZero) return; } @@ -883,13 +934,10 @@ static void computeKnownBitsFromShiftOperator( Known.One &= KOF(Known2.One, ShiftAmt); } - // If there are no compatible shift amounts, then we've proven that the shift - // amount must be >= the BitWidth, and the result is undefined. We could - // return anything we'd like, but we need to make sure the sets of known bits - // stay disjoint (it should be better for some other code to actually - // propagate the undef than to pick a value here using known bits). - if (Known.Zero.intersects(Known.One)) - Known.resetAll(); + // If the known bits conflict, the result is poison. Return a 0 and hope the + // caller can further optimize that. + if (Known.hasConflict()) + Known.setAllZero(); } static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, @@ -931,7 +979,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, } break; } - case Instruction::Or: { + case Instruction::Or: computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); @@ -940,7 +988,6 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, // Output known-1 are known to be set if set in either the LHS | RHS. Known.One |= Known2.One; break; - } case Instruction::Xor: { computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); @@ -1103,7 +1150,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, break; } case Instruction::LShr: { - // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 + // (lshr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 auto KZF = [](const APInt &KnownZero, unsigned ShiftAmt) { APInt KZResult = KnownZero.lshr(ShiftAmt); // High bits known zero. @@ -1298,9 +1345,6 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(), Known3.countMinTrailingZeros())); - if (DontImproveNonNegativePhiBits) - break; - auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(LU); if (OverflowOp && OverflowOp->hasNoSignedWrap()) { // If initial value of recurrence is nonnegative, and we are adding @@ -1525,9 +1569,8 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, // We know that CDS must be a vector of integers. Take the intersection of // each element. Known.Zero.setAllBits(); Known.One.setAllBits(); - APInt Elt(BitWidth, 0); for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { - Elt = CDS->getElementAsInteger(i); + APInt Elt = CDS->getElementAsAPInt(i); Known.Zero &= ~Elt; Known.One &= Elt; } @@ -1538,7 +1581,6 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, // We know that CV must be a vector of integers. Take the intersection of // each element. Known.Zero.setAllBits(); Known.One.setAllBits(); - APInt Elt(BitWidth, 0); for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { Constant *Element = CV->getAggregateElement(i); auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element); @@ -1546,7 +1588,7 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, Known.resetAll(); return; } - Elt = ElementCI->getValue(); + const APInt &Elt = ElementCI->getValue(); Known.Zero &= ~Elt; Known.One &= Elt; } @@ -1602,6 +1644,8 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, /// types and vectors of integers. bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, const Query &Q) { + assert(Depth <= MaxDepth && "Limit Search Depth"); + if (const Constant *C = dyn_cast<Constant>(V)) { if (C->isNullValue()) return OrZero; @@ -1755,6 +1799,58 @@ static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth, return false; } +static bool isKnownNonNullFromDominatingCondition(const Value *V, + const Instruction *CtxI, + const DominatorTree *DT) { + assert(V->getType()->isPointerTy() && "V must be pointer type"); + assert(!isa<ConstantData>(V) && "Did not expect ConstantPointerNull"); + + if (!CtxI || !DT) + return false; + + unsigned NumUsesExplored = 0; + for (auto *U : V->users()) { + // Avoid massive lists + if (NumUsesExplored >= DomConditionsMaxUses) + break; + NumUsesExplored++; + + // If the value is used as an argument to a call or invoke, then argument + // attributes may provide an answer about null-ness. + if (auto CS = ImmutableCallSite(U)) + if (auto *CalledFunc = CS.getCalledFunction()) + for (const Argument &Arg : CalledFunc->args()) + if (CS.getArgOperand(Arg.getArgNo()) == V && + Arg.hasNonNullAttr() && DT->dominates(CS.getInstruction(), CtxI)) + return true; + + // Consider only compare instructions uniquely controlling a branch + CmpInst::Predicate Pred; + if (!match(const_cast<User *>(U), + m_c_ICmp(Pred, m_Specific(V), m_Zero())) || + (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)) + continue; + + for (auto *CmpU : U->users()) { + if (const BranchInst *BI = dyn_cast<BranchInst>(CmpU)) { + assert(BI->isConditional() && "uses a comparison!"); + + BasicBlock *NonNullSuccessor = + BI->getSuccessor(Pred == ICmpInst::ICMP_EQ ? 1 : 0); + BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor); + if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent())) + return true; + } else if (Pred == ICmpInst::ICMP_NE && + match(CmpU, m_Intrinsic<Intrinsic::experimental_guard>()) && + DT->dominates(cast<Instruction>(CmpU), CtxI)) { + return true; + } + } + } + + return false; +} + /// Does the 'Range' metadata (which must be a valid MD_range operand list) /// ensure that the value it's attached to is never Value? 'RangeType' is /// is the type of the value described by the range. @@ -1800,7 +1896,15 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { return true; } - return false; + // A global variable in address space 0 is non null unless extern weak + // or an absolute symbol reference. Other address spaces may have null as a + // valid address for a global, so we can't assume anything. + if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { + if (!GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() && + GV->getType()->getAddressSpace() == 0) + return true; + } else + return false; } if (auto *I = dyn_cast<Instruction>(V)) { @@ -1815,14 +1919,36 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { } } + // Check for pointer simplifications. + if (V->getType()->isPointerTy()) { + // Alloca never returns null, malloc might. + if (isa<AllocaInst>(V) && Q.DL.getAllocaAddrSpace() == 0) + return true; + + // A byval, inalloca, or nonnull argument is never null. + if (const Argument *A = dyn_cast<Argument>(V)) + if (A->hasByValOrInAllocaAttr() || A->hasNonNullAttr()) + return true; + + // A Load tagged with nonnull metadata is never null. + if (const LoadInst *LI = dyn_cast<LoadInst>(V)) + if (LI->getMetadata(LLVMContext::MD_nonnull)) + return true; + + if (auto CS = ImmutableCallSite(V)) + if (CS.isReturnNonNull()) + return true; + } + // The remaining tests are all recursive, so bail out if we hit the limit. if (Depth++ >= MaxDepth) return false; - // Check for pointer simplifications. + // Check for recursive pointer simplifications. if (V->getType()->isPointerTy()) { - if (isKnownNonNullAt(V, Q.CxtI, Q.DT)) + if (isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT)) return true; + if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) if (isGEPKnownNonNull(GEP, Depth, Q)) return true; @@ -1949,7 +2075,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { } } // Check if all incoming values are non-zero constant. - bool AllNonZeroConstants = all_of(PN->operands(), [](Value *V) { + bool AllNonZeroConstants = llvm::all_of(PN->operands(), [](Value *V) { return isa<ConstantInt>(V) && !cast<ConstantInt>(V)->isZero(); }); if (AllNonZeroConstants) @@ -2033,11 +2159,7 @@ static unsigned computeNumSignBitsVectorConstant(const Value *V, if (!Elt) return 0; - // If the sign bit is 1, flip the bits, so we always count leading zeros. - APInt EltVal = Elt->getValue(); - if (EltVal.isNegative()) - EltVal = ~EltVal; - MinSignBits = std::min(MinSignBits, EltVal.countLeadingZeros()); + MinSignBits = std::min(MinSignBits, Elt->getValue().getNumSignBits()); } return MinSignBits; @@ -2061,6 +2183,7 @@ static unsigned ComputeNumSignBits(const Value *V, unsigned Depth, /// vector element with the mininum number of known sign bits. static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, const Query &Q) { + assert(Depth <= MaxDepth && "Limit Search Depth"); // We return the minimum number of sign bits that are guaranteed to be present // in V, so for undef we have to conservatively return 1. We don't have the @@ -2236,6 +2359,17 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, if (Tmp == 1) return 1; // Early out. return std::min(Tmp, Tmp2)-1; + case Instruction::Mul: { + // The output of the Mul can be at most twice the valid bits in the inputs. + unsigned SignBitsOp0 = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + if (SignBitsOp0 == 1) return 1; // Early out. + unsigned SignBitsOp1 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); + if (SignBitsOp1 == 1) return 1; + unsigned OutValidBits = + (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1); + return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1; + } + case Instruction::PHI: { const PHINode *PN = cast<PHINode>(U); unsigned NumIncomingValues = PN->getNumIncomingValues(); @@ -2507,9 +2641,7 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(ImmutableCallSite ICS, case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl: - if (ICS->hasNoNaNs()) - return Intrinsic::sqrt; - return Intrinsic::not_intrinsic; + return Intrinsic::sqrt; } return Intrinsic::not_intrinsic; @@ -2520,41 +2652,40 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(ImmutableCallSite ICS, /// /// NOTE: this function will need to be revisited when we support non-default /// rounding modes! -/// bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI, unsigned Depth) { - if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) + if (auto *CFP = dyn_cast<ConstantFP>(V)) return !CFP->getValueAPF().isNegZero(); + // Limit search depth. if (Depth == MaxDepth) - return false; // Limit search depth. + return false; - const Operator *I = dyn_cast<Operator>(V); - if (!I) return false; + auto *Op = dyn_cast<Operator>(V); + if (!Op) + return false; - // Check if the nsz fast-math flag is set - if (const FPMathOperator *FPO = dyn_cast<FPMathOperator>(I)) + // Check if the nsz fast-math flag is set. + if (auto *FPO = dyn_cast<FPMathOperator>(Op)) if (FPO->hasNoSignedZeros()) return true; - // (add x, 0.0) is guaranteed to return +0.0, not -0.0. - if (I->getOpcode() == Instruction::FAdd) - if (ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(1))) - if (CFP->isNullValue()) - return true; + // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0. + if (match(Op, m_FAdd(m_Value(), m_Zero()))) + return true; // sitofp and uitofp turn into +0.0 for zero. - if (isa<SIToFPInst>(I) || isa<UIToFPInst>(I)) + if (isa<SIToFPInst>(Op) || isa<UIToFPInst>(Op)) return true; - if (const CallInst *CI = dyn_cast<CallInst>(I)) { - Intrinsic::ID IID = getIntrinsicForCallSite(CI, TLI); + if (auto *Call = dyn_cast<CallInst>(Op)) { + Intrinsic::ID IID = getIntrinsicForCallSite(Call, TLI); switch (IID) { default: break; // sqrt(-0.0) = -0.0, no other negative results are possible. case Intrinsic::sqrt: - return CannotBeNegativeZero(CI->getArgOperand(0), TLI, Depth + 1); + return CannotBeNegativeZero(Call->getArgOperand(0), TLI, Depth + 1); // fabs(x) != -0.0 case Intrinsic::fabs: return true; @@ -2690,6 +2821,41 @@ bool llvm::SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI) { return cannotBeOrderedLessThanZeroImpl(V, TLI, true, 0); } +bool llvm::isKnownNeverNaN(const Value *V) { + assert(V->getType()->isFPOrFPVectorTy() && "Querying for NaN on non-FP type"); + + // If we're told that NaNs won't happen, assume they won't. + if (auto *FPMathOp = dyn_cast<FPMathOperator>(V)) + if (FPMathOp->hasNoNaNs()) + return true; + + // TODO: Handle instructions and potentially recurse like other 'isKnown' + // functions. For example, the result of sitofp is never NaN. + + // Handle scalar constants. + if (auto *CFP = dyn_cast<ConstantFP>(V)) + return !CFP->isNaN(); + + // Bail out for constant expressions, but try to handle vector constants. + if (!V->getType()->isVectorTy() || !isa<Constant>(V)) + return false; + + // For vectors, verify that each element is not NaN. + unsigned NumElts = V->getType()->getVectorNumElements(); + for (unsigned i = 0; i != NumElts; ++i) { + Constant *Elt = cast<Constant>(V)->getAggregateElement(i); + if (!Elt) + return false; + if (isa<UndefValue>(Elt)) + continue; + auto *CElt = dyn_cast<ConstantFP>(Elt); + if (!CElt || CElt->isNaN()) + return false; + } + // All elements were confirmed not-NaN or undefined. + return true; +} + /// If the specified value can be set by repeating the same byte in memory, /// return the i8 value that it is represented with. This is /// true for all i8 values obviously, but is also true for i32 0, i32 -1, @@ -2749,7 +2915,6 @@ Value *llvm::isBytewiseValue(Value *V) { return nullptr; } - // This is the recursive version of BuildSubAggregate. It takes a few different // arguments. Idxs is the index within the nested struct From that we are // looking at now (which is of type IndexedType). IdxSkip is the number of @@ -2760,7 +2925,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, SmallVectorImpl<unsigned> &Idxs, unsigned IdxSkip, Instruction *InsertBefore) { - llvm::StructType *STy = dyn_cast<llvm::StructType>(IndexedType); + StructType *STy = dyn_cast<StructType>(IndexedType); if (STy) { // Save the original To argument so we can modify it Value *OrigTo = To; @@ -2799,8 +2964,8 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, return nullptr; // Insert the value in the new (sub) aggregrate - return llvm::InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip), - "tmp", InsertBefore); + return InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip), + "tmp", InsertBefore); } // This helper takes a nested struct and extracts a part of it (which is again a @@ -3307,7 +3472,8 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) { /// This is a wrapper around GetUnderlyingObjects and adds support for basic /// ptrtoint+arithmetic+inttoptr sequences. -void llvm::getUnderlyingObjectsForCodeGen(const Value *V, +/// It returns false if unidentified object is found in GetUnderlyingObjects. +bool llvm::getUnderlyingObjectsForCodeGen(const Value *V, SmallVectorImpl<Value *> &Objects, const DataLayout &DL) { SmallPtrSet<const Value *, 16> Visited; @@ -3333,11 +3499,12 @@ void llvm::getUnderlyingObjectsForCodeGen(const Value *V, // getUnderlyingObjectsForCodeGen also fails for safety. if (!isIdentifiedObject(V)) { Objects.clear(); - return; + return false; } Objects.push_back(const_cast<Value *>(V)); } } while (!Working.empty()); + return true; } /// Return true if the only users of this pointer are lifetime markers. @@ -3401,7 +3568,8 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, // Speculative load may create a race that did not exist in the source. LI->getFunction()->hasFnAttribute(Attribute::SanitizeThread) || // Speculative load may load data from dirty regions. - LI->getFunction()->hasFnAttribute(Attribute::SanitizeAddress)) + LI->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) || + LI->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress)) return false; const DataLayout &DL = LI->getModule()->getDataLayout(); return isDereferenceableAndAlignedPointer(LI->getPointerOperand(), @@ -3443,100 +3611,6 @@ bool llvm::mayBeMemoryDependent(const Instruction &I) { return I.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I); } -/// Return true if we know that the specified value is never null. -bool llvm::isKnownNonNull(const Value *V) { - assert(V->getType()->isPointerTy() && "V must be pointer type"); - - // Alloca never returns null, malloc might. - if (isa<AllocaInst>(V)) return true; - - // A byval, inalloca, or nonnull argument is never null. - if (const Argument *A = dyn_cast<Argument>(V)) - return A->hasByValOrInAllocaAttr() || A->hasNonNullAttr(); - - // A global variable in address space 0 is non null unless extern weak - // or an absolute symbol reference. Other address spaces may have null as a - // valid address for a global, so we can't assume anything. - if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) - return !GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() && - GV->getType()->getAddressSpace() == 0; - - // A Load tagged with nonnull metadata is never null. - if (const LoadInst *LI = dyn_cast<LoadInst>(V)) - return LI->getMetadata(LLVMContext::MD_nonnull); - - if (auto CS = ImmutableCallSite(V)) - if (CS.isReturnNonNull()) - return true; - - return false; -} - -static bool isKnownNonNullFromDominatingCondition(const Value *V, - const Instruction *CtxI, - const DominatorTree *DT) { - assert(V->getType()->isPointerTy() && "V must be pointer type"); - assert(!isa<ConstantData>(V) && "Did not expect ConstantPointerNull"); - assert(CtxI && "Context instruction required for analysis"); - assert(DT && "Dominator tree required for analysis"); - - unsigned NumUsesExplored = 0; - for (auto *U : V->users()) { - // Avoid massive lists - if (NumUsesExplored >= DomConditionsMaxUses) - break; - NumUsesExplored++; - - // If the value is used as an argument to a call or invoke, then argument - // attributes may provide an answer about null-ness. - if (auto CS = ImmutableCallSite(U)) - if (auto *CalledFunc = CS.getCalledFunction()) - for (const Argument &Arg : CalledFunc->args()) - if (CS.getArgOperand(Arg.getArgNo()) == V && - Arg.hasNonNullAttr() && DT->dominates(CS.getInstruction(), CtxI)) - return true; - - // Consider only compare instructions uniquely controlling a branch - CmpInst::Predicate Pred; - if (!match(const_cast<User *>(U), - m_c_ICmp(Pred, m_Specific(V), m_Zero())) || - (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)) - continue; - - for (auto *CmpU : U->users()) { - if (const BranchInst *BI = dyn_cast<BranchInst>(CmpU)) { - assert(BI->isConditional() && "uses a comparison!"); - - BasicBlock *NonNullSuccessor = - BI->getSuccessor(Pred == ICmpInst::ICMP_EQ ? 1 : 0); - BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor); - if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent())) - return true; - } else if (Pred == ICmpInst::ICMP_NE && - match(CmpU, m_Intrinsic<Intrinsic::experimental_guard>()) && - DT->dominates(cast<Instruction>(CmpU), CtxI)) { - return true; - } - } - } - - return false; -} - -bool llvm::isKnownNonNullAt(const Value *V, const Instruction *CtxI, - const DominatorTree *DT) { - if (isa<ConstantPointerNull>(V) || isa<UndefValue>(V)) - return false; - - if (isKnownNonNull(V)) - return true; - - if (!CtxI || !DT) - return false; - - return ::isKnownNonNullFromDominatingCondition(V, CtxI, DT); -} - OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS, const Value *RHS, const DataLayout &DL, @@ -3771,7 +3845,7 @@ bool llvm::isOverflowIntrinsicNoWrap(const IntrinsicInst *II, return true; }; - return any_of(GuardingBranches, AllUsesGuardedByBranch); + return llvm::any_of(GuardingBranches, AllUsesGuardedByBranch); } @@ -3846,7 +3920,8 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { // FIXME: This isn't aggressive enough; a call which only writes to a global // is guaranteed to return. return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory() || - match(I, m_Intrinsic<Intrinsic::assume>()); + match(I, m_Intrinsic<Intrinsic::assume>()) || + match(I, m_Intrinsic<Intrinsic::sideeffect>()); } // Other instructions return normally. @@ -3975,7 +4050,7 @@ bool llvm::programUndefinedIfFullPoison(const Instruction *PoisonI) { } break; - }; + } return false; } @@ -3994,21 +4069,75 @@ static bool isKnownNonZero(const Value *V) { return false; } -/// Match non-obvious integer minimum and maximum sequences. -static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, - Value *CmpLHS, Value *CmpRHS, - Value *TrueVal, Value *FalseVal, - Value *&LHS, Value *&RHS) { - // Assume success. If there's no match, callers should not use these anyway. +/// Match clamp pattern for float types without care about NaNs or signed zeros. +/// Given non-min/max outer cmp/select from the clamp pattern this +/// function recognizes if it can be substitued by a "canonical" min/max +/// pattern. +static SelectPatternResult matchFastFloatClamp(CmpInst::Predicate Pred, + Value *CmpLHS, Value *CmpRHS, + Value *TrueVal, Value *FalseVal, + Value *&LHS, Value *&RHS) { + // Try to match + // X < C1 ? C1 : Min(X, C2) --> Max(C1, Min(X, C2)) + // X > C1 ? C1 : Max(X, C2) --> Min(C1, Max(X, C2)) + // and return description of the outer Max/Min. + + // First, check if select has inverse order: + if (CmpRHS == FalseVal) { + std::swap(TrueVal, FalseVal); + Pred = CmpInst::getInversePredicate(Pred); + } + + // Assume success now. If there's no match, callers should not use these anyway. LHS = TrueVal; RHS = FalseVal; - // Recognize variations of: - // CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v))) + const APFloat *FC1; + if (CmpRHS != TrueVal || !match(CmpRHS, m_APFloat(FC1)) || !FC1->isFinite()) + return {SPF_UNKNOWN, SPNB_NA, false}; + + const APFloat *FC2; + switch (Pred) { + case CmpInst::FCMP_OLT: + case CmpInst::FCMP_OLE: + case CmpInst::FCMP_ULT: + case CmpInst::FCMP_ULE: + if (match(FalseVal, + m_CombineOr(m_OrdFMin(m_Specific(CmpLHS), m_APFloat(FC2)), + m_UnordFMin(m_Specific(CmpLHS), m_APFloat(FC2)))) && + FC1->compare(*FC2) == APFloat::cmpResult::cmpLessThan) + return {SPF_FMAXNUM, SPNB_RETURNS_ANY, false}; + break; + case CmpInst::FCMP_OGT: + case CmpInst::FCMP_OGE: + case CmpInst::FCMP_UGT: + case CmpInst::FCMP_UGE: + if (match(FalseVal, + m_CombineOr(m_OrdFMax(m_Specific(CmpLHS), m_APFloat(FC2)), + m_UnordFMax(m_Specific(CmpLHS), m_APFloat(FC2)))) && + FC1->compare(*FC2) == APFloat::cmpResult::cmpGreaterThan) + return {SPF_FMINNUM, SPNB_RETURNS_ANY, false}; + break; + default: + break; + } + + return {SPF_UNKNOWN, SPNB_NA, false}; +} + +/// Recognize variations of: +/// CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v))) +static SelectPatternResult matchClamp(CmpInst::Predicate Pred, + Value *CmpLHS, Value *CmpRHS, + Value *TrueVal, Value *FalseVal) { + // Swap the select operands and predicate to match the patterns below. + if (CmpRHS != TrueVal) { + Pred = ICmpInst::getSwappedPredicate(Pred); + std::swap(TrueVal, FalseVal); + } const APInt *C1; if (CmpRHS == TrueVal && match(CmpRHS, m_APInt(C1))) { const APInt *C2; - // (X <s C1) ? C1 : SMIN(X, C2) ==> SMAX(SMIN(X, C2), C1) if (match(FalseVal, m_SMin(m_Specific(CmpLHS), m_APInt(C2))) && C1->slt(*C2) && Pred == CmpInst::ICMP_SLT) @@ -4029,6 +4158,21 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, C1->ugt(*C2) && Pred == CmpInst::ICMP_UGT) return {SPF_UMIN, SPNB_NA, false}; } + return {SPF_UNKNOWN, SPNB_NA, false}; +} + +/// Match non-obvious integer minimum and maximum sequences. +static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, + Value *CmpLHS, Value *CmpRHS, + Value *TrueVal, Value *FalseVal, + Value *&LHS, Value *&RHS) { + // Assume success. If there's no match, callers should not use these anyway. + LHS = TrueVal; + RHS = FalseVal; + + SelectPatternResult SPR = matchClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal); + if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN) + return SPR; if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT) return {SPF_UNKNOWN, SPNB_NA, false}; @@ -4047,6 +4191,7 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, match(TrueVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false}; + const APInt *C1; if (!match(CmpRHS, m_APInt(C1))) return {SPF_UNKNOWN, SPNB_NA, false}; @@ -4057,7 +4202,8 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, // Is the sign bit set? // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN - if (Pred == CmpInst::ICMP_SLT && *C1 == 0 && C2->isMaxSignedValue()) + if (Pred == CmpInst::ICMP_SLT && C1->isNullValue() && + C2->isMaxSignedValue()) return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false}; // Is the sign bit clear? @@ -4189,21 +4335,48 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X - if (Pred == ICmpInst::ICMP_SGT && (*C1 == 0 || C1->isAllOnesValue())) { + if (Pred == ICmpInst::ICMP_SGT && + (C1->isNullValue() || C1->isAllOnesValue())) { return {(CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false}; } // ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X // NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X - if (Pred == ICmpInst::ICMP_SLT && (*C1 == 0 || *C1 == 1)) { + if (Pred == ICmpInst::ICMP_SLT && + (C1->isNullValue() || C1->isOneValue())) { return {(CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false}; } } } - return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS); + if (CmpInst::isIntPredicate(Pred)) + return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS); + + // According to (IEEE 754-2008 5.3.1), minNum(0.0, -0.0) and similar + // may return either -0.0 or 0.0, so fcmp/select pair has stricter + // semantics than minNum. Be conservative in such case. + if (NaNBehavior != SPNB_RETURNS_ANY || + (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) && + !isKnownNonZero(CmpRHS))) + return {SPF_UNKNOWN, SPNB_NA, false}; + + return matchFastFloatClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS); } +/// Helps to match a select pattern in case of a type mismatch. +/// +/// The function processes the case when type of true and false values of a +/// select instruction differs from type of the cmp instruction operands because +/// of a cast instructon. The function checks if it is legal to move the cast +/// operation after "select". If yes, it returns the new second value of +/// "select" (with the assumption that cast is moved): +/// 1. As operand of cast instruction when both values of "select" are same cast +/// instructions. +/// 2. As restored constant (by applying reverse cast operation) when the first +/// value of the "select" is a cast operation and the second value is a +/// constant. +/// NOTE: We return only the new second value because the first value could be +/// accessed as operand of cast instruction. static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, Instruction::CastOps *CastOp) { auto *Cast1 = dyn_cast<CastInst>(V1); @@ -4234,7 +4407,34 @@ static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, CastedTo = ConstantExpr::getTrunc(C, SrcTy, true); break; case Instruction::Trunc: - CastedTo = ConstantExpr::getIntegerCast(C, SrcTy, CmpI->isSigned()); + Constant *CmpConst; + if (match(CmpI->getOperand(1), m_Constant(CmpConst)) && + CmpConst->getType() == SrcTy) { + // Here we have the following case: + // + // %cond = cmp iN %x, CmpConst + // %tr = trunc iN %x to iK + // %narrowsel = select i1 %cond, iK %t, iK C + // + // We can always move trunc after select operation: + // + // %cond = cmp iN %x, CmpConst + // %widesel = select i1 %cond, iN %x, iN CmpConst + // %tr = trunc iN %widesel to iK + // + // Note that C could be extended in any way because we don't care about + // upper bits after truncation. It can't be abs pattern, because it would + // look like: + // + // select i1 %cond, x, -x. + // + // So only min/max pattern could be matched. Such match requires widened C + // == CmpConst. That is why set widened C = CmpConst, condition trunc + // CmpConst == C is checked below. + CastedTo = CmpConst; + } else { + CastedTo = ConstantExpr::getIntegerCast(C, SrcTy, CmpI->isSigned()); + } break; case Instruction::FPTrunc: CastedTo = ConstantExpr::getFPExtend(C, SrcTy, true); @@ -4307,11 +4507,9 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, } /// Return true if "icmp Pred LHS RHS" is always true. -static bool isTruePredicate(CmpInst::Predicate Pred, - const Value *LHS, const Value *RHS, - const DataLayout &DL, unsigned Depth, - AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) { +static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS, + const Value *RHS, const DataLayout &DL, + unsigned Depth) { assert(!LHS->getType()->isVectorTy() && "TODO: extend to handle vectors!"); if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS) return true; @@ -4348,8 +4546,8 @@ static bool isTruePredicate(CmpInst::Predicate Pred, if (match(A, m_Or(m_Value(X), m_APInt(CA))) && match(B, m_Or(m_Specific(X), m_APInt(CB)))) { KnownBits Known(CA->getBitWidth()); - computeKnownBits(X, Known, DL, Depth + 1, AC, CxtI, DT); - + computeKnownBits(X, Known, DL, Depth + 1, /*AC*/ nullptr, + /*CxtI*/ nullptr, /*DT*/ nullptr); if (CA->isSubsetOf(Known.Zero) && CB->isSubsetOf(Known.Zero)) return true; } @@ -4371,27 +4569,23 @@ static bool isTruePredicate(CmpInst::Predicate Pred, /// ALHS ARHS" is true. Otherwise, return None. static Optional<bool> isImpliedCondOperands(CmpInst::Predicate Pred, const Value *ALHS, - const Value *ARHS, const Value *BLHS, - const Value *BRHS, const DataLayout &DL, - unsigned Depth, AssumptionCache *AC, - const Instruction *CxtI, const DominatorTree *DT) { + const Value *ARHS, const Value *BLHS, const Value *BRHS, + const DataLayout &DL, unsigned Depth) { switch (Pred) { default: return None; case CmpInst::ICMP_SLT: case CmpInst::ICMP_SLE: - if (isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth, AC, CxtI, - DT) && - isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth, AC, CxtI, DT)) + if (isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth) && + isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth)) return true; return None; case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE: - if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth, AC, CxtI, - DT) && - isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth, AC, CxtI, DT)) + if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth) && + isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth)) return true; return None; } @@ -4453,66 +4647,22 @@ isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, const Value *ALHS, return None; } -Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS, - const DataLayout &DL, bool LHSIsFalse, - unsigned Depth, AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT) { - // Bail out when we hit the limit. - if (Depth == MaxDepth) - return None; - - // A mismatch occurs when we compare a scalar cmp to a vector cmp, for example. - if (LHS->getType() != RHS->getType()) - return None; - - Type *OpTy = LHS->getType(); - assert(OpTy->isIntOrIntVectorTy(1)); - - // LHS ==> RHS by definition - if (LHS == RHS) - return !LHSIsFalse; - - if (OpTy->isVectorTy()) - // TODO: extending the code below to handle vectors - return None; - assert(OpTy->isIntegerTy(1) && "implied by above"); - - Value *BLHS, *BRHS; - ICmpInst::Predicate BPred; - // We expect the RHS to be an icmp. - if (!match(RHS, m_ICmp(BPred, m_Value(BLHS), m_Value(BRHS)))) - return None; - - Value *ALHS, *ARHS; - ICmpInst::Predicate APred; - // The LHS can be an 'or', 'and', or 'icmp'. - if (!match(LHS, m_ICmp(APred, m_Value(ALHS), m_Value(ARHS)))) { - // The remaining tests are all recursive, so bail out if we hit the limit. - if (Depth == MaxDepth) - return None; - // If the result of an 'or' is false, then we know both legs of the 'or' are - // false. Similarly, if the result of an 'and' is true, then we know both - // legs of the 'and' are true. - if ((LHSIsFalse && match(LHS, m_Or(m_Value(ALHS), m_Value(ARHS)))) || - (!LHSIsFalse && match(LHS, m_And(m_Value(ALHS), m_Value(ARHS))))) { - if (Optional<bool> Implication = isImpliedCondition( - ALHS, RHS, DL, LHSIsFalse, Depth + 1, AC, CxtI, DT)) - return Implication; - if (Optional<bool> Implication = isImpliedCondition( - ARHS, RHS, DL, LHSIsFalse, Depth + 1, AC, CxtI, DT)) - return Implication; - return None; - } - return None; - } - // All of the below logic assumes both LHS and RHS are icmps. - assert(isa<ICmpInst>(LHS) && isa<ICmpInst>(RHS) && "Expected icmps."); - +/// Return true if LHS implies RHS is true. Return false if LHS implies RHS is +/// false. Otherwise, return None if we can't infer anything. +static Optional<bool> isImpliedCondICmps(const ICmpInst *LHS, + const ICmpInst *RHS, + const DataLayout &DL, bool LHSIsTrue, + unsigned Depth) { + Value *ALHS = LHS->getOperand(0); + Value *ARHS = LHS->getOperand(1); // The rest of the logic assumes the LHS condition is true. If that's not the // case, invert the predicate to make it so. - if (LHSIsFalse) - APred = CmpInst::getInversePredicate(APred); + ICmpInst::Predicate APred = + LHSIsTrue ? LHS->getPredicate() : LHS->getInversePredicate(); + + Value *BLHS = RHS->getOperand(0); + Value *BRHS = RHS->getOperand(1); + ICmpInst::Predicate BPred = RHS->getPredicate(); // Can we infer anything when the two compares have matching operands? bool IsSwappedOps; @@ -4538,8 +4688,80 @@ Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS, } if (APred == BPred) - return isImpliedCondOperands(APred, ALHS, ARHS, BLHS, BRHS, DL, Depth, AC, - CxtI, DT); + return isImpliedCondOperands(APred, ALHS, ARHS, BLHS, BRHS, DL, Depth); + return None; +} + +/// Return true if LHS implies RHS is true. Return false if LHS implies RHS is +/// false. Otherwise, return None if we can't infer anything. We expect the +/// RHS to be an icmp and the LHS to be an 'and' or an 'or' instruction. +static Optional<bool> isImpliedCondAndOr(const BinaryOperator *LHS, + const ICmpInst *RHS, + const DataLayout &DL, bool LHSIsTrue, + unsigned Depth) { + // The LHS must be an 'or' or an 'and' instruction. + assert((LHS->getOpcode() == Instruction::And || + LHS->getOpcode() == Instruction::Or) && + "Expected LHS to be 'and' or 'or'."); + + assert(Depth <= MaxDepth && "Hit recursion limit"); + + // If the result of an 'or' is false, then we know both legs of the 'or' are + // false. Similarly, if the result of an 'and' is true, then we know both + // legs of the 'and' are true. + Value *ALHS, *ARHS; + if ((!LHSIsTrue && match(LHS, m_Or(m_Value(ALHS), m_Value(ARHS)))) || + (LHSIsTrue && match(LHS, m_And(m_Value(ALHS), m_Value(ARHS))))) { + // FIXME: Make this non-recursion. + if (Optional<bool> Implication = + isImpliedCondition(ALHS, RHS, DL, LHSIsTrue, Depth + 1)) + return Implication; + if (Optional<bool> Implication = + isImpliedCondition(ARHS, RHS, DL, LHSIsTrue, Depth + 1)) + return Implication; + return None; + } + return None; +} +Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS, + const DataLayout &DL, bool LHSIsTrue, + unsigned Depth) { + // Bail out when we hit the limit. + if (Depth == MaxDepth) + return None; + + // A mismatch occurs when we compare a scalar cmp to a vector cmp, for + // example. + if (LHS->getType() != RHS->getType()) + return None; + + Type *OpTy = LHS->getType(); + assert(OpTy->isIntOrIntVectorTy(1) && "Expected integer type only!"); + + // LHS ==> RHS by definition + if (LHS == RHS) + return LHSIsTrue; + + // FIXME: Extending the code below to handle vectors. + if (OpTy->isVectorTy()) + return None; + + assert(OpTy->isIntegerTy(1) && "implied by above"); + + // Both LHS and RHS are icmps. + const ICmpInst *LHSCmp = dyn_cast<ICmpInst>(LHS); + const ICmpInst *RHSCmp = dyn_cast<ICmpInst>(RHS); + if (LHSCmp && RHSCmp) + return isImpliedCondICmps(LHSCmp, RHSCmp, DL, LHSIsTrue, Depth); + + // The LHS should be an 'or' or an 'and' instruction. We expect the RHS to be + // an icmp. FIXME: Add support for and/or on the RHS. + const BinaryOperator *LHSBO = dyn_cast<BinaryOperator>(LHS); + if (LHSBO && RHSCmp) { + if ((LHSBO->getOpcode() == Instruction::And || + LHSBO->getOpcode() == Instruction::Or)) + return isImpliedCondAndOr(LHSBO, RHSCmp, DL, LHSIsTrue, Depth); + } return None; } diff --git a/contrib/llvm/lib/Analysis/VectorUtils.cpp b/contrib/llvm/lib/Analysis/VectorUtils.cpp index 554d132c2ab7..2becfbfe8a8d 100644 --- a/contrib/llvm/lib/Analysis/VectorUtils.cpp +++ b/contrib/llvm/lib/Analysis/VectorUtils.cpp @@ -91,7 +91,8 @@ Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI, return Intrinsic::not_intrinsic; if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start || - ID == Intrinsic::lifetime_end || ID == Intrinsic::assume) + ID == Intrinsic::lifetime_end || ID == Intrinsic::assume || + ID == Intrinsic::sideeffect) return ID; return Intrinsic::not_intrinsic; } |