diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2014-11-24 09:08:18 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2014-11-24 09:08:18 +0000 |
commit | 5ca98fd98791947eba83a1ed3f2c8191ef7afa6c (patch) | |
tree | f5944309621cee4fe0976be6f9ac619b7ebfc4c2 /lib/Analysis | |
parent | 68bcb7db193e4bc81430063148253d30a791023e (diff) | |
download | src-5ca98fd98791947eba83a1ed3f2c8191ef7afa6c.tar.gz src-5ca98fd98791947eba83a1ed3f2c8191ef7afa6c.zip |
Vendor import of llvm RELEASE_350/final tag r216957 (effectively, 3.5.0 release):vendor/llvm/llvm-release_350-r216957
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=274955
svn path=/vendor/llvm/llvm-release_35-r216957/; revision=274956; tag=vendor/llvm/llvm-release_350-r216957
Diffstat (limited to 'lib/Analysis')
63 files changed, 5295 insertions, 3446 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index b8b6d37a792f..8b8106b950a0 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -25,12 +25,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -60,6 +60,13 @@ bool AliasAnalysis::pointsToConstantMemory(const Location &Loc, return AA->pointsToConstantMemory(Loc, OrLocal); } +AliasAnalysis::Location +AliasAnalysis::getArgLocation(ImmutableCallSite CS, unsigned ArgIdx, + AliasAnalysis::ModRefResult &Mask) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + return AA->getArgLocation(CS, ArgIdx, Mask); +} + void AliasAnalysis::deleteValue(Value *V) { assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); AA->deleteValue(V); @@ -91,22 +98,26 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS, if (onlyAccessesArgPointees(MRB)) { bool doesAlias = false; + ModRefResult AllArgsMask = NoModRef; if (doesAccessArgPointees(MRB)) { - MDNode *CSTag = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa); for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); AI != AE; ++AI) { const Value *Arg = *AI; if (!Arg->getType()->isPointerTy()) continue; - Location CSLoc(Arg, UnknownSize, CSTag); + ModRefResult ArgMask; + Location CSLoc = + getArgLocation(CS, (unsigned) std::distance(CS.arg_begin(), AI), + ArgMask); if (!isNoAlias(CSLoc, Loc)) { doesAlias = true; - break; + AllArgsMask = ModRefResult(AllArgsMask | ArgMask); } } } if (!doesAlias) return NoModRef; + Mask = ModRefResult(Mask & AllArgsMask); } // If Loc is a constant memory location, the call definitely could not @@ -150,14 +161,23 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { if (onlyAccessesArgPointees(CS2B)) { AliasAnalysis::ModRefResult R = NoModRef; if (doesAccessArgPointees(CS2B)) { - MDNode *CS2Tag = CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa); for (ImmutableCallSite::arg_iterator I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) { const Value *Arg = *I; if (!Arg->getType()->isPointerTy()) continue; - Location CS2Loc(Arg, UnknownSize, CS2Tag); - R = ModRefResult((R | getModRefInfo(CS1, CS2Loc)) & Mask); + ModRefResult ArgMask; + Location CS2Loc = + getArgLocation(CS2, (unsigned) std::distance(CS2.arg_begin(), I), + ArgMask); + // ArgMask indicates what CS2 might do to CS2Loc, and the dependence of + // CS1 on that location is the inverse. + if (ArgMask == Mod) + ArgMask = ModRef; + else if (ArgMask == Ref) + ArgMask = Mod; + + R = ModRefResult((R | (getModRefInfo(CS1, CS2Loc) & ArgMask)) & Mask); if (R == Mask) break; } @@ -170,21 +190,28 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { if (onlyAccessesArgPointees(CS1B)) { AliasAnalysis::ModRefResult R = NoModRef; if (doesAccessArgPointees(CS1B)) { - MDNode *CS1Tag = CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa); for (ImmutableCallSite::arg_iterator I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) { const Value *Arg = *I; if (!Arg->getType()->isPointerTy()) continue; - Location CS1Loc(Arg, UnknownSize, CS1Tag); - if (getModRefInfo(CS2, CS1Loc) != NoModRef) { - R = Mask; + ModRefResult ArgMask; + Location CS1Loc = + getArgLocation(CS1, (unsigned) std::distance(CS1.arg_begin(), I), + ArgMask); + // ArgMask indicates what CS1 might do to CS1Loc; if CS1 might Mod + // CS1Loc, then we care about either a Mod or a Ref by CS2. If CS1 + // might Ref, then we care only about a Mod by CS2. + ModRefResult ArgR = getModRefInfo(CS2, CS1Loc); + if (((ArgMask & Mod) != NoModRef && (ArgR & ModRef) != NoModRef) || + ((ArgMask & Ref) != NoModRef && (ArgR & Mod) != NoModRef)) + R = ModRefResult((R | ArgMask) & Mask); + + if (R == Mask) break; - } } } - if (R == NoModRef) - return R; + return R; } // If this is the end of the chain, don't forward. @@ -338,7 +365,7 @@ AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) { AliasAnalysis::ModRefResult AliasAnalysis::getModRefInfo(const AtomicCmpXchgInst *CX, const Location &Loc) { // Acquire/Release cmpxchg has properties that matter for arbitrary addresses. - if (CX->getOrdering() > Monotonic) + if (CX->getSuccessOrdering() > Monotonic) return ModRef; // If the cmpxchg address does not alias the location, it does not access it. @@ -361,53 +388,6 @@ AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) { return ModRef; } -namespace { - /// Only find pointer captures which happen before the given instruction. Uses - /// the dominator tree to determine whether one instruction is before another. - /// Only support the case where the Value is defined in the same basic block - /// as the given instruction and the use. - struct CapturesBefore : public CaptureTracker { - CapturesBefore(const Instruction *I, DominatorTree *DT) - : BeforeHere(I), DT(DT), Captured(false) {} - - void tooManyUses() { Captured = true; } - - bool shouldExplore(Use *U) { - Instruction *I = cast<Instruction>(U->getUser()); - BasicBlock *BB = I->getParent(); - // We explore this usage only if the usage can reach "BeforeHere". - // If use is not reachable from entry, there is no need to explore. - if (BeforeHere != I && !DT->isReachableFromEntry(BB)) - return false; - // If the value is defined in the same basic block as use and BeforeHere, - // there is no need to explore the use if BeforeHere dominates use. - // Check whether there is a path from I to BeforeHere. - if (BeforeHere != I && DT->dominates(BeforeHere, I) && - !isPotentiallyReachable(I, BeforeHere, DT)) - return false; - return true; - } - - bool captured(Use *U) { - Instruction *I = cast<Instruction>(U->getUser()); - BasicBlock *BB = I->getParent(); - // Same logic as in shouldExplore. - if (BeforeHere != I && !DT->isReachableFromEntry(BB)) - return false; - if (BeforeHere != I && DT->dominates(BeforeHere, I) && - !isPotentiallyReachable(I, BeforeHere, DT)) - return false; - Captured = true; - return true; - } - - const Instruction *BeforeHere; - DominatorTree *DT; - - bool Captured; - }; -} - // FIXME: this is really just shoring-up a deficiency in alias analysis. // BasicAA isn't willing to spend linear time determining whether an alloca // was captured before or after this particular call, while we are. However, @@ -416,9 +396,9 @@ AliasAnalysis::ModRefResult AliasAnalysis::callCapturesBefore(const Instruction *I, const AliasAnalysis::Location &MemLoc, DominatorTree *DT) { - if (!DT || !TD) return AliasAnalysis::ModRef; + if (!DT || !DL) return AliasAnalysis::ModRef; - const Value *Object = GetUnderlyingObject(MemLoc.Ptr, TD); + const Value *Object = GetUnderlyingObject(MemLoc.Ptr, DL); if (!isIdentifiedObject(Object) || isa<GlobalValue>(Object) || isa<Constant>(Object)) return AliasAnalysis::ModRef; @@ -427,9 +407,9 @@ AliasAnalysis::callCapturesBefore(const Instruction *I, if (!CS.getInstruction() || CS.getInstruction() == Object) return AliasAnalysis::ModRef; - CapturesBefore CB(I, DT); - llvm::PointerMayBeCaptured(Object, &CB); - if (CB.Captured) + if (llvm::PointerMayBeCapturedBefore(Object, /* ReturnCaptures */ true, + /* StoreCaptures */ true, I, DT, + /* include Object */ true)) return AliasAnalysis::ModRef; unsigned ArgNo = 0; @@ -472,7 +452,8 @@ AliasAnalysis::~AliasAnalysis() {} /// AliasAnalysis interface before any other methods are called. /// void AliasAnalysis::InitializeAliasAnalysis(Pass *P) { - TD = P->getAnalysisIfAvailable<DataLayout>(); + DataLayoutPass *DLP = P->getAnalysisIfAvailable<DataLayoutPass>(); + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = P->getAnalysisIfAvailable<TargetLibraryInfo>(); AA = &P->getAnalysis<AliasAnalysis>(); } @@ -487,7 +468,7 @@ void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { /// if known, or a conservative value otherwise. /// uint64_t AliasAnalysis::getTypeStoreSize(Type *Ty) { - return TD ? TD->getTypeStoreSize(Ty) : UnknownSize; + return DL ? DL->getTypeStoreSize(Ty) : UnknownSize; } /// canBasicBlockModify - Return true if it is possible for execution of the @@ -554,3 +535,14 @@ bool llvm::isIdentifiedObject(const Value *V) { return A->hasNoAliasAttr() || A->hasByValAttr(); return false; } + +/// isIdentifiedFunctionLocal - Return true if V is umabigously identified +/// at the function-level. Different IdentifiedFunctionLocals can't alias. +/// Further, an IdentifiedFunctionLocal can not alias with any function +/// arguments other than itself, which is not necessarily true for +/// IdentifiedObjects. +bool llvm::isIdentifiedFunctionLocal(const Value *V) +{ + return isa<AllocaInst>(V) || isNoAliasCall(V) || isNoAliasArgument(V); +} + diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp index 9f4a47c77e03..b8609142fa20 100644 --- a/lib/Analysis/AliasAnalysisCounter.cpp +++ b/lib/Analysis/AliasAnalysisCounter.cpp @@ -14,7 +14,6 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Assembly/Writer.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -75,13 +74,13 @@ namespace { } } - bool runOnModule(Module &M) { + bool runOnModule(Module &M) override { this->M = &M; InitializeAliasAnalysis(this); return false; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AliasAnalysis::getAnalysisUsage(AU); AU.addRequired<AliasAnalysis>(); AU.setPreservesAll(); @@ -91,25 +90,25 @@ namespace { /// an analysis interface through multiple inheritance. If needed, it /// should override this to adjust the this pointer as needed for the /// specified pass info. - virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + void *getAdjustedAnalysisPointer(AnalysisID PI) override { if (PI == &AliasAnalysis::ID) return (AliasAnalysis*)this; return this; } // FIXME: We could count these too... - bool pointsToConstantMemory(const Location &Loc, bool OrLocal) { + bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override { return getAnalysis<AliasAnalysis>().pointsToConstantMemory(Loc, OrLocal); } // Forwarding functions: just delegate to a real AA implementation, counting // the number of responses... - AliasResult alias(const Location &LocA, const Location &LocB); + AliasResult alias(const Location &LocA, const Location &LocB) override; ModRefResult getModRefInfo(ImmutableCallSite CS, - const Location &Loc); + const Location &Loc) override; ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) { + ImmutableCallSite CS2) override { return AliasAnalysis::getModRefInfo(CS1,CS2); } }; @@ -127,7 +126,7 @@ AliasAnalysis::AliasResult AliasAnalysisCounter::alias(const Location &LocA, const Location &LocB) { AliasResult R = getAnalysis<AliasAnalysis>().alias(LocA, LocB); - const char *AliasString = 0; + const char *AliasString = nullptr; switch (R) { case NoAlias: No++; AliasString = "No alias"; break; case MayAlias: May++; AliasString = "May alias"; break; @@ -138,10 +137,10 @@ AliasAnalysisCounter::alias(const Location &LocA, const Location &LocB) { if (PrintAll || (PrintAllFailures && R == MayAlias)) { errs() << AliasString << ":\t"; errs() << "[" << LocA.Size << "B] "; - WriteAsOperand(errs(), LocA.Ptr, true, M); + LocA.Ptr->printAsOperand(errs(), true, M); errs() << ", "; errs() << "[" << LocB.Size << "B] "; - WriteAsOperand(errs(), LocB.Ptr, true, M); + LocB.Ptr->printAsOperand(errs(), true, M); errs() << "\n"; } @@ -153,7 +152,7 @@ AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS, const Location &Loc) { ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, Loc); - const char *MRString = 0; + const char *MRString = nullptr; switch (R) { case NoModRef: NoMR++; MRString = "NoModRef"; break; case Ref: JustRef++; MRString = "JustRef"; break; @@ -164,7 +163,7 @@ AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS, if (PrintAll || (PrintAllFailures && R == ModRef)) { errs() << MRString << ": Ptr: "; errs() << "[" << Loc.Size << "B] "; - WriteAsOperand(errs(), Loc.Ptr, true, M); + Loc.Ptr->printAsOperand(errs(), true, M); errs() << "\t<->" << *CS.getInstruction() << '\n'; } return R; diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp index a571463dfe12..d9fa5a500f36 100644 --- a/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -20,15 +20,14 @@ #include "llvm/Analysis/Passes.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Assembly/Writer.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/InstIterator.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -57,12 +56,12 @@ namespace { initializeAAEvalPass(*PassRegistry::getPassRegistry()); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AliasAnalysis>(); AU.setPreservesAll(); } - bool doInitialization(Module &M) { + bool doInitialization(Module &M) override { NoAlias = MayAlias = PartialAlias = MustAlias = 0; NoModRef = Mod = Ref = ModRef = 0; @@ -74,8 +73,8 @@ namespace { return false; } - bool runOnFunction(Function &F); - bool doFinalization(Module &M); + bool runOnFunction(Function &F) override; + bool doFinalization(Module &M) override; }; } @@ -94,8 +93,8 @@ static void PrintResults(const char *Msg, bool P, const Value *V1, std::string o1, o2; { raw_string_ostream os1(o1), os2(o2); - WriteAsOperand(os1, V1, true, M); - WriteAsOperand(os2, V2, true, M); + V1->printAsOperand(os1, true, M); + V2->printAsOperand(os2, true, M); } if (o2 < o1) @@ -111,7 +110,7 @@ PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr, Module *M) { if (P) { errs() << " " << Msg << ": Ptr: "; - WriteAsOperand(errs(), Ptr, true, M); + Ptr->printAsOperand(errs(), true, M); errs() << "\t<->" << *I << '\n'; } } diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp index f6178e36f0a9..5d61cf9752e9 100644 --- a/lib/Analysis/AliasDebugger.cpp +++ b/lib/Analysis/AliasDebugger.cpp @@ -43,7 +43,7 @@ namespace { initializeAliasDebuggerPass(*PassRegistry::getPassRegistry()); } - bool runOnModule(Module &M) { + bool runOnModule(Module &M) override { InitializeAliasAnalysis(this); // set up super class for(Module::global_iterator I = M.global_begin(), @@ -76,7 +76,7 @@ namespace { return false; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AliasAnalysis::getAnalysisUsage(AU); AU.setPreservesAll(); // Does not transform code } @@ -85,7 +85,7 @@ namespace { /// an analysis interface through multiple inheritance. If needed, it /// should override this to adjust the this pointer as needed for the /// specified pass info. - virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + void *getAdjustedAnalysisPointer(AnalysisID PI) override { if (PI == &AliasAnalysis::ID) return (AliasAnalysis*)this; return this; @@ -94,7 +94,7 @@ namespace { //------------------------------------------------ // Implement the AliasAnalysis API // - AliasResult alias(const Location &LocA, const Location &LocB) { + AliasResult alias(const Location &LocA, const Location &LocB) override { assert(Vals.find(LocA.Ptr) != Vals.end() && "Never seen value in AA before"); assert(Vals.find(LocB.Ptr) != Vals.end() && @@ -103,26 +103,26 @@ namespace { } ModRefResult getModRefInfo(ImmutableCallSite CS, - const Location &Loc) { + const Location &Loc) override { assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before"); return AliasAnalysis::getModRefInfo(CS, Loc); } ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) { + ImmutableCallSite CS2) override { return AliasAnalysis::getModRefInfo(CS1,CS2); } - - bool pointsToConstantMemory(const Location &Loc, bool OrLocal) { + + bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override { assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before"); return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); } - virtual void deleteValue(Value *V) { + void deleteValue(Value *V) override { assert(Vals.find(V) != Vals.end() && "Never seen value in AA before"); AliasAnalysis::deleteValue(V); } - virtual void copyValue(Value *From, Value *To) { + void copyValue(Value *From, Value *To) override { Vals.insert(To); AliasAnalysis::copyValue(From, To); } diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index 2289c1223e97..a45fe2389ee7 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -13,8 +13,8 @@ #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Assembly/Writer.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" @@ -22,7 +22,6 @@ #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/InstIterator.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -73,16 +72,16 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) { AS.PtrList->setPrevInList(PtrListEnd); PtrListEnd = AS.PtrListEnd; - AS.PtrList = 0; + AS.PtrList = nullptr; AS.PtrListEnd = &AS.PtrList; - assert(*AS.PtrListEnd == 0 && "End of list is not null?"); + assert(*AS.PtrListEnd == nullptr && "End of list is not null?"); } } void AliasSetTracker::removeAliasSet(AliasSet *AS) { if (AliasSet *Fwd = AS->Forward) { Fwd->dropRef(*this); - AS->Forward = 0; + AS->Forward = nullptr; } AliasSets.erase(AS); } @@ -116,10 +115,10 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry, Entry.updateSizeAndTBAAInfo(Size, TBAAInfo); // Add it to the end of the list... - assert(*PtrListEnd == 0 && "End of list is not null?"); + assert(*PtrListEnd == nullptr && "End of list is not null?"); *PtrListEnd = &Entry; PtrListEnd = Entry.setPrevInList(PtrListEnd); - assert(*PtrListEnd == 0 && "End of list is not null?"); + assert(*PtrListEnd == nullptr && "End of list is not null?"); addRef(); // Entry points to alias set. } @@ -218,11 +217,11 @@ void AliasSetTracker::clear() { AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) { - AliasSet *FoundSet = 0; + AliasSet *FoundSet = nullptr; for (iterator I = begin(), E = end(); I != E; ++I) { if (I->Forward || !I->aliasesPointer(Ptr, Size, TBAAInfo, AA)) continue; - if (FoundSet == 0) { // If this is the first alias set ptr can go into. + if (!FoundSet) { // If this is the first alias set ptr can go into. FoundSet = I; // Remember it. } else { // Otherwise, we must merge the sets. FoundSet->mergeSetIn(*I, *this); // Merge in contents. @@ -246,12 +245,12 @@ bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size, AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) { - AliasSet *FoundSet = 0; + AliasSet *FoundSet = nullptr; for (iterator I = begin(), E = end(); I != E; ++I) { if (I->Forward || !I->aliasesUnknownInst(Inst, AA)) continue; - if (FoundSet == 0) // If this is the first alias set ptr can go into. + if (!FoundSet) // If this is the first alias set ptr can go into. FoundSet = I; // Remember it. else if (!I->Forward) // Otherwise, we must merge the sets. FoundSet->mergeSetIn(*I, *this); // Merge in contents. @@ -566,7 +565,7 @@ void AliasSet::print(raw_ostream &OS) const { OS << "Pointers: "; for (iterator I = begin(), E = end(); I != E; ++I) { if (I != begin()) OS << ", "; - WriteAsOperand(OS << "(", I.getPointer()); + I.getPointer()->printAsOperand(OS << "("); OS << ", " << I.getSize() << ")"; } } @@ -574,7 +573,7 @@ void AliasSet::print(raw_ostream &OS) const { OS << "\n " << UnknownInsts.size() << " Unknown instructions: "; for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) { if (i) OS << ", "; - WriteAsOperand(OS, UnknownInsts[i]); + UnknownInsts[i]->printAsOperand(OS); } } OS << "\n"; @@ -628,12 +627,12 @@ namespace { initializeAliasSetPrinterPass(*PassRegistry::getPassRegistry()); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); AU.addRequired<AliasAnalysis>(); } - virtual bool runOnFunction(Function &F) { + bool runOnFunction(Function &F) override { Tracker = new AliasSetTracker(getAnalysis<AliasAnalysis>()); for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index 98f2a55a2fdd..c58012f3a8c3 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -9,10 +9,11 @@ #include "llvm-c/Analysis.h" #include "llvm-c/Initialization.h" -#include "llvm/Analysis/Verifier.h" -#include "llvm/InitializePasses.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Verifier.h" +#include "llvm/InitializePasses.h" #include "llvm/PassRegistry.h" +#include "llvm/Support/raw_ostream.h" #include <cstring> using namespace llvm; @@ -47,6 +48,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeIVUsersPass(Registry); initializeInstCountPass(Registry); initializeIntervalPartitionPass(Registry); + initializeJumpInstrTableInfoPass(Registry); initializeLazyValueInfoPass(Registry); initializeLibCallAliasAnalysisPass(Registry); initializeLintPass(Registry); @@ -55,7 +57,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeMemoryDependenceAnalysisPass(Registry); initializeModuleDebugInfoPrinterPass(Registry); initializePostDominatorTreePass(Registry); - initializeRegionInfoPass(Registry); + initializeRegionInfoPassPass(Registry); initializeRegionViewerPass(Registry); initializeRegionPrinterPass(Registry); initializeRegionOnlyViewerPass(Registry); @@ -72,21 +74,34 @@ void LLVMInitializeAnalysis(LLVMPassRegistryRef R) { LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action, char **OutMessages) { + raw_ostream *DebugOS = Action != LLVMReturnStatusAction ? &errs() : nullptr; std::string Messages; + raw_string_ostream MsgsOS(Messages); + + LLVMBool Result = verifyModule(*unwrap(M), OutMessages ? &MsgsOS : DebugOS); + + // Duplicate the output to stderr. + if (DebugOS && OutMessages) + *DebugOS << MsgsOS.str(); - LLVMBool Result = verifyModule(*unwrap(M), - static_cast<VerifierFailureAction>(Action), - OutMessages? &Messages : 0); + if (Action == LLVMAbortProcessAction && Result) + report_fatal_error("Broken module found, compilation aborted!"); if (OutMessages) - *OutMessages = strdup(Messages.c_str()); + *OutMessages = strdup(MsgsOS.str().c_str()); return Result; } LLVMBool LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action) { - return verifyFunction(*unwrap<Function>(Fn), - static_cast<VerifierFailureAction>(Action)); + LLVMBool Result = verifyFunction( + *unwrap<Function>(Fn), Action != LLVMReturnStatusAction ? &errs() + : nullptr); + + if (Action == LLVMAbortProcessAction && Result) + report_fatal_error("Broken function found, compilation aborted!"); + + return Result; } void LLVMViewFunctionCFG(LLVMValueRef Fn) { diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index d0e186c5f23f..38ec52d6b9ac 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -17,9 +17,8 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -27,7 +26,9 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" @@ -36,7 +37,6 @@ #include "llvm/IR/Operator.h" #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Target/TargetLibraryInfo.h" #include <algorithm> using namespace llvm; @@ -47,6 +47,11 @@ using namespace llvm; /// cannot be involved in a cycle. const unsigned MaxNumPhiBBsValueReachabilityCheck = 20; +// The max limit of the search depth in DecomposeGEPExpression() and +// GetUnderlyingObject(), both functions need to use the same search +// depth otherwise the algorithm in aliasGEP will assert. +static const unsigned MaxLookupSearchDepth = 6; + //===----------------------------------------------------------------------===// // Useful predicates //===----------------------------------------------------------------------===// @@ -93,11 +98,11 @@ static bool isEscapeSource(const Value *V) { /// getObjectSize - Return the size of the object specified by V, or /// UnknownSize if unknown. -static uint64_t getObjectSize(const Value *V, const DataLayout &TD, +static uint64_t getObjectSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, bool RoundToAlign = false) { uint64_t Size; - if (getObjectSize(V, Size, &TD, &TLI, RoundToAlign)) + if (getObjectSize(V, Size, &DL, &TLI, RoundToAlign)) return Size; return AliasAnalysis::UnknownSize; } @@ -105,7 +110,7 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &TD, /// isObjectSmallerThan - Return true if we can prove that the object specified /// by V is smaller than Size. static bool isObjectSmallerThan(const Value *V, uint64_t Size, - const DataLayout &TD, + const DataLayout &DL, const TargetLibraryInfo &TLI) { // Note that the meanings of the "object" are slightly different in the // following contexts: @@ -138,7 +143,7 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size, // This function needs to use the aligned object size because we allow // reads a bit past the end given sufficient alignment. - uint64_t ObjectSize = getObjectSize(V, TD, TLI, /*RoundToAlign*/true); + uint64_t ObjectSize = getObjectSize(V, DL, TLI, /*RoundToAlign*/true); return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size; } @@ -146,22 +151,11 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size, /// isObjectSize - Return true if we can prove that the object specified /// by V has size Size. static bool isObjectSize(const Value *V, uint64_t Size, - const DataLayout &TD, const TargetLibraryInfo &TLI) { - uint64_t ObjectSize = getObjectSize(V, TD, TLI); + const DataLayout &DL, const TargetLibraryInfo &TLI) { + uint64_t ObjectSize = getObjectSize(V, DL, TLI); return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize == Size; } -/// isIdentifiedFunctionLocal - Return true if V is umabigously identified -/// at the function-level. Different IdentifiedFunctionLocals can't alias. -/// Further, an IdentifiedFunctionLocal can not alias with any function -/// arguments other than itself, which is not neccessarily true for -/// IdentifiedObjects. -static bool isIdentifiedFunctionLocal(const Value *V) -{ - return isa<AllocaInst>(V) || isNoAliasCall(V) || isNoAliasArgument(V); -} - - //===----------------------------------------------------------------------===// // GetElementPtr Instruction Decomposition and Analysis //===----------------------------------------------------------------------===// @@ -200,7 +194,7 @@ namespace { /// represented in the result. static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, ExtensionKind &Extension, - const DataLayout &TD, unsigned Depth) { + const DataLayout &DL, unsigned Depth) { assert(V->getType()->isIntegerTy() && "Not an integer value"); // Limit our recursion depth. @@ -217,23 +211,23 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, case Instruction::Or: // X|C == X+C if all the bits in C are unset in X. Otherwise we can't // analyze it. - if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &TD)) + if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &DL)) break; // FALL THROUGH. case Instruction::Add: V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, - TD, Depth+1); + DL, Depth+1); Offset += RHSC->getValue(); return V; case Instruction::Mul: V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, - TD, Depth+1); + DL, Depth+1); Offset *= RHSC->getValue(); Scale *= RHSC->getValue(); return V; case Instruction::Shl: V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, - TD, Depth+1); + DL, Depth+1); Offset <<= RHSC->getValue().getLimitedValue(); Scale <<= RHSC->getValue().getLimitedValue(); return V; @@ -254,7 +248,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt; Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension, - TD, Depth+1); + DL, Depth+1); Scale = Scale.zext(OldWidth); Offset = Offset.zext(OldWidth); @@ -276,21 +270,24 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, /// the gep cannot necessarily be reconstructed from its decomposed form. /// /// When DataLayout is around, this function is capable of analyzing everything -/// that GetUnderlyingObject can look through. When not, it just looks -/// through pointer casts. +/// that GetUnderlyingObject can look through. To be able to do that +/// GetUnderlyingObject and DecomposeGEPExpression must use the same search +/// depth (MaxLookupSearchDepth). +/// When DataLayout not is around, it just looks through pointer casts. /// static const Value * DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, SmallVectorImpl<VariableGEPIndex> &VarIndices, - const DataLayout *TD) { + bool &MaxLookupReached, const DataLayout *DL) { // Limit recursion depth to limit compile time in crazy cases. - unsigned MaxLookup = 6; + unsigned MaxLookup = MaxLookupSearchDepth; + MaxLookupReached = false; BaseOffs = 0; do { // See if this is a bitcast or GEP. const Operator *Op = dyn_cast<Operator>(V); - if (Op == 0) { + if (!Op) { // The only non-operator case we can handle are GlobalAliases. if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { if (!GA->mayBeOverridden()) { @@ -301,19 +298,20 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, return V; } - if (Op->getOpcode() == Instruction::BitCast) { + if (Op->getOpcode() == Instruction::BitCast || + Op->getOpcode() == Instruction::AddrSpaceCast) { V = Op->getOperand(0); continue; } const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op); - if (GEPOp == 0) { + if (!GEPOp) { // If it's not a GEP, hand it off to SimplifyInstruction to see if it // can come up with something. This matches what GetUnderlyingObject does. if (const Instruction *I = dyn_cast<Instruction>(V)) // TODO: Get a DominatorTree and use it here. if (const Value *Simplified = - SimplifyInstruction(const_cast<Instruction *>(I), TD)) { + SimplifyInstruction(const_cast<Instruction *>(I), DL)) { V = Simplified; continue; } @@ -328,7 +326,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, // If we are lacking DataLayout information, we can't compute the offets of // elements computed by GEPs. However, we can handle bitcast equivalent // GEPs. - if (TD == 0) { + if (!DL) { if (!GEPOp->hasAllZeroIndices()) return V; V = GEPOp->getOperand(0); @@ -347,30 +345,30 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); if (FieldNo == 0) continue; - BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo); + BaseOffs += DL->getStructLayout(STy)->getElementOffset(FieldNo); continue; } // For an array/pointer, add the element offset, explicitly scaled. if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) { if (CIdx->isZero()) continue; - BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue(); + BaseOffs += DL->getTypeAllocSize(*GTI)*CIdx->getSExtValue(); continue; } - uint64_t Scale = TD->getTypeAllocSize(*GTI); + uint64_t Scale = DL->getTypeAllocSize(*GTI); ExtensionKind Extension = EK_NotExtended; // If the integer type is smaller than the pointer size, it is implicitly // sign extended to pointer size. unsigned Width = Index->getType()->getIntegerBitWidth(); - if (TD->getPointerSizeInBits(AS) > Width) + if (DL->getPointerSizeInBits(AS) > Width) Extension = EK_SignExt; // Use GetLinearExpression to decompose the index into a C1*V+C2 form. APInt IndexScale(Width, 0), IndexOffset(Width, 0); Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension, - *TD, 0); + *DL, 0); // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale. // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale. @@ -392,7 +390,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, // Make sure that we have a scale that makes sense for this target's // pointer size. - if (unsigned ShiftBits = 64 - TD->getPointerSizeInBits(AS)) { + if (unsigned ShiftBits = 64 - DL->getPointerSizeInBits(AS)) { Scale <<= ShiftBits; Scale = (int64_t)Scale >> ShiftBits; } @@ -409,6 +407,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, } while (--MaxLookup); // If the chain of expressions is too deep, just return early. + MaxLookupReached = true; return V; } @@ -424,7 +423,7 @@ static const Function *getParent(const Value *V) { if (const Argument *arg = dyn_cast<Argument>(V)) return arg->getParent(); - return NULL; + return nullptr; } static bool notDifferentParent(const Value *O1, const Value *O2) { @@ -444,17 +443,16 @@ namespace { initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry()); } - virtual void initializePass() { + void initializePass() override { InitializeAliasAnalysis(this); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AliasAnalysis>(); AU.addRequired<TargetLibraryInfo>(); } - virtual AliasResult alias(const Location &LocA, - const Location &LocB) { + AliasResult alias(const Location &LocA, const Location &LocB) override { assert(AliasCache.empty() && "AliasCache must be cleared after use!"); assert(notDifferentParent(LocA.Ptr, LocB.Ptr) && "BasicAliasAnalysis doesn't support interprocedural queries."); @@ -469,32 +467,36 @@ namespace { return Alias; } - virtual ModRefResult getModRefInfo(ImmutableCallSite CS, - const Location &Loc); + ModRefResult getModRefInfo(ImmutableCallSite CS, + const Location &Loc) override; - virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) { + ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) override { // The AliasAnalysis base class has some smarts, lets use them. return AliasAnalysis::getModRefInfo(CS1, CS2); } /// pointsToConstantMemory - Chase pointers until we find a (constant /// global) or not. - virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal); + bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override; + + /// Get the location associated with a pointer argument of a callsite. + Location getArgLocation(ImmutableCallSite CS, unsigned ArgIdx, + ModRefResult &Mask) override; /// getModRefBehavior - Return the behavior when calling the given /// call site. - virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS); + ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override; /// getModRefBehavior - Return the behavior when calling the given function. /// For use when the call site is not known. - virtual ModRefBehavior getModRefBehavior(const Function *F); + ModRefBehavior getModRefBehavior(const Function *F) override; /// getAdjustedAnalysisPointer - This method is used when a pass implements /// an analysis interface through multiple inheritance. If needed, it /// should override this to adjust the this pointer as needed for the /// specified pass info. - virtual void *getAdjustedAnalysisPointer(const void *ID) { + void *getAdjustedAnalysisPointer(const void *ID) override { if (ID == &AliasAnalysis::ID) return (AliasAnalysis*)this; return this; @@ -593,7 +595,7 @@ BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) { SmallVector<const Value *, 16> Worklist; Worklist.push_back(Loc.Ptr); do { - const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), TD); + const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL); if (!Visited.insert(V)) { Visited.clear(); return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); @@ -645,6 +647,21 @@ BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) { return Worklist.empty(); } +static bool isMemsetPattern16(const Function *MS, + const TargetLibraryInfo &TLI) { + if (TLI.has(LibFunc::memset_pattern16) && + MS->getName() == "memset_pattern16") { + FunctionType *MemsetType = MS->getFunctionType(); + if (!MemsetType->isVarArg() && MemsetType->getNumParams() == 3 && + isa<PointerType>(MemsetType->getParamType(0)) && + isa<PointerType>(MemsetType->getParamType(1)) && + isa<IntegerType>(MemsetType->getParamType(2))) + return true; + } + + return false; +} + /// getModRefBehavior - Return the behavior when calling the given call site. AliasAnalysis::ModRefBehavior BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { @@ -684,10 +701,93 @@ BasicAliasAnalysis::getModRefBehavior(const Function *F) { if (F->onlyReadsMemory()) Min = OnlyReadsMemory; + const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>(); + if (isMemsetPattern16(F, TLI)) + Min = OnlyAccessesArgumentPointees; + // Otherwise be conservative. return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min); } +AliasAnalysis::Location +BasicAliasAnalysis::getArgLocation(ImmutableCallSite CS, unsigned ArgIdx, + ModRefResult &Mask) { + Location Loc = AliasAnalysis::getArgLocation(CS, ArgIdx, Mask); + const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>(); + const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()); + if (II != nullptr) + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: { + assert((ArgIdx == 0 || ArgIdx == 1) && + "Invalid argument index for memory intrinsic"); + if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) + Loc.Size = LenCI->getZExtValue(); + assert(Loc.Ptr == II->getArgOperand(ArgIdx) && + "Memory intrinsic location pointer not argument?"); + Mask = ArgIdx ? Ref : Mod; + break; + } + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: { + assert(ArgIdx == 1 && "Invalid argument index"); + assert(Loc.Ptr == II->getArgOperand(ArgIdx) && + "Intrinsic location pointer not argument?"); + Loc.Size = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); + break; + } + case Intrinsic::invariant_end: { + assert(ArgIdx == 2 && "Invalid argument index"); + assert(Loc.Ptr == II->getArgOperand(ArgIdx) && + "Intrinsic location pointer not argument?"); + Loc.Size = cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(); + break; + } + case Intrinsic::arm_neon_vld1: { + assert(ArgIdx == 0 && "Invalid argument index"); + assert(Loc.Ptr == II->getArgOperand(ArgIdx) && + "Intrinsic location pointer not argument?"); + // LLVM's vld1 and vst1 intrinsics currently only support a single + // vector register. + if (DL) + Loc.Size = DL->getTypeStoreSize(II->getType()); + break; + } + case Intrinsic::arm_neon_vst1: { + assert(ArgIdx == 0 && "Invalid argument index"); + assert(Loc.Ptr == II->getArgOperand(ArgIdx) && + "Intrinsic location pointer not argument?"); + if (DL) + Loc.Size = DL->getTypeStoreSize(II->getArgOperand(1)->getType()); + break; + } + } + + // We can bound the aliasing properties of memset_pattern16 just as we can + // for memcpy/memset. This is particularly important because the + // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16 + // whenever possible. + else if (CS.getCalledFunction() && + isMemsetPattern16(CS.getCalledFunction(), TLI)) { + assert((ArgIdx == 0 || ArgIdx == 1) && + "Invalid argument index for memset_pattern16"); + if (ArgIdx == 1) + Loc.Size = 16; + else if (const ConstantInt *LenCI = + dyn_cast<ConstantInt>(CS.getArgument(2))) + Loc.Size = LenCI->getZExtValue(); + assert(Loc.Ptr == CS.getArgument(ArgIdx) && + "memset_pattern16 location pointer not argument?"); + Mask = ArgIdx ? Ref : Mod; + } + // FIXME: Handle memset_pattern4 and memset_pattern8 also. + + return Loc; +} + /// getModRefInfo - Check to see if the specified callsite can clobber the /// specified memory object. Since we only look at local properties of this /// function, we really can't say much about this query. We do, however, use @@ -698,7 +798,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) && "AliasAnalysis query involving multiple functions!"); - const Value *Object = GetUnderlyingObject(Loc.Ptr, TD); + const Value *Object = GetUnderlyingObject(Loc.Ptr, DL); // If this is a tail call and Loc.Ptr points to a stack location, we know that // the tail call cannot access or modify the local stack. @@ -740,144 +840,13 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, return NoModRef; } - const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>(); - ModRefResult Min = ModRef; - - // Finally, handle specific knowledge of intrinsics. - const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()); - if (II != 0) - switch (II->getIntrinsicID()) { - default: break; - case Intrinsic::memcpy: - case Intrinsic::memmove: { - uint64_t Len = UnknownSize; - if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) - Len = LenCI->getZExtValue(); - Value *Dest = II->getArgOperand(0); - Value *Src = II->getArgOperand(1); - // If it can't overlap the source dest, then it doesn't modref the loc. - if (isNoAlias(Location(Dest, Len), Loc)) { - if (isNoAlias(Location(Src, Len), Loc)) - return NoModRef; - // If it can't overlap the dest, then worst case it reads the loc. - Min = Ref; - } else if (isNoAlias(Location(Src, Len), Loc)) { - // If it can't overlap the source, then worst case it mutates the loc. - Min = Mod; - } - break; - } - case Intrinsic::memset: - // Since memset is 'accesses arguments' only, the AliasAnalysis base class - // will handle it for the variable length case. - if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) { - uint64_t Len = LenCI->getZExtValue(); - Value *Dest = II->getArgOperand(0); - if (isNoAlias(Location(Dest, Len), Loc)) - return NoModRef; - } - // We know that memset doesn't load anything. - Min = Mod; - break; - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::invariant_start: { - uint64_t PtrSize = - cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); - if (isNoAlias(Location(II->getArgOperand(1), - PtrSize, - II->getMetadata(LLVMContext::MD_tbaa)), - Loc)) - return NoModRef; - break; - } - case Intrinsic::invariant_end: { - uint64_t PtrSize = - cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(); - if (isNoAlias(Location(II->getArgOperand(2), - PtrSize, - II->getMetadata(LLVMContext::MD_tbaa)), - Loc)) - return NoModRef; - break; - } - case Intrinsic::arm_neon_vld1: { - // LLVM's vld1 and vst1 intrinsics currently only support a single - // vector register. - uint64_t Size = - TD ? TD->getTypeStoreSize(II->getType()) : UnknownSize; - if (isNoAlias(Location(II->getArgOperand(0), Size, - II->getMetadata(LLVMContext::MD_tbaa)), - Loc)) - return NoModRef; - break; - } - case Intrinsic::arm_neon_vst1: { - uint64_t Size = - TD ? TD->getTypeStoreSize(II->getArgOperand(1)->getType()) : UnknownSize; - if (isNoAlias(Location(II->getArgOperand(0), Size, - II->getMetadata(LLVMContext::MD_tbaa)), - Loc)) - return NoModRef; - break; - } - } - - // We can bound the aliasing properties of memset_pattern16 just as we can - // for memcpy/memset. This is particularly important because the - // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16 - // whenever possible. - else if (TLI.has(LibFunc::memset_pattern16) && - CS.getCalledFunction() && - CS.getCalledFunction()->getName() == "memset_pattern16") { - const Function *MS = CS.getCalledFunction(); - FunctionType *MemsetType = MS->getFunctionType(); - if (!MemsetType->isVarArg() && MemsetType->getNumParams() == 3 && - isa<PointerType>(MemsetType->getParamType(0)) && - isa<PointerType>(MemsetType->getParamType(1)) && - isa<IntegerType>(MemsetType->getParamType(2))) { - uint64_t Len = UnknownSize; - if (const ConstantInt *LenCI = dyn_cast<ConstantInt>(CS.getArgument(2))) - Len = LenCI->getZExtValue(); - const Value *Dest = CS.getArgument(0); - const Value *Src = CS.getArgument(1); - // If it can't overlap the source dest, then it doesn't modref the loc. - if (isNoAlias(Location(Dest, Len), Loc)) { - // Always reads 16 bytes of the source. - if (isNoAlias(Location(Src, 16), Loc)) - return NoModRef; - // If it can't overlap the dest, then worst case it reads the loc. - Min = Ref; - // Always reads 16 bytes of the source. - } else if (isNoAlias(Location(Src, 16), Loc)) { - // If it can't overlap the source, then worst case it mutates the loc. - Min = Mod; - } - } - } - // The AliasAnalysis base class has some smarts, lets use them. - return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min); -} - -static bool areVarIndicesEqual(SmallVectorImpl<VariableGEPIndex> &Indices1, - SmallVectorImpl<VariableGEPIndex> &Indices2) { - unsigned Size1 = Indices1.size(); - unsigned Size2 = Indices2.size(); - - if (Size1 != Size2) - return false; - - for (unsigned I = 0; I != Size1; ++I) - if (Indices1[I] != Indices2[I]) - return false; - - return true; + return AliasAnalysis::getModRefInfo(CS, Loc); } /// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction /// against another pointer. We know that V1 is a GEP, but we don't know -/// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, TD), +/// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, DL), /// UnderlyingV2 is the same for V2. /// AliasAnalysis::AliasResult @@ -888,6 +857,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, const Value *UnderlyingV1, const Value *UnderlyingV2) { int64_t GEP1BaseOffset; + bool GEP1MaxLookupReached; SmallVector<VariableGEPIndex, 4> GEP1VariableIndices; // If we have two gep instructions with must-alias or not-alias'ing base @@ -895,8 +865,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // derived pointer. if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) { // Do the base pointers alias? - AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0, - UnderlyingV2, UnknownSize, 0); + AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, nullptr, + UnderlyingV2, UnknownSize, nullptr); // Check for geps of non-aliasing underlying pointers where the offsets are // identical. @@ -909,21 +879,28 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // See if the computed offset from the common pointer tells us about the // relation of the resulting pointer. int64_t GEP2BaseOffset; + bool GEP2MaxLookupReached; SmallVector<VariableGEPIndex, 4> GEP2VariableIndices; const Value *GEP2BasePtr = - DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD); + DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, + GEP2MaxLookupReached, DL); const Value *GEP1BasePtr = - DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD); + DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, + GEP1MaxLookupReached, DL); // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { - assert(TD == 0 && - "DecomposeGEPExpression and GetUnderlyingObject disagree!"); + assert(!DL && + "DecomposeGEPExpression and GetUnderlyingObject disagree!"); return MayAlias; } + // If the max search depth is reached the result is undefined + if (GEP2MaxLookupReached || GEP1MaxLookupReached) + return MayAlias; + // Same offsets. if (GEP1BaseOffset == GEP2BaseOffset && - areVarIndicesEqual(GEP1VariableIndices, GEP2VariableIndices)) + GEP1VariableIndices == GEP2VariableIndices) return NoAlias; GEP1VariableIndices.clear(); } @@ -937,20 +914,26 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // exactly, see if the computed offset from the common pointer tells us // about the relation of the resulting pointer. const Value *GEP1BasePtr = - DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD); + DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, + GEP1MaxLookupReached, DL); int64_t GEP2BaseOffset; + bool GEP2MaxLookupReached; SmallVector<VariableGEPIndex, 4> GEP2VariableIndices; const Value *GEP2BasePtr = - DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD); + DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, + GEP2MaxLookupReached, DL); // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { - assert(TD == 0 && + assert(!DL && "DecomposeGEPExpression and GetUnderlyingObject disagree!"); return MayAlias; } + // If the max search depth is reached the result is undefined + if (GEP2MaxLookupReached || GEP1MaxLookupReached) + return MayAlias; // Subtract the GEP2 pointer from the GEP1 pointer to find out their // symbolic difference. @@ -966,7 +949,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, if (V1Size == UnknownSize && V2Size == UnknownSize) return MayAlias; - AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, 0, + AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, nullptr, V2, V2Size, V2TBAAInfo); if (R != MustAlias) // If V2 may alias GEP base pointer, conservatively returns MayAlias. @@ -977,15 +960,19 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, return R; const Value *GEP1BasePtr = - DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD); + DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, + GEP1MaxLookupReached, DL); // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. if (GEP1BasePtr != UnderlyingV1) { - assert(TD == 0 && + assert(!DL && "DecomposeGEPExpression and GetUnderlyingObject disagree!"); return MayAlias; } + // If the max search depth is reached the result is undefined + if (GEP1MaxLookupReached) + return MayAlias; } // In the two GEP Case, if there is no difference in the offsets of the @@ -1215,8 +1202,8 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, return NoAlias; // Scalars cannot alias each other // Figure out what objects these things are pointing to if we can. - const Value *O1 = GetUnderlyingObject(V1, TD); - const Value *O2 = GetUnderlyingObject(V2, TD); + const Value *O1 = GetUnderlyingObject(V1, DL, MaxLookupSearchDepth); + const Value *O2 = GetUnderlyingObject(V2, DL, MaxLookupSearchDepth); // Null values in the default address space don't point to any object, so they // don't alias any other pointer. @@ -1265,9 +1252,9 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, // If the size of one access is larger than the entire object on the other // side, then we know such behavior is undefined and can assume no alias. - if (TD) - if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD, *TLI)) || - (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD, *TLI))) + if (DL) + if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *DL, *TLI)) || + (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *DL, *TLI))) return NoAlias; // Check the cache before climbing up use-def chains. This also terminates @@ -1319,9 +1306,9 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, // If both pointers are pointing into the same object and one of them // accesses is accessing the entire object, then the accesses must // overlap in some way. - if (TD && O1 == O2) - if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *TD, *TLI)) || - (V2Size != UnknownSize && isObjectSize(O2, V2Size, *TD, *TLI))) + if (DL && O1 == O2) + if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *DL, *TLI)) || + (V2Size != UnknownSize && isObjectSize(O2, V2Size, *DL, *TLI))) return AliasCache[Locs] = PartialAlias; AliasResult Result = @@ -1343,7 +1330,9 @@ bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V, return false; // Use dominance or loop info if available. - DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); + DominatorTreeWrapperPass *DTWP = + getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; LoopInfo *LI = getAnalysisIfAvailable<LoopInfo>(); // Make sure that the visited phis cannot reach the Value. This ensures that diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp index 62f3ab16ca7c..8ed8e3e4c2e0 100644 --- a/lib/Analysis/BlockFrequencyInfo.cpp +++ b/lib/Analysis/BlockFrequencyInfo.cpp @@ -1,4 +1,4 @@ -//=======-------- BlockFrequencyInfo.cpp - Block Frequency Analysis -------===// +//===- BlockFrequencyInfo.cpp - Block Frequency Analysis ------------------===// // // The LLVM Compiler Infrastructure // @@ -12,18 +12,20 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/BlockFrequencyImpl.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Passes.h" +#include "llvm/IR/CFG.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" using namespace llvm; +#define DEBUG_TYPE "block-freq" + #ifndef NDEBUG enum GVDAGType { GVDT_None, @@ -86,7 +88,7 @@ struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits { OS << Node->getName().str() << ":"; switch (ViewBlockFreqPropagationDAG) { case GVDT_Fraction: - Graph->getBlockFreq(Node).print(OS); + Graph->printBlockFreq(OS, Node); break; case GVDT_Integer: OS << Graph->getBlockFreq(Node).getFrequency(); @@ -106,6 +108,7 @@ struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits { INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis", true, true) @@ -114,21 +117,22 @@ char BlockFrequencyInfo::ID = 0; BlockFrequencyInfo::BlockFrequencyInfo() : FunctionPass(ID) { initializeBlockFrequencyInfoPass(*PassRegistry::getPassRegistry()); - BFI = new BlockFrequencyImpl<BasicBlock, Function, BranchProbabilityInfo>(); } -BlockFrequencyInfo::~BlockFrequencyInfo() { - delete BFI; -} +BlockFrequencyInfo::~BlockFrequencyInfo() {} void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<BranchProbabilityInfo>(); + AU.addRequired<LoopInfo>(); AU.setPreservesAll(); } bool BlockFrequencyInfo::runOnFunction(Function &F) { BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>(); - BFI->doFunction(&F, &BPI); + LoopInfo &LI = getAnalysis<LoopInfo>(); + if (!BFI) + BFI.reset(new ImplType); + BFI->doFunction(&F, &BPI, &LI); #ifndef NDEBUG if (ViewBlockFreqPropagationDAG != GVDT_None) view(); @@ -136,12 +140,14 @@ bool BlockFrequencyInfo::runOnFunction(Function &F) { return false; } +void BlockFrequencyInfo::releaseMemory() { BFI.reset(); } + void BlockFrequencyInfo::print(raw_ostream &O, const Module *) const { if (BFI) BFI->print(O); } BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const { - return BFI->getBlockFreq(BB); + return BFI ? BFI->getBlockFreq(BB) : 0; } /// Pop up a ghostview window with the current block frequency propagation @@ -157,5 +163,20 @@ void BlockFrequencyInfo::view() const { } const Function *BlockFrequencyInfo::getFunction() const { - return BFI->Fn; + return BFI ? BFI->getFunction() : nullptr; +} + +raw_ostream &BlockFrequencyInfo:: +printBlockFreq(raw_ostream &OS, const BlockFrequency Freq) const { + return BFI ? BFI->printBlockFreq(OS, Freq) : OS; +} + +raw_ostream & +BlockFrequencyInfo::printBlockFreq(raw_ostream &OS, + const BasicBlock *BB) const { + return BFI ? BFI->printBlockFreq(OS, BB) : OS; +} + +uint64_t BlockFrequencyInfo::getEntryFreq() const { + return BFI ? BFI->getEntryFreq() : 0; } diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp new file mode 100644 index 000000000000..3203c371648d --- /dev/null +++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -0,0 +1,697 @@ +//===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Loops should be simplified before this analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/Support/raw_ostream.h" +#include <deque> + +using namespace llvm; +using namespace llvm::bfi_detail; + +#define DEBUG_TYPE "block-freq" + +ScaledNumber<uint64_t> BlockMass::toScaled() const { + if (isFull()) + return ScaledNumber<uint64_t>(1, 0); + return ScaledNumber<uint64_t>(getMass() + 1, -64); +} + +void BlockMass::dump() const { print(dbgs()); } + +static char getHexDigit(int N) { + assert(N < 16); + if (N < 10) + return '0' + N; + return 'a' + N - 10; +} +raw_ostream &BlockMass::print(raw_ostream &OS) const { + for (int Digits = 0; Digits < 16; ++Digits) + OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf); + return OS; +} + +namespace { + +typedef BlockFrequencyInfoImplBase::BlockNode BlockNode; +typedef BlockFrequencyInfoImplBase::Distribution Distribution; +typedef BlockFrequencyInfoImplBase::Distribution::WeightList WeightList; +typedef BlockFrequencyInfoImplBase::Scaled64 Scaled64; +typedef BlockFrequencyInfoImplBase::LoopData LoopData; +typedef BlockFrequencyInfoImplBase::Weight Weight; +typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData; + +/// \brief Dithering mass distributer. +/// +/// This class splits up a single mass into portions by weight, dithering to +/// spread out error. No mass is lost. The dithering precision depends on the +/// precision of the product of \a BlockMass and \a BranchProbability. +/// +/// The distribution algorithm follows. +/// +/// 1. Initialize by saving the sum of the weights in \a RemWeight and the +/// mass to distribute in \a RemMass. +/// +/// 2. For each portion: +/// +/// 1. Construct a branch probability, P, as the portion's weight divided +/// by the current value of \a RemWeight. +/// 2. Calculate the portion's mass as \a RemMass times P. +/// 3. Update \a RemWeight and \a RemMass at each portion by subtracting +/// the current portion's weight and mass. +struct DitheringDistributer { + uint32_t RemWeight; + BlockMass RemMass; + + DitheringDistributer(Distribution &Dist, const BlockMass &Mass); + + BlockMass takeMass(uint32_t Weight); +}; + +} // end namespace + +DitheringDistributer::DitheringDistributer(Distribution &Dist, + const BlockMass &Mass) { + Dist.normalize(); + RemWeight = Dist.Total; + RemMass = Mass; +} + +BlockMass DitheringDistributer::takeMass(uint32_t Weight) { + assert(Weight && "invalid weight"); + assert(Weight <= RemWeight); + BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight); + + // Decrement totals (dither). + RemWeight -= Weight; + RemMass -= Mass; + return Mass; +} + +void Distribution::add(const BlockNode &Node, uint64_t Amount, + Weight::DistType Type) { + assert(Amount && "invalid weight of 0"); + uint64_t NewTotal = Total + Amount; + + // Check for overflow. It should be impossible to overflow twice. + bool IsOverflow = NewTotal < Total; + assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow"); + DidOverflow |= IsOverflow; + + // Update the total. + Total = NewTotal; + + // Save the weight. + Weights.push_back(Weight(Type, Node, Amount)); +} + +static void combineWeight(Weight &W, const Weight &OtherW) { + assert(OtherW.TargetNode.isValid()); + if (!W.Amount) { + W = OtherW; + return; + } + assert(W.Type == OtherW.Type); + assert(W.TargetNode == OtherW.TargetNode); + assert(W.Amount < W.Amount + OtherW.Amount && "Unexpected overflow"); + W.Amount += OtherW.Amount; +} +static void combineWeightsBySorting(WeightList &Weights) { + // Sort so edges to the same node are adjacent. + std::sort(Weights.begin(), Weights.end(), + [](const Weight &L, + const Weight &R) { return L.TargetNode < R.TargetNode; }); + + // Combine adjacent edges. + WeightList::iterator O = Weights.begin(); + for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E; + ++O, (I = L)) { + *O = *I; + + // Find the adjacent weights to the same node. + for (++L; L != E && I->TargetNode == L->TargetNode; ++L) + combineWeight(*O, *L); + } + + // Erase extra entries. + Weights.erase(O, Weights.end()); + return; +} +static void combineWeightsByHashing(WeightList &Weights) { + // Collect weights into a DenseMap. + typedef DenseMap<BlockNode::IndexType, Weight> HashTable; + HashTable Combined(NextPowerOf2(2 * Weights.size())); + for (const Weight &W : Weights) + combineWeight(Combined[W.TargetNode.Index], W); + + // Check whether anything changed. + if (Weights.size() == Combined.size()) + return; + + // Fill in the new weights. + Weights.clear(); + Weights.reserve(Combined.size()); + for (const auto &I : Combined) + Weights.push_back(I.second); +} +static void combineWeights(WeightList &Weights) { + // Use a hash table for many successors to keep this linear. + if (Weights.size() > 128) { + combineWeightsByHashing(Weights); + return; + } + + combineWeightsBySorting(Weights); +} +static uint64_t shiftRightAndRound(uint64_t N, int Shift) { + assert(Shift >= 0); + assert(Shift < 64); + if (!Shift) + return N; + return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1)); +} +void Distribution::normalize() { + // Early exit for termination nodes. + if (Weights.empty()) + return; + + // Only bother if there are multiple successors. + if (Weights.size() > 1) + combineWeights(Weights); + + // Early exit when combined into a single successor. + if (Weights.size() == 1) { + Total = 1; + Weights.front().Amount = 1; + return; + } + + // Determine how much to shift right so that the total fits into 32-bits. + // + // If we shift at all, shift by 1 extra. Otherwise, the lower limit of 1 + // for each weight can cause a 32-bit overflow. + int Shift = 0; + if (DidOverflow) + Shift = 33; + else if (Total > UINT32_MAX) + Shift = 33 - countLeadingZeros(Total); + + // Early exit if nothing needs to be scaled. + if (!Shift) + return; + + // Recompute the total through accumulation (rather than shifting it) so that + // it's accurate after shifting. + Total = 0; + + // Sum the weights to each node and shift right if necessary. + for (Weight &W : Weights) { + // Scale down below UINT32_MAX. Since Shift is larger than necessary, we + // can round here without concern about overflow. + assert(W.TargetNode.isValid()); + W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift)); + assert(W.Amount <= UINT32_MAX); + + // Update the total. + Total += W.Amount; + } + assert(Total <= UINT32_MAX); +} + +void BlockFrequencyInfoImplBase::clear() { + // Swap with a default-constructed std::vector, since std::vector<>::clear() + // does not actually clear heap storage. + std::vector<FrequencyData>().swap(Freqs); + std::vector<WorkingData>().swap(Working); + Loops.clear(); +} + +/// \brief Clear all memory not needed downstream. +/// +/// Releases all memory not used downstream. In particular, saves Freqs. +static void cleanup(BlockFrequencyInfoImplBase &BFI) { + std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs)); + BFI.clear(); + BFI.Freqs = std::move(SavedFreqs); +} + +bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist, + const LoopData *OuterLoop, + const BlockNode &Pred, + const BlockNode &Succ, + uint64_t Weight) { + if (!Weight) + Weight = 1; + + auto isLoopHeader = [&OuterLoop](const BlockNode &Node) { + return OuterLoop && OuterLoop->isHeader(Node); + }; + + BlockNode Resolved = Working[Succ.Index].getResolvedNode(); + +#ifndef NDEBUG + auto debugSuccessor = [&](const char *Type) { + dbgs() << " =>" + << " [" << Type << "] weight = " << Weight; + if (!isLoopHeader(Resolved)) + dbgs() << ", succ = " << getBlockName(Succ); + if (Resolved != Succ) + dbgs() << ", resolved = " << getBlockName(Resolved); + dbgs() << "\n"; + }; + (void)debugSuccessor; +#endif + + if (isLoopHeader(Resolved)) { + DEBUG(debugSuccessor("backedge")); + Dist.addBackedge(OuterLoop->getHeader(), Weight); + return true; + } + + if (Working[Resolved.Index].getContainingLoop() != OuterLoop) { + DEBUG(debugSuccessor(" exit ")); + Dist.addExit(Resolved, Weight); + return true; + } + + if (Resolved < Pred) { + if (!isLoopHeader(Pred)) { + // If OuterLoop is an irreducible loop, we can't actually handle this. + assert((!OuterLoop || !OuterLoop->isIrreducible()) && + "unhandled irreducible control flow"); + + // Irreducible backedge. Abort. + DEBUG(debugSuccessor("abort!!!")); + return false; + } + + // If "Pred" is a loop header, then this isn't really a backedge; rather, + // OuterLoop must be irreducible. These false backedges can come only from + // secondary loop headers. + assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) && + "unhandled irreducible control flow"); + } + + DEBUG(debugSuccessor(" local ")); + Dist.addLocal(Resolved, Weight); + return true; +} + +bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist( + const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) { + // Copy the exit map into Dist. + for (const auto &I : Loop.Exits) + if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first, + I.second.getMass())) + // Irreducible backedge. + return false; + + return true; +} + +/// \brief Get the maximum allowed loop scale. +/// +/// Gives the maximum number of estimated iterations allowed for a loop. Very +/// large numbers cause problems downstream (even within 64-bits). +static Scaled64 getMaxLoopScale() { return Scaled64(1, 12); } + +/// \brief Compute the loop scale for a loop. +void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) { + // Compute loop scale. + DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n"); + + // LoopScale == 1 / ExitMass + // ExitMass == HeadMass - BackedgeMass + BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass; + + // Block scale stores the inverse of the scale. + Loop.Scale = ExitMass.toScaled().inverse(); + + DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull() + << " - " << Loop.BackedgeMass << ")\n" + << " - scale = " << Loop.Scale << "\n"); + + if (Loop.Scale > getMaxLoopScale()) { + Loop.Scale = getMaxLoopScale(); + DEBUG(dbgs() << " - reduced-to-max-scale: " << getMaxLoopScale() << "\n"); + } +} + +/// \brief Package up a loop. +void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) { + DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n"); + + // Clear the subloop exits to prevent quadratic memory usage. + for (const BlockNode &M : Loop.Nodes) { + if (auto *Loop = Working[M.Index].getPackagedLoop()) + Loop->Exits.clear(); + DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n"); + } + Loop.IsPackaged = true; +} + +void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source, + LoopData *OuterLoop, + Distribution &Dist) { + BlockMass Mass = Working[Source.Index].getMass(); + DEBUG(dbgs() << " => mass: " << Mass << "\n"); + + // Distribute mass to successors as laid out in Dist. + DitheringDistributer D(Dist, Mass); + +#ifndef NDEBUG + auto debugAssign = [&](const BlockNode &T, const BlockMass &M, + const char *Desc) { + dbgs() << " => assign " << M << " (" << D.RemMass << ")"; + if (Desc) + dbgs() << " [" << Desc << "]"; + if (T.isValid()) + dbgs() << " to " << getBlockName(T); + dbgs() << "\n"; + }; + (void)debugAssign; +#endif + + for (const Weight &W : Dist.Weights) { + // Check for a local edge (non-backedge and non-exit). + BlockMass Taken = D.takeMass(W.Amount); + if (W.Type == Weight::Local) { + Working[W.TargetNode.Index].getMass() += Taken; + DEBUG(debugAssign(W.TargetNode, Taken, nullptr)); + continue; + } + + // Backedges and exits only make sense if we're processing a loop. + assert(OuterLoop && "backedge or exit outside of loop"); + + // Check for a backedge. + if (W.Type == Weight::Backedge) { + OuterLoop->BackedgeMass += Taken; + DEBUG(debugAssign(BlockNode(), Taken, "back")); + continue; + } + + // This must be an exit. + assert(W.Type == Weight::Exit); + OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken)); + DEBUG(debugAssign(W.TargetNode, Taken, "exit")); + } +} + +static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI, + const Scaled64 &Min, const Scaled64 &Max) { + // Scale the Factor to a size that creates integers. Ideally, integers would + // be scaled so that Max == UINT64_MAX so that they can be best + // differentiated. However, the register allocator currently deals poorly + // with large numbers. Instead, push Min up a little from 1 to give some + // room to differentiate small, unequal numbers. + // + // TODO: fix issues downstream so that ScalingFactor can be + // Scaled64(1,64)/Max. + Scaled64 ScalingFactor = Min.inverse(); + if ((Max / Min).lg() < 60) + ScalingFactor <<= 3; + + // Translate the floats to integers. + DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max + << ", factor = " << ScalingFactor << "\n"); + for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) { + Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor; + BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>()); + DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = " + << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled + << ", int = " << BFI.Freqs[Index].Integer << "\n"); + } +} + +/// \brief Unwrap a loop package. +/// +/// Visits all the members of a loop, adjusting their BlockData according to +/// the loop's pseudo-node. +static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) { + DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop) + << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale + << "\n"); + Loop.Scale *= Loop.Mass.toScaled(); + Loop.IsPackaged = false; + DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n"); + + // Propagate the head scale through the loop. Since members are visited in + // RPO, the head scale will be updated by the loop scale first, and then the + // final head scale will be used for updated the rest of the members. + for (const BlockNode &N : Loop.Nodes) { + const auto &Working = BFI.Working[N.Index]; + Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale + : BFI.Freqs[N.Index].Scaled; + Scaled64 New = Loop.Scale * F; + DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New + << "\n"); + F = New; + } +} + +void BlockFrequencyInfoImplBase::unwrapLoops() { + // Set initial frequencies from loop-local masses. + for (size_t Index = 0; Index < Working.size(); ++Index) + Freqs[Index].Scaled = Working[Index].Mass.toScaled(); + + for (LoopData &Loop : Loops) + unwrapLoop(*this, Loop); +} + +void BlockFrequencyInfoImplBase::finalizeMetrics() { + // Unwrap loop packages in reverse post-order, tracking min and max + // frequencies. + auto Min = Scaled64::getLargest(); + auto Max = Scaled64::getZero(); + for (size_t Index = 0; Index < Working.size(); ++Index) { + // Update min/max scale. + Min = std::min(Min, Freqs[Index].Scaled); + Max = std::max(Max, Freqs[Index].Scaled); + } + + // Convert to integers. + convertFloatingToInteger(*this, Min, Max); + + // Clean up data structures. + cleanup(*this); + + // Print out the final stats. + DEBUG(dump()); +} + +BlockFrequency +BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const { + if (!Node.isValid()) + return 0; + return Freqs[Node.Index].Integer; +} +Scaled64 +BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const { + if (!Node.isValid()) + return Scaled64::getZero(); + return Freqs[Node.Index].Scaled; +} + +std::string +BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const { + return std::string(); +} +std::string +BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const { + return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*"); +} + +raw_ostream & +BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS, + const BlockNode &Node) const { + return OS << getFloatingBlockFreq(Node); +} + +raw_ostream & +BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS, + const BlockFrequency &Freq) const { + Scaled64 Block(Freq.getFrequency(), 0); + Scaled64 Entry(getEntryFreq(), 0); + + return OS << Block / Entry; +} + +void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) { + Start = OuterLoop.getHeader(); + Nodes.reserve(OuterLoop.Nodes.size()); + for (auto N : OuterLoop.Nodes) + addNode(N); + indexNodes(); +} +void IrreducibleGraph::addNodesInFunction() { + Start = 0; + for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index) + if (!BFI.Working[Index].isPackaged()) + addNode(Index); + indexNodes(); +} +void IrreducibleGraph::indexNodes() { + for (auto &I : Nodes) + Lookup[I.Node.Index] = &I; +} +void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ, + const BFIBase::LoopData *OuterLoop) { + if (OuterLoop && OuterLoop->isHeader(Succ)) + return; + auto L = Lookup.find(Succ.Index); + if (L == Lookup.end()) + return; + IrrNode &SuccIrr = *L->second; + Irr.Edges.push_back(&SuccIrr); + SuccIrr.Edges.push_front(&Irr); + ++SuccIrr.NumIn; +} + +namespace llvm { +template <> struct GraphTraits<IrreducibleGraph> { + typedef bfi_detail::IrreducibleGraph GraphT; + + typedef const GraphT::IrrNode NodeType; + typedef GraphT::IrrNode::iterator ChildIteratorType; + + static const NodeType *getEntryNode(const GraphT &G) { + return G.StartIrr; + } + static ChildIteratorType child_begin(NodeType *N) { return N->succ_begin(); } + static ChildIteratorType child_end(NodeType *N) { return N->succ_end(); } +}; +} + +/// \brief Find extra irreducible headers. +/// +/// Find entry blocks and other blocks with backedges, which exist when \c G +/// contains irreducible sub-SCCs. +static void findIrreducibleHeaders( + const BlockFrequencyInfoImplBase &BFI, + const IrreducibleGraph &G, + const std::vector<const IrreducibleGraph::IrrNode *> &SCC, + LoopData::NodeList &Headers, LoopData::NodeList &Others) { + // Map from nodes in the SCC to whether it's an entry block. + SmallDenseMap<const IrreducibleGraph::IrrNode *, bool, 8> InSCC; + + // InSCC also acts the set of nodes in the graph. Seed it. + for (const auto *I : SCC) + InSCC[I] = false; + + for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) { + auto &Irr = *I->first; + for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) { + if (InSCC.count(P)) + continue; + + // This is an entry block. + I->second = true; + Headers.push_back(Irr.Node); + DEBUG(dbgs() << " => entry = " << BFI.getBlockName(Irr.Node) << "\n"); + break; + } + } + assert(Headers.size() >= 2 && "Should be irreducible"); + if (Headers.size() == InSCC.size()) { + // Every block is a header. + std::sort(Headers.begin(), Headers.end()); + return; + } + + // Look for extra headers from irreducible sub-SCCs. + for (const auto &I : InSCC) { + // Entry blocks are already headers. + if (I.second) + continue; + + auto &Irr = *I.first; + for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) { + // Skip forward edges. + if (P->Node < Irr.Node) + continue; + + // Skip predecessors from entry blocks. These can have inverted + // ordering. + if (InSCC.lookup(P)) + continue; + + // Store the extra header. + Headers.push_back(Irr.Node); + DEBUG(dbgs() << " => extra = " << BFI.getBlockName(Irr.Node) << "\n"); + break; + } + if (Headers.back() == Irr.Node) + // Added this as a header. + continue; + + // This is not a header. + Others.push_back(Irr.Node); + DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n"); + } + std::sort(Headers.begin(), Headers.end()); + std::sort(Others.begin(), Others.end()); +} + +static void createIrreducibleLoop( + BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, + LoopData *OuterLoop, std::list<LoopData>::iterator Insert, + const std::vector<const IrreducibleGraph::IrrNode *> &SCC) { + // Translate the SCC into RPO. + DEBUG(dbgs() << " - found-scc\n"); + + LoopData::NodeList Headers; + LoopData::NodeList Others; + findIrreducibleHeaders(BFI, G, SCC, Headers, Others); + + auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(), + Headers.end(), Others.begin(), Others.end()); + + // Update loop hierarchy. + for (const auto &N : Loop->Nodes) + if (BFI.Working[N.Index].isLoopHeader()) + BFI.Working[N.Index].Loop->Parent = &*Loop; + else + BFI.Working[N.Index].Loop = &*Loop; +} + +iterator_range<std::list<LoopData>::iterator> +BlockFrequencyInfoImplBase::analyzeIrreducible( + const IrreducibleGraph &G, LoopData *OuterLoop, + std::list<LoopData>::iterator Insert) { + assert((OuterLoop == nullptr) == (Insert == Loops.begin())); + auto Prev = OuterLoop ? std::prev(Insert) : Loops.end(); + + for (auto I = scc_begin(G); !I.isAtEnd(); ++I) { + if (I->size() < 2) + continue; + + // Translate the SCC into RPO. + createIrreducibleLoop(*this, G, OuterLoop, Insert, *I); + } + + if (OuterLoop) + return make_range(std::next(Prev), Insert); + return make_range(Loops.begin(), Insert); +} + +void +BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) { + OuterLoop.Exits.clear(); + OuterLoop.BackedgeMass = BlockMass::getEmpty(); + auto O = OuterLoop.Nodes.begin() + 1; + for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I) + if (!Working[I->Index].isPackaged()) + *O++ = *I; + OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end()); +} diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 86560ca33d0c..bbd875059522 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -14,16 +14,18 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" -#include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" using namespace llvm; +#define DEBUG_TYPE "branch-prob" + INITIALIZE_PASS_BEGIN(BranchProbabilityInfo, "branch-prob", "Branch Probability Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(LoopInfo) @@ -321,6 +323,9 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { InEdges.push_back(I.getSuccessorIndex()); } + if (BackEdges.empty() && ExitingEdges.empty()) + return false; + if (uint32_t numBackEdges = BackEdges.size()) { uint32_t backWeight = LBH_TAKEN_WEIGHT / numBackEdges; if (backWeight < NORMAL_WEIGHT) @@ -483,6 +488,8 @@ void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const { } bool BranchProbabilityInfo::runOnFunction(Function &F) { + DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() + << " ----\n\n"); LastF = &F; // Store the last function we ran on for printing. LI = &getAnalysis<LoopInfo>(); assert(PostDominatedByUnreachable.empty()); @@ -554,7 +561,7 @@ isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const { BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const { uint32_t Sum = 0; uint32_t MaxWeight = 0; - BasicBlock *MaxSucc = 0; + BasicBlock *MaxSucc = nullptr; for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { BasicBlock *Succ = *I; @@ -574,7 +581,7 @@ BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const { if (BranchProbability(MaxWeight, Sum) > BranchProbability(4, 5)) return MaxSucc; - return 0; + return nullptr; } /// Get the raw edge weight for the edge. If can't find it, return @@ -591,6 +598,11 @@ getEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors) const { return DEFAULT_WEIGHT; } +uint32_t BranchProbabilityInfo::getEdgeWeight(const BasicBlock *Src, + succ_const_iterator Dst) const { + return getEdgeWeight(Src, Dst.getSuccessorIndex()); +} + /// Get the raw edge weight calculated for the block pair. This returns the sum /// of all raw edge weights from Src to Dst. uint32_t BranchProbabilityInfo:: diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp index c3f32d3a840c..8ef5302717f8 100644 --- a/lib/Analysis/CFG.cpp +++ b/lib/Analysis/CFG.cpp @@ -13,10 +13,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CFG.h" - #include "llvm/ADT/SmallSet.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/Dominators.h" using namespace llvm; @@ -102,15 +101,9 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum, // If AllowIdenticalEdges is true, then we allow this edge to be considered // non-critical iff all preds come from TI's block. - while (I != E) { - const BasicBlock *P = *I; - if (P != FirstPred) + for (; I != E; ++I) + if (*I != FirstPred) return true; - // Note: leave this as is until no one ever compiles with either gcc 4.0.1 - // or Xcode 2. This seems to work around the pred_iterator assert in PR 2207 - E = pred_end(P); - ++I; - } return false; } @@ -130,7 +123,7 @@ static bool loopContainsBoth(const LoopInfo *LI, const BasicBlock *BB1, const BasicBlock *BB2) { const Loop *L1 = getOutermostLoop(LI, BB1); const Loop *L2 = getOutermostLoop(LI, BB2); - return L1 != NULL && L1 == L2; + return L1 != nullptr && L1 == L2; } static bool isPotentiallyReachableInner(SmallVectorImpl<BasicBlock *> &Worklist, @@ -140,7 +133,7 @@ static bool isPotentiallyReachableInner(SmallVectorImpl<BasicBlock *> &Worklist, // When the stop block is unreachable, it's dominated from everywhere, // regardless of whether there's a path between the two blocks. if (DT && !DT->isReachableFromEntry(StopBB)) - DT = 0; + DT = nullptr; // Limit the number of blocks we visit. The goal is to avoid run-away compile // times on large CFGs without hampering sensible code. Arbitrarily chosen. @@ -163,14 +156,13 @@ static bool isPotentiallyReachableInner(SmallVectorImpl<BasicBlock *> &Worklist, return true; } - if (const Loop *Outer = LI ? getOutermostLoop(LI, BB) : 0) { + if (const Loop *Outer = LI ? getOutermostLoop(LI, BB) : nullptr) { // All blocks in a single loop are reachable from all other blocks. From // any of these blocks, we can skip directly to the exits of the loop, // ignoring any other blocks inside the loop body. Outer->getExitBlocks(Worklist); } else { - for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) - Worklist.push_back(*I); + Worklist.append(succ_begin(BB), succ_end(BB)); } } while (!Worklist.empty()); @@ -208,7 +200,7 @@ bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B, // If the block is in a loop then we can reach any instruction in the block // from any other instruction in the block by going around a backedge. - if (LI && LI->getLoopFor(BB) != 0) + if (LI && LI->getLoopFor(BB) != nullptr) return true; // Linear scan, start at 'A', see whether we hit 'B' or the end first. @@ -223,8 +215,7 @@ bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B, return false; // Otherwise, continue doing the normal per-BB CFG walk. - for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) - Worklist.push_back(*I); + Worklist.append(succ_begin(BB), succ_end(BB)); if (Worklist.empty()) { // We've proven that there's no path! diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp index 9b6879a42ed4..c2c19d6a69ef 100644 --- a/lib/Analysis/CFGPrinter.cpp +++ b/lib/Analysis/CFGPrinter.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/CFGPrinter.h" #include "llvm/Pass.h" +#include "llvm/Support/FileSystem.h" using namespace llvm; namespace { @@ -28,14 +29,14 @@ namespace { initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnFunction(Function &F) { + bool runOnFunction(Function &F) override { F.viewCFG(); return false; } - void print(raw_ostream &OS, const Module* = 0) const {} + void print(raw_ostream &OS, const Module* = nullptr) const override {} - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); } }; @@ -51,14 +52,14 @@ namespace { initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnFunction(Function &F) { + bool runOnFunction(Function &F) override { F.viewCFGOnly(); return false; } - void print(raw_ostream &OS, const Module* = 0) const {} + void print(raw_ostream &OS, const Module* = nullptr) const override {} - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); } }; @@ -75,12 +76,12 @@ namespace { initializeCFGPrinterPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnFunction(Function &F) { + bool runOnFunction(Function &F) override { std::string Filename = "cfg." + F.getName().str() + ".dot"; errs() << "Writing '" << Filename << "'..."; std::string ErrorInfo; - raw_fd_ostream File(Filename.c_str(), ErrorInfo); + raw_fd_ostream File(Filename.c_str(), ErrorInfo, sys::fs::F_Text); if (ErrorInfo.empty()) WriteGraph(File, (const Function*)&F); @@ -90,9 +91,9 @@ namespace { return false; } - void print(raw_ostream &OS, const Module* = 0) const {} + void print(raw_ostream &OS, const Module* = nullptr) const override {} - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); } }; @@ -108,13 +109,13 @@ namespace { CFGOnlyPrinter() : FunctionPass(ID) { initializeCFGOnlyPrinterPass(*PassRegistry::getPassRegistry()); } - - virtual bool runOnFunction(Function &F) { + + bool runOnFunction(Function &F) override { std::string Filename = "cfg." + F.getName().str() + ".dot"; errs() << "Writing '" << Filename << "'..."; std::string ErrorInfo; - raw_fd_ostream File(Filename.c_str(), ErrorInfo); + raw_fd_ostream File(Filename.c_str(), ErrorInfo, sys::fs::F_Text); if (ErrorInfo.empty()) WriteGraph(File, (const Function*)&F, true); @@ -123,9 +124,9 @@ namespace { errs() << "\n"; return false; } - void print(raw_ostream &OS, const Module* = 0) const {} + void print(raw_ostream &OS, const Module* = nullptr) const override {} - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); } }; @@ -147,8 +148,8 @@ void Function::viewCFG() const { /// viewCFGOnly - This function is meant for use from the debugger. It works /// just like viewCFG, but it does not include the contents of basic blocks -/// into the nodes, just the label. If you are only interested in the CFG t -/// his can make the graph smaller. +/// into the nodes, just the label. If you are only interested in the CFG +/// this can make the graph smaller. /// void Function::viewCFGOnly() const { ViewGraph(this, "cfg" + getName(), true); diff --git a/lib/Analysis/CGSCCPassManager.cpp b/lib/Analysis/CGSCCPassManager.cpp new file mode 100644 index 000000000000..5d1d8a9c6e06 --- /dev/null +++ b/lib/Analysis/CGSCCPassManager.cpp @@ -0,0 +1,167 @@ +//===- CGSCCPassManager.cpp - Managing & running CGSCC passes -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +static cl::opt<bool> +DebugPM("debug-cgscc-pass-manager", cl::Hidden, + cl::desc("Print CGSCC pass management debugging information")); + +PreservedAnalyses CGSCCPassManager::run(LazyCallGraph::SCC *C, + CGSCCAnalysisManager *AM) { + PreservedAnalyses PA = PreservedAnalyses::all(); + + if (DebugPM) + dbgs() << "Starting CGSCC pass manager run.\n"; + + for (unsigned Idx = 0, Size = Passes.size(); Idx != Size; ++Idx) { + if (DebugPM) + dbgs() << "Running CGSCC pass: " << Passes[Idx]->name() << "\n"; + + PreservedAnalyses PassPA = Passes[Idx]->run(C, AM); + if (AM) + AM->invalidate(C, PassPA); + PA.intersect(std::move(PassPA)); + } + + if (DebugPM) + dbgs() << "Finished CGSCC pass manager run.\n"; + + return PA; +} + +bool CGSCCAnalysisManager::empty() const { + assert(CGSCCAnalysisResults.empty() == CGSCCAnalysisResultLists.empty() && + "The storage and index of analysis results disagree on how many there " + "are!"); + return CGSCCAnalysisResults.empty(); +} + +void CGSCCAnalysisManager::clear() { + CGSCCAnalysisResults.clear(); + CGSCCAnalysisResultLists.clear(); +} + +CGSCCAnalysisManager::ResultConceptT & +CGSCCAnalysisManager::getResultImpl(void *PassID, LazyCallGraph::SCC *C) { + CGSCCAnalysisResultMapT::iterator RI; + bool Inserted; + std::tie(RI, Inserted) = CGSCCAnalysisResults.insert(std::make_pair( + std::make_pair(PassID, C), CGSCCAnalysisResultListT::iterator())); + + // If we don't have a cached result for this function, look up the pass and + // run it to produce a result, which we then add to the cache. + if (Inserted) { + CGSCCAnalysisResultListT &ResultList = CGSCCAnalysisResultLists[C]; + ResultList.emplace_back(PassID, lookupPass(PassID).run(C, this)); + RI->second = std::prev(ResultList.end()); + } + + return *RI->second->second; +} + +CGSCCAnalysisManager::ResultConceptT * +CGSCCAnalysisManager::getCachedResultImpl(void *PassID, + LazyCallGraph::SCC *C) const { + CGSCCAnalysisResultMapT::const_iterator RI = + CGSCCAnalysisResults.find(std::make_pair(PassID, C)); + return RI == CGSCCAnalysisResults.end() ? nullptr : &*RI->second->second; +} + +void CGSCCAnalysisManager::invalidateImpl(void *PassID, LazyCallGraph::SCC *C) { + CGSCCAnalysisResultMapT::iterator RI = + CGSCCAnalysisResults.find(std::make_pair(PassID, C)); + if (RI == CGSCCAnalysisResults.end()) + return; + + CGSCCAnalysisResultLists[C].erase(RI->second); +} + +void CGSCCAnalysisManager::invalidateImpl(LazyCallGraph::SCC *C, + const PreservedAnalyses &PA) { + // Clear all the invalidated results associated specifically with this + // function. + SmallVector<void *, 8> InvalidatedPassIDs; + CGSCCAnalysisResultListT &ResultsList = CGSCCAnalysisResultLists[C]; + for (CGSCCAnalysisResultListT::iterator I = ResultsList.begin(), + E = ResultsList.end(); + I != E;) + if (I->second->invalidate(C, PA)) { + InvalidatedPassIDs.push_back(I->first); + I = ResultsList.erase(I); + } else { + ++I; + } + while (!InvalidatedPassIDs.empty()) + CGSCCAnalysisResults.erase( + std::make_pair(InvalidatedPassIDs.pop_back_val(), C)); + CGSCCAnalysisResultLists.erase(C); +} + +char CGSCCAnalysisManagerModuleProxy::PassID; + +CGSCCAnalysisManagerModuleProxy::Result +CGSCCAnalysisManagerModuleProxy::run(Module *M) { + assert(CGAM->empty() && "CGSCC analyses ran prior to the module proxy!"); + return Result(*CGAM); +} + +CGSCCAnalysisManagerModuleProxy::Result::~Result() { + // Clear out the analysis manager if we're being destroyed -- it means we + // didn't even see an invalidate call when we got invalidated. + CGAM->clear(); +} + +bool CGSCCAnalysisManagerModuleProxy::Result::invalidate( + Module *M, const PreservedAnalyses &PA) { + // If this proxy isn't marked as preserved, then we can't even invalidate + // individual CGSCC analyses, there may be an invalid set of SCC objects in + // the cache making it impossible to incrementally preserve them. + // Just clear the entire manager. + if (!PA.preserved(ID())) + CGAM->clear(); + + // Return false to indicate that this result is still a valid proxy. + return false; +} + +char ModuleAnalysisManagerCGSCCProxy::PassID; + +char FunctionAnalysisManagerCGSCCProxy::PassID; + +FunctionAnalysisManagerCGSCCProxy::Result +FunctionAnalysisManagerCGSCCProxy::run(LazyCallGraph::SCC *C) { + assert(FAM->empty() && "Function analyses ran prior to the CGSCC proxy!"); + return Result(*FAM); +} + +FunctionAnalysisManagerCGSCCProxy::Result::~Result() { + // Clear out the analysis manager if we're being destroyed -- it means we + // didn't even see an invalidate call when we got invalidated. + FAM->clear(); +} + +bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate( + LazyCallGraph::SCC *C, const PreservedAnalyses &PA) { + // If this proxy isn't marked as preserved, then we can't even invalidate + // individual function analyses, there may be an invalid set of Function + // objects in the cache making it impossible to incrementally preserve them. + // Just clear the entire manager. + if (!PA.preserved(ID())) + FAM->clear(); + + // Return false to indicate that this result is still a valid proxy. + return false; +} + +char CGSCCAnalysisManagerFunctionProxy::PassID; diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 3624aac4502e..d1632fd26ace 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -7,9 +7,11 @@ add_llvm_library(LLVMAnalysis Analysis.cpp BasicAliasAnalysis.cpp BlockFrequencyInfo.cpp + BlockFrequencyInfoImpl.cpp BranchProbabilityInfo.cpp CFG.cpp CFGPrinter.cpp + CGSCCPassManager.cpp CaptureTracking.cpp CostModel.cpp CodeMetrics.cpp @@ -23,6 +25,8 @@ add_llvm_library(LLVMAnalysis InstructionSimplify.cpp Interval.cpp IntervalPartition.cpp + JumpInstrTableInfo.cpp + LazyCallGraph.cpp LazyValueInfo.cpp LibCallAliasAnalysis.cpp LibCallSemantics.cpp diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index 79fab1be4413..f2f8877af1a2 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -20,24 +20,26 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" -#include "llvm/Support/CallSite.h" using namespace llvm; CaptureTracker::~CaptureTracker() {} -bool CaptureTracker::shouldExplore(Use *U) { return true; } +bool CaptureTracker::shouldExplore(const Use *U) { return true; } namespace { struct SimpleCaptureTracker : public CaptureTracker { explicit SimpleCaptureTracker(bool ReturnCaptures) : ReturnCaptures(ReturnCaptures), Captured(false) {} - void tooManyUses() { Captured = true; } + void tooManyUses() override { Captured = true; } - bool captured(Use *U) { + bool captured(const Use *U) override { if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures) return false; @@ -49,6 +51,65 @@ namespace { bool Captured; }; + + /// Only find pointer captures which happen before the given instruction. Uses + /// the dominator tree to determine whether one instruction is before another. + /// Only support the case where the Value is defined in the same basic block + /// as the given instruction and the use. + struct CapturesBefore : public CaptureTracker { + CapturesBefore(bool ReturnCaptures, const Instruction *I, DominatorTree *DT, + bool IncludeI) + : BeforeHere(I), DT(DT), ReturnCaptures(ReturnCaptures), + IncludeI(IncludeI), Captured(false) {} + + void tooManyUses() override { Captured = true; } + + bool shouldExplore(const Use *U) override { + Instruction *I = cast<Instruction>(U->getUser()); + if (BeforeHere == I && !IncludeI) + return false; + + BasicBlock *BB = I->getParent(); + // We explore this usage only if the usage can reach "BeforeHere". + // If use is not reachable from entry, there is no need to explore. + if (BeforeHere != I && !DT->isReachableFromEntry(BB)) + return false; + // If the value is defined in the same basic block as use and BeforeHere, + // there is no need to explore the use if BeforeHere dominates use. + // Check whether there is a path from I to BeforeHere. + if (BeforeHere != I && DT->dominates(BeforeHere, I) && + !isPotentiallyReachable(I, BeforeHere, DT)) + return false; + return true; + } + + bool captured(const Use *U) override { + if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures) + return false; + + Instruction *I = cast<Instruction>(U->getUser()); + if (BeforeHere == I && !IncludeI) + return false; + + BasicBlock *BB = I->getParent(); + // Same logic as in shouldExplore. + if (BeforeHere != I && !DT->isReachableFromEntry(BB)) + return false; + if (BeforeHere != I && DT->dominates(BeforeHere, I) && + !isPotentiallyReachable(I, BeforeHere, DT)) + return false; + Captured = true; + return true; + } + + const Instruction *BeforeHere; + DominatorTree *DT; + + bool ReturnCaptures; + bool IncludeI; + + bool Captured; + }; } /// PointerMayBeCaptured - Return true if this pointer value may be captured @@ -74,6 +135,32 @@ bool llvm::PointerMayBeCaptured(const Value *V, return SCT.Captured; } +/// PointerMayBeCapturedBefore - Return true if this pointer value may be +/// captured by the enclosing function (which is required to exist). If a +/// DominatorTree is provided, only captures which happen before the given +/// instruction are considered. This routine can be expensive, so consider +/// caching the results. The boolean ReturnCaptures specifies whether +/// returning the value (or part of it) from the function counts as capturing +/// it or not. The boolean StoreCaptures specified whether storing the value +/// (or part of it) into memory anywhere automatically counts as capturing it +/// or not. +bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, + bool StoreCaptures, const Instruction *I, + DominatorTree *DT, bool IncludeI) { + assert(!isa<GlobalValue>(V) && + "It doesn't make sense to ask whether a global is captured."); + + if (!DT) + return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures); + + // TODO: See comment in PointerMayBeCaptured regarding what could be done + // with StoreCaptures. + + CapturesBefore CB(ReturnCaptures, I, DT, IncludeI); + PointerMayBeCaptured(V, &CB); + return CB.Captured; +} + /// TODO: Write a new FunctionPass AliasAnalysis so that it can keep /// a cache. Then we can move the code from BasicAliasAnalysis into /// that path, and remove this threshold. @@ -81,25 +168,23 @@ static int const Threshold = 20; void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) { assert(V->getType()->isPointerTy() && "Capture is for pointers only!"); - SmallVector<Use*, Threshold> Worklist; - SmallSet<Use*, Threshold> Visited; + SmallVector<const Use *, Threshold> Worklist; + SmallSet<const Use *, Threshold> Visited; int Count = 0; - for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); - UI != UE; ++UI) { + for (const Use &U : V->uses()) { // If there are lots of uses, conservatively say that the value // is captured to avoid taking too much compile time. if (Count++ >= Threshold) return Tracker->tooManyUses(); - Use *U = &UI.getUse(); - if (!Tracker->shouldExplore(U)) continue; - Visited.insert(U); - Worklist.push_back(U); + if (!Tracker->shouldExplore(&U)) continue; + Visited.insert(&U); + Worklist.push_back(&U); } while (!Worklist.empty()) { - Use *U = Worklist.pop_back_val(); + const Use *U = Worklist.pop_back_val(); Instruction *I = cast<Instruction>(U->getUser()); V = U->get(); @@ -145,19 +230,18 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) { case Instruction::GetElementPtr: case Instruction::PHI: case Instruction::Select: + case Instruction::AddrSpaceCast: // The original value is not captured via this if the new value isn't. Count = 0; - for (Instruction::use_iterator UI = I->use_begin(), UE = I->use_end(); - UI != UE; ++UI) { + for (Use &UU : I->uses()) { // If there are lots of uses, conservatively say that the value // is captured to avoid taking too much compile time. if (Count++ >= Threshold) return Tracker->tooManyUses(); - Use *U = &UI.getUse(); - if (Visited.insert(U)) - if (Tracker->shouldExplore(U)) - Worklist.push_back(U); + if (Visited.insert(&UU)) + if (Tracker->shouldExplore(&UU)) + Worklist.push_back(&UU); } break; case Instruction::ICmp: diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp index 8cda01a24c0d..4c8a093684f6 100644 --- a/lib/Analysis/CodeMetrics.cpp +++ b/lib/Analysis/CodeMetrics.cpp @@ -13,10 +13,10 @@ #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/Support/CallSite.h" using namespace llvm; @@ -65,11 +65,11 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, ++NumVectorInsts; if (const CallInst *CI = dyn_cast<CallInst>(II)) - if (CI->hasFnAttr(Attribute::NoDuplicate)) + if (CI->cannotDuplicate()) notDuplicatable = true; if (const InvokeInst *InvI = dyn_cast<InvokeInst>(II)) - if (InvI->hasFnAttr(Attribute::NoDuplicate)) + if (InvI->cannotDuplicate()) notDuplicatable = true; NumInsts += TTI.getUserCost(&*II); diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 3d32232dacf9..8dc94219027f 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -21,21 +21,26 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Config/config.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Operator.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FEnv.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetLibraryInfo.h" #include <cerrno> #include <cmath> + +#ifdef HAVE_FENV_H +#include <fenv.h> +#endif + using namespace llvm; //===----------------------------------------------------------------------===// @@ -56,7 +61,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, // Handle a vector->integer cast. if (IntegerType *IT = dyn_cast<IntegerType>(DestTy)) { VectorType *VTy = dyn_cast<VectorType>(C->getType()); - if (VTy == 0) + if (!VTy) return ConstantExpr::getBitCast(C, DestTy); unsigned NumSrcElts = VTy->getNumElements(); @@ -73,7 +78,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, } ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(C); - if (CDV == 0) + if (!CDV) return ConstantExpr::getBitCast(C, DestTy); // Now that we know that the input value is a vector of integers, just shift @@ -93,7 +98,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, // The code below only handles casts to vectors currently. VectorType *DestVTy = dyn_cast<VectorType>(DestTy); - if (DestVTy == 0) + if (!DestVTy) return ConstantExpr::getBitCast(C, DestTy); // If this is a scalar -> vector cast, convert the input into a <1 x scalar> @@ -235,7 +240,8 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, // Look through ptr->int and ptr->ptr casts. if (CE->getOpcode() == Instruction::PtrToInt || - CE->getOpcode() == Instruction::BitCast) + CE->getOpcode() == Instruction::BitCast || + CE->getOpcode() == Instruction::AddrSpaceCast) return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD); // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) @@ -411,32 +417,32 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, TD.getTypeAllocSizeInBits(LoadTy), AS); } else - return 0; + return nullptr; C = FoldBitCast(C, MapTy, TD); if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, TD)) return FoldBitCast(Res, LoadTy, TD); - return 0; + return nullptr; } unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8; if (BytesLoaded > 32 || BytesLoaded == 0) - return 0; + return nullptr; GlobalValue *GVal; APInt Offset; if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD)) - return 0; + return nullptr; GlobalVariable *GV = dyn_cast<GlobalVariable>(GVal); if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() || !GV->getInitializer()->getType()->isSized()) - return 0; + return nullptr; // If we're loading off the beginning of the global, some bytes may be valid, // but we don't try to handle this. if (Offset.isNegative()) - return 0; + return nullptr; // If we're not accessing anything in this constant, the result is undefined. if (Offset.getZExtValue() >= @@ -446,7 +452,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, unsigned char RawBytes[32] = {0}; if (!ReadDataFromGlobal(GV->getInitializer(), Offset.getZExtValue(), RawBytes, BytesLoaded, TD)) - return 0; + return nullptr; APInt ResultVal = APInt(IntType->getBitWidth(), 0); if (TD.isLittleEndian()) { @@ -466,6 +472,52 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, return ConstantInt::get(IntType->getContext(), ResultVal); } +static Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE, + const DataLayout *DL) { + if (!DL) + return nullptr; + auto *DestPtrTy = dyn_cast<PointerType>(CE->getType()); + if (!DestPtrTy) + return nullptr; + Type *DestTy = DestPtrTy->getElementType(); + + Constant *C = ConstantFoldLoadFromConstPtr(CE->getOperand(0), DL); + if (!C) + return nullptr; + + do { + Type *SrcTy = C->getType(); + + // If the type sizes are the same and a cast is legal, just directly + // cast the constant. + if (DL->getTypeSizeInBits(DestTy) == DL->getTypeSizeInBits(SrcTy)) { + Instruction::CastOps Cast = Instruction::BitCast; + // If we are going from a pointer to int or vice versa, we spell the cast + // differently. + if (SrcTy->isIntegerTy() && DestTy->isPointerTy()) + Cast = Instruction::IntToPtr; + else if (SrcTy->isPointerTy() && DestTy->isIntegerTy()) + Cast = Instruction::PtrToInt; + + if (CastInst::castIsValid(Cast, C, DestTy)) + return ConstantExpr::getCast(Cast, C, DestTy); + } + + // If this isn't an aggregate type, there is nothing we can do to drill down + // and find a bitcastable constant. + if (!SrcTy->isAggregateType()) + return nullptr; + + // We're simulating a load through a pointer that was bitcast to point to + // a different type, so we can try to walk down through the initial + // elements of an aggregate to see if some part of th e aggregate is + // castable to implement the "load" semantic model. + C = C->getAggregateElement(0u); + } while (C); + + return nullptr; +} + /// ConstantFoldLoadFromConstPtr - Return the value that a load from C would /// produce if it is constant and determinable. If this is not determinable, /// return null. @@ -479,7 +531,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, // If the loaded value isn't a constant expr, we can't handle it. ConstantExpr *CE = dyn_cast<ConstantExpr>(C); if (!CE) - return 0; + return nullptr; if (CE->getOpcode() == Instruction::GetElementPtr) { if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0))) { @@ -491,6 +543,10 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, } } + if (CE->getOpcode() == Instruction::BitCast) + if (Constant *LoadedC = ConstantFoldLoadThroughBitcast(CE, TD)) + return LoadedC; + // Instead of loading constant c string, use corresponding integer value // directly if string length is small enough. StringRef Str; @@ -542,16 +598,16 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, // Try hard to fold loads from bitcasted strange and non-type-safe things. if (TD) return FoldReinterpretLoadFromConstPtr(CE, *TD); - return 0; + return nullptr; } static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){ - if (LI->isVolatile()) return 0; + if (LI->isVolatile()) return nullptr; if (Constant *C = dyn_cast<Constant>(LI->getOperand(0))) return ConstantFoldLoadFromConstPtr(C, TD); - return 0; + return nullptr; } /// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression. @@ -571,8 +627,8 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType()->getScalarType()); APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0); APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0); - ComputeMaskedBits(Op0, KnownZero0, KnownOne0, DL); - ComputeMaskedBits(Op1, KnownZero1, KnownOne1, DL); + computeKnownBits(Op0, KnownZero0, KnownOne0, DL); + computeKnownBits(Op1, KnownZero1, KnownOne1, DL); if ((KnownOne1 | KnownZero0).isAllOnesValue()) { // All the bits of Op0 that the 'and' could be masking are already zero. return Op0; @@ -608,7 +664,7 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, } } - return 0; + return nullptr; } /// CastGEPIndices - If array indices are not pointer-sized integers, @@ -618,7 +674,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, Type *ResultTy, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TD) - return 0; + return nullptr; Type *IntPtrTy = TD->getIntPtrType(ResultTy); @@ -641,7 +697,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, } if (!Any) - return 0; + return nullptr; Constant *C = ConstantExpr::getGetElementPtr(Ops[0], NewIdxs); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { @@ -656,7 +712,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, static Constant* StripPtrCastKeepAS(Constant* Ptr) { assert(Ptr->getType()->isPointerTy() && "Not a pointer type"); PointerType *OldPtrTy = cast<PointerType>(Ptr->getType()); - Ptr = cast<Constant>(Ptr->stripPointerCasts()); + Ptr = Ptr->stripPointerCasts(); PointerType *NewPtrTy = cast<PointerType>(Ptr->getType()); // Preserve the address space number of the pointer. @@ -676,7 +732,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, Constant *Ptr = Ops[0]; if (!TD || !Ptr->getType()->getPointerElementType()->isSized() || !Ptr->getType()->isPointerTy()) - return 0; + return nullptr; Type *IntPtrTy = TD->getIntPtrType(Ptr->getType()); Type *ResultElementTy = ResultTy->getPointerElementType(); @@ -690,7 +746,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, // "inttoptr (sub (ptrtoint Ptr), V)" if (Ops.size() == 2 && ResultElementTy->isIntegerTy(8)) { ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[1]); - assert((CE == 0 || CE->getType() == IntPtrTy) && + assert((!CE || CE->getType() == IntPtrTy) && "CastGEPIndices didn't canonicalize index types!"); if (CE && CE->getOpcode() == Instruction::Sub && CE->getOperand(0)->isNullValue()) { @@ -702,7 +758,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, return Res; } } - return 0; + return nullptr; } unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy); @@ -765,7 +821,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, // Only handle pointers to sized types, not pointers to functions. if (!ATy->getElementType()->isSized()) - return 0; + return nullptr; } // Determine which element of the array the offset points into. @@ -810,7 +866,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, // type, then the offset is pointing into the middle of an indivisible // member, so we can't simplify it. if (Offset != 0) - return 0; + return nullptr; // Create a GEP. Constant *C = ConstantExpr::getGetElementPtr(Ptr, NewIdxs); @@ -841,7 +897,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetLibraryInfo *TLI) { // Handle PHI nodes quickly here... if (PHINode *PN = dyn_cast<PHINode>(I)) { - Constant *CommonValue = 0; + Constant *CommonValue = nullptr; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *Incoming = PN->getIncomingValue(i); @@ -854,14 +910,14 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, // If the incoming value is not a constant, then give up. Constant *C = dyn_cast<Constant>(Incoming); if (!C) - return 0; + return nullptr; // Fold the PHI's operands. if (ConstantExpr *NewC = dyn_cast<ConstantExpr>(C)) C = ConstantFoldConstantExpression(NewC, TD, TLI); // If the incoming value is a different constant to // the one we saw previously, then give up. if (CommonValue && C != CommonValue) - return 0; + return nullptr; CommonValue = C; } @@ -876,7 +932,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) { Constant *Op = dyn_cast<Constant>(*i); if (!Op) - return 0; // All operands not constant! + return nullptr; // All operands not constant! // Fold the Instruction's operands. if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(Op)) @@ -966,14 +1022,14 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, } switch (Opcode) { - default: return 0; + default: return nullptr; case Instruction::ICmp: case Instruction::FCmp: llvm_unreachable("Invalid for compares"); case Instruction::Call: if (Function *F = dyn_cast<Function>(Ops.back())) if (canConstantFoldCallTo(F)) return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1), TLI); - return 0; + return nullptr; case Instruction::PtrToInt: // If the input is a inttoptr, eliminate the pair. This requires knowing // the width of a pointer, so it can't be done in ConstantExpr::getCast. @@ -1142,14 +1198,14 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE) { if (!CE->getOperand(1)->isNullValue()) - return 0; // Do not allow stepping over the value! + return nullptr; // Do not allow stepping over the value! // Loop over all of the operands, tracking down which value we are // addressing. for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i) { C = C->getAggregateElement(CE->getOperand(i)); - if (C == 0) - return 0; + if (!C) + return nullptr; } return C; } @@ -1164,8 +1220,8 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C, // addressing. for (unsigned i = 0, e = Indices.size(); i != e; ++i) { C = C->getAggregateElement(Indices[i]); - if (C == 0) - return 0; + if (!C) + return nullptr; } return C; } @@ -1186,6 +1242,7 @@ bool llvm::canConstantFoldCallTo(const Function *F) { case Intrinsic::exp: case Intrinsic::exp2: case Intrinsic::floor: + case Intrinsic::ceil: case Intrinsic::sqrt: case Intrinsic::pow: case Intrinsic::powi: @@ -1193,6 +1250,10 @@ bool llvm::canConstantFoldCallTo(const Function *F) { case Intrinsic::ctpop: case Intrinsic::ctlz: case Intrinsic::cttz: + case Intrinsic::fma: + case Intrinsic::fmuladd: + case Intrinsic::copysign: + case Intrinsic::round: case Intrinsic::sadd_with_overflow: case Intrinsic::uadd_with_overflow: case Intrinsic::ssub_with_overflow: @@ -1244,15 +1305,7 @@ bool llvm::canConstantFoldCallTo(const Function *F) { } } -static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, - Type *Ty) { - sys::llvm_fenv_clearexcept(); - V = NativeFP(V); - if (sys::llvm_fenv_testexcept()) { - sys::llvm_fenv_clearexcept(); - return 0; - } - +static Constant *GetConstantFoldFPValue(double V, Type *Ty) { if (Ty->isHalfTy()) { APFloat APF(V); bool unused; @@ -1264,28 +1317,53 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, if (Ty->isDoubleTy()) return ConstantFP::get(Ty->getContext(), APFloat(V)); llvm_unreachable("Can only constant fold half/float/double"); + +} + +namespace { +/// llvm_fenv_clearexcept - Clear the floating-point exception state. +static inline void llvm_fenv_clearexcept() { +#if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT + feclearexcept(FE_ALL_EXCEPT); +#endif + errno = 0; +} + +/// llvm_fenv_testexcept - Test if a floating-point exception was raised. +static inline bool llvm_fenv_testexcept() { + int errno_val = errno; + if (errno_val == ERANGE || errno_val == EDOM) + return true; +#if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT && HAVE_DECL_FE_INEXACT + if (fetestexcept(FE_ALL_EXCEPT & ~FE_INEXACT)) + return true; +#endif + return false; +} +} // End namespace + +static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, + Type *Ty) { + llvm_fenv_clearexcept(); + V = NativeFP(V); + if (llvm_fenv_testexcept()) { + llvm_fenv_clearexcept(); + return nullptr; + } + + return GetConstantFoldFPValue(V, Ty); } static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), double V, double W, Type *Ty) { - sys::llvm_fenv_clearexcept(); + llvm_fenv_clearexcept(); V = NativeFP(V, W); - if (sys::llvm_fenv_testexcept()) { - sys::llvm_fenv_clearexcept(); - return 0; + if (llvm_fenv_testexcept()) { + llvm_fenv_clearexcept(); + return nullptr; } - if (Ty->isHalfTy()) { - APFloat APF(V); - bool unused; - APF.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &unused); - return ConstantFP::get(Ty->getContext(), APF); - } - if (Ty->isFloatTy()) - return ConstantFP::get(Ty->getContext(), APFloat((float)V)); - if (Ty->isDoubleTy()) - return ConstantFP::get(Ty->getContext(), APFloat(V)); - llvm_unreachable("Can only constant fold half/float/double"); + return GetConstantFoldFPValue(V, Ty); } /// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer @@ -1311,59 +1389,61 @@ static Constant *ConstantFoldConvertToInt(const APFloat &Val, /*isSigned=*/true, mode, &isExact); if (status != APFloat::opOK && status != APFloat::opInexact) - return 0; + return nullptr; return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true); } -/// ConstantFoldCall - Attempt to constant fold a call to the specified function -/// with the specified arguments, returning null if unsuccessful. -Constant * -llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, - const TargetLibraryInfo *TLI) { - if (!F->hasName()) - return 0; - StringRef Name = F->getName(); +static double getValueAsDouble(ConstantFP *Op) { + Type *Ty = Op->getType(); - Type *Ty = F->getReturnType(); + if (Ty->isFloatTy()) + return Op->getValueAPF().convertToFloat(); + + if (Ty->isDoubleTy()) + return Op->getValueAPF().convertToDouble(); + + bool unused; + APFloat APF = Op->getValueAPF(); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused); + return APF.convertToDouble(); +} + +static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, + Type *Ty, ArrayRef<Constant *> Operands, + const TargetLibraryInfo *TLI) { if (Operands.size() == 1) { if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) { - if (F->getIntrinsicID() == Intrinsic::convert_to_fp16) { + if (IntrinsicID == Intrinsic::convert_to_fp16) { APFloat Val(Op->getValueAPF()); bool lost = false; Val.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &lost); - return ConstantInt::get(F->getContext(), Val.bitcastToAPInt()); + return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt()); } - if (!TLI) - return 0; if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) - return 0; + return nullptr; + + if (IntrinsicID == Intrinsic::round) { + APFloat V = Op->getValueAPF(); + V.roundToIntegral(APFloat::rmNearestTiesToAway); + return ConstantFP::get(Ty->getContext(), V); + } /// We only fold functions with finite arguments. Folding NaN and inf is /// likely to be aborted with an exception anyway, and some host libms /// have known errors raising exceptions. if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity()) - return 0; + return nullptr; /// Currently APFloat versions of these functions do not exist, so we use /// the host native double versions. Float versions are not called /// directly but for all these it is true (float)(f((double)arg)) == /// f(arg). Long double not supported yet. - double V; - if (Ty->isFloatTy()) - V = Op->getValueAPF().convertToFloat(); - else if (Ty->isDoubleTy()) - V = Op->getValueAPF().convertToDouble(); - else { - bool unused; - APFloat APF = Op->getValueAPF(); - APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused); - V = APF.convertToDouble(); - } + double V = getValueAsDouble(Op); - switch (F->getIntrinsicID()) { + switch (IntrinsicID) { default: break; case Intrinsic::fabs: return ConstantFoldFP(fabs, V, Ty); @@ -1389,8 +1469,13 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, #endif case Intrinsic::floor: return ConstantFoldFP(floor, V, Ty); + case Intrinsic::ceil: + return ConstantFoldFP(ceil, V, Ty); } + if (!TLI) + return nullptr; + switch (Name[0]) { case 'a': if (Name == "acos" && TLI->has(LibFunc::acos)) @@ -1431,7 +1516,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, return ConstantFoldFP(log, V, Ty); else if (Name == "log10" && V > 0 && TLI->has(LibFunc::log10)) return ConstantFoldFP(log10, V, Ty); - else if (F->getIntrinsicID() == Intrinsic::sqrt && + else if (IntrinsicID == Intrinsic::sqrt && (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())) { if (V >= -0.0) return ConstantFoldFP(sqrt, V, Ty); @@ -1460,13 +1545,13 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, default: break; } - return 0; + return nullptr; } if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) { - switch (F->getIntrinsicID()) { + switch (IntrinsicID) { case Intrinsic::bswap: - return ConstantInt::get(F->getContext(), Op->getValue().byteSwap()); + return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap()); case Intrinsic::ctpop: return ConstantInt::get(Ty, Op->getValue().countPopulation()); case Intrinsic::convert_from_fp16: { @@ -1481,10 +1566,10 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, assert(status == APFloat::opOK && !lost && "Precision lost during fp16 constfolding"); - return ConstantFP::get(F->getContext(), Val); + return ConstantFP::get(Ty->getContext(), Val); } default: - return 0; + return nullptr; } } @@ -1492,7 +1577,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, if (isa<ConstantVector>(Operands[0]) || isa<ConstantDataVector>(Operands[0])) { Constant *Op = cast<Constant>(Operands[0]); - switch (F->getIntrinsicID()) { + switch (IntrinsicID) { default: break; case Intrinsic::x86_sse_cvtss2si: case Intrinsic::x86_sse_cvtss2si64: @@ -1514,51 +1599,36 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, } if (isa<UndefValue>(Operands[0])) { - if (F->getIntrinsicID() == Intrinsic::bswap) + if (IntrinsicID == Intrinsic::bswap) return Operands[0]; - return 0; + return nullptr; } - return 0; + return nullptr; } if (Operands.size() == 2) { if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) { if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) - return 0; - double Op1V; - if (Ty->isFloatTy()) - Op1V = Op1->getValueAPF().convertToFloat(); - else if (Ty->isDoubleTy()) - Op1V = Op1->getValueAPF().convertToDouble(); - else { - bool unused; - APFloat APF = Op1->getValueAPF(); - APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused); - Op1V = APF.convertToDouble(); - } + return nullptr; + double Op1V = getValueAsDouble(Op1); if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) { if (Op2->getType() != Op1->getType()) - return 0; - - double Op2V; - if (Ty->isFloatTy()) - Op2V = Op2->getValueAPF().convertToFloat(); - else if (Ty->isDoubleTy()) - Op2V = Op2->getValueAPF().convertToDouble(); - else { - bool unused; - APFloat APF = Op2->getValueAPF(); - APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused); - Op2V = APF.convertToDouble(); - } + return nullptr; - if (F->getIntrinsicID() == Intrinsic::pow) { + double Op2V = getValueAsDouble(Op2); + if (IntrinsicID == Intrinsic::pow) { return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); } + if (IntrinsicID == Intrinsic::copysign) { + APFloat V1 = Op1->getValueAPF(); + APFloat V2 = Op2->getValueAPF(); + V1.copySign(V2); + return ConstantFP::get(Ty->getContext(), V1); + } if (!TLI) - return 0; + return nullptr; if (Name == "pow" && TLI->has(LibFunc::pow)) return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); if (Name == "fmod" && TLI->has(LibFunc::fmod)) @@ -1566,25 +1636,25 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, if (Name == "atan2" && TLI->has(LibFunc::atan2)) return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) { - if (F->getIntrinsicID() == Intrinsic::powi && Ty->isHalfTy()) - return ConstantFP::get(F->getContext(), + if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy()) + return ConstantFP::get(Ty->getContext(), APFloat((float)std::pow((float)Op1V, (int)Op2C->getZExtValue()))); - if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy()) - return ConstantFP::get(F->getContext(), + if (IntrinsicID == Intrinsic::powi && Ty->isFloatTy()) + return ConstantFP::get(Ty->getContext(), APFloat((float)std::pow((float)Op1V, (int)Op2C->getZExtValue()))); - if (F->getIntrinsicID() == Intrinsic::powi && Ty->isDoubleTy()) - return ConstantFP::get(F->getContext(), + if (IntrinsicID == Intrinsic::powi && Ty->isDoubleTy()) + return ConstantFP::get(Ty->getContext(), APFloat((double)std::pow((double)Op1V, (int)Op2C->getZExtValue()))); } - return 0; + return nullptr; } if (ConstantInt *Op1 = dyn_cast<ConstantInt>(Operands[0])) { if (ConstantInt *Op2 = dyn_cast<ConstantInt>(Operands[1])) { - switch (F->getIntrinsicID()) { + switch (IntrinsicID) { default: break; case Intrinsic::sadd_with_overflow: case Intrinsic::uadd_with_overflow: @@ -1594,7 +1664,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, case Intrinsic::umul_with_overflow: { APInt Res; bool Overflow; - switch (F->getIntrinsicID()) { + switch (IntrinsicID) { default: llvm_unreachable("Invalid case"); case Intrinsic::sadd_with_overflow: Res = Op1->getValue().sadd_ov(Op2->getValue(), Overflow); @@ -1616,10 +1686,10 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, break; } Constant *Ops[] = { - ConstantInt::get(F->getContext(), Res), - ConstantInt::get(Type::getInt1Ty(F->getContext()), Overflow) + ConstantInt::get(Ty->getContext(), Res), + ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow) }; - return ConstantStruct::get(cast<StructType>(F->getReturnType()), Ops); + return ConstantStruct::get(cast<StructType>(Ty), Ops); } case Intrinsic::cttz: if (Op2->isOne() && Op1->isZero()) // cttz(0, 1) is undef. @@ -1632,9 +1702,79 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, } } - return 0; + return nullptr; } - return 0; + return nullptr; } - return 0; + + if (Operands.size() != 3) + return nullptr; + + if (const ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) { + if (const ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) { + if (const ConstantFP *Op3 = dyn_cast<ConstantFP>(Operands[2])) { + switch (IntrinsicID) { + default: break; + case Intrinsic::fma: + case Intrinsic::fmuladd: { + APFloat V = Op1->getValueAPF(); + APFloat::opStatus s = V.fusedMultiplyAdd(Op2->getValueAPF(), + Op3->getValueAPF(), + APFloat::rmNearestTiesToEven); + if (s != APFloat::opInvalidOp) + return ConstantFP::get(Ty->getContext(), V); + + return nullptr; + } + } + } + } + } + + return nullptr; +} + +static Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID, + VectorType *VTy, + ArrayRef<Constant *> Operands, + const TargetLibraryInfo *TLI) { + SmallVector<Constant *, 4> Result(VTy->getNumElements()); + SmallVector<Constant *, 4> Lane(Operands.size()); + Type *Ty = VTy->getElementType(); + + for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) { + // Gather a column of constants. + for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) { + Constant *Agg = Operands[J]->getAggregateElement(I); + if (!Agg) + return nullptr; + + Lane[J] = Agg; + } + + // Use the regular scalar folding to simplify this column. + Constant *Folded = ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI); + if (!Folded) + return nullptr; + Result[I] = Folded; + } + + return ConstantVector::get(Result); +} + +/// ConstantFoldCall - Attempt to constant fold a call to the specified function +/// with the specified arguments, returning null if unsuccessful. +Constant * +llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, + const TargetLibraryInfo *TLI) { + if (!F->hasName()) + return nullptr; + StringRef Name = F->getName(); + + Type *Ty = F->getReturnType(); + + if (VectorType *VTy = dyn_cast<VectorType>(Ty)) + return ConstantFoldVectorCall(Name, F->getIntrinsicID(), VTy, Operands, TLI); + + return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI); } diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp index f9432584691d..1b74f8c19c51 100644 --- a/lib/Analysis/CostModel.cpp +++ b/lib/Analysis/CostModel.cpp @@ -17,8 +17,6 @@ // //===----------------------------------------------------------------------===// -#define CM_NAME "cost-model" -#define DEBUG_TYPE CM_NAME #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -32,6 +30,9 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define CM_NAME "cost-model" +#define DEBUG_TYPE CM_NAME + static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false), cl::Hidden, cl::desc("Recognize reduction patterns.")); @@ -41,7 +42,7 @@ namespace { public: static char ID; // Class identification, replacement for typeinfo - CostModelAnalysis() : FunctionPass(ID), F(0), TTI(0) { + CostModelAnalysis() : FunctionPass(ID), F(nullptr), TTI(nullptr) { initializeCostModelAnalysisPass( *PassRegistry::getPassRegistry()); } @@ -53,9 +54,9 @@ namespace { unsigned getInstructionCost(const Instruction *I) const; private: - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual bool runOnFunction(Function &F); - virtual void print(raw_ostream &OS, const Module*) const; + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnFunction(Function &F) override; + void print(raw_ostream &OS, const Module*) const override; /// The function that we analyze. Function *F; @@ -94,34 +95,45 @@ static bool isReverseVectorMask(SmallVectorImpl<int> &Mask) { return true; } +static bool isAlternateVectorMask(SmallVectorImpl<int> &Mask) { + bool isAlternate = true; + unsigned MaskSize = Mask.size(); + + // Example: shufflevector A, B, <0,5,2,7> + for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { + if (Mask[i] < 0) + continue; + isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i); + } + + if (isAlternate) + return true; + + isAlternate = true; + // Example: shufflevector A, B, <4,1,6,3> + for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { + if (Mask[i] < 0) + continue; + isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i); + } + + return isAlternate; +} + static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { TargetTransformInfo::OperandValueKind OpInfo = TargetTransformInfo::OK_AnyValue; - // Check for a splat of a constant. - ConstantDataVector *CDV = 0; - if ((CDV = dyn_cast<ConstantDataVector>(V))) - if (CDV->getSplatValue() != NULL) - OpInfo = TargetTransformInfo::OK_UniformConstantValue; - ConstantVector *CV = 0; - if ((CV = dyn_cast<ConstantVector>(V))) - if (CV->getSplatValue() != NULL) + // Check for a splat of a constant or for a non uniform vector of constants. + if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) { + OpInfo = TargetTransformInfo::OK_NonUniformConstantValue; + if (cast<Constant>(V)->getSplatValue() != nullptr) OpInfo = TargetTransformInfo::OK_UniformConstantValue; + } return OpInfo; } -static bool matchMask(SmallVectorImpl<int> &M1, SmallVectorImpl<int> &M2) { - if (M1.size() != M2.size()) - return false; - - for (unsigned i = 0, e = M1.size(); i != e; ++i) - if (M1[i] != M2[i]) - return false; - - return true; -} - static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, unsigned Level) { // We don't need a shuffle if we just want to have element 0 in position 0 of @@ -139,7 +151,7 @@ static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, Mask[i] = val; SmallVector<int, 16> ActualMask = SI->getShuffleMask(); - if (!matchMask(Mask, ActualMask)) + if (Mask != ActualMask) return false; return true; @@ -153,7 +165,7 @@ static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp, // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 - if (BinOp == 0) + if (BinOp == nullptr) return false; assert(BinOp->getType()->isVectorTy() && "Expecting a vector type"); @@ -174,9 +186,9 @@ static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp, return false; // Shuffle inputs must match. - Value *NextLevelOpL = LS ? LS->getOperand(0) : 0; - Value *NextLevelOpR = RS ? RS->getOperand(0) : 0; - Value *NextLevelOp = 0; + Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr; + Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr; + Value *NextLevelOp = nullptr; if (NextLevelOpR && NextLevelOpL) { // If we have two shuffles their operands must match. if (NextLevelOpL != NextLevelOpR) @@ -201,7 +213,7 @@ static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp, // Check that the next levels binary operation exists and matches with the // current one. - BinaryOperator *NextLevelBinOp = 0; + BinaryOperator *NextLevelBinOp = nullptr; if (Level + 1 != NumLevels) { if (!(NextLevelBinOp = dyn_cast<BinaryOperator>(NextLevelOp))) return false; @@ -280,7 +292,7 @@ getShuffleAndOtherOprd(BinaryOperator *B) { Value *L = B->getOperand(0); Value *R = B->getOperand(1); - ShuffleVectorInst *S = 0; + ShuffleVectorInst *S = nullptr; if ((S = dyn_cast<ShuffleVectorInst>(L))) return std::make_pair(R, S); @@ -337,10 +349,10 @@ static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, Value *NextRdxOp; ShuffleVectorInst *Shuffle; - tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp); + std::tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp); // Check the current reduction operation and the shuffle use the same value. - if (Shuffle == 0) + if (Shuffle == nullptr) return false; if (Shuffle->getOperand(0) != NextRdxOp) return false; @@ -352,7 +364,7 @@ static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1); SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); - if (!matchMask(ShuffleMask, Mask)) + if (ShuffleMask != Mask) return false; RdxOp = NextRdxOp; @@ -439,7 +451,8 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { case Instruction::UIToFP: case Instruction::Trunc: case Instruction::FPTrunc: - case Instruction::BitCast: { + case Instruction::BitCast: + case Instruction::AddrSpaceCast: { Type *SrcTy = I->getOperand(0)->getType(); return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy); } @@ -478,9 +491,15 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { unsigned NumVecElems = VecTypOp0->getVectorNumElements(); SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); - if (NumVecElems == Mask.size() && isReverseVectorMask(Mask)) - return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0, - 0); + if (NumVecElems == Mask.size()) { + if (isReverseVectorMask(Mask)) + return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, + 0, nullptr); + if (isAlternateVectorMask(Mask)) + return TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, + VecTypOp0, 0, nullptr); + } + return -1; } case Instruction::Call: diff --git a/lib/Analysis/Delinearization.cpp b/lib/Analysis/Delinearization.cpp index 3ed0609cf38d..9334cebe1802 100644 --- a/lib/Analysis/Delinearization.cpp +++ b/lib/Analysis/Delinearization.cpp @@ -14,26 +14,27 @@ // //===----------------------------------------------------------------------===// -#define DL_NAME "delinearize" -#define DEBUG_TYPE DL_NAME #include "llvm/IR/Constants.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/Pass.h" #include "llvm/IR/Type.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/InstIterator.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DL_NAME "delinearize" +#define DEBUG_TYPE DL_NAME + namespace { class Delinearization : public FunctionPass { @@ -49,9 +50,9 @@ public: Delinearization() : FunctionPass(ID) { initializeDelinearizationPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnFunction(Function &F); - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual void print(raw_ostream &O, const Module *M = 0) const; + bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + void print(raw_ostream &O, const Module *M = nullptr) const override; }; } // end anonymous namespace @@ -76,7 +77,7 @@ static Value *getPointerOperand(Instruction &Inst) { return Store->getPointerOperand(); else if (GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(&Inst)) return Gep->getPointerOperand(); - return NULL; + return nullptr; } void Delinearization::print(raw_ostream &O, const Module *) const { @@ -92,25 +93,38 @@ void Delinearization::print(raw_ostream &O, const Module *) const { const BasicBlock *BB = Inst->getParent(); // Delinearize the memory access as analyzed in all the surrounding loops. // Do not analyze memory accesses outside loops. - for (Loop *L = LI->getLoopFor(BB); L != NULL; L = L->getParentLoop()) { + for (Loop *L = LI->getLoopFor(BB); L != nullptr; L = L->getParentLoop()) { const SCEV *AccessFn = SE->getSCEVAtScope(getPointerOperand(*Inst), L); + + const SCEVUnknown *BasePointer = + dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFn)); + // Do not delinearize if we cannot find the base pointer. + if (!BasePointer) + break; + AccessFn = SE->getMinusSCEV(AccessFn, BasePointer); const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(AccessFn); // Do not try to delinearize memory accesses that are not AddRecs. if (!AR) break; + + O << "\n"; + O << "Inst:" << *Inst << "\n"; + O << "In Loop with Header: " << L->getHeader()->getName() << "\n"; O << "AddRec: " << *AR << "\n"; SmallVector<const SCEV *, 3> Subscripts, Sizes; - const SCEV *Res = AR->delinearize(*SE, Subscripts, Sizes); - int Size = Subscripts.size(); - if (Res == AR || Size == 0) { + AR->delinearize(*SE, Subscripts, Sizes, SE->getElementSize(Inst)); + if (Subscripts.size() == 0 || Sizes.size() == 0 || + Subscripts.size() != Sizes.size()) { O << "failed to delinearize\n"; continue; } - O << "Base offset: " << *Res << "\n"; + + O << "Base offset: " << *BasePointer << "\n"; O << "ArrayDecl[UnknownSize]"; + int Size = Subscripts.size(); for (int i = 0; i < Size - 1; i++) O << "[" << *Sizes[i] << "]"; O << " with elements of " << *Sizes[Size - 1] << " bytes.\n"; diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp index 3b3e2ef155a0..d0784f1e678d 100644 --- a/lib/Analysis/DependenceAnalysis.cpp +++ b/lib/Analysis/DependenceAnalysis.cpp @@ -51,8 +51,6 @@ // // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "da" - #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -60,15 +58,17 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Operator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/InstIterator.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "da" + //===----------------------------------------------------------------------===// // statistics @@ -234,7 +234,7 @@ FullDependence::FullDependence(Instruction *Source, Levels(CommonLevels), LoopIndependent(PossiblyLoopIndependent) { Consistent = true; - DV = CommonLevels ? new DVEntry[CommonLevels] : NULL; + DV = CommonLevels ? new DVEntry[CommonLevels] : nullptr; } // The rest are simple getters that hide the implementation. @@ -658,7 +658,7 @@ Value *getPointerOperand(Instruction *I) { if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand(); llvm_unreachable("Value is not load or store instruction"); - return 0; + return nullptr; } @@ -932,7 +932,7 @@ const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L, const SCEV *UB = SE->getBackedgeTakenCount(L); return SE->getNoopOrZeroExtend(UB, T); } - return NULL; + return nullptr; } @@ -943,7 +943,7 @@ const SCEVConstant *DependenceAnalysis::collectConstantUpperBound(const Loop *L, ) const { if (const SCEV *UB = collectUpperBound(L, T)) return dyn_cast<SCEVConstant>(UB); - return NULL; + return nullptr; } @@ -1275,8 +1275,8 @@ bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff, // // Program 2.1, page 29. // Computes the GCD of AM and BM. -// Also finds a solution to the equation ax - by = gdc(a, b). -// Returns true iff the gcd divides Delta. +// Also finds a solution to the equation ax - by = gcd(a, b). +// Returns true if dependence disproved; i.e., gcd does not divide Delta. static bool findGCD(unsigned Bits, APInt AM, APInt BM, APInt Delta, APInt &G, APInt &X, APInt &Y) { @@ -2194,7 +2194,7 @@ const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) { if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Product->getOperand(Op))) return Constant; } - return NULL; + return nullptr; } @@ -2646,8 +2646,8 @@ void DependenceAnalysis::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B, BoundInfo *Bound, unsigned K) const { - Bound[K].Lower[Dependence::DVEntry::ALL] = NULL; // Default value = -infinity. - Bound[K].Upper[Dependence::DVEntry::ALL] = NULL; // Default value = +infinity. + Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. if (Bound[K].Iterations) { Bound[K].Lower[Dependence::DVEntry::ALL] = SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), @@ -2687,8 +2687,8 @@ void DependenceAnalysis::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B, BoundInfo *Bound, unsigned K) const { - Bound[K].Lower[Dependence::DVEntry::EQ] = NULL; // Default value = -infinity. - Bound[K].Upper[Dependence::DVEntry::EQ] = NULL; // Default value = +infinity. + Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. if (Bound[K].Iterations) { const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); const SCEV *NegativePart = getNegativePart(Delta); @@ -2729,8 +2729,8 @@ void DependenceAnalysis::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B, BoundInfo *Bound, unsigned K) const { - Bound[K].Lower[Dependence::DVEntry::LT] = NULL; // Default value = -infinity. - Bound[K].Upper[Dependence::DVEntry::LT] = NULL; // Default value = +infinity. + Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. if (Bound[K].Iterations) { const SCEV *Iter_1 = SE->getMinusSCEV(Bound[K].Iterations, @@ -2776,8 +2776,8 @@ void DependenceAnalysis::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B, BoundInfo *Bound, unsigned K) const { - Bound[K].Lower[Dependence::DVEntry::GT] = NULL; // Default value = -infinity. - Bound[K].Upper[Dependence::DVEntry::GT] = NULL; // Default value = +infinity. + Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. if (Bound[K].Iterations) { const SCEV *Iter_1 = SE->getMinusSCEV(Bound[K].Iterations, @@ -2829,7 +2829,7 @@ DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript, CI[K].Coeff = Zero; CI[K].PosPart = Zero; CI[K].NegPart = Zero; - CI[K].Iterations = NULL; + CI[K].Iterations = nullptr; } while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { const Loop *L = AddRec->getLoop(); @@ -2872,7 +2872,7 @@ const SCEV *DependenceAnalysis::getLowerBound(BoundInfo *Bound) const { if (Bound[K].Lower[Bound[K].Direction]) Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); else - Sum = NULL; + Sum = nullptr; } return Sum; } @@ -2888,7 +2888,7 @@ const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const { if (Bound[K].Upper[Bound[K].Direction]) Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); else - Sum = NULL; + Sum = nullptr; } return Sum; } @@ -3148,12 +3148,12 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, } else if (CurConstraint.isLine()) { Level.Scalar = false; - Level.Distance = NULL; + Level.Distance = nullptr; // direction should be accurate } else if (CurConstraint.isPoint()) { Level.Scalar = false; - Level.Distance = NULL; + Level.Distance = nullptr; unsigned NewDirection = Dependence::DVEntry::NONE; if (!isKnownPredicate(CmpInst::ICMP_NE, CurConstraint.getY(), @@ -3178,33 +3178,57 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, /// Check if we can delinearize the subscripts. If the SCEVs representing the /// source and destination array references are recurrences on a nested loop, -/// this function flattens the nested recurrences into seperate recurrences +/// this function flattens the nested recurrences into separate recurrences /// for each loop level. -bool -DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV, - SmallVectorImpl<Subscript> &Pair) const { +bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, + const SCEV *DstSCEV, + SmallVectorImpl<Subscript> &Pair, + const SCEV *ElementSize) const { + const SCEVUnknown *SrcBase = + dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcSCEV)); + const SCEVUnknown *DstBase = + dyn_cast<SCEVUnknown>(SE->getPointerBase(DstSCEV)); + + if (!SrcBase || !DstBase || SrcBase != DstBase) + return false; + + SrcSCEV = SE->getMinusSCEV(SrcSCEV, SrcBase); + DstSCEV = SE->getMinusSCEV(DstSCEV, DstBase); + const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) return false; - SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts, SrcSizes, DstSizes; - SrcAR->delinearize(*SE, SrcSubscripts, SrcSizes); - DstAR->delinearize(*SE, DstSubscripts, DstSizes); + // First step: collect parametric terms in both array references. + SmallVector<const SCEV *, 4> Terms; + SrcAR->collectParametricTerms(*SE, Terms); + DstAR->collectParametricTerms(*SE, Terms); - int size = SrcSubscripts.size(); - int dstSize = DstSubscripts.size(); - if (size != dstSize || size < 2) + // Second step: find subscript sizes. + SmallVector<const SCEV *, 4> Sizes; + SE->findArrayDimensions(Terms, Sizes, ElementSize); + + // Third step: compute the access functions for each subscript. + SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; + SrcAR->computeAccessFunctions(*SE, SrcSubscripts, Sizes); + DstAR->computeAccessFunctions(*SE, DstSubscripts, Sizes); + + // Fail when there is only a subscript: that's a linearized access function. + if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || + SrcSubscripts.size() != DstSubscripts.size()) return false; -#ifndef NDEBUG - DEBUG(errs() << "\nSrcSubscripts: "); - for (int i = 0; i < size; i++) - DEBUG(errs() << *SrcSubscripts[i]); - DEBUG(errs() << "\nDstSubscripts: "); - for (int i = 0; i < size; i++) - DEBUG(errs() << *DstSubscripts[i]); -#endif + int size = SrcSubscripts.size(); + + DEBUG({ + dbgs() << "\nSrcSubscripts: "; + for (int i = 0; i < size; i++) + dbgs() << *SrcSubscripts[i]; + dbgs() << "\nDstSubscripts: "; + for (int i = 0; i < size; i++) + dbgs() << *DstSubscripts[i]; + }); // The delinearization transforms a single-subscript MIV dependence test into // a multi-subscript SIV dependence test that is easier to compute. So we @@ -3262,7 +3286,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) // if both instructions don't reference memory, there's no dependence - return NULL; + return nullptr; if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { // can only analyze simple loads and stores, i.e., no calls, invokes, etc. @@ -3282,7 +3306,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, case AliasAnalysis::NoAlias: // If the objects noalias, they are distinct, accesses are independent. DEBUG(dbgs() << "no alias\n"); - return NULL; + return nullptr; case AliasAnalysis::MustAlias: break; // The underlying objects alias; test accesses for dependence. } @@ -3335,7 +3359,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, } if (Delinearize && Pairs == 1 && CommonLevels > 1 && - tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) { + tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { DEBUG(dbgs() << " delinerized GEP\n"); Pairs = Pair.size(); } @@ -3477,26 +3501,26 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, case Subscript::ZIV: DEBUG(dbgs() << ", ZIV\n"); if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) - return NULL; + return nullptr; break; case Subscript::SIV: { DEBUG(dbgs() << ", SIV\n"); unsigned Level; - const SCEV *SplitIter = NULL; + const SCEV *SplitIter = nullptr; if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, SplitIter)) - return NULL; + return nullptr; break; } case Subscript::RDIV: DEBUG(dbgs() << ", RDIV\n"); if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) - return NULL; + return nullptr; break; case Subscript::MIV: DEBUG(dbgs() << ", MIV\n"); if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) - return NULL; + return nullptr; break; default: llvm_unreachable("subscript has unexpected classification"); @@ -3530,16 +3554,16 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); // SJ is an SIV subscript that's part of the current coupled group unsigned Level; - const SCEV *SplitIter = NULL; + const SCEV *SplitIter = nullptr; DEBUG(dbgs() << "SIV\n"); if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, SplitIter)) - return NULL; + return nullptr; ConstrainedLevels.set(Level); if (intersectConstraints(&Constraints[Level], &NewConstraint)) { if (Constraints[Level].isEmpty()) { ++DeltaIndependence; - return NULL; + return nullptr; } Changed = true; } @@ -3565,7 +3589,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, case Subscript::ZIV: DEBUG(dbgs() << "ZIV\n"); if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) - return NULL; + return nullptr; Mivs.reset(SJ); break; case Subscript::SIV: @@ -3588,7 +3612,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, if (Pair[SJ].Classification == Subscript::RDIV) { DEBUG(dbgs() << "RDIV test\n"); if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) - return NULL; + return nullptr; // I don't yet understand how to propagate RDIV results Mivs.reset(SJ); } @@ -3601,7 +3625,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, if (Pair[SJ].Classification == Subscript::MIV) { DEBUG(dbgs() << "MIV test\n"); if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) - return NULL; + return nullptr; } else llvm_unreachable("expected only MIV subscripts at this point"); @@ -3613,7 +3637,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, SJ >= 0; SJ = ConstrainedLevels.find_next(SJ)) { updateDirection(Result.DV[SJ - 1], Constraints[SJ]); if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) - return NULL; + return nullptr; } } } @@ -3648,11 +3672,11 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, } } if (AllEqual) - return NULL; + return nullptr; } FullDependence *Final = new FullDependence(Result); - Result.DV = NULL; + Result.DV = nullptr; return Final; } @@ -3759,7 +3783,7 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, } if (Delinearize && Pairs == 1 && CommonLevels > 1 && - tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) { + tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { DEBUG(dbgs() << " delinerized GEP\n"); Pairs = Pair.size(); } @@ -3825,11 +3849,11 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, switch (Pair[SI].Classification) { case Subscript::SIV: { unsigned Level; - const SCEV *SplitIter = NULL; + const SCEV *SplitIter = nullptr; (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, SplitIter); if (Level == SplitLevel) { - assert(SplitIter != NULL); + assert(SplitIter != nullptr); return SplitIter; } break; @@ -3864,7 +3888,7 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { // SJ is an SIV subscript that's part of the current coupled group unsigned Level; - const SCEV *SplitIter = NULL; + const SCEV *SplitIter = nullptr; (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, SplitIter); if (Level == SplitLevel && SplitIter) @@ -3905,5 +3929,5 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, } } llvm_unreachable("somehow reached end of routine"); - return NULL; + return nullptr; } diff --git a/lib/Analysis/DomPrinter.cpp b/lib/Analysis/DomPrinter.cpp index cde431459d50..0c880df54f8e 100644 --- a/lib/Analysis/DomPrinter.cpp +++ b/lib/Analysis/DomPrinter.cpp @@ -81,18 +81,32 @@ struct DOTGraphTraits<PostDominatorTree*> } namespace { -struct DomViewer - : public DOTGraphTraitsViewer<DominatorTree, false> { +struct DominatorTreeWrapperPassAnalysisGraphTraits { + static DominatorTree *getGraph(DominatorTreeWrapperPass *DTWP) { + return &DTWP->getDomTree(); + } +}; + +struct DomViewer : public DOTGraphTraitsViewer< + DominatorTreeWrapperPass, false, DominatorTree *, + DominatorTreeWrapperPassAnalysisGraphTraits> { static char ID; - DomViewer() : DOTGraphTraitsViewer<DominatorTree, false>("dom", ID){ + DomViewer() + : DOTGraphTraitsViewer<DominatorTreeWrapperPass, false, DominatorTree *, + DominatorTreeWrapperPassAnalysisGraphTraits>( + "dom", ID) { initializeDomViewerPass(*PassRegistry::getPassRegistry()); } }; -struct DomOnlyViewer - : public DOTGraphTraitsViewer<DominatorTree, true> { +struct DomOnlyViewer : public DOTGraphTraitsViewer< + DominatorTreeWrapperPass, true, DominatorTree *, + DominatorTreeWrapperPassAnalysisGraphTraits> { static char ID; - DomOnlyViewer() : DOTGraphTraitsViewer<DominatorTree, true>("domonly", ID){ + DomOnlyViewer() + : DOTGraphTraitsViewer<DominatorTreeWrapperPass, true, DominatorTree *, + DominatorTreeWrapperPassAnalysisGraphTraits>( + "domonly", ID) { initializeDomOnlyViewerPass(*PassRegistry::getPassRegistry()); } }; @@ -136,18 +150,26 @@ INITIALIZE_PASS(PostDomOnlyViewer, "view-postdom-only", false, false) namespace { -struct DomPrinter - : public DOTGraphTraitsPrinter<DominatorTree, false> { +struct DomPrinter : public DOTGraphTraitsPrinter< + DominatorTreeWrapperPass, false, DominatorTree *, + DominatorTreeWrapperPassAnalysisGraphTraits> { static char ID; - DomPrinter() : DOTGraphTraitsPrinter<DominatorTree, false>("dom", ID) { + DomPrinter() + : DOTGraphTraitsPrinter<DominatorTreeWrapperPass, false, DominatorTree *, + DominatorTreeWrapperPassAnalysisGraphTraits>( + "dom", ID) { initializeDomPrinterPass(*PassRegistry::getPassRegistry()); } }; -struct DomOnlyPrinter - : public DOTGraphTraitsPrinter<DominatorTree, true> { +struct DomOnlyPrinter : public DOTGraphTraitsPrinter< + DominatorTreeWrapperPass, true, DominatorTree *, + DominatorTreeWrapperPassAnalysisGraphTraits> { static char ID; - DomOnlyPrinter() : DOTGraphTraitsPrinter<DominatorTree, true>("domonly", ID) { + DomOnlyPrinter() + : DOTGraphTraitsPrinter<DominatorTreeWrapperPass, true, DominatorTree *, + DominatorTreeWrapperPassAnalysisGraphTraits>( + "domonly", ID) { initializeDomOnlyPrinterPass(*PassRegistry::getPassRegistry()); } }; diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp index 7e4a89f1bd57..7ba91bc90dfc 100644 --- a/lib/Analysis/DominanceFrontier.cpp +++ b/lib/Analysis/DominanceFrontier.cpp @@ -8,134 +8,50 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/DominanceFrontier.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Analysis/DominanceFrontierImpl.h" + using namespace llvm; +namespace llvm { +template class DominanceFrontierBase<BasicBlock>; +template class ForwardDominanceFrontierBase<BasicBlock>; +} + char DominanceFrontier::ID = 0; + INITIALIZE_PASS_BEGIN(DominanceFrontier, "domfrontier", "Dominance Frontier Construction", true, true) -INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(DominanceFrontier, "domfrontier", "Dominance Frontier Construction", true, true) -namespace { - class DFCalculateWorkObject { - public: - DFCalculateWorkObject(BasicBlock *B, BasicBlock *P, - const DomTreeNode *N, - const DomTreeNode *PN) - : currentBB(B), parentBB(P), Node(N), parentNode(PN) {} - BasicBlock *currentBB; - BasicBlock *parentBB; - const DomTreeNode *Node; - const DomTreeNode *parentNode; - }; +DominanceFrontier::DominanceFrontier() + : FunctionPass(ID), + Base() { + initializeDominanceFrontierPass(*PassRegistry::getPassRegistry()); } -void DominanceFrontier::anchor() { } - -const DominanceFrontier::DomSetType & -DominanceFrontier::calculate(const DominatorTree &DT, - const DomTreeNode *Node) { - BasicBlock *BB = Node->getBlock(); - DomSetType *Result = NULL; - - std::vector<DFCalculateWorkObject> workList; - SmallPtrSet<BasicBlock *, 32> visited; - - workList.push_back(DFCalculateWorkObject(BB, NULL, Node, NULL)); - do { - DFCalculateWorkObject *currentW = &workList.back(); - assert (currentW && "Missing work object."); - - BasicBlock *currentBB = currentW->currentBB; - BasicBlock *parentBB = currentW->parentBB; - const DomTreeNode *currentNode = currentW->Node; - const DomTreeNode *parentNode = currentW->parentNode; - assert (currentBB && "Invalid work object. Missing current Basic Block"); - assert (currentNode && "Invalid work object. Missing current Node"); - DomSetType &S = Frontiers[currentBB]; - - // Visit each block only once. - if (visited.count(currentBB) == 0) { - visited.insert(currentBB); - - // Loop over CFG successors to calculate DFlocal[currentNode] - for (succ_iterator SI = succ_begin(currentBB), SE = succ_end(currentBB); - SI != SE; ++SI) { - // Does Node immediately dominate this successor? - if (DT[*SI]->getIDom() != currentNode) - S.insert(*SI); - } - } - - // At this point, S is DFlocal. Now we union in DFup's of our children... - // Loop through and visit the nodes that Node immediately dominates (Node's - // children in the IDomTree) - bool visitChild = false; - for (DomTreeNode::const_iterator NI = currentNode->begin(), - NE = currentNode->end(); NI != NE; ++NI) { - DomTreeNode *IDominee = *NI; - BasicBlock *childBB = IDominee->getBlock(); - if (visited.count(childBB) == 0) { - workList.push_back(DFCalculateWorkObject(childBB, currentBB, - IDominee, currentNode)); - visitChild = true; - } - } - - // If all children are visited or there is any child then pop this block - // from the workList. - if (!visitChild) { - - if (!parentBB) { - Result = &S; - break; - } - - DomSetType::const_iterator CDFI = S.begin(), CDFE = S.end(); - DomSetType &parentSet = Frontiers[parentBB]; - for (; CDFI != CDFE; ++CDFI) { - if (!DT.properlyDominates(parentNode, DT[*CDFI])) - parentSet.insert(*CDFI); - } - workList.pop_back(); - } +void DominanceFrontier::releaseMemory() { + Base.releaseMemory(); +} - } while (!workList.empty()); +bool DominanceFrontier::runOnFunction(Function &) { + releaseMemory(); + Base.analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree()); + return false; +} - return *Result; +void DominanceFrontier::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<DominatorTreeWrapperPass>(); } -void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const { - for (const_iterator I = begin(), E = end(); I != E; ++I) { - OS << " DomFrontier for BB "; - if (I->first) - WriteAsOperand(OS, I->first, false); - else - OS << " <<exit node>>"; - OS << " is:\t"; - - const std::set<BasicBlock*> &BBs = I->second; - - for (std::set<BasicBlock*>::const_iterator I = BBs.begin(), E = BBs.end(); - I != E; ++I) { - OS << ' '; - if (*I) - WriteAsOperand(OS, *I, false); - else - OS << "<<exit node>>"; - } - OS << "\n"; - } +void DominanceFrontier::print(raw_ostream &OS, const Module *) const { + Base.print(OS); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void DominanceFrontierBase::dump() const { +void DominanceFrontier::dump() const { print(dbgs()); } #endif - diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp index f042964c21d9..dfabb0ad063d 100644 --- a/lib/Analysis/IPA/CallGraph.cpp +++ b/lib/Analysis/IPA/CallGraph.cpp @@ -8,17 +8,45 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CallGraph.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" -#include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -CallGraph::CallGraph() - : ModulePass(ID), Root(0), ExternalCallingNode(0), CallsExternalNode(0) { - initializeCallGraphPass(*PassRegistry::getPassRegistry()); +//===----------------------------------------------------------------------===// +// Implementations of the CallGraph class methods. +// + +CallGraph::CallGraph(Module &M) + : M(M), Root(nullptr), ExternalCallingNode(getOrInsertFunction(nullptr)), + CallsExternalNode(new CallGraphNode(nullptr)) { + // Add every function to the call graph. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + addToCallGraph(I); + + // If we didn't find a main function, use the external call graph node + if (!Root) + Root = ExternalCallingNode; +} + +CallGraph::~CallGraph() { + // CallsExternalNode is not in the function map, delete it explicitly. + CallsExternalNode->allReferencesDropped(); + delete CallsExternalNode; + +// Reset all node's use counts to zero before deleting them to prevent an +// assertion from firing. +#ifndef NDEBUG + for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end(); + I != E; ++I) + I->second->allReferencesDropped(); +#endif + for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end(); + I != E; ++I) + delete I->second; } void CallGraph::addToCallGraph(Function *F) { @@ -62,59 +90,7 @@ void CallGraph::addToCallGraph(Function *F) { } } -void CallGraph::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); -} - -bool CallGraph::runOnModule(Module &M) { - Mod = &M; - - ExternalCallingNode = getOrInsertFunction(0); - assert(!CallsExternalNode); - CallsExternalNode = new CallGraphNode(0); - Root = 0; - - // Add every function to the call graph. - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - addToCallGraph(I); - - // If we didn't find a main function, use the external call graph node - if (Root == 0) - Root = ExternalCallingNode; - - return false; -} - -INITIALIZE_PASS(CallGraph, "basiccg", "CallGraph Construction", false, true) - -char CallGraph::ID = 0; - -void CallGraph::releaseMemory() { - /// CallsExternalNode is not in the function map, delete it explicitly. - if (CallsExternalNode) { - CallsExternalNode->allReferencesDropped(); - delete CallsExternalNode; - CallsExternalNode = 0; - } - - if (FunctionMap.empty()) - return; - -// Reset all node's use counts to zero before deleting them to prevent an -// assertion from firing. -#ifndef NDEBUG - for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end(); - I != E; ++I) - I->second->allReferencesDropped(); -#endif - - for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end(); - I != E; ++I) - delete I->second; - FunctionMap.clear(); -} - -void CallGraph::print(raw_ostream &OS, const Module*) const { +void CallGraph::print(raw_ostream &OS) const { OS << "CallGraph Root is: "; if (Function *F = Root->getFunction()) OS << F->getName() << "\n"; @@ -125,16 +101,11 @@ void CallGraph::print(raw_ostream &OS, const Module*) const { for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I) I->second->print(OS); } + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void CallGraph::dump() const { - print(dbgs(), 0); -} +void CallGraph::dump() const { print(dbgs()); } #endif -//===----------------------------------------------------------------------===// -// Implementations of public modification methods -// - // removeFunctionFromModule - Unlink the function from this module, returning // it. Because this removes the function from the module, the call graph node // is destroyed. This is only valid if the function does not call any other @@ -148,7 +119,7 @@ Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) { delete CGN; // Delete the call graph node for this func FunctionMap.erase(F); // Remove the call graph node from the map - Mod->getFunctionList().remove(F); + M.getFunctionList().remove(F); return F; } @@ -172,12 +143,17 @@ void CallGraph::spliceFunction(const Function *From, const Function *To) { // not already exist. CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) { CallGraphNode *&CGN = FunctionMap[F]; - if (CGN) return CGN; - - assert((!F || F->getParent() == Mod) && "Function not in current module!"); + if (CGN) + return CGN; + + assert((!F || F->getParent() == &M) && "Function not in current module!"); return CGN = new CallGraphNode(const_cast<Function*>(F)); } +//===----------------------------------------------------------------------===// +// Implementations of the CallGraphNode class methods. +// + void CallGraphNode::print(raw_ostream &OS) const { if (Function *F = getFunction()) OS << "Call graph node for function: '" << F->getName() << "'"; @@ -234,7 +210,7 @@ void CallGraphNode::removeOneAbstractEdgeTo(CallGraphNode *Callee) { for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { assert(I != CalledFunctions.end() && "Cannot find callee to remove!"); CallRecord &CR = *I; - if (CR.second == Callee && CR.first == 0) { + if (CR.second == Callee && CR.first == nullptr) { Callee->DropRef(); *I = CalledFunctions.back(); CalledFunctions.pop_back(); @@ -260,5 +236,52 @@ void CallGraphNode::replaceCallEdge(CallSite CS, } } +//===----------------------------------------------------------------------===// +// Out-of-line definitions of CallGraphAnalysis class members. +// + +char CallGraphAnalysis::PassID; + +//===----------------------------------------------------------------------===// +// Implementations of the CallGraphWrapperPass class methods. +// + +CallGraphWrapperPass::CallGraphWrapperPass() : ModulePass(ID) { + initializeCallGraphWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +CallGraphWrapperPass::~CallGraphWrapperPass() {} + +void CallGraphWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); +} + +bool CallGraphWrapperPass::runOnModule(Module &M) { + // All the real work is done in the constructor for the CallGraph. + G.reset(new CallGraph(M)); + return false; +} + +INITIALIZE_PASS(CallGraphWrapperPass, "basiccg", "CallGraph Construction", + false, true) + +char CallGraphWrapperPass::ID = 0; + +void CallGraphWrapperPass::releaseMemory() { G.reset(); } + +void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const { + if (!G) { + OS << "No call graph has been built!\n"; + return; + } + + // Just delegate. + G->print(OS); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void CallGraphWrapperPass::dump() const { print(dbgs(), nullptr); } +#endif + // Enuse that users of CallGraph.h also link with this file DEFINING_FILE_FOR(CallGraph) diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp index 182beca3643e..c27edbfa2ff5 100644 --- a/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "cgscc-passmgr" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/Statistic.h" @@ -23,12 +22,15 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LegacyPassManagers.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "cgscc-passmgr" + static cl::opt<unsigned> MaxIterations("max-cg-scc-iterations", cl::ReallyHidden, cl::init(4)); @@ -49,7 +51,7 @@ public: /// run - Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the module, and if so, return true. - bool runOnModule(Module &M); + bool runOnModule(Module &M) override; using ModulePass::doInitialization; using ModulePass::doFinalization; @@ -58,21 +60,21 @@ public: bool doFinalization(CallGraph &CG); /// Pass Manager itself does not invalidate any analysis info. - void getAnalysisUsage(AnalysisUsage &Info) const { + void getAnalysisUsage(AnalysisUsage &Info) const override { // CGPassManager walks SCC and it needs CallGraph. - Info.addRequired<CallGraph>(); + Info.addRequired<CallGraphWrapperPass>(); Info.setPreservesAll(); } - virtual const char *getPassName() const { + const char *getPassName() const override { return "CallGraph Pass Manager"; } - virtual PMDataManager *getAsPMDataManager() { return this; } - virtual Pass *getAsPass() { return this; } + PMDataManager *getAsPMDataManager() override { return this; } + Pass *getAsPass() override { return this; } // Print passes managed by this manager - void dumpPassStructure(unsigned Offset) { + void dumpPassStructure(unsigned Offset) override { errs().indent(Offset*2) << "Call Graph SCC Pass Manager\n"; for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { Pass *P = getContainedPass(Index); @@ -86,7 +88,7 @@ public: return static_cast<Pass *>(PassVector[N]); } - virtual PassManagerType getPassManagerType() const { + PassManagerType getPassManagerType() const override { return PMT_CallGraphPassManager; } @@ -112,7 +114,7 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, bool Changed = false; PMDataManager *PM = P->getAsPMDataManager(); - if (PM == 0) { + if (!PM) { CallGraphSCCPass *CGSP = (CallGraphSCCPass*)P; if (!CallGraphUpToDate) { DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false); @@ -144,8 +146,11 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, I != E; ++I) { if (Function *F = (*I)->getFunction()) { dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName()); - TimeRegion PassTimer(getPassTimer(FPP)); - Changed |= FPP->runOnFunction(*F); + { + TimeRegion PassTimer(getPassTimer(FPP)); + Changed |= FPP->runOnFunction(*F); + } + F->getContext().yield(); } } @@ -190,7 +195,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, SCCIdx != E; ++SCCIdx, ++FunctionNo) { CallGraphNode *CGN = *SCCIdx; Function *F = CGN->getFunction(); - if (F == 0 || F->isDeclaration()) continue; + if (!F || F->isDeclaration()) continue; // Walk the function body looking for call sites. Sync up the call sites in // CGN with those actually in the function. @@ -203,7 +208,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) { // If this call site is null, then the function pass deleted the call // entirely and the WeakVH nulled it out. - if (I->first == 0 || + if (!I->first || // If we've already seen this call site, then the FunctionPass RAUW'd // one call with another, which resulted in two "uses" in the edge // list of the same call. @@ -217,7 +222,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, "CallGraphSCCPass did not update the CallGraph correctly!"); // If this was an indirect call site, count it. - if (I->second->getFunction() == 0) + if (!I->second->getFunction()) ++NumIndirectRemoved; else ++NumDirectRemoved; @@ -273,7 +278,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, // site could be turned direct), don't reject it in checking mode, and // don't tweak it to be more precise. if (CheckingMode && CS.getCalledFunction() && - ExistingNode->getFunction() == 0) + ExistingNode->getFunction() == nullptr) continue; assert(!CheckingMode && @@ -286,7 +291,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, CalleeNode = CG.getOrInsertFunction(Callee); // Keep track of whether we turned an indirect call into a direct // one. - if (ExistingNode->getFunction() == 0) { + if (!ExistingNode->getFunction()) { DevirtualizedCall = true; DEBUG(dbgs() << " CGSCCPASSMGR: Devirtualized call to '" << Callee->getName() << "'\n"); @@ -424,7 +429,7 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, /// run - Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the module, and if so, return true. bool CGPassManager::runOnModule(Module &M) { - CallGraph &CG = getAnalysis<CallGraph>(); + CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); bool Changed = doInitialization(CG); // Walk the callgraph in bottom-up SCC order. @@ -434,8 +439,8 @@ bool CGPassManager::runOnModule(Module &M) { while (!CGI.isAtEnd()) { // Copy the current SCC and increment past it so that the pass can hack // on the SCC if it wants to without invalidating our iterator. - std::vector<CallGraphNode*> &NodeVec = *CGI; - CurSCC.initialize(&NodeVec[0], &NodeVec[0]+NodeVec.size()); + const std::vector<CallGraphNode *> &NodeVec = *CGI; + CurSCC.initialize(NodeVec.data(), NodeVec.data() + NodeVec.size()); ++CGI; // At the top level, we run all the passes in this pass manager on the @@ -570,8 +575,8 @@ void CallGraphSCCPass::assignPassManager(PMStack &PMS, /// the call graph. If the derived class implements this method, it should /// always explicitly call the implementation here. void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<CallGraph>(); - AU.addPreserved<CallGraph>(); + AU.addRequired<CallGraphWrapperPass>(); + AU.addPreserved<CallGraphWrapperPass>(); } @@ -590,15 +595,19 @@ namespace { static char ID; PrintCallGraphPass(const std::string &B, raw_ostream &o) : CallGraphSCCPass(ID), Banner(B), Out(o) {} - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); } - - bool runOnSCC(CallGraphSCC &SCC) { + + bool runOnSCC(CallGraphSCC &SCC) override { Out << Banner; - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) - (*I)->getFunction()->print(Out); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + if ((*I)->getFunction()) + (*I)->getFunction()->print(Out); + else + Out << "\nPrinting <null> Function\n"; + } return false; } }; diff --git a/lib/Analysis/IPA/CallPrinter.cpp b/lib/Analysis/IPA/CallPrinter.cpp index 306ae7a4dbfb..68dcd3c06427 100644 --- a/lib/Analysis/IPA/CallPrinter.cpp +++ b/lib/Analysis/IPA/CallPrinter.cpp @@ -22,13 +22,10 @@ using namespace llvm; namespace llvm { -template<> -struct DOTGraphTraits<CallGraph*> : public DefaultDOTGraphTraits { - DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} +template <> struct DOTGraphTraits<CallGraph *> : public DefaultDOTGraphTraits { + DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} - static std::string getGraphName(CallGraph *Graph) { - return "Call graph"; - } + static std::string getGraphName(CallGraph *Graph) { return "Call graph"; } std::string getNodeLabel(CallGraphNode *Node, CallGraph *Graph) { if (Function *Func = Node->getFunction()) @@ -38,49 +35,57 @@ struct DOTGraphTraits<CallGraph*> : public DefaultDOTGraphTraits { } }; +struct AnalysisCallGraphWrapperPassTraits { + static CallGraph *getGraph(CallGraphWrapperPass *P) { + return &P->getCallGraph(); + } +}; + } // end llvm namespace namespace { struct CallGraphViewer - : public DOTGraphTraitsModuleViewer<CallGraph, true> { + : public DOTGraphTraitsModuleViewer<CallGraphWrapperPass, true, CallGraph *, + AnalysisCallGraphWrapperPassTraits> { static char ID; CallGraphViewer() - : DOTGraphTraitsModuleViewer<CallGraph, true>("callgraph", ID) { + : DOTGraphTraitsModuleViewer<CallGraphWrapperPass, true, CallGraph *, + AnalysisCallGraphWrapperPassTraits>( + "callgraph", ID) { initializeCallGraphViewerPass(*PassRegistry::getPassRegistry()); } }; -struct CallGraphPrinter - : public DOTGraphTraitsModulePrinter<CallGraph, true> { +struct CallGraphPrinter : public DOTGraphTraitsModulePrinter< + CallGraphWrapperPass, true, CallGraph *, + AnalysisCallGraphWrapperPassTraits> { static char ID; CallGraphPrinter() - : DOTGraphTraitsModulePrinter<CallGraph, true>("callgraph", ID) { - initializeCallGraphPrinterPass(*PassRegistry::getPassRegistry()); + : DOTGraphTraitsModulePrinter<CallGraphWrapperPass, true, CallGraph *, + AnalysisCallGraphWrapperPassTraits>( + "callgraph", ID) { + initializeCallGraphPrinterPass(*PassRegistry::getPassRegistry()); } }; } // end anonymous namespace char CallGraphViewer::ID = 0; -INITIALIZE_PASS(CallGraphViewer, "view-callgraph", - "View call graph", - false, false) +INITIALIZE_PASS(CallGraphViewer, "view-callgraph", "View call graph", false, + false) char CallGraphPrinter::ID = 0; INITIALIZE_PASS(CallGraphPrinter, "dot-callgraph", - "Print call graph to 'dot' file", - false, false) + "Print call graph to 'dot' file", false, false) // Create methods available outside of this file, to use them // "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by // the link time optimization. -ModulePass *llvm::createCallGraphViewerPass() { - return new CallGraphViewer(); -} +ModulePass *llvm::createCallGraphViewerPass() { return new CallGraphViewer(); } ModulePass *llvm::createCallGraphPrinterPass() { return new CallGraphPrinter(); diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp index 1c4f17d3819a..b37344b044ce 100644 --- a/lib/Analysis/IPA/FindUsedTypes.cpp +++ b/lib/Analysis/IPA/FindUsedTypes.cpp @@ -14,11 +14,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/FindUsedTypes.h" -#include "llvm/Assembly/Writer.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Module.h" -#include "llvm/Support/InstIterator.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index 7ec46442bf4c..607c06810123 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "globalsmodref-aa" #include "llvm/Analysis/Passes.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/Statistic.h" @@ -24,15 +23,17 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/InstIterator.h" #include <set> using namespace llvm; +#define DEBUG_TYPE "globalsmodref-aa" + STATISTIC(NumNonAddrTakenGlobalVars, "Number of global vars without address taken"); STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken"); @@ -94,34 +95,38 @@ namespace { initializeGlobalsModRefPass(*PassRegistry::getPassRegistry()); } - bool runOnModule(Module &M) { - InitializeAliasAnalysis(this); // set up super class - AnalyzeGlobals(M); // find non-addr taken globals - AnalyzeCallGraph(getAnalysis<CallGraph>(), M); // Propagate on CG + bool runOnModule(Module &M) override { + InitializeAliasAnalysis(this); + + // Find non-addr taken globals. + AnalyzeGlobals(M); + + // Propagate on CG. + AnalyzeCallGraph(getAnalysis<CallGraphWrapperPass>().getCallGraph(), M); return false; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AliasAnalysis::getAnalysisUsage(AU); - AU.addRequired<CallGraph>(); + AU.addRequired<CallGraphWrapperPass>(); AU.setPreservesAll(); // Does not transform code } //------------------------------------------------ // Implement the AliasAnalysis API // - AliasResult alias(const Location &LocA, const Location &LocB); + AliasResult alias(const Location &LocA, const Location &LocB) override; ModRefResult getModRefInfo(ImmutableCallSite CS, - const Location &Loc); + const Location &Loc) override; ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) { + ImmutableCallSite CS2) override { return AliasAnalysis::getModRefInfo(CS1, CS2); } /// getModRefBehavior - Return the behavior of the specified function if /// called from the specified call site. The call site may be null in which /// case the most generic behavior of this function should be returned. - ModRefBehavior getModRefBehavior(const Function *F) { + ModRefBehavior getModRefBehavior(const Function *F) override { ModRefBehavior Min = UnknownModRefBehavior; if (FunctionRecord *FR = getFunctionInfo(F)) { @@ -137,7 +142,7 @@ namespace { /// getModRefBehavior - Return the behavior of the specified function if /// called from the specified call site. The call site may be null in which /// case the most generic behavior of this function should be returned. - ModRefBehavior getModRefBehavior(ImmutableCallSite CS) { + ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override { ModRefBehavior Min = UnknownModRefBehavior; if (const Function* F = CS.getCalledFunction()) @@ -151,15 +156,15 @@ namespace { return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); } - virtual void deleteValue(Value *V); - virtual void copyValue(Value *From, Value *To); - virtual void addEscapingUse(Use &U); + void deleteValue(Value *V) override; + void copyValue(Value *From, Value *To) override; + void addEscapingUse(Use &U) override; /// getAdjustedAnalysisPointer - This method is used when a pass implements /// an analysis interface through multiple inheritance. If needed, it /// should override this to adjust the this pointer as needed for the /// specified pass info. - virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + void *getAdjustedAnalysisPointer(AnalysisID PI) override { if (PI == &AliasAnalysis::ID) return (AliasAnalysis*)this; return this; @@ -173,14 +178,14 @@ namespace { FunctionInfo.find(F); if (I != FunctionInfo.end()) return &I->second; - return 0; + return nullptr; } void AnalyzeGlobals(Module &M); void AnalyzeCallGraph(CallGraph &CG, Module &M); bool AnalyzeUsesOfPointer(Value *V, std::vector<Function*> &Readers, std::vector<Function*> &Writers, - GlobalValue *OkayStoreDest = 0); + GlobalValue *OkayStoreDest = nullptr); bool AnalyzeIndirectGlobalMemory(GlobalValue *GV); }; } @@ -189,7 +194,7 @@ char GlobalsModRef::ID = 0; INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis, "globalsmodref-aa", "Simple mod/ref analysis for globals", false, true, false) -INITIALIZE_PASS_DEPENDENCY(CallGraph) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis, "globalsmodref-aa", "Simple mod/ref analysis for globals", false, true, false) @@ -248,42 +253,33 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, GlobalValue *OkayStoreDest) { if (!V->getType()->isPointerTy()) return true; - for (Value::use_iterator UI = V->use_begin(), E=V->use_end(); UI != E; ++UI) { - User *U = *UI; - if (LoadInst *LI = dyn_cast<LoadInst>(U)) { + for (Use &U : V->uses()) { + User *I = U.getUser(); + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { Readers.push_back(LI->getParent()->getParent()); - } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { if (V == SI->getOperand(1)) { Writers.push_back(SI->getParent()->getParent()); } else if (SI->getOperand(1) != OkayStoreDest) { return true; // Storing the pointer } - } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { - if (AnalyzeUsesOfPointer(GEP, Readers, Writers)) return true; - } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { - if (AnalyzeUsesOfPointer(BCI, Readers, Writers, OkayStoreDest)) + } else if (Operator::getOpcode(I) == Instruction::GetElementPtr) { + if (AnalyzeUsesOfPointer(I, Readers, Writers)) return true; - } else if (isFreeCall(U, TLI)) { - Writers.push_back(cast<Instruction>(U)->getParent()->getParent()); - } else if (CallInst *CI = dyn_cast<CallInst>(U)) { - // Make sure that this is just the function being called, not that it is - // passing into the function. - for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) - if (CI->getArgOperand(i) == V) return true; - } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) { + } else if (Operator::getOpcode(I) == Instruction::BitCast) { + if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest)) + return true; + } else if (CallSite CS = I) { // Make sure that this is just the function being called, not that it is // passing into the function. - for (unsigned i = 0, e = II->getNumArgOperands(); i != e; ++i) - if (II->getArgOperand(i) == V) return true; - } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { - if (CE->getOpcode() == Instruction::GetElementPtr || - CE->getOpcode() == Instruction::BitCast) { - if (AnalyzeUsesOfPointer(CE, Readers, Writers)) - return true; - } else { - return true; + if (!CS.isCallee(&U)) { + // Detect calls to free. + if (isFreeCall(I, TLI)) + Writers.push_back(CS->getParent()->getParent()); + else + return true; // Argument of an unknown call. } - } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) { + } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) { if (!isa<ConstantPointerNull>(ICI->getOperand(1))) return true; // Allow comparison against null. } else { @@ -308,8 +304,7 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { // Walk the user list of the global. If we find anything other than a direct // load or store, bail out. - for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){ - User *U = *I; + for (User *U : GV->users()) { if (LoadInst *LI = dyn_cast<LoadInst>(U)) { // The pointer loaded from the global can only be used in simple ways: // we allow addressing of it and loading storing to it. We do *not* allow @@ -363,9 +358,8 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { // We do a bottom-up SCC traversal of the call graph. In other words, we // visit all callees before callers (leaf-first). - for (scc_iterator<CallGraph*> I = scc_begin(&CG), E = scc_end(&CG); I != E; - ++I) { - std::vector<CallGraphNode *> &SCC = *I; + for (scc_iterator<CallGraph*> I = scc_begin(&CG); !I.isAtEnd(); ++I) { + const std::vector<CallGraphNode *> &SCC = *I; assert(!SCC.empty() && "SCC with no functions?"); if (!SCC[0]->getFunction()) { @@ -417,10 +411,8 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { FunctionEffect |= CalleeFR->FunctionEffect; // Incorporate callee's effects on globals into our info. - for (std::map<const GlobalValue*, unsigned>::iterator GI = - CalleeFR->GlobalInfo.begin(), E = CalleeFR->GlobalInfo.end(); - GI != E; ++GI) - FR.GlobalInfo[GI->first] |= GI->second; + for (const auto &G : CalleeFR->GlobalInfo) + FR.GlobalInfo[G.first] |= G.second; FR.MayReadAnyGlobal |= CalleeFR->MayReadAnyGlobal; } else { // Can't say anything about it. However, if it is inside our SCC, @@ -499,8 +491,8 @@ GlobalsModRef::alias(const Location &LocA, if (GV1 || GV2) { // If the global's address is taken, pretend we don't know it's a pointer to // the global. - if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = 0; - if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = 0; + if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = nullptr; + if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = nullptr; // If the two pointers are derived from two different non-addr-taken // globals, or if one is and the other isn't, we know these can't alias. @@ -514,7 +506,7 @@ GlobalsModRef::alias(const Location &LocA, // These pointers may be based on the memory owned by an indirect global. If // so, we may be able to handle this. First check to see if the base pointer // is a direct load from an indirect global. - GV1 = GV2 = 0; + GV1 = GV2 = nullptr; if (const LoadInst *LI = dyn_cast<LoadInst>(UV1)) if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0))) if (IndirectGlobals.count(GV)) diff --git a/lib/Analysis/IPA/IPA.cpp b/lib/Analysis/IPA/IPA.cpp index 47357cf92127..b26c052de67e 100644 --- a/lib/Analysis/IPA/IPA.cpp +++ b/lib/Analysis/IPA/IPA.cpp @@ -12,14 +12,14 @@ //===----------------------------------------------------------------------===// #include "llvm/InitializePasses.h" -#include "llvm/PassRegistry.h" #include "llvm-c/Initialization.h" +#include "llvm/PassRegistry.h" using namespace llvm; /// initializeIPA - Initialize all passes linked into the IPA library. void llvm::initializeIPA(PassRegistry &Registry) { - initializeCallGraphPass(Registry); + initializeCallGraphWrapperPassPass(Registry); initializeCallGraphPrinterPass(Registry); initializeCallGraphViewerPass(Registry); initializeFindUsedTypesPass(Registry); diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp index 3bc796e53f90..8807529cabac 100644 --- a/lib/Analysis/IPA/InlineCost.cpp +++ b/lib/Analysis/IPA/InlineCost.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "inline-cost" #include "llvm/Analysis/InlineCost.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -21,19 +20,21 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" -#include "llvm/InstVisitor.h" -#include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "inline-cost" + STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); namespace { @@ -43,7 +44,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { friend class InstVisitor<CallAnalyzer, bool>; // DataLayout if available, or null. - const DataLayout *const TD; + const DataLayout *const DL; /// The TargetTransformInfo available for this compilation. const TargetTransformInfo &TTI; @@ -97,9 +98,6 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { void disableSROA(Value *V); void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, int InstructionCost); - bool handleSROACandidate(bool IsSROAValid, - DenseMap<Value *, int>::iterator CostIt, - int InstructionCost); bool isGEPOffsetConstant(GetElementPtrInst &GEP); bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); bool simplifyCallSite(Function *F, CallSite CS); @@ -142,9 +140,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { bool visitUnreachableInst(UnreachableInst &I); public: - CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI, + CallAnalyzer(const DataLayout *DL, const TargetTransformInfo &TTI, Function &Callee, int Threshold) - : TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0), + : DL(DL), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0), IsCallerRecursive(false), IsRecursiveCall(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), @@ -225,21 +223,6 @@ void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, SROACostSavings += InstructionCost; } -/// \brief Helper for the common pattern of handling a SROA candidate. -/// Either accumulates the cost savings if the SROA remains valid, or disables -/// SROA for the candidate. -bool CallAnalyzer::handleSROACandidate(bool IsSROAValid, - DenseMap<Value *, int>::iterator CostIt, - int InstructionCost) { - if (IsSROAValid) { - accumulateSROACost(CostIt, InstructionCost); - return true; - } - - disableSROA(CostIt); - return false; -} - /// \brief Check whether a GEP's indices are all constant. /// /// Respects any simplified values known during the analysis of this callsite. @@ -256,10 +239,10 @@ bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) { /// Returns false if unable to compute the offset for any reason. Respects any /// simplified values known during the analysis of this callsite. bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { - if (!TD) + if (!DL) return false; - unsigned IntPtrWidth = TD->getPointerSizeInBits(); + unsigned IntPtrWidth = DL->getPointerSizeInBits(); assert(IntPtrWidth == Offset.getBitWidth()); for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); @@ -275,25 +258,34 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { // Handle a struct index, which adds its field offset to the pointer. if (StructType *STy = dyn_cast<StructType>(*GTI)) { unsigned ElementIdx = OpC->getZExtValue(); - const StructLayout *SL = TD->getStructLayout(STy); + const StructLayout *SL = DL->getStructLayout(STy); Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); continue; } - APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType())); + APInt TypeSize(IntPtrWidth, DL->getTypeAllocSize(GTI.getIndexedType())); Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; } return true; } bool CallAnalyzer::visitAlloca(AllocaInst &I) { - // FIXME: Check whether inlining will turn a dynamic alloca into a static + // Check whether inlining will turn a dynamic alloca into a static // alloca, and handle that case. + if (I.isArrayAllocation()) { + if (Constant *Size = SimplifiedValues.lookup(I.getArraySize())) { + ConstantInt *AllocSize = dyn_cast<ConstantInt>(Size); + assert(AllocSize && "Allocation size not a constant int?"); + Type *Ty = I.getAllocatedType(); + AllocatedSize += Ty->getPrimitiveSizeInBits() * AllocSize->getZExtValue(); + return Base::visitAlloca(I); + } + } // Accumulate the allocated size. if (I.isStaticAlloca()) { Type *Ty = I.getAllocatedType(); - AllocatedSize += (TD ? TD->getTypeAllocSize(Ty) : + AllocatedSize += (DL ? DL->getTypeAllocSize(Ty) : Ty->getPrimitiveSizeInBits()); } @@ -330,7 +322,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { // Try to fold GEPs of constant-offset call site argument pointers. This // requires target data and inbounds GEPs. - if (TD && I.isInBounds()) { + if (DL && I.isInBounds()) { // Check if we have a base + offset for the pointer. Value *Ptr = I.getPointerOperand(); std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr); @@ -399,6 +391,7 @@ bool CallAnalyzer::visitBitCast(BitCastInst &I) { } bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { + const DataLayout *DL = I.getDataLayout(); // Propagate constants through ptrtoint. Constant *COp = dyn_cast<Constant>(I.getOperand(0)); if (!COp) @@ -412,7 +405,7 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { // Track base/offset pairs when converted to a plain integer provided the // integer is large enough to represent the pointer. unsigned IntegerSize = I.getType()->getScalarSizeInBits(); - if (TD && IntegerSize >= TD->getPointerSizeInBits()) { + if (DL && IntegerSize >= DL->getPointerSizeInBits()) { std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(I.getOperand(0)); if (BaseAndOffset.first) @@ -435,6 +428,7 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { } bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { + const DataLayout *DL = I.getDataLayout(); // Propagate constants through ptrtoint. Constant *COp = dyn_cast<Constant>(I.getOperand(0)); if (!COp) @@ -449,7 +443,7 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { // modifications provided the integer is not too large. Value *Op = I.getOperand(0); unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); - if (TD && IntegerSize <= TD->getPointerSizeInBits()) { + if (DL && IntegerSize <= DL->getPointerSizeInBits()) { std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op); if (BaseAndOffset.first) ConstantOffsetPtrs[&I] = BaseAndOffset; @@ -488,7 +482,7 @@ bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { COp = SimplifiedValues.lookup(Operand); if (COp) if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(), - COp, TD)) { + COp, DL)) { SimplifiedValues[&I] = C; return true; } @@ -523,9 +517,9 @@ bool CallAnalyzer::visitCmpInst(CmpInst &I) { // a common base. Value *LHSBase, *RHSBase; APInt LHSOffset, RHSOffset; - llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); + std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); if (LHSBase) { - llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); + std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); if (RHSBase && LHSBase == RHSBase) { // We have common bases, fold the icmp to a constant based on the // offsets. @@ -573,9 +567,9 @@ bool CallAnalyzer::visitSub(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); Value *LHSBase, *RHSBase; APInt LHSOffset, RHSOffset; - llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); + std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); if (LHSBase) { - llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); + std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); if (RHSBase && LHSBase == RHSBase) { // We have common bases, fold the subtract to a constant based on the // offsets. @@ -602,7 +596,7 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { if (!isa<Constant>(RHS)) if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) RHS = SimpleRHS; - Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, TD); + Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL); if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) { SimplifiedValues[&I] = C; return true; @@ -721,7 +715,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { return false; } if (CS.isCall() && - cast<CallInst>(CS.getInstruction())->hasFnAttr(Attribute::NoDuplicate)) + cast<CallInst>(CS.getInstruction())->cannotDuplicate()) ContainsNoDuplicateCall = true; if (Function *F = CS.getCalledFunction()) { @@ -784,7 +778,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { // during devirtualization and so we want to give it a hefty bonus for // inlining, but cap that bonus in the event that inlining wouldn't pan // out. Pretend to inline the function, with a custom threshold. - CallAnalyzer CA(TD, TTI, *F, InlineConstants::IndirectCallThreshold); + CallAnalyzer CA(DL, TTI, *F, InlineConstants::IndirectCallThreshold); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the // bonus we want to apply, but don't go below zero. @@ -814,9 +808,29 @@ bool CallAnalyzer::visitBranchInst(BranchInst &BI) { bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { // We model unconditional switches as free, see the comments on handling // branches. - return isa<ConstantInt>(SI.getCondition()) || - dyn_cast_or_null<ConstantInt>( - SimplifiedValues.lookup(SI.getCondition())); + if (isa<ConstantInt>(SI.getCondition())) + return true; + if (Value *V = SimplifiedValues.lookup(SI.getCondition())) + if (isa<ConstantInt>(V)) + return true; + + // Otherwise, we need to accumulate a cost proportional to the number of + // distinct successor blocks. This fan-out in the CFG cannot be represented + // for free even if we can represent the core switch as a jumptable that + // takes a single instruction. + // + // NB: We convert large switches which are just used to initialize large phi + // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent + // inlining those. It will prevent inlining in cases where the optimization + // does not (yet) fire. + SmallPtrSet<BasicBlock *, 8> SuccessorBlocks; + SuccessorBlocks.insert(SI.getDefaultDest()); + for (auto I = SI.case_begin(), E = SI.case_end(); I != E; ++I) + SuccessorBlocks.insert(I.getCaseSuccessor()); + // Add cost corresponding to the number of distinct destinations. The first + // we model as free because of fallthrough. + Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost; + return false; } bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) { @@ -827,10 +841,7 @@ bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) { // original function which is extremely undefined behavior. // FIXME: This logic isn't really right; we can safely inline functions with // indirectbr's as long as no other function or global references the - // blockaddress of a block within the current function. And as a QOI issue, - // if someone is using a blockaddress without an indirectbr, and that - // reference somehow ends up in another function or global, we probably don't - // want to inline this function. + // blockaddress of a block within the current function. HasIndirectBr = true; return false; } @@ -872,6 +883,16 @@ bool CallAnalyzer::visitInstruction(Instruction &I) { /// viable, and true if inlining remains viable. bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + // FIXME: Currently, the number of instructions in a function regardless of + // our ability to simplify them during inline to constants or dead code, + // are actually used by the vector bonus heuristic. As long as that's true, + // we have to special case debug intrinsics here to prevent differences in + // inlining due to debug symbols. Eventually, the number of unsimplified + // instructions shouldn't factor into the cost computation, but until then, + // hack around it here. + if (isa<DbgInfoIntrinsic>(I)) + continue; + ++NumInstructions; if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy()) ++NumVectorInstructions; @@ -921,10 +942,10 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { /// returns 0 if V is not a pointer, and returns the constant '0' if there are /// no constant offsets applied. ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { - if (!TD || !V->getType()->isPointerTy()) - return 0; + if (!DL || !V->getType()->isPointerTy()) + return nullptr; - unsigned IntPtrWidth = TD->getPointerSizeInBits(); + unsigned IntPtrWidth = DL->getPointerSizeInBits(); APInt Offset = APInt::getNullValue(IntPtrWidth); // Even though we don't look through PHI nodes, we could be called on an @@ -934,7 +955,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { do { if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset)) - return 0; + return nullptr; V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { V = cast<Operator>(V)->getOperand(0); @@ -948,7 +969,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { assert(V->getType()->isPointerTy() && "Unexpected operand type!"); } while (Visited.insert(V)); - Type *IntPtrTy = TD->getIntPtrType(V->getContext()); + Type *IntPtrTy = DL->getIntPtrType(V->getContext()); return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset)); } @@ -983,12 +1004,12 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // Give out bonuses per argument, as the instructions setting them up will // be gone after inlining. for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { - if (TD && CS.isByValArgument(I)) { + if (DL && CS.isByValArgument(I)) { // We approximate the number of loads and stores needed by dividing the // size of the byval type by the target's pointer size. PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); - unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType()); - unsigned PointerSize = TD->getPointerSizeInBits(); + unsigned TypeSize = DL->getTypeSizeInBits(PTy->getElementType()); + unsigned PointerSize = DL->getPointerSizeInBits(); // Ceiling division. unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; @@ -1040,9 +1061,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { Function *Caller = CS.getInstruction()->getParent()->getParent(); // Check if the caller function is recursive itself. - for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end(); - U != E; ++U) { - CallSite Site(cast<Value>(*U)); + for (User *U : Caller->users()) { + CallSite Site(U); if (!Site) continue; Instruction *I = Site.getInstruction(); @@ -1098,6 +1118,15 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { if (BB->empty()) continue; + // Disallow inlining a blockaddress. A blockaddress only has defined + // behavior for an indirect branch in the same function, and we do not + // currently support inlining indirect branches. But, the inliner may not + // see an indirect branch that ends up being dead code at a particular call + // site. If the blockaddress escapes the function, e.g., via a global + // variable, inlining may lead to an invalid cross-function reference. + if (BB->hasAddressTaken()) + return false; + // Analyze the cost of this block. If we blow through the threshold, this // returns false, and we can bail on out. if (!analyzeBlock(BB)) { @@ -1154,7 +1183,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { } } - // If this is a noduplicate call, we can still inline as long as + // If this is a noduplicate call, we can still inline as long as // inlining this would cause the removal of the caller (so the instruction // is not actually duplicated, just moved). if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) @@ -1168,7 +1197,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// \brief Dump stats about this call's analysis. void CallAnalyzer::dump() { -#define DEBUG_PRINT_STAT(x) llvm::dbgs() << " " #x ": " << x << "\n" +#define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n" DEBUG_PRINT_STAT(NumConstantArgs); DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); DEBUG_PRINT_STAT(NumAllocaArgs); @@ -1178,6 +1207,9 @@ void CallAnalyzer::dump() { DEBUG_PRINT_STAT(SROACostSavings); DEBUG_PRINT_STAT(SROACostSavingsLost); DEBUG_PRINT_STAT(ContainsNoDuplicateCall); + DEBUG_PRINT_STAT(Cost); + DEBUG_PRINT_STAT(Threshold); + DEBUG_PRINT_STAT(VectorBonus); #undef DEBUG_PRINT_STAT } #endif @@ -1190,7 +1222,7 @@ INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", char InlineCostAnalysis::ID = 0; -InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID), TD(0) {} +InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID) {} InlineCostAnalysis::~InlineCostAnalysis() {} @@ -1201,7 +1233,6 @@ void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { } bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) { - TD = getAnalysisIfAvailable<DataLayout>(); TTI = &getAnalysis<TargetTransformInfo>(); return false; } @@ -1234,7 +1265,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, // Calls to functions with always-inline attributes should be inlined // whenever possible. - if (Callee->hasFnAttribute(Attribute::AlwaysInline)) { + if (CS.hasFnAttr(Attribute::AlwaysInline)) { if (isInlineViable(*Callee)) return llvm::InlineCost::getAlways(); return llvm::InlineCost::getNever(); @@ -1259,7 +1290,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); - CallAnalyzer CA(TD, *TTI, *Callee, Threshold); + CallAnalyzer CA(Callee->getDataLayout(), *TTI, *Callee, Threshold); bool ShouldInline = CA.analyzeCall(CS); DEBUG(CA.dump()); @@ -1278,8 +1309,9 @@ bool InlineCostAnalysis::isInlineViable(Function &F) { F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::ReturnsTwice); for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { - // Disallow inlining of functions which contain an indirect branch. - if (isa<IndirectBrInst>(BI->getTerminator())) + // Disallow inlining of functions which contain indirect branches or + // blockaddresses. + if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken()) return false; for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 5a06cdce3085..24655aa002c8 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -12,17 +12,15 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "iv-users" #include "llvm/Analysis/IVUsers.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Assembly/Writer.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" @@ -30,11 +28,13 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "iv-users" + char IVUsers::ID = 0; INITIALIZE_PASS_BEGIN(IVUsers, "iv-users", "Induction Variable Users", false, true) INITIALIZE_PASS_DEPENDENCY(LoopInfo) -INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_PASS_END(IVUsers, "iv-users", "Induction Variable Users", false, true) @@ -85,7 +85,7 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT, const LoopInfo *LI, SmallPtrSet<Loop*,16> &SimpleLoopNests) { - Loop *NearestLoop = 0; + Loop *NearestLoop = nullptr; for (DomTreeNode *Rung = DT->getNode(BB); Rung; Rung = Rung->getIDom()) { BasicBlock *DomBB = Rung->getBlock(); @@ -124,14 +124,14 @@ bool IVUsers::AddUsersImpl(Instruction *I, // IVUsers is used by LSR which assumes that all SCEV expressions are safe to // pass to SCEVExpander. Expressions are not safe to expand if they represent // operations that are not safe to speculate, namely integer division. - if (!isa<PHINode>(I) && !isSafeToSpeculativelyExecute(I, TD)) + if (!isa<PHINode>(I) && !isSafeToSpeculativelyExecute(I, DL)) return false; // LSR is not APInt clean, do not touch integers bigger than 64-bits. // Also avoid creating IVs of non-native types. For example, we don't want a // 64-bit IV in 32-bit code just because the loop has one 64-bit cast. uint64_t Width = SE->getTypeSizeInBits(I->getType()); - if (Width > 64 || (TD && !TD->isLegalInteger(Width))) + if (Width > 64 || (DL && !DL->isLegalInteger(Width))) return false; // Get the symbolic expression for this instruction. @@ -143,9 +143,8 @@ bool IVUsers::AddUsersImpl(Instruction *I, return false; SmallPtrSet<Instruction *, 4> UniqueUsers; - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); - UI != E; ++UI) { - Instruction *User = cast<Instruction>(*UI); + for (Use &U : I->uses()) { + Instruction *User = cast<Instruction>(U.getUser()); if (!UniqueUsers.insert(User)) continue; @@ -158,7 +157,7 @@ bool IVUsers::AddUsersImpl(Instruction *I, BasicBlock *UseBB = User->getParent(); // A phi's use is live out of its predecessor block. if (PHINode *PHI = dyn_cast<PHINode>(User)) { - unsigned OperandNo = UI.getOperandNo(); + unsigned OperandNo = U.getOperandNo(); unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo); UseBB = PHI->getIncomingBlock(ValNo); } @@ -243,7 +242,7 @@ IVUsers::IVUsers() void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<LoopInfo>(); - AU.addRequired<DominatorTree>(); + AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<ScalarEvolution>(); AU.setPreservesAll(); } @@ -252,9 +251,10 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { L = l; LI = &getAnalysis<LoopInfo>(); - DT = &getAnalysis<DominatorTree>(); + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); SE = &getAnalysis<ScalarEvolution>(); - TD = getAnalysisIfAvailable<DataLayout>(); + DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); + DL = DLP ? &DLP->getDataLayout() : nullptr; // Find all uses of induction variables in this loop, and categorize // them by stride. Start by finding all of the PHI nodes in the header for @@ -267,7 +267,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { void IVUsers::print(raw_ostream &OS, const Module *M) const { OS << "IV Users for loop "; - WriteAsOperand(OS, L->getHeader(), false); + L->getHeader()->printAsOperand(OS, false); if (SE->hasLoopInvariantBackedgeTakenCount(L)) { OS << " with backedge-taken count " << *SE->getBackedgeTakenCount(L); @@ -277,17 +277,20 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const { for (ilist<IVStrideUse>::const_iterator UI = IVUses.begin(), E = IVUses.end(); UI != E; ++UI) { OS << " "; - WriteAsOperand(OS, UI->getOperandValToReplace(), false); + UI->getOperandValToReplace()->printAsOperand(OS, false); OS << " = " << *getReplacementExpr(*UI); for (PostIncLoopSet::const_iterator I = UI->PostIncLoops.begin(), E = UI->PostIncLoops.end(); I != E; ++I) { OS << " (post-inc with loop "; - WriteAsOperand(OS, (*I)->getHeader(), false); + (*I)->getHeader()->printAsOperand(OS, false); OS << ")"; } OS << " in "; - UI->getUser()->print(OS); + if (UI->getUser()) + UI->getUser()->print(OS); + else + OS << "Printing <null> User"; OS << '\n'; } } @@ -330,16 +333,16 @@ static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) { I != E; ++I) if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L)) return AR; - return 0; + return nullptr; } - return 0; + return nullptr; } const SCEV *IVUsers::getStride(const IVStrideUse &IU, const Loop *L) const { if (const SCEVAddRecExpr *AR = findAddRecForLoop(getExpr(IU), L)) return AR->getStepRecurrence(*SE); - return 0; + return nullptr; } void IVStrideUse::transformToPostInc(const Loop *L) { diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp index 75a49eb90a88..de2b9c0c56db 100644 --- a/lib/Analysis/InstCount.cpp +++ b/lib/Analysis/InstCount.cpp @@ -11,17 +11,18 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "instcount" #include "llvm/Analysis/Passes.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Function.h" -#include "llvm/InstVisitor.h" +#include "llvm/IR/InstVisitor.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "instcount" + STATISTIC(TotalInsts , "Number of instructions (of all types)"); STATISTIC(TotalBlocks, "Number of basic blocks"); STATISTIC(TotalFuncs , "Number of non-external functions"); @@ -47,7 +48,7 @@ namespace { void visitInstruction(Instruction &I) { errs() << "Instruction Count does not know about " << I; - llvm_unreachable(0); + llvm_unreachable(nullptr); } public: static char ID; // Pass identification, replacement for typeid @@ -55,12 +56,12 @@ namespace { initializeInstCountPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnFunction(Function &F); + bool runOnFunction(Function &F) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); } - virtual void print(raw_ostream &O, const Module *M) const {} + void print(raw_ostream &O, const Module *M) const override {} }; } diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index b867af1dc3de..7a820a58f6f0 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -17,37 +17,37 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "instsimplify" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/Operator.h" -#include "llvm/Support/ConstantRange.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/PatternMatch.h" -#include "llvm/Support/ValueHandle.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/ValueHandle.h" using namespace llvm; using namespace llvm::PatternMatch; +#define DEBUG_TYPE "instsimplify" + enum { RecursionLimit = 3 }; STATISTIC(NumExpand, "Number of expansions"); -STATISTIC(NumFactor , "Number of factorizations"); STATISTIC(NumReassoc, "Number of reassociations"); struct Query { - const DataLayout *TD; + const DataLayout *DL; const TargetLibraryInfo *TLI; const DominatorTree *DT; - Query(const DataLayout *td, const TargetLibraryInfo *tli, - const DominatorTree *dt) : TD(td), TLI(tli), DT(dt) {} + Query(const DataLayout *DL, const TargetLibraryInfo *tli, + const DominatorTree *dt) : DL(DL), TLI(tli), DT(dt) {} }; static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned); @@ -131,7 +131,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand; // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) - return 0; + return nullptr; // Check whether the expression has the form "(A op' B) op C". if (BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS)) @@ -179,79 +179,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, } } - return 0; -} - -/// FactorizeBinOp - Simplify "LHS Opcode RHS" by factorizing out a common term -/// using the operation OpCodeToExtract. For example, when Opcode is Add and -/// OpCodeToExtract is Mul then this tries to turn "(A*B)+(A*C)" into "A*(B+C)". -/// Returns the simplified value, or null if no simplification was performed. -static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, - unsigned OpcToExtract, const Query &Q, - unsigned MaxRecurse) { - Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract; - // Recursion is always used, so bail out at once if we already hit the limit. - if (!MaxRecurse--) - return 0; - - BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS); - BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS); - - if (!Op0 || Op0->getOpcode() != OpcodeToExtract || - !Op1 || Op1->getOpcode() != OpcodeToExtract) - return 0; - - // The expression has the form "(A op' B) op (C op' D)". - Value *A = Op0->getOperand(0), *B = Op0->getOperand(1); - Value *C = Op1->getOperand(0), *D = Op1->getOperand(1); - - // Use left distributivity, i.e. "X op' (Y op Z) = (X op' Y) op (X op' Z)". - // Does the instruction have the form "(A op' B) op (A op' D)" or, in the - // commutative case, "(A op' B) op (C op' A)"? - if (A == C || (Instruction::isCommutative(OpcodeToExtract) && A == D)) { - Value *DD = A == C ? D : C; - // Form "A op' (B op DD)" if it simplifies completely. - // Does "B op DD" simplify? - if (Value *V = SimplifyBinOp(Opcode, B, DD, Q, MaxRecurse)) { - // It does! Return "A op' V" if it simplifies or is already available. - // If V equals B then "A op' V" is just the LHS. If V equals DD then - // "A op' V" is just the RHS. - if (V == B || V == DD) { - ++NumFactor; - return V == B ? LHS : RHS; - } - // Otherwise return "A op' V" if it simplifies. - if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, Q, MaxRecurse)) { - ++NumFactor; - return W; - } - } - } - - // Use right distributivity, i.e. "(X op Y) op' Z = (X op' Z) op (Y op' Z)". - // Does the instruction have the form "(A op' B) op (C op' B)" or, in the - // commutative case, "(A op' B) op (B op' D)"? - if (B == D || (Instruction::isCommutative(OpcodeToExtract) && B == C)) { - Value *CC = B == D ? C : D; - // Form "(A op CC) op' B" if it simplifies completely.. - // Does "A op CC" simplify? - if (Value *V = SimplifyBinOp(Opcode, A, CC, Q, MaxRecurse)) { - // It does! Return "V op' B" if it simplifies or is already available. - // If V equals A then "V op' B" is just the LHS. If V equals CC then - // "V op' B" is just the RHS. - if (V == A || V == CC) { - ++NumFactor; - return V == A ? LHS : RHS; - } - // Otherwise return "V op' B" if it simplifies. - if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, Q, MaxRecurse)) { - ++NumFactor; - return W; - } - } - } - - return 0; + return nullptr; } /// SimplifyAssociativeBinOp - Generic simplifications for associative binary @@ -263,7 +191,7 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) - return 0; + return nullptr; BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS); BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS); @@ -308,7 +236,7 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, // The remaining transforms require commutativity as well as associativity. if (!Instruction::isCommutative(Opcode)) - return 0; + return nullptr; // Transform: "(A op B) op C" ==> "(C op A) op B" if it simplifies completely. if (Op0 && Op0->getOpcode() == Opcode) { @@ -348,7 +276,7 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, } } - return 0; + return nullptr; } /// ThreadBinOpOverSelect - In the case of a binary operation with a select @@ -359,7 +287,7 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS, const Query &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) - return 0; + return nullptr; SelectInst *SI; if (isa<SelectInst>(LHS)) { @@ -420,7 +348,7 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS, } } - return 0; + return nullptr; } /// ThreadCmpOverSelect - In the case of a comparison with a select instruction, @@ -432,7 +360,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) - return 0; + return nullptr; // Make sure the select is on the LHS. if (!isa<SelectInst>(LHS)) { @@ -456,7 +384,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, // It didn't simplify. However if "cmp TV, RHS" is equal to the select // condition then we can replace it with 'true'. Otherwise give up. if (!isSameCompare(Cond, Pred, TV, RHS)) - return 0; + return nullptr; TCmp = getTrue(Cond->getType()); } @@ -470,7 +398,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, // It didn't simplify. However if "cmp FV, RHS" is equal to the select // condition then we can replace it with 'false'. Otherwise give up. if (!isSameCompare(Cond, Pred, FV, RHS)) - return 0; + return nullptr; FCmp = getFalse(Cond->getType()); } @@ -482,7 +410,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, // The remaining cases only make sense if the select condition has the same // type as the result of the comparison, so bail out if this is not so. if (Cond->getType()->isVectorTy() != RHS->getType()->isVectorTy()) - return 0; + return nullptr; // If the false value simplified to false, then the result of the compare // is equal to "Cond && TCmp". This also catches the case when the false // value simplified to false and the true value to true, returning "Cond". @@ -502,7 +430,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, Q, MaxRecurse)) return V; - return 0; + return nullptr; } /// ThreadBinOpOverPHI - In the case of a binary operation with an operand that @@ -513,24 +441,24 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, const Query &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) - return 0; + return nullptr; PHINode *PI; if (isa<PHINode>(LHS)) { PI = cast<PHINode>(LHS); // Bail out if RHS and the phi may be mutually interdependent due to a loop. if (!ValueDominatesPHI(RHS, PI, Q.DT)) - return 0; + return nullptr; } else { assert(isa<PHINode>(RHS) && "No PHI instruction operand!"); PI = cast<PHINode>(RHS); // Bail out if LHS and the phi may be mutually interdependent due to a loop. if (!ValueDominatesPHI(LHS, PI, Q.DT)) - return 0; + return nullptr; } // Evaluate the BinOp on the incoming phi values. - Value *CommonValue = 0; + Value *CommonValue = nullptr; for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) { Value *Incoming = PI->getIncomingValue(i); // If the incoming value is the phi node itself, it can safely be skipped. @@ -541,7 +469,7 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, // If the operation failed to simplify, or simplified to a different value // to previously, then give up. if (!V || (CommonValue && V != CommonValue)) - return 0; + return nullptr; CommonValue = V; } @@ -556,7 +484,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Query &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) - return 0; + return nullptr; // Make sure the phi is on the LHS. if (!isa<PHINode>(LHS)) { @@ -568,10 +496,10 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, // Bail out if RHS and the phi may be mutually interdependent due to a loop. if (!ValueDominatesPHI(RHS, PI, Q.DT)) - return 0; + return nullptr; // Evaluate the BinOp on the incoming phi values. - Value *CommonValue = 0; + Value *CommonValue = nullptr; for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) { Value *Incoming = PI->getIncomingValue(i); // If the incoming value is the phi node itself, it can safely be skipped. @@ -580,7 +508,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, // If the operation failed to simplify, or simplified to a different value // to previously, then give up. if (!V || (CommonValue && V != CommonValue)) - return 0; + return nullptr; CommonValue = V; } @@ -595,7 +523,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(), Ops, - Q.TD, Q.TLI); + Q.DL, Q.TLI); } // Canonicalize the constant to the RHS. @@ -613,7 +541,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, // X + (Y - X) -> Y // (Y - X) + X -> Y // Eg: X + -X -> 0 - Value *Y = 0; + Value *Y = nullptr; if (match(Op1, m_Sub(m_Value(Y), m_Specific(Op0))) || match(Op0, m_Sub(m_Value(Y), m_Specific(Op1)))) return Y; @@ -633,11 +561,6 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, MaxRecurse)) return V; - // Mul distributes over Add. Try some generic simplifications based on this. - if (Value *V = FactorizeBinOp(Instruction::Add, Op0, Op1, Instruction::Mul, - Q, MaxRecurse)) - return V; - // Threading Add over selects and phi nodes is pointless, so don't bother. // Threading over the select in "A + select(cond, B, C)" means evaluating // "A+B" and "A+C" and seeing if they are equal; but they are equal if and @@ -647,13 +570,13 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, // "A+B" and "A+C" thus gains nothing, but costs compile time. Similarly // for threading over phi nodes. - return 0; + return nullptr; } Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const DataLayout *TD, const TargetLibraryInfo *TLI, + const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT), + return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query (DL, TLI, DT), RecursionLimit); } @@ -667,17 +590,17 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, /// This is very similar to GetPointerBaseWithConstantOffset except it doesn't /// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc. /// folding. -static Constant *stripAndComputeConstantOffsets(const DataLayout *TD, +static Constant *stripAndComputeConstantOffsets(const DataLayout *DL, Value *&V, bool AllowNonInbounds = false) { assert(V->getType()->getScalarType()->isPointerTy()); // Without DataLayout, just be conservative for now. Theoretically, more could // be done in this case. - if (!TD) + if (!DL) return ConstantInt::get(IntegerType::get(V->getContext(), 64), 0); - Type *IntPtrTy = TD->getIntPtrType(V->getType())->getScalarType(); + Type *IntPtrTy = DL->getIntPtrType(V->getType())->getScalarType(); APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth()); // Even though we don't look through PHI nodes, we could be called on an @@ -687,7 +610,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *TD, do { if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { if ((!AllowNonInbounds && !GEP->isInBounds()) || - !GEP->accumulateConstantOffset(*TD, Offset)) + !GEP->accumulateConstantOffset(*DL, Offset)) break; V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { @@ -712,15 +635,15 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *TD, /// \brief Compute the constant difference between two pointer values. /// If the difference is not a constant, returns zero. -static Constant *computePointerDifference(const DataLayout *TD, +static Constant *computePointerDifference(const DataLayout *DL, Value *LHS, Value *RHS) { - Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); - Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS); + Constant *LHSOffset = stripAndComputeConstantOffsets(DL, LHS); + Constant *RHSOffset = stripAndComputeConstantOffsets(DL, RHS); // If LHS and RHS are not related via constant offsets to the same base // value, there is nothing we can do here. if (LHS != RHS) - return 0; + return nullptr; // Otherwise, the difference of LHS - RHS can be computed as: // LHS - RHS @@ -737,7 +660,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(), - Ops, Q.TD, Q.TLI); + Ops, Q.DL, Q.TLI); } // X - undef -> undef @@ -753,16 +676,9 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, if (Op0 == Op1) return Constant::getNullValue(Op0->getType()); - // (X*2) - X -> X - // (X<<1) - X -> X - Value *X = 0; - if (match(Op0, m_Mul(m_Specific(Op1), m_ConstantInt<2>())) || - match(Op0, m_Shl(m_Specific(Op1), m_One()))) - return Op1; - // (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies. // For example, (X + Y) - Y -> X; (Y + X) - Y -> X - Value *Y = 0, *Z = Op1; + Value *X = nullptr, *Y = nullptr, *Z = Op1; if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z // See if "V === Y - Z" simplifies. if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse-1)) @@ -831,14 +747,9 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, // Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...). if (match(Op0, m_PtrToInt(m_Value(X))) && match(Op1, m_PtrToInt(m_Value(Y)))) - if (Constant *Result = computePointerDifference(Q.TD, X, Y)) + if (Constant *Result = computePointerDifference(Q.DL, X, Y)) return ConstantExpr::getIntegerCast(Result, Op0->getType(), true); - // Mul distributes over Sub. Try some generic simplifications based on this. - if (Value *V = FactorizeBinOp(Instruction::Sub, Op0, Op1, Instruction::Mul, - Q, MaxRecurse)) - return V; - // i1 sub -> xor. if (MaxRecurse && Op0->getType()->isIntegerTy(1)) if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1)) @@ -853,13 +764,13 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, // "A-B" and "A-C" thus gains nothing, but costs compile time. Similarly // for threading over phi nodes. - return 0; + return nullptr; } Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const DataLayout *TD, const TargetLibraryInfo *TLI, + const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT), + return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query (DL, TLI, DT), RecursionLimit); } @@ -871,7 +782,7 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::FAdd, CLHS->getType(), - Ops, Q.TD, Q.TLI); + Ops, Q.DL, Q.TLI); } // Canonicalize the constant to the RHS. @@ -890,7 +801,7 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, // fadd [nnan ninf] X, (fsub [nnan ninf] 0, X) ==> 0 // where nnan and ninf have to occur at least once somewhere in this // expression - Value *SubOp = 0; + Value *SubOp = nullptr; if (match(Op1, m_FSub(m_AnyZero(), m_Specific(Op0)))) SubOp = Op1; else if (match(Op0, m_FSub(m_AnyZero(), m_Specific(Op1)))) @@ -902,7 +813,7 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, return Constant::getNullValue(Op0->getType()); } - return 0; + return nullptr; } /// Given operands for an FSub, see if we can fold the result. If not, this @@ -913,7 +824,7 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::FSub, CLHS->getType(), - Ops, Q.TD, Q.TLI); + Ops, Q.DL, Q.TLI); } } @@ -939,7 +850,7 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (FMF.noNaNs() && FMF.noInfs() && Op0 == Op1) return Constant::getNullValue(Op0->getType()); - return 0; + return nullptr; } /// Given the operands for an FMul, see if we can fold the result @@ -951,7 +862,7 @@ static Value *SimplifyFMulInst(Value *Op0, Value *Op1, if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::FMul, CLHS->getType(), - Ops, Q.TD, Q.TLI); + Ops, Q.DL, Q.TLI); } // Canonicalize the constant to the RHS. @@ -966,7 +877,7 @@ static Value *SimplifyFMulInst(Value *Op0, Value *Op1, if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero())) return Op1; - return 0; + return nullptr; } /// SimplifyMulInst - Given operands for a Mul, see if we can @@ -977,7 +888,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(), - Ops, Q.TD, Q.TLI); + Ops, Q.DL, Q.TLI); } // Canonicalize the constant to the RHS. @@ -997,7 +908,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, return Op0; // (X / Y) * Y -> X if the division is exact. - Value *X = 0; + Value *X = nullptr; if (match(Op0, m_Exact(m_IDiv(m_Value(X), m_Specific(Op1)))) || // (X / Y) * Y match(Op1, m_Exact(m_IDiv(m_Value(X), m_Specific(Op0))))) // Y * (X / Y) return X; @@ -1031,33 +942,33 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, MaxRecurse)) return V; - return 0; + return nullptr; } Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const DataLayout *TD, const TargetLibraryInfo *TLI, + const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyFAddInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit); + return ::SimplifyFAddInst(Op0, Op1, FMF, Query (DL, TLI, DT), RecursionLimit); } Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const DataLayout *TD, const TargetLibraryInfo *TLI, + const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyFSubInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit); + return ::SimplifyFSubInst(Op0, Op1, FMF, Query (DL, TLI, DT), RecursionLimit); } Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const DataLayout *TD, + const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyFMulInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit); + return ::SimplifyFMulInst(Op0, Op1, FMF, Query (DL, TLI, DT), RecursionLimit); } -Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout *TD, +Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyMulInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); + return ::SimplifyMulInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); } /// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can @@ -1067,7 +978,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, if (Constant *C0 = dyn_cast<Constant>(Op0)) { if (Constant *C1 = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { C0, C1 }; - return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI); + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.DL, Q.TLI); } } @@ -1098,7 +1009,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, return ConstantInt::get(Op0->getType(), 1); // (X * Y) / Y -> X if the multiplication does not overflow. - Value *X = 0, *Y = 0; + Value *X = nullptr, *Y = nullptr; if (match(Op0, m_Mul(m_Value(X), m_Value(Y))) && (X == Op1 || Y == Op1)) { if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1 OverflowingBinaryOperator *Mul = cast<OverflowingBinaryOperator>(Op0); @@ -1129,7 +1040,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; - return 0; + return nullptr; } /// SimplifySDivInst - Given operands for an SDiv, see if we can @@ -1139,13 +1050,13 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q, if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse)) return V; - return 0; + return nullptr; } -Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout *TD, +Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifySDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); + return ::SimplifySDivInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); } /// SimplifyUDivInst - Given operands for a UDiv, see if we can @@ -1155,13 +1066,13 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q, if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse)) return V; - return 0; + return nullptr; } -Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout *TD, +Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyUDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); + return ::SimplifyUDivInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); } static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q, @@ -1174,13 +1085,13 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q, if (match(Op1, m_Undef())) return Op1; - return 0; + return nullptr; } -Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const DataLayout *TD, +Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyFDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); + return ::SimplifyFDivInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); } /// SimplifyRem - Given operands for an SRem or URem, see if we can @@ -1190,7 +1101,7 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, if (Constant *C0 = dyn_cast<Constant>(Op0)) { if (Constant *C1 = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { C0, C1 }; - return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI); + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.DL, Q.TLI); } } @@ -1234,7 +1145,7 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; - return 0; + return nullptr; } /// SimplifySRemInst - Given operands for an SRem, see if we can @@ -1244,13 +1155,13 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q, if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse)) return V; - return 0; + return nullptr; } -Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout *TD, +Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifySRemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); + return ::SimplifySRemInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); } /// SimplifyURemInst - Given operands for a URem, see if we can @@ -1260,13 +1171,13 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q, if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse)) return V; - return 0; + return nullptr; } -Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout *TD, +Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyURemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); + return ::SimplifyURemInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); } static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &, @@ -1279,13 +1190,40 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &, if (match(Op1, m_Undef())) return Op1; - return 0; + return nullptr; } -Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const DataLayout *TD, +Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyFRemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); + return ::SimplifyFRemInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); +} + +/// isUndefShift - Returns true if a shift by \c Amount always yields undef. +static bool isUndefShift(Value *Amount) { + Constant *C = dyn_cast<Constant>(Amount); + if (!C) + return false; + + // X shift by undef -> undef because it may shift by the bitwidth. + if (isa<UndefValue>(C)) + return true; + + // Shifting by the bitwidth or more is undefined. + if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) + if (CI->getValue().getLimitedValue() >= + CI->getType()->getScalarSizeInBits()) + return true; + + // If all lanes of a vector shift are undefined the whole shift is. + if (isa<ConstantVector>(C) || isa<ConstantDataVector>(C)) { + for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E; ++I) + if (!isUndefShift(C->getAggregateElement(I))) + return false; + return true; + } + + return false; } /// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can @@ -1295,7 +1233,7 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, if (Constant *C0 = dyn_cast<Constant>(Op0)) { if (Constant *C1 = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { C0, C1 }; - return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI); + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.DL, Q.TLI); } } @@ -1307,15 +1245,9 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, if (match(Op1, m_Zero())) return Op0; - // X shift by undef -> undef because it may shift by the bitwidth. - if (match(Op1, m_Undef())) - return Op1; - - // Shifting by the bitwidth or more is undefined. - if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) - if (CI->getValue().getLimitedValue() >= - Op0->getType()->getScalarSizeInBits()) - return UndefValue::get(Op0->getType()); + // Fold undefined shifts. + if (isUndefShift(Op1)) + return UndefValue::get(Op0->getType()); // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. @@ -1329,7 +1261,7 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; - return 0; + return nullptr; } /// SimplifyShlInst - Given operands for an Shl, see if we can @@ -1347,13 +1279,13 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, Value *X; if (match(Op0, m_Exact(m_Shr(m_Value(X), m_Specific(Op1))))) return X; - return 0; + return nullptr; } Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const DataLayout *TD, const TargetLibraryInfo *TLI, + const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT), + return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query (DL, TLI, DT), RecursionLimit); } @@ -1378,14 +1310,14 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, cast<OverflowingBinaryOperator>(Op0)->hasNoUnsignedWrap()) return X; - return 0; + return nullptr; } Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, - const DataLayout *TD, + const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyLShrInst(Op0, Op1, isExact, Query (TD, TLI, DT), + return ::SimplifyLShrInst(Op0, Op1, isExact, Query (DL, TLI, DT), RecursionLimit); } @@ -1414,14 +1346,19 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap()) return X; - return 0; + // Arithmetic shifting an all-sign-bit value is a no-op. + unsigned NumSignBits = ComputeNumSignBits(Op0, Q.DL); + if (NumSignBits == Op0->getType()->getScalarSizeInBits()) + return Op0; + + return nullptr; } Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, - const DataLayout *TD, + const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyAShrInst(Op0, Op1, isExact, Query (TD, TLI, DT), + return ::SimplifyAShrInst(Op0, Op1, isExact, Query (DL, TLI, DT), RecursionLimit); } @@ -1433,7 +1370,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::And, CLHS->getType(), - Ops, Q.TD, Q.TLI); + Ops, Q.DL, Q.TLI); } // Canonicalize the constant to the RHS. @@ -1462,7 +1399,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, return Constant::getNullValue(Op0->getType()); // (A | ?) & A = A - Value *A = 0, *B = 0; + Value *A = nullptr, *B = nullptr; if (match(Op0, m_Or(m_Value(A), m_Value(B))) && (A == Op1 || B == Op1)) return Op1; @@ -1496,11 +1433,6 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, Q, MaxRecurse)) return V; - // Or distributes over And. Try some generic simplifications based on this. - if (Value *V = FactorizeBinOp(Instruction::And, Op0, Op1, Instruction::Or, - Q, MaxRecurse)) - return V; - // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) @@ -1515,13 +1447,13 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, MaxRecurse)) return V; - return 0; + return nullptr; } -Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout *TD, +Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyAndInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); + return ::SimplifyAndInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); } /// SimplifyOrInst - Given operands for an Or, see if we can @@ -1532,7 +1464,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(), - Ops, Q.TD, Q.TLI); + Ops, Q.DL, Q.TLI); } // Canonicalize the constant to the RHS. @@ -1561,7 +1493,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, return Constant::getAllOnesValue(Op0->getType()); // (A & ?) | A = A - Value *A = 0, *B = 0; + Value *A = nullptr, *B = nullptr; if (match(Op0, m_And(m_Value(A), m_Value(B))) && (A == Op1 || B == Op1)) return Op1; @@ -1591,11 +1523,6 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, MaxRecurse)) return V; - // And distributes over Or. Try some generic simplifications based on this. - if (Value *V = FactorizeBinOp(Instruction::Or, Op0, Op1, Instruction::And, - Q, MaxRecurse)) - return V; - // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) @@ -1603,19 +1530,51 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, MaxRecurse)) return V; + // (A & C)|(B & D) + Value *C = nullptr, *D = nullptr; + if (match(Op0, m_And(m_Value(A), m_Value(C))) && + match(Op1, m_And(m_Value(B), m_Value(D)))) { + ConstantInt *C1 = dyn_cast<ConstantInt>(C); + ConstantInt *C2 = dyn_cast<ConstantInt>(D); + if (C1 && C2 && (C1->getValue() == ~C2->getValue())) { + // (A & C1)|(B & C2) + // If we have: ((V + N) & C1) | (V & C2) + // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 + // replace with V+N. + Value *V1, *V2; + if ((C2->getValue() & (C2->getValue() + 1)) == 0 && // C2 == 0+1+ + match(A, m_Add(m_Value(V1), m_Value(V2)))) { + // Add commutes, try both ways. + if (V1 == B && MaskedValueIsZero(V2, C2->getValue())) + return A; + if (V2 == B && MaskedValueIsZero(V1, C2->getValue())) + return A; + } + // Or commutes, try both ways. + if ((C1->getValue() & (C1->getValue() + 1)) == 0 && + match(B, m_Add(m_Value(V1), m_Value(V2)))) { + // Add commutes, try both ways. + if (V1 == A && MaskedValueIsZero(V2, C1->getValue())) + return B; + if (V2 == A && MaskedValueIsZero(V1, C1->getValue())) + return B; + } + } + } + // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse)) return V; - return 0; + return nullptr; } -Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout *TD, +Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyOrInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); + return ::SimplifyOrInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); } /// SimplifyXorInst - Given operands for a Xor, see if we can @@ -1626,7 +1585,7 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q, if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(), - Ops, Q.TD, Q.TLI); + Ops, Q.DL, Q.TLI); } // Canonicalize the constant to the RHS. @@ -1655,11 +1614,6 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q, MaxRecurse)) return V; - // And distributes over Xor. Try some generic simplifications based on this. - if (Value *V = FactorizeBinOp(Instruction::Xor, Op0, Op1, Instruction::And, - Q, MaxRecurse)) - return V; - // Threading Xor over selects and phi nodes is pointless, so don't bother. // Threading over the select in "A ^ select(cond, B, C)" means evaluating // "A^B" and "A^C" and seeing if they are equal; but they are equal if and @@ -1669,13 +1623,13 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q, // "A^B" and "A^C" thus gains nothing, but costs compile time. Similarly // for threading over phi nodes. - return 0; + return nullptr; } -Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout *TD, +Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyXorInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); + return ::SimplifyXorInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); } static Type *GetCompareTy(Value *Op) { @@ -1689,17 +1643,17 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, Value *LHS, Value *RHS) { SelectInst *SI = dyn_cast<SelectInst>(V); if (!SI) - return 0; + return nullptr; CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition()); if (!Cmp) - return 0; + return nullptr; Value *CmpLHS = Cmp->getOperand(0), *CmpRHS = Cmp->getOperand(1); if (Pred == Cmp->getPredicate() && LHS == CmpLHS && RHS == CmpRHS) return Cmp; if (Pred == CmpInst::getSwappedPredicate(Cmp->getPredicate()) && LHS == CmpRHS && RHS == CmpLHS) return Cmp; - return 0; + return nullptr; } // A significant optimization not implemented here is assuming that alloca @@ -1730,7 +1684,7 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, // If the C and C++ standards are ever made sufficiently restrictive in this // area, it may be possible to update LLVM's semantics accordingly and reinstate // this optimization. -static Constant *computePointerICmp(const DataLayout *TD, +static Constant *computePointerICmp(const DataLayout *DL, const TargetLibraryInfo *TLI, CmpInst::Predicate Pred, Value *LHS, Value *RHS) { @@ -1747,7 +1701,7 @@ static Constant *computePointerICmp(const DataLayout *TD, // We can only fold certain predicates on pointer comparisons. switch (Pred) { default: - return 0; + return nullptr; // Equality comaprisons are easy to fold. case CmpInst::ICMP_EQ: @@ -1772,8 +1726,8 @@ static Constant *computePointerICmp(const DataLayout *TD, // numerous hazards. AliasAnalysis and its utilities rely on special rules // governing loads and stores which don't apply to icmps. Also, AliasAnalysis // doesn't need to guarantee pointer inequality when it says NoAlias. - Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); - Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS); + Constant *LHSOffset = stripAndComputeConstantOffsets(DL, LHS); + Constant *RHSOffset = stripAndComputeConstantOffsets(DL, RHS); // If LHS and RHS are related via constant offsets to the same base // value, we can replace it with an icmp which just compares the offsets. @@ -1817,8 +1771,8 @@ static Constant *computePointerICmp(const DataLayout *TD, ConstantInt *RHSOffsetCI = dyn_cast<ConstantInt>(RHSOffset); uint64_t LHSSize, RHSSize; if (LHSOffsetCI && RHSOffsetCI && - getObjectSize(LHS, LHSSize, TD, TLI) && - getObjectSize(RHS, RHSSize, TD, TLI)) { + getObjectSize(LHS, LHSSize, DL, TLI) && + getObjectSize(RHS, RHSSize, DL, TLI)) { const APInt &LHSOffsetValue = LHSOffsetCI->getValue(); const APInt &RHSOffsetValue = RHSOffsetCI->getValue(); if (!LHSOffsetValue.isNegative() && @@ -1844,8 +1798,8 @@ static Constant *computePointerICmp(const DataLayout *TD, // equality comparisons concerning the result. We avoid walking the whole // chain again by starting where the last calls to // stripAndComputeConstantOffsets left off and accumulate the offsets. - Constant *LHSNoBound = stripAndComputeConstantOffsets(TD, LHS, true); - Constant *RHSNoBound = stripAndComputeConstantOffsets(TD, RHS, true); + Constant *LHSNoBound = stripAndComputeConstantOffsets(DL, LHS, true); + Constant *RHSNoBound = stripAndComputeConstantOffsets(DL, RHS, true); if (LHS == RHS) return ConstantExpr::getICmp(Pred, ConstantExpr::getAdd(LHSOffset, LHSNoBound), @@ -1853,7 +1807,7 @@ static Constant *computePointerICmp(const DataLayout *TD, } // Otherwise, fail. - return 0; + return nullptr; } /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can @@ -1865,7 +1819,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (Constant *CLHS = dyn_cast<Constant>(LHS)) { if (Constant *CRHS = dyn_cast<Constant>(RHS)) - return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.TD, Q.TLI); + return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI); // If we have a constant, make sure it is on the RHS. std::swap(LHS, RHS); @@ -1929,40 +1883,40 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getTrue(ITy); case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULE: - if (isKnownNonZero(LHS, Q.TD)) + if (isKnownNonZero(LHS, Q.DL)) return getFalse(ITy); break; case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGT: - if (isKnownNonZero(LHS, Q.TD)) + if (isKnownNonZero(LHS, Q.DL)) return getTrue(ITy); break; case ICmpInst::ICMP_SLT: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL); if (LHSKnownNegative) return getTrue(ITy); if (LHSKnownNonNegative) return getFalse(ITy); break; case ICmpInst::ICMP_SLE: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL); if (LHSKnownNegative) return getTrue(ITy); - if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.TD)) + if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL)) return getFalse(ITy); break; case ICmpInst::ICMP_SGE: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL); if (LHSKnownNegative) return getFalse(ITy); if (LHSKnownNonNegative) return getTrue(ITy); break; case ICmpInst::ICMP_SGT: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL); if (LHSKnownNegative) return getFalse(ITy); - if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.TD)) + if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL)) return getTrue(ITy); break; } @@ -1979,7 +1933,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Many binary operators with constant RHS have easy to compute constant // range. Use them to check whether the comparison is a tautology. - uint32_t Width = CI->getBitWidth(); + unsigned Width = CI->getBitWidth(); APInt Lower = APInt(Width, 0); APInt Upper = APInt(Width, 0); ConstantInt *CI2; @@ -1998,20 +1952,47 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, APInt NegOne = APInt::getAllOnesValue(Width); if (!CI2->isZero()) Upper = NegOne.udiv(CI2->getValue()) + 1; + } else if (match(LHS, m_SDiv(m_ConstantInt(CI2), m_Value()))) { + if (CI2->isMinSignedValue()) { + // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2]. + Lower = CI2->getValue(); + Upper = Lower.lshr(1) + 1; + } else { + // 'sdiv CI2, x' produces [-|CI2|, |CI2|]. + Upper = CI2->getValue().abs() + 1; + Lower = (-Upper) + 1; + } } else if (match(LHS, m_SDiv(m_Value(), m_ConstantInt(CI2)))) { - // 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2]. APInt IntMin = APInt::getSignedMinValue(Width); APInt IntMax = APInt::getSignedMaxValue(Width); - APInt Val = CI2->getValue().abs(); - if (!Val.isMinValue()) { + APInt Val = CI2->getValue(); + if (Val.isAllOnesValue()) { + // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX] + // where CI2 != -1 and CI2 != 0 and CI2 != 1 + Lower = IntMin + 1; + Upper = IntMax + 1; + } else if (Val.countLeadingZeros() < Width - 1) { + // 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2] + // where CI2 != -1 and CI2 != 0 and CI2 != 1 Lower = IntMin.sdiv(Val); - Upper = IntMax.sdiv(Val) + 1; + Upper = IntMax.sdiv(Val); + if (Lower.sgt(Upper)) + std::swap(Lower, Upper); + Upper = Upper + 1; + assert(Upper != Lower && "Upper part of range has wrapped!"); } } else if (match(LHS, m_LShr(m_Value(), m_ConstantInt(CI2)))) { // 'lshr x, CI2' produces [0, UINT_MAX >> CI2]. APInt NegOne = APInt::getAllOnesValue(Width); if (CI2->getValue().ult(Width)) Upper = NegOne.lshr(CI2->getValue()) + 1; + } else if (match(LHS, m_LShr(m_ConstantInt(CI2), m_Value()))) { + // 'lshr CI2, x' produces [CI2 >> (Width-1), CI2]. + unsigned ShiftAmount = Width - 1; + if (!CI2->isZero() && cast<BinaryOperator>(LHS)->isExact()) + ShiftAmount = CI2->getValue().countTrailingZeros(); + Lower = CI2->getValue().lshr(ShiftAmount); + Upper = CI2->getValue() + 1; } else if (match(LHS, m_AShr(m_Value(), m_ConstantInt(CI2)))) { // 'ashr x, CI2' produces [INT_MIN >> CI2, INT_MAX >> CI2]. APInt IntMin = APInt::getSignedMinValue(Width); @@ -2020,6 +2001,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, Lower = IntMin.ashr(CI2->getValue()); Upper = IntMax.ashr(CI2->getValue()) + 1; } + } else if (match(LHS, m_AShr(m_ConstantInt(CI2), m_Value()))) { + unsigned ShiftAmount = Width - 1; + if (!CI2->isZero() && cast<BinaryOperator>(LHS)->isExact()) + ShiftAmount = CI2->getValue().countTrailingZeros(); + if (CI2->isNegative()) { + // 'ashr CI2, x' produces [CI2, CI2 >> (Width-1)] + Lower = CI2->getValue(); + Upper = CI2->getValue().ashr(ShiftAmount) + 1; + } else { + // 'ashr CI2, x' produces [CI2 >> (Width-1), CI2] + Lower = CI2->getValue().ashr(ShiftAmount); + Upper = CI2->getValue() + 1; + } } else if (match(LHS, m_Or(m_Value(), m_ConstantInt(CI2)))) { // 'or x, CI2' produces [CI2, UINT_MAX]. Lower = CI2->getValue(); @@ -2045,8 +2039,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input // if the integer type is the same size as the pointer type. - if (MaxRecurse && Q.TD && isa<PtrToIntInst>(LI) && - Q.TD->getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) { + if (MaxRecurse && Q.DL && isa<PtrToIntInst>(LI) && + Q.DL->getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) { if (Constant *RHSC = dyn_cast<Constant>(RHS)) { // Transfer the cast to the constant. if (Value *V = SimplifyICmpInst(Pred, SrcOp, @@ -2195,12 +2189,31 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } + // If a bit is known to be zero for A and known to be one for B, + // then A and B cannot be equal. + if (ICmpInst::isEquality(Pred)) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { + uint32_t BitWidth = CI->getBitWidth(); + APInt LHSKnownZero(BitWidth, 0); + APInt LHSKnownOne(BitWidth, 0); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); + APInt RHSKnownZero(BitWidth, 0); + APInt RHSKnownOne(BitWidth, 0); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); + if (((LHSKnownOne & RHSKnownZero) != 0) || + ((LHSKnownZero & RHSKnownOne) != 0)) + return (Pred == ICmpInst::ICMP_EQ) + ? ConstantInt::getFalse(CI->getContext()) + : ConstantInt::getTrue(CI->getContext()); + } + } + // Special logic for binary operators. BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS); BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS); if (MaxRecurse && (LBO || RBO)) { // Analyze the case when either LHS or RHS is an add instruction. - Value *A = 0, *B = 0, *C = 0, *D = 0; + Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; // LHS = A + B (or A and B are null); RHS = C + D (or C and D are null). bool NoLHSWrapProblem = false, NoRHSWrapProblem = false; if (LBO && LBO->getOpcode() == Instruction::Add) { @@ -2258,6 +2271,28 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } + // 0 - (zext X) pred C + if (!CmpInst::isUnsigned(Pred) && match(LHS, m_Neg(m_ZExt(m_Value())))) { + if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) { + if (RHSC->getValue().isStrictlyPositive()) { + if (Pred == ICmpInst::ICMP_SLT) + return ConstantInt::getTrue(RHSC->getContext()); + if (Pred == ICmpInst::ICMP_SGE) + return ConstantInt::getFalse(RHSC->getContext()); + if (Pred == ICmpInst::ICMP_EQ) + return ConstantInt::getFalse(RHSC->getContext()); + if (Pred == ICmpInst::ICMP_NE) + return ConstantInt::getTrue(RHSC->getContext()); + } + if (RHSC->getValue().isNonNegative()) { + if (Pred == ICmpInst::ICMP_SLE) + return ConstantInt::getTrue(RHSC->getContext()); + if (Pred == ICmpInst::ICMP_SGT) + return ConstantInt::getFalse(RHSC->getContext()); + } + } + } + // icmp pred (urem X, Y), Y if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) { bool KnownNonNegative, KnownNegative; @@ -2266,7 +2301,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD); + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL); if (!KnownNonNegative) break; // fall-through @@ -2276,7 +2311,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getFalse(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD); + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL); if (!KnownNonNegative) break; // fall-through @@ -2295,7 +2330,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD); + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL); if (!KnownNonNegative) break; // fall-through @@ -2305,7 +2340,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getTrue(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD); + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL); if (!KnownNonNegative) break; // fall-through @@ -2548,7 +2583,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Simplify comparisons of related pointers using a powerful, recursive // GEP-walk when we have target data available.. if (LHS->getType()->isPointerTy()) - if (Constant *C = computePointerICmp(Q.TD, Q.TLI, Pred, LHS, RHS)) + if (Constant *C = computePointerICmp(Q.DL, Q.TLI, Pred, LHS, RHS)) return C; if (GetElementPtrInst *GLHS = dyn_cast<GetElementPtrInst>(LHS)) { @@ -2584,14 +2619,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse)) return V; - return 0; + return nullptr; } Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const DataLayout *TD, + const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyICmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT), + return ::SimplifyICmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT), RecursionLimit); } @@ -2604,7 +2639,7 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (Constant *CLHS = dyn_cast<Constant>(LHS)) { if (Constant *CRHS = dyn_cast<Constant>(RHS)) - return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.TD, Q.TLI); + return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI); // If we have a constant, make sure it is on the RHS. std::swap(LHS, RHS); @@ -2681,14 +2716,14 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse)) return V; - return 0; + return nullptr; } Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const DataLayout *TD, + const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT), + return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT), RecursionLimit); } @@ -2699,8 +2734,12 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, unsigned MaxRecurse) { // select true, X, Y -> X // select false, X, Y -> Y - if (ConstantInt *CB = dyn_cast<ConstantInt>(CondVal)) - return CB->getZExtValue() ? TrueVal : FalseVal; + if (Constant *CB = dyn_cast<Constant>(CondVal)) { + if (CB->isAllOnesValue()) + return TrueVal; + if (CB->isNullValue()) + return FalseVal; + } // select C, X, X -> X if (TrueVal == FalseVal) @@ -2716,14 +2755,14 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X return TrueVal; - return 0; + return nullptr; } Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, - const DataLayout *TD, + const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Query (TD, TLI, DT), + return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Query (DL, TLI, DT), RecursionLimit); } @@ -2731,10 +2770,7 @@ Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, /// fold the result. If not, this returns null. static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) { // The type of the GEP pointer operand. - PointerType *PtrTy = dyn_cast<PointerType>(Ops[0]->getType()); - // The GEP pointer operand is not a pointer, it's a vector of pointers. - if (!PtrTy) - return 0; + PointerType *PtrTy = cast<PointerType>(Ops[0]->getType()->getScalarType()); // getelementptr P -> P. if (Ops.size() == 1) @@ -2744,18 +2780,19 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) { // Compute the (pointer) type returned by the GEP instruction. Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, Ops.slice(1)); Type *GEPTy = PointerType::get(LastType, PtrTy->getAddressSpace()); + if (VectorType *VT = dyn_cast<VectorType>(Ops[0]->getType())) + GEPTy = VectorType::get(GEPTy, VT->getNumElements()); return UndefValue::get(GEPTy); } if (Ops.size() == 2) { // getelementptr P, 0 -> P. - if (ConstantInt *C = dyn_cast<ConstantInt>(Ops[1])) - if (C->isZero()) - return Ops[0]; + if (match(Ops[1], m_Zero())) + return Ops[0]; // getelementptr P, N -> P if P points to a type of zero size. - if (Q.TD) { + if (Q.DL) { Type *Ty = PtrTy->getElementType(); - if (Ty->isSized() && Q.TD->getTypeAllocSize(Ty) == 0) + if (Ty->isSized() && Q.DL->getTypeAllocSize(Ty) == 0) return Ops[0]; } } @@ -2763,15 +2800,15 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) { // Check to see if this is constant foldable. for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (!isa<Constant>(Ops[i])) - return 0; + return nullptr; return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1)); } -Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout *TD, +Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyGEPInst(Ops, Query (TD, TLI, DT), RecursionLimit); + return ::SimplifyGEPInst(Ops, Query (DL, TLI, DT), RecursionLimit); } /// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we @@ -2800,15 +2837,15 @@ static Value *SimplifyInsertValueInst(Value *Agg, Value *Val, return Agg; } - return 0; + return nullptr; } Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, - const DataLayout *TD, + const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query (TD, TLI, DT), + return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query (DL, TLI, DT), RecursionLimit); } @@ -2816,7 +2853,7 @@ Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, static Value *SimplifyPHINode(PHINode *PN, const Query &Q) { // If all of the PHI's incoming values are the same then replace the PHI node // with the common value. - Value *CommonValue = 0; + Value *CommonValue = nullptr; bool HasUndefInput = false; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *Incoming = PN->getIncomingValue(i); @@ -2828,7 +2865,7 @@ static Value *SimplifyPHINode(PHINode *PN, const Query &Q) { continue; } if (CommonValue && Incoming != CommonValue) - return 0; // Not the same, bail out. + return nullptr; // Not the same, bail out. CommonValue = Incoming; } @@ -2841,22 +2878,22 @@ static Value *SimplifyPHINode(PHINode *PN, const Query &Q) { // instruction, we cannot return X as the result of the PHI node unless it // dominates the PHI block. if (HasUndefInput) - return ValueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : 0; + return ValueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : nullptr; return CommonValue; } static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) { if (Constant *C = dyn_cast<Constant>(Op)) - return ConstantFoldInstOperands(Instruction::Trunc, Ty, C, Q.TD, Q.TLI); + return ConstantFoldInstOperands(Instruction::Trunc, Ty, C, Q.DL, Q.TLI); - return 0; + return nullptr; } -Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout *TD, +Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyTruncInst(Op, Ty, Query (TD, TLI, DT), RecursionLimit); + return ::SimplifyTruncInst(Op, Ty, Query (DL, TLI, DT), RecursionLimit); } //=== Helper functions for higher up the class hierarchy. @@ -2901,7 +2938,7 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, if (Constant *CLHS = dyn_cast<Constant>(LHS)) if (Constant *CRHS = dyn_cast<Constant>(RHS)) { Constant *COps[] = {CLHS, CRHS}; - return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, Q.TD, + return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, Q.DL, Q.TLI); } @@ -2922,14 +2959,14 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, Q, MaxRecurse)) return V; - return 0; + return nullptr; } } Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const DataLayout *TD, const TargetLibraryInfo *TLI, + const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyBinOp(Opcode, LHS, RHS, Query (TD, TLI, DT), RecursionLimit); + return ::SimplifyBinOp(Opcode, LHS, RHS, Query (DL, TLI, DT), RecursionLimit); } /// SimplifyCmpInst - Given operands for a CmpInst, see if we can @@ -2942,9 +2979,9 @@ static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, } Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const DataLayout *TD, const TargetLibraryInfo *TLI, + const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyCmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT), + return ::SimplifyCmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT), RecursionLimit); } @@ -2969,7 +3006,7 @@ static Value *SimplifyIntrinsic(Intrinsic::ID IID, IterTy ArgBegin, IterTy ArgEn const Query &Q, unsigned MaxRecurse) { // Perform idempotent optimizations if (!IsIdempotent(IID)) - return 0; + return nullptr; // Unary Ops if (std::distance(ArgBegin, ArgEnd) == 1) @@ -2977,7 +3014,7 @@ static Value *SimplifyIntrinsic(Intrinsic::ID IID, IterTy ArgBegin, IterTy ArgEn if (II->getIntrinsicID() == IID) return II; - return 0; + return nullptr; } template <typename IterTy> @@ -2994,7 +3031,7 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, Function *F = dyn_cast<Function>(V); if (!F) - return 0; + return nullptr; if (unsigned IID = F->getIntrinsicID()) if (Value *Ret = @@ -3002,14 +3039,14 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, return Ret; if (!canConstantFoldCallTo(F)) - return 0; + return nullptr; SmallVector<Constant *, 4> ConstantArgs; ConstantArgs.reserve(ArgEnd - ArgBegin); for (IterTy I = ArgBegin, E = ArgEnd; I != E; ++I) { Constant *C = dyn_cast<Constant>(*I); if (!C) - return 0; + return nullptr; ConstantArgs.push_back(C); } @@ -3017,136 +3054,136 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, } Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin, - User::op_iterator ArgEnd, const DataLayout *TD, + User::op_iterator ArgEnd, const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(TD, TLI, DT), + return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT), RecursionLimit); } Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args, - const DataLayout *TD, const TargetLibraryInfo *TLI, + const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyCall(V, Args.begin(), Args.end(), Query(TD, TLI, DT), + return ::SimplifyCall(V, Args.begin(), Args.end(), Query(DL, TLI, DT), RecursionLimit); } /// SimplifyInstruction - See if we can compute a simplified version of this /// instruction. If not, this returns null. -Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *TD, +Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { Value *Result; switch (I->getOpcode()) { default: - Result = ConstantFoldInstruction(I, TD, TLI); + Result = ConstantFoldInstruction(I, DL, TLI); break; case Instruction::FAdd: Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), TD, TLI, DT); + I->getFastMathFlags(), DL, TLI, DT); break; case Instruction::Add: Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1), cast<BinaryOperator>(I)->hasNoSignedWrap(), cast<BinaryOperator>(I)->hasNoUnsignedWrap(), - TD, TLI, DT); + DL, TLI, DT); break; case Instruction::FSub: Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), TD, TLI, DT); + I->getFastMathFlags(), DL, TLI, DT); break; case Instruction::Sub: Result = SimplifySubInst(I->getOperand(0), I->getOperand(1), cast<BinaryOperator>(I)->hasNoSignedWrap(), cast<BinaryOperator>(I)->hasNoUnsignedWrap(), - TD, TLI, DT); + DL, TLI, DT); break; case Instruction::FMul: Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), TD, TLI, DT); + I->getFastMathFlags(), DL, TLI, DT); break; case Instruction::Mul: - Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); + Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); break; case Instruction::SDiv: - Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); + Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); break; case Instruction::UDiv: - Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); + Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); break; case Instruction::FDiv: - Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); + Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); break; case Instruction::SRem: - Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); + Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); break; case Instruction::URem: - Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); + Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); break; case Instruction::FRem: - Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); + Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); break; case Instruction::Shl: Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1), cast<BinaryOperator>(I)->hasNoSignedWrap(), cast<BinaryOperator>(I)->hasNoUnsignedWrap(), - TD, TLI, DT); + DL, TLI, DT); break; case Instruction::LShr: Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1), cast<BinaryOperator>(I)->isExact(), - TD, TLI, DT); + DL, TLI, DT); break; case Instruction::AShr: Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1), cast<BinaryOperator>(I)->isExact(), - TD, TLI, DT); + DL, TLI, DT); break; case Instruction::And: - Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); + Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); break; case Instruction::Or: - Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); + Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); break; case Instruction::Xor: - Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); + Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); break; case Instruction::ICmp: Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(), - I->getOperand(0), I->getOperand(1), TD, TLI, DT); + I->getOperand(0), I->getOperand(1), DL, TLI, DT); break; case Instruction::FCmp: Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), - I->getOperand(0), I->getOperand(1), TD, TLI, DT); + I->getOperand(0), I->getOperand(1), DL, TLI, DT); break; case Instruction::Select: Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1), - I->getOperand(2), TD, TLI, DT); + I->getOperand(2), DL, TLI, DT); break; case Instruction::GetElementPtr: { SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end()); - Result = SimplifyGEPInst(Ops, TD, TLI, DT); + Result = SimplifyGEPInst(Ops, DL, TLI, DT); break; } case Instruction::InsertValue: { InsertValueInst *IV = cast<InsertValueInst>(I); Result = SimplifyInsertValueInst(IV->getAggregateOperand(), IV->getInsertedValueOperand(), - IV->getIndices(), TD, TLI, DT); + IV->getIndices(), DL, TLI, DT); break; } case Instruction::PHI: - Result = SimplifyPHINode(cast<PHINode>(I), Query (TD, TLI, DT)); + Result = SimplifyPHINode(cast<PHINode>(I), Query (DL, TLI, DT)); break; case Instruction::Call: { CallSite CS(cast<CallInst>(I)); Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), - TD, TLI, DT); + DL, TLI, DT); break; } case Instruction::Trunc: - Result = SimplifyTruncInst(I->getOperand(0), I->getType(), TD, TLI, DT); + Result = SimplifyTruncInst(I->getOperand(0), I->getType(), DL, TLI, DT); break; } @@ -3168,7 +3205,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *TD, /// This routine returns 'true' only when *it* simplifies something. The passed /// in simplified value does not count toward this. static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, - const DataLayout *TD, + const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { bool Simplified = false; @@ -3177,10 +3214,9 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, // If we have an explicit value to collapse to, do that round of the // simplification loop by hand initially. if (SimpleV) { - for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; - ++UI) - if (*UI != I) - Worklist.insert(cast<Instruction>(*UI)); + for (User *U : I->users()) + if (U != I) + Worklist.insert(cast<Instruction>(U)); // Replace the instruction with its simplified value. I->replaceAllUsesWith(SimpleV); @@ -3198,7 +3234,7 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, I = Worklist[Idx]; // See if this instruction simplifies. - SimpleV = SimplifyInstruction(I, TD, TLI, DT); + SimpleV = SimplifyInstruction(I, DL, TLI, DT); if (!SimpleV) continue; @@ -3207,9 +3243,8 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, // Stash away all the uses of the old instruction so we can check them for // recursive simplifications after a RAUW. This is cheaper than checking all // uses of To on the recursive step in most cases. - for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; - ++UI) - Worklist.insert(cast<Instruction>(*UI)); + for (User *U : I->users()) + Worklist.insert(cast<Instruction>(U)); // Replace the instruction with its simplified value. I->replaceAllUsesWith(SimpleV); @@ -3223,17 +3258,17 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, } bool llvm::recursivelySimplifyInstruction(Instruction *I, - const DataLayout *TD, + const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return replaceAndRecursivelySimplifyImpl(I, 0, TD, TLI, DT); + return replaceAndRecursivelySimplifyImpl(I, nullptr, DL, TLI, DT); } bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, - const DataLayout *TD, + const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!"); assert(SimpleV && "Must provide a simplified value."); - return replaceAndRecursivelySimplifyImpl(I, SimpleV, TD, TLI, DT); + return replaceAndRecursivelySimplifyImpl(I, SimpleV, DL, TLI, DT); } diff --git a/lib/Analysis/Interval.cpp b/lib/Analysis/Interval.cpp index 26a0322407ec..e3e785ffc45f 100644 --- a/lib/Analysis/Interval.cpp +++ b/lib/Analysis/Interval.cpp @@ -14,7 +14,7 @@ #include "llvm/Analysis/Interval.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/Support/CFG.h" +#include "llvm/IR/CFG.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> diff --git a/lib/Analysis/IntervalPartition.cpp b/lib/Analysis/IntervalPartition.cpp index 2e259b147b8b..a0583e86d185 100644 --- a/lib/Analysis/IntervalPartition.cpp +++ b/lib/Analysis/IntervalPartition.cpp @@ -29,7 +29,7 @@ void IntervalPartition::releaseMemory() { delete Intervals[i]; IntervalMap.clear(); Intervals.clear(); - RootInterval = 0; + RootInterval = nullptr; } void IntervalPartition::print(raw_ostream &O, const Module*) const { diff --git a/lib/Analysis/JumpInstrTableInfo.cpp b/lib/Analysis/JumpInstrTableInfo.cpp new file mode 100644 index 000000000000..b5b426533ff3 --- /dev/null +++ b/lib/Analysis/JumpInstrTableInfo.cpp @@ -0,0 +1,40 @@ +//===-- JumpInstrTableInfo.cpp: Info for Jump-Instruction Tables ----------===// +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Information about jump-instruction tables that have been created by +/// JumpInstrTables pass. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "jiti" + +#include "llvm/Analysis/JumpInstrTableInfo.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" + +using namespace llvm; + +INITIALIZE_PASS(JumpInstrTableInfo, "jump-instr-table-info", + "Jump-Instruction Table Info", true, true) +char JumpInstrTableInfo::ID = 0; + +ImmutablePass *llvm::createJumpInstrTableInfoPass() { + return new JumpInstrTableInfo(); +} + +JumpInstrTableInfo::JumpInstrTableInfo() : ImmutablePass(ID), Tables() { + initializeJumpInstrTableInfoPass(*PassRegistry::getPassRegistry()); +} + +JumpInstrTableInfo::~JumpInstrTableInfo() {} + +void JumpInstrTableInfo::insertEntry(FunctionType *TableFunTy, Function *Target, + Function *Jump) { + Tables[TableFunTy].push_back(JumpPair(Target, Jump)); +} diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp new file mode 100644 index 000000000000..e0736162a77a --- /dev/null +++ b/lib/Analysis/LazyCallGraph.cpp @@ -0,0 +1,728 @@ +//===- LazyCallGraph.cpp - Analysis of a Module's call graph --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "lcg" + +static void findCallees( + SmallVectorImpl<Constant *> &Worklist, SmallPtrSetImpl<Constant *> &Visited, + SmallVectorImpl<PointerUnion<Function *, LazyCallGraph::Node *>> &Callees, + DenseMap<Function *, size_t> &CalleeIndexMap) { + while (!Worklist.empty()) { + Constant *C = Worklist.pop_back_val(); + + if (Function *F = dyn_cast<Function>(C)) { + // Note that we consider *any* function with a definition to be a viable + // edge. Even if the function's definition is subject to replacement by + // some other module (say, a weak definition) there may still be + // optimizations which essentially speculate based on the definition and + // a way to check that the specific definition is in fact the one being + // used. For example, this could be done by moving the weak definition to + // a strong (internal) definition and making the weak definition be an + // alias. Then a test of the address of the weak function against the new + // strong definition's address would be an effective way to determine the + // safety of optimizing a direct call edge. + if (!F->isDeclaration() && + CalleeIndexMap.insert(std::make_pair(F, Callees.size())).second) { + DEBUG(dbgs() << " Added callable function: " << F->getName() + << "\n"); + Callees.push_back(F); + } + continue; + } + + for (Value *Op : C->operand_values()) + if (Visited.insert(cast<Constant>(Op))) + Worklist.push_back(cast<Constant>(Op)); + } +} + +LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F) + : G(&G), F(F), DFSNumber(0), LowLink(0) { + DEBUG(dbgs() << " Adding functions called by '" << F.getName() + << "' to the graph.\n"); + + SmallVector<Constant *, 16> Worklist; + SmallPtrSet<Constant *, 16> Visited; + // Find all the potential callees in this function. First walk the + // instructions and add every operand which is a constant to the worklist. + for (BasicBlock &BB : F) + for (Instruction &I : BB) + for (Value *Op : I.operand_values()) + if (Constant *C = dyn_cast<Constant>(Op)) + if (Visited.insert(C)) + Worklist.push_back(C); + + // We've collected all the constant (and thus potentially function or + // function containing) operands to all of the instructions in the function. + // Process them (recursively) collecting every function found. + findCallees(Worklist, Visited, Callees, CalleeIndexMap); +} + +void LazyCallGraph::Node::insertEdgeInternal(Function &Callee) { + if (Node *N = G->lookup(Callee)) + return insertEdgeInternal(*N); + + CalleeIndexMap.insert(std::make_pair(&Callee, Callees.size())); + Callees.push_back(&Callee); +} + +void LazyCallGraph::Node::insertEdgeInternal(Node &CalleeN) { + CalleeIndexMap.insert(std::make_pair(&CalleeN.getFunction(), Callees.size())); + Callees.push_back(&CalleeN); +} + +void LazyCallGraph::Node::removeEdgeInternal(Function &Callee) { + auto IndexMapI = CalleeIndexMap.find(&Callee); + assert(IndexMapI != CalleeIndexMap.end() && + "Callee not in the callee set for this caller?"); + + Callees[IndexMapI->second] = nullptr; + CalleeIndexMap.erase(IndexMapI); +} + +LazyCallGraph::LazyCallGraph(Module &M) : NextDFSNumber(0) { + DEBUG(dbgs() << "Building CG for module: " << M.getModuleIdentifier() + << "\n"); + for (Function &F : M) + if (!F.isDeclaration() && !F.hasLocalLinkage()) + if (EntryIndexMap.insert(std::make_pair(&F, EntryNodes.size())).second) { + DEBUG(dbgs() << " Adding '" << F.getName() + << "' to entry set of the graph.\n"); + EntryNodes.push_back(&F); + } + + // Now add entry nodes for functions reachable via initializers to globals. + SmallVector<Constant *, 16> Worklist; + SmallPtrSet<Constant *, 16> Visited; + for (GlobalVariable &GV : M.globals()) + if (GV.hasInitializer()) + if (Visited.insert(GV.getInitializer())) + Worklist.push_back(GV.getInitializer()); + + DEBUG(dbgs() << " Adding functions referenced by global initializers to the " + "entry set.\n"); + findCallees(Worklist, Visited, EntryNodes, EntryIndexMap); + + for (auto &Entry : EntryNodes) { + assert(!Entry.isNull() && + "We can't have removed edges before we finish the constructor!"); + if (Function *F = Entry.dyn_cast<Function *>()) + SCCEntryNodes.push_back(F); + else + SCCEntryNodes.push_back(&Entry.get<Node *>()->getFunction()); + } +} + +LazyCallGraph::LazyCallGraph(LazyCallGraph &&G) + : BPA(std::move(G.BPA)), NodeMap(std::move(G.NodeMap)), + EntryNodes(std::move(G.EntryNodes)), + EntryIndexMap(std::move(G.EntryIndexMap)), SCCBPA(std::move(G.SCCBPA)), + SCCMap(std::move(G.SCCMap)), LeafSCCs(std::move(G.LeafSCCs)), + DFSStack(std::move(G.DFSStack)), + SCCEntryNodes(std::move(G.SCCEntryNodes)), + NextDFSNumber(G.NextDFSNumber) { + updateGraphPtrs(); +} + +LazyCallGraph &LazyCallGraph::operator=(LazyCallGraph &&G) { + BPA = std::move(G.BPA); + NodeMap = std::move(G.NodeMap); + EntryNodes = std::move(G.EntryNodes); + EntryIndexMap = std::move(G.EntryIndexMap); + SCCBPA = std::move(G.SCCBPA); + SCCMap = std::move(G.SCCMap); + LeafSCCs = std::move(G.LeafSCCs); + DFSStack = std::move(G.DFSStack); + SCCEntryNodes = std::move(G.SCCEntryNodes); + NextDFSNumber = G.NextDFSNumber; + updateGraphPtrs(); + return *this; +} + +void LazyCallGraph::SCC::insert(Node &N) { + N.DFSNumber = N.LowLink = -1; + Nodes.push_back(&N); + G->SCCMap[&N] = this; +} + +bool LazyCallGraph::SCC::isDescendantOf(const SCC &C) const { + // Walk up the parents of this SCC and verify that we eventually find C. + SmallVector<const SCC *, 4> AncestorWorklist; + AncestorWorklist.push_back(this); + do { + const SCC *AncestorC = AncestorWorklist.pop_back_val(); + if (AncestorC->isChildOf(C)) + return true; + for (const SCC *ParentC : AncestorC->ParentSCCs) + AncestorWorklist.push_back(ParentC); + } while (!AncestorWorklist.empty()); + + return false; +} + +void LazyCallGraph::SCC::insertIntraSCCEdge(Node &CallerN, Node &CalleeN) { + // First insert it into the caller. + CallerN.insertEdgeInternal(CalleeN); + + assert(G->SCCMap.lookup(&CallerN) == this && "Caller must be in this SCC."); + assert(G->SCCMap.lookup(&CalleeN) == this && "Callee must be in this SCC."); + + // Nothing changes about this SCC or any other. +} + +void LazyCallGraph::SCC::insertOutgoingEdge(Node &CallerN, Node &CalleeN) { + // First insert it into the caller. + CallerN.insertEdgeInternal(CalleeN); + + assert(G->SCCMap.lookup(&CallerN) == this && "Caller must be in this SCC."); + + SCC &CalleeC = *G->SCCMap.lookup(&CalleeN); + assert(&CalleeC != this && "Callee must not be in this SCC."); + assert(CalleeC.isDescendantOf(*this) && + "Callee must be a descendant of the Caller."); + + // The only change required is to add this SCC to the parent set of the callee. + CalleeC.ParentSCCs.insert(this); +} + +SmallVector<LazyCallGraph::SCC *, 1> +LazyCallGraph::SCC::insertIncomingEdge(Node &CallerN, Node &CalleeN) { + // First insert it into the caller. + CallerN.insertEdgeInternal(CalleeN); + + assert(G->SCCMap.lookup(&CalleeN) == this && "Callee must be in this SCC."); + + SCC &CallerC = *G->SCCMap.lookup(&CallerN); + assert(&CallerC != this && "Caller must not be in this SCC."); + assert(CallerC.isDescendantOf(*this) && + "Caller must be a descendant of the Callee."); + + // The algorithm we use for merging SCCs based on the cycle introduced here + // is to walk the SCC inverted DAG formed by the parent SCC sets. The inverse + // graph has the same cycle properties as the actual DAG of the SCCs, and + // when forming SCCs lazily by a DFS, the bottom of the graph won't exist in + // many cases which should prune the search space. + // + // FIXME: We can get this pruning behavior even after the incremental SCC + // formation by leaving behind (conservative) DFS numberings in the nodes, + // and pruning the search with them. These would need to be cleverly updated + // during the removal of intra-SCC edges, but could be preserved + // conservatively. + + // The set of SCCs that are connected to the caller, and thus will + // participate in the merged connected component. + SmallPtrSet<SCC *, 8> ConnectedSCCs; + ConnectedSCCs.insert(this); + ConnectedSCCs.insert(&CallerC); + + // We build up a DFS stack of the parents chains. + SmallVector<std::pair<SCC *, SCC::parent_iterator>, 8> DFSSCCs; + SmallPtrSet<SCC *, 8> VisitedSCCs; + int ConnectedDepth = -1; + SCC *C = this; + parent_iterator I = parent_begin(), E = parent_end(); + for (;;) { + while (I != E) { + SCC &ParentSCC = *I++; + + // If we have already processed this parent SCC, skip it, and remember + // whether it was connected so we don't have to check the rest of the + // stack. This also handles when we reach a child of the 'this' SCC (the + // callee) which terminates the search. + if (ConnectedSCCs.count(&ParentSCC)) { + ConnectedDepth = std::max<int>(ConnectedDepth, DFSSCCs.size()); + continue; + } + if (VisitedSCCs.count(&ParentSCC)) + continue; + + // We fully explore the depth-first space, adding nodes to the connected + // set only as we pop them off, so "recurse" by rotating to the parent. + DFSSCCs.push_back(std::make_pair(C, I)); + C = &ParentSCC; + I = ParentSCC.parent_begin(); + E = ParentSCC.parent_end(); + } + + // If we've found a connection anywhere below this point on the stack (and + // thus up the parent graph from the caller), the current node needs to be + // added to the connected set now that we've processed all of its parents. + if ((int)DFSSCCs.size() == ConnectedDepth) { + --ConnectedDepth; // We're finished with this connection. + ConnectedSCCs.insert(C); + } else { + // Otherwise remember that its parents don't ever connect. + assert(ConnectedDepth < (int)DFSSCCs.size() && + "Cannot have a connected depth greater than the DFS depth!"); + VisitedSCCs.insert(C); + } + + if (DFSSCCs.empty()) + break; // We've walked all the parents of the caller transitively. + + // Pop off the prior node and position to unwind the depth first recursion. + std::tie(C, I) = DFSSCCs.pop_back_val(); + E = C->parent_end(); + } + + // Now that we have identified all of the SCCs which need to be merged into + // a connected set with the inserted edge, merge all of them into this SCC. + // FIXME: This operation currently creates ordering stability problems + // because we don't use stably ordered containers for the parent SCCs or the + // connected SCCs. + unsigned NewNodeBeginIdx = Nodes.size(); + for (SCC *C : ConnectedSCCs) { + if (C == this) + continue; + for (SCC *ParentC : C->ParentSCCs) + if (!ConnectedSCCs.count(ParentC)) + ParentSCCs.insert(ParentC); + C->ParentSCCs.clear(); + + for (Node *N : *C) { + for (Node &ChildN : *N) { + SCC &ChildC = *G->SCCMap.lookup(&ChildN); + if (&ChildC != C) + ChildC.ParentSCCs.erase(C); + } + G->SCCMap[N] = this; + Nodes.push_back(N); + } + C->Nodes.clear(); + } + for (auto I = Nodes.begin() + NewNodeBeginIdx, E = Nodes.end(); I != E; ++I) + for (Node &ChildN : **I) { + SCC &ChildC = *G->SCCMap.lookup(&ChildN); + if (&ChildC != this) + ChildC.ParentSCCs.insert(this); + } + + // We return the list of SCCs which were merged so that callers can + // invalidate any data they have associated with those SCCs. Note that these + // SCCs are no longer in an interesting state (they are totally empty) but + // the pointers will remain stable for the life of the graph itself. + return SmallVector<SCC *, 1>(ConnectedSCCs.begin(), ConnectedSCCs.end()); +} + +void LazyCallGraph::SCC::removeInterSCCEdge(Node &CallerN, Node &CalleeN) { + // First remove it from the node. + CallerN.removeEdgeInternal(CalleeN.getFunction()); + + assert(G->SCCMap.lookup(&CallerN) == this && + "The caller must be a member of this SCC."); + + SCC &CalleeC = *G->SCCMap.lookup(&CalleeN); + assert(&CalleeC != this && + "This API only supports the rmoval of inter-SCC edges."); + + assert(std::find(G->LeafSCCs.begin(), G->LeafSCCs.end(), this) == + G->LeafSCCs.end() && + "Cannot have a leaf SCC caller with a different SCC callee."); + + bool HasOtherCallToCalleeC = false; + bool HasOtherCallOutsideSCC = false; + for (Node *N : *this) { + for (Node &OtherCalleeN : *N) { + SCC &OtherCalleeC = *G->SCCMap.lookup(&OtherCalleeN); + if (&OtherCalleeC == &CalleeC) { + HasOtherCallToCalleeC = true; + break; + } + if (&OtherCalleeC != this) + HasOtherCallOutsideSCC = true; + } + if (HasOtherCallToCalleeC) + break; + } + // Because the SCCs form a DAG, deleting such an edge cannot change the set + // of SCCs in the graph. However, it may cut an edge of the SCC DAG, making + // the caller no longer a parent of the callee. Walk the other call edges + // in the caller to tell. + if (!HasOtherCallToCalleeC) { + bool Removed = CalleeC.ParentSCCs.erase(this); + (void)Removed; + assert(Removed && + "Did not find the caller SCC in the callee SCC's parent list!"); + + // It may orphan an SCC if it is the last edge reaching it, but that does + // not violate any invariants of the graph. + if (CalleeC.ParentSCCs.empty()) + DEBUG(dbgs() << "LCG: Update removing " << CallerN.getFunction().getName() + << " -> " << CalleeN.getFunction().getName() + << " edge orphaned the callee's SCC!\n"); + } + + // It may make the Caller SCC a leaf SCC. + if (!HasOtherCallOutsideSCC) + G->LeafSCCs.push_back(this); +} + +void LazyCallGraph::SCC::internalDFS( + SmallVectorImpl<std::pair<Node *, Node::iterator>> &DFSStack, + SmallVectorImpl<Node *> &PendingSCCStack, Node *N, + SmallVectorImpl<SCC *> &ResultSCCs) { + Node::iterator I = N->begin(); + N->LowLink = N->DFSNumber = 1; + int NextDFSNumber = 2; + for (;;) { + assert(N->DFSNumber != 0 && "We should always assign a DFS number " + "before processing a node."); + + // We simulate recursion by popping out of the nested loop and continuing. + Node::iterator E = N->end(); + while (I != E) { + Node &ChildN = *I; + if (SCC *ChildSCC = G->SCCMap.lookup(&ChildN)) { + // Check if we have reached a node in the new (known connected) set of + // this SCC. If so, the entire stack is necessarily in that set and we + // can re-start. + if (ChildSCC == this) { + insert(*N); + while (!PendingSCCStack.empty()) + insert(*PendingSCCStack.pop_back_val()); + while (!DFSStack.empty()) + insert(*DFSStack.pop_back_val().first); + return; + } + + // If this child isn't currently in this SCC, no need to process it. + // However, we do need to remove this SCC from its SCC's parent set. + ChildSCC->ParentSCCs.erase(this); + ++I; + continue; + } + + if (ChildN.DFSNumber == 0) { + // Mark that we should start at this child when next this node is the + // top of the stack. We don't start at the next child to ensure this + // child's lowlink is reflected. + DFSStack.push_back(std::make_pair(N, I)); + + // Continue, resetting to the child node. + ChildN.LowLink = ChildN.DFSNumber = NextDFSNumber++; + N = &ChildN; + I = ChildN.begin(); + E = ChildN.end(); + continue; + } + + // Track the lowest link of the children, if any are still in the stack. + // Any child not on the stack will have a LowLink of -1. + assert(ChildN.LowLink != 0 && + "Low-link must not be zero with a non-zero DFS number."); + if (ChildN.LowLink >= 0 && ChildN.LowLink < N->LowLink) + N->LowLink = ChildN.LowLink; + ++I; + } + + if (N->LowLink == N->DFSNumber) { + ResultSCCs.push_back(G->formSCC(N, PendingSCCStack)); + if (DFSStack.empty()) + return; + } else { + // At this point we know that N cannot ever be an SCC root. Its low-link + // is not its dfs-number, and we've processed all of its children. It is + // just sitting here waiting until some node further down the stack gets + // low-link == dfs-number and pops it off as well. Move it to the pending + // stack which is pulled into the next SCC to be formed. + PendingSCCStack.push_back(N); + + assert(!DFSStack.empty() && "We shouldn't have an empty stack!"); + } + + N = DFSStack.back().first; + I = DFSStack.back().second; + DFSStack.pop_back(); + } +} + +SmallVector<LazyCallGraph::SCC *, 1> +LazyCallGraph::SCC::removeIntraSCCEdge(Node &CallerN, + Node &CalleeN) { + // First remove it from the node. + CallerN.removeEdgeInternal(CalleeN.getFunction()); + + // We return a list of the resulting *new* SCCs in postorder. + SmallVector<SCC *, 1> ResultSCCs; + + // Direct recursion doesn't impact the SCC graph at all. + if (&CallerN == &CalleeN) + return ResultSCCs; + + // The worklist is every node in the original SCC. + SmallVector<Node *, 1> Worklist; + Worklist.swap(Nodes); + for (Node *N : Worklist) { + // The nodes formerly in this SCC are no longer in any SCC. + N->DFSNumber = 0; + N->LowLink = 0; + G->SCCMap.erase(N); + } + assert(Worklist.size() > 1 && "We have to have at least two nodes to have an " + "edge between them that is within the SCC."); + + // The callee can already reach every node in this SCC (by definition). It is + // the only node we know will stay inside this SCC. Everything which + // transitively reaches Callee will also remain in the SCC. To model this we + // incrementally add any chain of nodes which reaches something in the new + // node set to the new node set. This short circuits one side of the Tarjan's + // walk. + insert(CalleeN); + + // We're going to do a full mini-Tarjan's walk using a local stack here. + SmallVector<std::pair<Node *, Node::iterator>, 4> DFSStack; + SmallVector<Node *, 4> PendingSCCStack; + do { + Node *N = Worklist.pop_back_val(); + if (N->DFSNumber == 0) + internalDFS(DFSStack, PendingSCCStack, N, ResultSCCs); + + assert(DFSStack.empty() && "Didn't flush the entire DFS stack!"); + assert(PendingSCCStack.empty() && "Didn't flush all pending SCC nodes!"); + } while (!Worklist.empty()); + + // Now we need to reconnect the current SCC to the graph. + bool IsLeafSCC = true; + for (Node *N : Nodes) { + for (Node &ChildN : *N) { + SCC &ChildSCC = *G->SCCMap.lookup(&ChildN); + if (&ChildSCC == this) + continue; + ChildSCC.ParentSCCs.insert(this); + IsLeafSCC = false; + } + } +#ifndef NDEBUG + if (!ResultSCCs.empty()) + assert(!IsLeafSCC && "This SCC cannot be a leaf as we have split out new " + "SCCs by removing this edge."); + if (!std::any_of(G->LeafSCCs.begin(), G->LeafSCCs.end(), + [&](SCC *C) { return C == this; })) + assert(!IsLeafSCC && "This SCC cannot be a leaf as it already had child " + "SCCs before we removed this edge."); +#endif + // If this SCC stopped being a leaf through this edge removal, remove it from + // the leaf SCC list. + if (!IsLeafSCC && !ResultSCCs.empty()) + G->LeafSCCs.erase(std::remove(G->LeafSCCs.begin(), G->LeafSCCs.end(), this), + G->LeafSCCs.end()); + + // Return the new list of SCCs. + return ResultSCCs; +} + +void LazyCallGraph::insertEdge(Node &CallerN, Function &Callee) { + assert(SCCMap.empty() && DFSStack.empty() && + "This method cannot be called after SCCs have been formed!"); + + return CallerN.insertEdgeInternal(Callee); +} + +void LazyCallGraph::removeEdge(Node &CallerN, Function &Callee) { + assert(SCCMap.empty() && DFSStack.empty() && + "This method cannot be called after SCCs have been formed!"); + + return CallerN.removeEdgeInternal(Callee); +} + +LazyCallGraph::Node &LazyCallGraph::insertInto(Function &F, Node *&MappedN) { + return *new (MappedN = BPA.Allocate()) Node(*this, F); +} + +void LazyCallGraph::updateGraphPtrs() { + // Process all nodes updating the graph pointers. + { + SmallVector<Node *, 16> Worklist; + for (auto &Entry : EntryNodes) + if (Node *EntryN = Entry.dyn_cast<Node *>()) + Worklist.push_back(EntryN); + + while (!Worklist.empty()) { + Node *N = Worklist.pop_back_val(); + N->G = this; + for (auto &Callee : N->Callees) + if (!Callee.isNull()) + if (Node *CalleeN = Callee.dyn_cast<Node *>()) + Worklist.push_back(CalleeN); + } + } + + // Process all SCCs updating the graph pointers. + { + SmallVector<SCC *, 16> Worklist(LeafSCCs.begin(), LeafSCCs.end()); + + while (!Worklist.empty()) { + SCC *C = Worklist.pop_back_val(); + C->G = this; + Worklist.insert(Worklist.end(), C->ParentSCCs.begin(), + C->ParentSCCs.end()); + } + } +} + +LazyCallGraph::SCC *LazyCallGraph::formSCC(Node *RootN, + SmallVectorImpl<Node *> &NodeStack) { + // The tail of the stack is the new SCC. Allocate the SCC and pop the stack + // into it. + SCC *NewSCC = new (SCCBPA.Allocate()) SCC(*this); + + while (!NodeStack.empty() && NodeStack.back()->DFSNumber > RootN->DFSNumber) { + assert(NodeStack.back()->LowLink >= RootN->LowLink && + "We cannot have a low link in an SCC lower than its root on the " + "stack!"); + NewSCC->insert(*NodeStack.pop_back_val()); + } + NewSCC->insert(*RootN); + + // A final pass over all edges in the SCC (this remains linear as we only + // do this once when we build the SCC) to connect it to the parent sets of + // its children. + bool IsLeafSCC = true; + for (Node *SCCN : NewSCC->Nodes) + for (Node &SCCChildN : *SCCN) { + SCC &ChildSCC = *SCCMap.lookup(&SCCChildN); + if (&ChildSCC == NewSCC) + continue; + ChildSCC.ParentSCCs.insert(NewSCC); + IsLeafSCC = false; + } + + // For the SCCs where we fine no child SCCs, add them to the leaf list. + if (IsLeafSCC) + LeafSCCs.push_back(NewSCC); + + return NewSCC; +} + +LazyCallGraph::SCC *LazyCallGraph::getNextSCCInPostOrder() { + Node *N; + Node::iterator I; + if (!DFSStack.empty()) { + N = DFSStack.back().first; + I = DFSStack.back().second; + DFSStack.pop_back(); + } else { + // If we've handled all candidate entry nodes to the SCC forest, we're done. + do { + if (SCCEntryNodes.empty()) + return nullptr; + + N = &get(*SCCEntryNodes.pop_back_val()); + } while (N->DFSNumber != 0); + I = N->begin(); + N->LowLink = N->DFSNumber = 1; + NextDFSNumber = 2; + } + + for (;;) { + assert(N->DFSNumber != 0 && "We should always assign a DFS number " + "before placing a node onto the stack."); + + Node::iterator E = N->end(); + while (I != E) { + Node &ChildN = *I; + if (ChildN.DFSNumber == 0) { + // Mark that we should start at this child when next this node is the + // top of the stack. We don't start at the next child to ensure this + // child's lowlink is reflected. + DFSStack.push_back(std::make_pair(N, N->begin())); + + // Recurse onto this node via a tail call. + assert(!SCCMap.count(&ChildN) && + "Found a node with 0 DFS number but already in an SCC!"); + ChildN.LowLink = ChildN.DFSNumber = NextDFSNumber++; + N = &ChildN; + I = ChildN.begin(); + E = ChildN.end(); + continue; + } + + // Track the lowest link of the children, if any are still in the stack. + assert(ChildN.LowLink != 0 && + "Low-link must not be zero with a non-zero DFS number."); + if (ChildN.LowLink >= 0 && ChildN.LowLink < N->LowLink) + N->LowLink = ChildN.LowLink; + ++I; + } + + if (N->LowLink == N->DFSNumber) + // Form the new SCC out of the top of the DFS stack. + return formSCC(N, PendingSCCStack); + + // At this point we know that N cannot ever be an SCC root. Its low-link + // is not its dfs-number, and we've processed all of its children. It is + // just sitting here waiting until some node further down the stack gets + // low-link == dfs-number and pops it off as well. Move it to the pending + // stack which is pulled into the next SCC to be formed. + PendingSCCStack.push_back(N); + + assert(!DFSStack.empty() && "We never found a viable root!"); + N = DFSStack.back().first; + I = DFSStack.back().second; + DFSStack.pop_back(); + } +} + +char LazyCallGraphAnalysis::PassID; + +LazyCallGraphPrinterPass::LazyCallGraphPrinterPass(raw_ostream &OS) : OS(OS) {} + +static void printNodes(raw_ostream &OS, LazyCallGraph::Node &N, + SmallPtrSetImpl<LazyCallGraph::Node *> &Printed) { + // Recurse depth first through the nodes. + for (LazyCallGraph::Node &ChildN : N) + if (Printed.insert(&ChildN)) + printNodes(OS, ChildN, Printed); + + OS << " Call edges in function: " << N.getFunction().getName() << "\n"; + for (LazyCallGraph::iterator I = N.begin(), E = N.end(); I != E; ++I) + OS << " -> " << I->getFunction().getName() << "\n"; + + OS << "\n"; +} + +static void printSCC(raw_ostream &OS, LazyCallGraph::SCC &SCC) { + ptrdiff_t SCCSize = std::distance(SCC.begin(), SCC.end()); + OS << " SCC with " << SCCSize << " functions:\n"; + + for (LazyCallGraph::Node *N : SCC) + OS << " " << N->getFunction().getName() << "\n"; + + OS << "\n"; +} + +PreservedAnalyses LazyCallGraphPrinterPass::run(Module *M, + ModuleAnalysisManager *AM) { + LazyCallGraph &G = AM->getResult<LazyCallGraphAnalysis>(M); + + OS << "Printing the call graph for module: " << M->getModuleIdentifier() + << "\n\n"; + + SmallPtrSet<LazyCallGraph::Node *, 16> Printed; + for (LazyCallGraph::Node &N : G) + if (Printed.insert(&N)) + printNodes(OS, N, Printed); + + for (LazyCallGraph::SCC &SCC : G.postorder_sccs()) + printSCC(OS, SCC); + + return PreservedAnalyses::all(); + +} diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index b6970af4cdec..9f919f7644a3 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -12,21 +12,20 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "lazy-value-info" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/ConstantRange.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/PatternMatch.h" -#include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLibraryInfo.h" #include <map> @@ -34,6 +33,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "lazy-value-info" + char LazyValueInfo::ID = 0; INITIALIZE_PASS_BEGIN(LazyValueInfo, "lazy-value-info", "Lazy Value Information Analysis", false, true) @@ -82,7 +83,7 @@ class LVILatticeVal { ConstantRange Range; public: - LVILatticeVal() : Tag(undefined), Val(0), Range(1, true) {} + LVILatticeVal() : Tag(undefined), Val(nullptr), Range(1, true) {} static LVILatticeVal get(Constant *C) { LVILatticeVal Res; @@ -302,9 +303,9 @@ namespace { LVIValueHandle(Value *V, LazyValueInfoCache *P) : CallbackVH(V), Parent(P) { } - - void deleted(); - void allUsesReplacedWith(Value *V) { + + void deleted() override; + void allUsesReplacedWith(Value *V) override { deleted(); } }; @@ -516,7 +517,7 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { BBLV.markOverdefined(); Instruction *BBI = dyn_cast<Instruction>(Val); - if (BBI == 0 || BBI->getParent() != BB) { + if (!BBI || BBI->getParent() != BB) { return ODCacheUpdater.markResult(solveBlockValueNonLocal(BBLV, Val, BB)); } @@ -595,7 +596,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV, Value *UnderlyingVal = GetUnderlyingObject(Val); // If 'GetUnderlyingObject' didn't converge, skip it. It won't converge // inside InstructionDereferencesPointer either. - if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, NULL, 1)) { + if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, nullptr, 1)) { for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) { if (InstructionDereferencesPointer(BI, UnderlyingVal)) { @@ -813,7 +814,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, // Recognize the range checking idiom that InstCombine produces. // (X-C1) u< C2 --> [C1, C1+C2) - ConstantInt *NegOffset = 0; + ConstantInt *NegOffset = nullptr; if (ICI->getPredicate() == ICmpInst::ICMP_ULT) match(ICI->getOperand(0), m_Add(m_Specific(Val), m_ConstantInt(NegOffset))); @@ -1013,7 +1014,8 @@ bool LazyValueInfo::runOnFunction(Function &F) { if (PImpl) getCache(PImpl).clear(); - TD = getAnalysisIfAvailable<DataLayout>(); + DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfo>(); // Fully lazy. @@ -1029,7 +1031,7 @@ void LazyValueInfo::releaseMemory() { // If the cache was allocated, free it. if (PImpl) { delete &getCache(PImpl); - PImpl = 0; + PImpl = nullptr; } } @@ -1043,7 +1045,7 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) { if (const APInt *SingleVal = CR.getSingleElement()) return ConstantInt::get(V->getContext(), *SingleVal); } - return 0; + return nullptr; } /// getConstantOnEdge - Determine whether the specified value is known to be a @@ -1059,7 +1061,7 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, if (const APInt *SingleVal = CR.getSingleElement()) return ConstantInt::get(V->getContext(), *SingleVal); } - return 0; + return nullptr; } /// getPredicateOnEdge - Determine whether the specified value comparison @@ -1071,9 +1073,9 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB); // If we know the value is a constant, evaluate the conditional. - Constant *Res = 0; + Constant *Res = nullptr; if (Result.isConstant()) { - Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD, + Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, DL, TLI); if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res)) return ResCI->isZero() ? False : True; @@ -1115,14 +1117,14 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, if (Pred == ICmpInst::ICMP_EQ) { // !C1 == C -> false iff C1 == C. Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, - Result.getNotConstant(), C, TD, + Result.getNotConstant(), C, DL, TLI); if (Res->isNullValue()) return False; } else if (Pred == ICmpInst::ICMP_NE) { // !C1 != C -> true iff C1 == C. Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, - Result.getNotConstant(), C, TD, + Result.getNotConstant(), C, DL, TLI); if (Res->isNullValue()) return True; diff --git a/lib/Analysis/LibCallAliasAnalysis.cpp b/lib/Analysis/LibCallAliasAnalysis.cpp index fefa51660f92..016f8c5cc793 100644 --- a/lib/Analysis/LibCallAliasAnalysis.cpp +++ b/lib/Analysis/LibCallAliasAnalysis.cpp @@ -54,7 +54,7 @@ LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI, // if we have detailed info and if 'P' is any of the locations we know // about. const LibCallFunctionInfo::LocationMRInfo *Details = FI->LocationDetails; - if (Details == 0) + if (Details == nullptr) return MRInfo; // If the details array is of the 'DoesNot' kind, we only know something if diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp index 0592ccb26c12..7d4e254a111d 100644 --- a/lib/Analysis/LibCallSemantics.cpp +++ b/lib/Analysis/LibCallSemantics.cpp @@ -46,11 +46,11 @@ LibCallInfo::getFunctionInfo(const Function *F) const { /// If this is the first time we are querying for this info, lazily construct /// the StringMap to index it. - if (Map == 0) { + if (!Map) { Impl = Map = new StringMap<const LibCallFunctionInfo*>(); const LibCallFunctionInfo *Array = getFunctionInfoArray(); - if (Array == 0) return 0; + if (!Array) return nullptr; // We now have the array of entries. Populate the StringMap. for (unsigned i = 0; Array[i].Name; ++i) diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index ec17f47acb86..b14f3292e900 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -16,7 +16,7 @@ // those aren't comprehensive either. Second, many conditions cannot be // checked statically. This pass does no dynamic instrumentation, so it // can't check for all possible problems. -// +// // Another limitation is that it assumes all code will be executed. A store // through a null pointer in a basic block which is never reached is harmless, // but this pass will warn about it anyway. This is the main reason why most @@ -26,31 +26,30 @@ // less obvious. If an optimization pass appears to be introducing a warning, // it may be that the optimization pass is merely exposing an existing // condition in the code. -// +// // This code may be run before instcombine. In many cases, instcombine checks // for the same kinds of things and turns instructions with undefined behavior // into unreachable (or equivalent). Because of this, this pass makes some // effort to look through bitcasts and so on. -// +// //===----------------------------------------------------------------------===// #include "llvm/Analysis/Lint.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Assembly/Writer.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/InstVisitor.h" #include "llvm/Pass.h" #include "llvm/PassManager.h" -#include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLibraryInfo.h" @@ -103,7 +102,7 @@ namespace { Module *Mod; AliasAnalysis *AA; DominatorTree *DT; - DataLayout *TD; + const DataLayout *DL; TargetLibraryInfo *TLI; std::string Messages; @@ -114,22 +113,22 @@ namespace { initializeLintPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnFunction(Function &F); + bool runOnFunction(Function &F) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); AU.addRequired<AliasAnalysis>(); AU.addRequired<TargetLibraryInfo>(); - AU.addRequired<DominatorTree>(); + AU.addRequired<DominatorTreeWrapperPass>(); } - virtual void print(raw_ostream &O, const Module *M) const {} + void print(raw_ostream &O, const Module *M) const override {} void WriteValue(const Value *V) { if (!V) return; if (isa<Instruction>(V)) { MessagesStr << *V << '\n'; } else { - WriteAsOperand(MessagesStr, V, true, Mod); + V->printAsOperand(MessagesStr, true, Mod); MessagesStr << '\n'; } } @@ -138,8 +137,8 @@ namespace { // that failed. This provides a nice place to put a breakpoint if you want // to see why something is not correct. void CheckFailed(const Twine &Message, - const Value *V1 = 0, const Value *V2 = 0, - const Value *V3 = 0, const Value *V4 = 0) { + const Value *V1 = nullptr, const Value *V2 = nullptr, + const Value *V3 = nullptr, const Value *V4 = nullptr) { MessagesStr << Message.str() << "\n"; WriteValue(V1); WriteValue(V2); @@ -153,7 +152,7 @@ char Lint::ID = 0; INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", false, true) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) -INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", false, true) @@ -176,8 +175,9 @@ INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", bool Lint::runOnFunction(Function &F) { Mod = F.getParent(); AA = &getAnalysis<AliasAnalysis>(); - DT = &getAnalysis<DominatorTree>(); - TD = getAnalysisIfAvailable<DataLayout>(); + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfo>(); visit(F); dbgs() << MessagesStr.str(); @@ -199,7 +199,7 @@ void Lint::visitCallSite(CallSite CS) { Value *Callee = CS.getCalledValue(); visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize, - 0, 0, MemRef::Callee); + 0, nullptr, MemRef::Callee); if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) { Assert1(CS.getCallingConv() == F->getCallingConv(), @@ -248,7 +248,7 @@ void Lint::visitCallSite(CallSite CS) { Type *Ty = cast<PointerType>(Formal->getType())->getElementType(); visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty), - TD ? TD->getABITypeAlignment(Ty) : 0, + DL ? DL->getABITypeAlignment(Ty) : 0, Ty, MemRef::Read | MemRef::Write); } } @@ -275,10 +275,10 @@ void Lint::visitCallSite(CallSite CS) { MemCpyInst *MCI = cast<MemCpyInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MCI->getDest(), AliasAnalysis::UnknownSize, - MCI->getAlignment(), 0, + MCI->getAlignment(), nullptr, MemRef::Write); visitMemoryReference(I, MCI->getSource(), AliasAnalysis::UnknownSize, - MCI->getAlignment(), 0, + MCI->getAlignment(), nullptr, MemRef::Read); // Check that the memcpy arguments don't overlap. The AliasAnalysis API @@ -299,10 +299,10 @@ void Lint::visitCallSite(CallSite CS) { MemMoveInst *MMI = cast<MemMoveInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MMI->getDest(), AliasAnalysis::UnknownSize, - MMI->getAlignment(), 0, + MMI->getAlignment(), nullptr, MemRef::Write); visitMemoryReference(I, MMI->getSource(), AliasAnalysis::UnknownSize, - MMI->getAlignment(), 0, + MMI->getAlignment(), nullptr, MemRef::Read); break; } @@ -310,7 +310,7 @@ void Lint::visitCallSite(CallSite CS) { MemSetInst *MSI = cast<MemSetInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MSI->getDest(), AliasAnalysis::UnknownSize, - MSI->getAlignment(), 0, + MSI->getAlignment(), nullptr, MemRef::Write); break; } @@ -321,17 +321,17 @@ void Lint::visitCallSite(CallSite CS) { &I); visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, - 0, 0, MemRef::Read | MemRef::Write); + 0, nullptr, MemRef::Read | MemRef::Write); break; case Intrinsic::vacopy: visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, - 0, 0, MemRef::Write); + 0, nullptr, MemRef::Write); visitMemoryReference(I, CS.getArgument(1), AliasAnalysis::UnknownSize, - 0, 0, MemRef::Read); + 0, nullptr, MemRef::Read); break; case Intrinsic::vaend: visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, - 0, 0, MemRef::Read | MemRef::Write); + 0, nullptr, MemRef::Read | MemRef::Write); break; case Intrinsic::stackrestore: @@ -339,7 +339,7 @@ void Lint::visitCallSite(CallSite CS) { // stack pointer, which the compiler may read from or write to // at any time, so check it for both readability and writeability. visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, - 0, 0, MemRef::Read | MemRef::Write); + 0, nullptr, MemRef::Read | MemRef::Write); break; } } @@ -415,7 +415,7 @@ void Lint::visitMemoryReference(Instruction &I, // Only handles memory references that read/write something simple like an // alloca instruction or a global variable. int64_t Offset = 0; - if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, TD)) { + if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, DL)) { // OK, so the access is to a constant offset from Ptr. Check that Ptr is // something we can handle and if so extract the size of this base object // along with its alignment. @@ -424,21 +424,21 @@ void Lint::visitMemoryReference(Instruction &I, if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { Type *ATy = AI->getAllocatedType(); - if (TD && !AI->isArrayAllocation() && ATy->isSized()) - BaseSize = TD->getTypeAllocSize(ATy); + if (DL && !AI->isArrayAllocation() && ATy->isSized()) + BaseSize = DL->getTypeAllocSize(ATy); BaseAlign = AI->getAlignment(); - if (TD && BaseAlign == 0 && ATy->isSized()) - BaseAlign = TD->getABITypeAlignment(ATy); + if (DL && BaseAlign == 0 && ATy->isSized()) + BaseAlign = DL->getABITypeAlignment(ATy); } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { // If the global may be defined differently in another compilation unit // then don't warn about funky memory accesses. if (GV->hasDefinitiveInitializer()) { Type *GTy = GV->getType()->getElementType(); - if (TD && GTy->isSized()) - BaseSize = TD->getTypeAllocSize(GTy); + if (DL && GTy->isSized()) + BaseSize = DL->getTypeAllocSize(GTy); BaseAlign = GV->getAlignment(); - if (TD && BaseAlign == 0 && GTy->isSized()) - BaseAlign = TD->getABITypeAlignment(GTy); + if (DL && BaseAlign == 0 && GTy->isSized()) + BaseAlign = DL->getABITypeAlignment(GTy); } } @@ -451,8 +451,8 @@ void Lint::visitMemoryReference(Instruction &I, // Accesses that say that the memory is more aligned than it is are not // defined. - if (TD && Align == 0 && Ty && Ty->isSized()) - Align = TD->getABITypeAlignment(Ty); + if (DL && Align == 0 && Ty && Ty->isSized()) + Align = DL->getABITypeAlignment(Ty); Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), "Undefined behavior: Memory reference address is misaligned", &I); } @@ -504,7 +504,7 @@ void Lint::visitShl(BinaryOperator &I) { "Undefined result: Shift count out of range", &I); } -static bool isZero(Value *V, DataLayout *DL) { +static bool isZero(Value *V, const DataLayout *DL) { // Assume undef could be zero. if (isa<UndefValue>(V)) return true; @@ -513,7 +513,7 @@ static bool isZero(Value *V, DataLayout *DL) { if (!VecTy) { unsigned BitWidth = V->getType()->getIntegerBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(V, KnownZero, KnownOne, DL); + computeKnownBits(V, KnownZero, KnownOne, DL); return KnownZero.isAllOnesValue(); } @@ -534,7 +534,7 @@ static bool isZero(Value *V, DataLayout *DL) { return true; APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(Elem, KnownZero, KnownOne, DL); + computeKnownBits(Elem, KnownZero, KnownOne, DL); if (KnownZero.isAllOnesValue()) return true; } @@ -543,22 +543,22 @@ static bool isZero(Value *V, DataLayout *DL) { } void Lint::visitSDiv(BinaryOperator &I) { - Assert1(!isZero(I.getOperand(1), TD), + Assert1(!isZero(I.getOperand(1), DL), "Undefined behavior: Division by zero", &I); } void Lint::visitUDiv(BinaryOperator &I) { - Assert1(!isZero(I.getOperand(1), TD), + Assert1(!isZero(I.getOperand(1), DL), "Undefined behavior: Division by zero", &I); } void Lint::visitSRem(BinaryOperator &I) { - Assert1(!isZero(I.getOperand(1), TD), + Assert1(!isZero(I.getOperand(1), DL), "Undefined behavior: Division by zero", &I); } void Lint::visitURem(BinaryOperator &I) { - Assert1(!isZero(I.getOperand(1), TD), + Assert1(!isZero(I.getOperand(1), DL), "Undefined behavior: Division by zero", &I); } @@ -572,13 +572,13 @@ void Lint::visitAllocaInst(AllocaInst &I) { } void Lint::visitVAArgInst(VAArgInst &I) { - visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0, 0, - MemRef::Read | MemRef::Write); + visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0, + nullptr, MemRef::Read | MemRef::Write); } void Lint::visitIndirectBrInst(IndirectBrInst &I) { - visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0, 0, - MemRef::Branchee); + visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0, + nullptr, MemRef::Branchee); Assert1(I.getNumDestinations() != 0, "Undefined behavior: indirectbr with no destinations", &I); @@ -603,7 +603,7 @@ void Lint::visitInsertElementInst(InsertElementInst &I) { void Lint::visitUnreachableInst(UnreachableInst &I) { // This isn't undefined behavior, it's merely suspicious. Assert1(&I == I.getParent()->begin() || - prior(BasicBlock::iterator(&I))->mayHaveSideEffects(), + std::prev(BasicBlock::iterator(&I))->mayHaveSideEffects(), "Unusual: unreachable immediately preceded by instruction without " "side effects", &I); } @@ -632,7 +632,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, // TODO: Look through eliminable cast pairs. // TODO: Look through calls with unique return values. // TODO: Look through vector insert/extract/shuffle. - V = OffsetOk ? GetUnderlyingObject(V, TD) : V->stripPointerCasts(); + V = OffsetOk ? GetUnderlyingObject(V, DL) : V->stripPointerCasts(); if (LoadInst *L = dyn_cast<LoadInst>(V)) { BasicBlock::iterator BBI = L; BasicBlock *BB = L->getParent(); @@ -652,8 +652,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, if (W != V) return findValueImpl(W, OffsetOk, Visited); } else if (CastInst *CI = dyn_cast<CastInst>(V)) { - if (CI->isNoopCast(TD ? TD->getIntPtrType(V->getContext()) : - Type::getInt64Ty(V->getContext()))) + if (CI->isNoopCast(DL)) return findValueImpl(CI->getOperand(0), OffsetOk, Visited); } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) { if (Value *W = FindInsertedValue(Ex->getAggregateOperand(), @@ -666,7 +665,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), CE->getOperand(0)->getType(), CE->getType(), - TD ? TD->getIntPtrType(V->getContext()) : + DL ? DL->getIntPtrType(V->getType()) : Type::getInt64Ty(V->getContext()))) return findValueImpl(CE->getOperand(0), OffsetOk, Visited); } else if (CE->getOpcode() == Instruction::ExtractValue) { @@ -679,10 +678,10 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, // As a last resort, try SimplifyInstruction or constant folding. if (Instruction *Inst = dyn_cast<Instruction>(V)) { - if (Value *W = SimplifyInstruction(Inst, TD, TLI, DT)) + if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT)) return findValueImpl(W, OffsetOk, Visited); } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { - if (Value *W = ConstantFoldConstantExpression(CE, TD, TLI)) + if (Value *W = ConstantFoldConstantExpression(CE, DL, TLI)) if (W != V) return findValueImpl(W, OffsetOk, Visited); } diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index 0902a39a9f81..005d309894c1 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -62,7 +62,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, if (ByteOffset < 0) // out of bounds return false; - Type *BaseType = 0; + Type *BaseType = nullptr; unsigned BaseAlign = 0; if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { // An alloca is safe to load from as load as it is suitably aligned. @@ -161,7 +161,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, ScanFrom++; // Don't scan huge blocks. - if (MaxInstsToScan-- == 0) return 0; + if (MaxInstsToScan-- == 0) return nullptr; --ScanFrom; // If this is a load of Ptr, the loaded value is available. @@ -198,7 +198,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, // Otherwise the store that may or may not alias the pointer, bail out. ++ScanFrom; - return 0; + return nullptr; } // If this is some other instruction that may clobber Ptr, bail out. @@ -211,11 +211,11 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, // May modify the pointer, bail out. ++ScanFrom; - return 0; + return nullptr; } } // Got to the start of the block, we didn't find it, but are done for this // block. - return 0; + return nullptr; } diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index e369633ba291..46c0eaabe1a3 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -17,15 +17,14 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfoImpl.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Assembly/Writer.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" -#include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include <algorithm> @@ -47,7 +46,7 @@ VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo), char LoopInfo::ID = 0; INITIALIZE_PASS_BEGIN(LoopInfo, "loops", "Natural Loop Information", true, true) -INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(LoopInfo, "loops", "Natural Loop Information", true, true) // Loop identifier metadata name. @@ -142,21 +141,21 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed, PHINode *Loop::getCanonicalInductionVariable() const { BasicBlock *H = getHeader(); - BasicBlock *Incoming = 0, *Backedge = 0; + BasicBlock *Incoming = nullptr, *Backedge = nullptr; pred_iterator PI = pred_begin(H); assert(PI != pred_end(H) && "Loop must have at least one backedge!"); Backedge = *PI++; - if (PI == pred_end(H)) return 0; // dead loop + if (PI == pred_end(H)) return nullptr; // dead loop Incoming = *PI++; - if (PI != pred_end(H)) return 0; // multiple backedges? + if (PI != pred_end(H)) return nullptr; // multiple backedges? if (contains(Incoming)) { if (contains(Backedge)) - return 0; + return nullptr; std::swap(Incoming, Backedge); } else if (!contains(Backedge)) - return 0; + return nullptr; // Loop over all of the PHI nodes, looking for a canonical indvar. for (BasicBlock::iterator I = H->begin(); isa<PHINode>(I); ++I) { @@ -172,7 +171,7 @@ PHINode *Loop::getCanonicalInductionVariable() const { if (CI->equalsInt(1)) return PN; } - return 0; + return nullptr; } /// isLCSSAForm - Return true if the Loop is in LCSSA form @@ -180,12 +179,11 @@ bool Loop::isLCSSAForm(DominatorTree &DT) const { for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) { BasicBlock *BB = *BI; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I) - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; - ++UI) { - User *U = *UI; - BasicBlock *UserBB = cast<Instruction>(U)->getParent(); - if (PHINode *P = dyn_cast<PHINode>(U)) - UserBB = P->getIncomingBlock(UI); + for (Use &U : I->uses()) { + Instruction *UI = cast<Instruction>(U.getUser()); + BasicBlock *UserBB = UI->getParent(); + if (PHINode *P = dyn_cast<PHINode>(UI)) + UserBB = P->getIncomingBlock(U); // Check the current block, as a fast-path, before checking whether // the use is anywhere in the loop. Most values are used in the same @@ -220,12 +218,12 @@ bool Loop::isSafeToClone() const { return false; if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator())) - if (II->hasFnAttr(Attribute::NoDuplicate)) + if (II->cannotDuplicate()) return false; for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) { if (const CallInst *CI = dyn_cast<CallInst>(BI)) { - if (CI->hasFnAttr(Attribute::NoDuplicate)) + if (CI->cannotDuplicate()) return false; } } @@ -234,7 +232,7 @@ bool Loop::isSafeToClone() const { } MDNode *Loop::getLoopID() const { - MDNode *LoopID = 0; + MDNode *LoopID = nullptr; if (isLoopSimplifyForm()) { LoopID = getLoopLatch()->getTerminator()->getMetadata(LoopMDName); } else { @@ -243,7 +241,7 @@ MDNode *Loop::getLoopID() const { BasicBlock *H = getHeader(); for (block_iterator I = block_begin(), IE = block_end(); I != IE; ++I) { TerminatorInst *TI = (*I)->getTerminator(); - MDNode *MD = 0; + MDNode *MD = nullptr; // Check if this terminator branches to the loop header. for (unsigned i = 0, ie = TI->getNumSuccessors(); i != ie; ++i) { @@ -253,17 +251,17 @@ MDNode *Loop::getLoopID() const { } } if (!MD) - return 0; + return nullptr; if (!LoopID) LoopID = MD; else if (MD != LoopID) - return 0; + return nullptr; } } if (!LoopID || LoopID->getNumOperands() == 0 || LoopID->getOperand(0) != LoopID) - return 0; + return nullptr; return LoopID; } @@ -404,7 +402,7 @@ BasicBlock *Loop::getUniqueExitBlock() const { getUniqueExitBlocks(UniqueExitBlocks); if (UniqueExitBlocks.size() == 1) return UniqueExitBlocks[0]; - return 0; + return nullptr; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -528,8 +526,8 @@ void UnloopUpdater::removeBlocksFromAncestors() { /// nested within unloop. void UnloopUpdater::updateSubloopParents() { while (!Unloop->empty()) { - Loop *Subloop = *llvm::prior(Unloop->end()); - Unloop->removeChildLoop(llvm::prior(Unloop->end())); + Loop *Subloop = *std::prev(Unloop->end()); + Unloop->removeChildLoop(std::prev(Unloop->end())); assert(SubloopParents.count(Subloop) && "DFS failed to visit subloop"); if (Loop *Parent = SubloopParents[Subloop]) @@ -550,7 +548,7 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) { // is considered uninitialized. Loop *NearLoop = BBLoop; - Loop *Subloop = 0; + Loop *Subloop = nullptr; if (NearLoop != Unloop && Unloop->contains(NearLoop)) { Subloop = NearLoop; // Find the subloop ancestor that is directly contained within Unloop. @@ -566,7 +564,7 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) { succ_iterator I = succ_begin(BB), E = succ_end(BB); if (I == E) { assert(!Subloop && "subloop blocks must have a successor"); - NearLoop = 0; // unloop blocks may now exit the function. + NearLoop = nullptr; // unloop blocks may now exit the function. } for (; I != E; ++I) { if (*I == BB) @@ -614,7 +612,7 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) { // bool LoopInfo::runOnFunction(Function &) { releaseMemory(); - LI.Analyze(getAnalysis<DominatorTree>().getBase()); + LI.Analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree()); return false; } @@ -639,7 +637,7 @@ void LoopInfo::updateUnloop(Loop *Unloop) { // Blocks no longer have a parent but are still referenced by Unloop until // the Unloop object is deleted. - LI.changeLoopFor(*I, 0); + LI.changeLoopFor(*I, nullptr); } // Remove the loop from the top-level LoopInfo object. @@ -653,7 +651,7 @@ void LoopInfo::updateUnloop(Loop *Unloop) { // Move all of the subloops to the top-level. while (!Unloop->empty()) - LI.addTopLevelLoop(Unloop->removeChildLoop(llvm::prior(Unloop->end()))); + LI.addTopLevelLoop(Unloop->removeChildLoop(std::prev(Unloop->end()))); return; } @@ -705,7 +703,7 @@ void LoopInfo::verifyAnalysis() const { void LoopInfo::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired<DominatorTree>(); + AU.addRequired<DominatorTreeWrapperPass>(); } void LoopInfo::print(raw_ostream &OS, const Module*) const { diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp index acf2ba63bd32..7bd866e73e10 100644 --- a/lib/Analysis/LoopPass.cpp +++ b/lib/Analysis/LoopPass.cpp @@ -14,11 +14,14 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LoopPass.h" -#include "llvm/Assembly/PrintModulePass.h" +#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" using namespace llvm; +#define DEBUG_TYPE "loop-pass-manager" + namespace { /// PrintLoopPass - Print a Function corresponding to a Loop. @@ -33,16 +36,19 @@ public: PrintLoopPass(const std::string &B, raw_ostream &o) : LoopPass(ID), Banner(B), Out(o) {} - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); } - bool runOnLoop(Loop *L, LPPassManager &) { + bool runOnLoop(Loop *L, LPPassManager &) override { Out << Banner; for (Loop::block_iterator b = L->block_begin(), be = L->block_end(); b != be; ++b) { - (*b)->print(Out); + if (*b) + (*b)->print(Out); + else + Out << "Printing <null> block"; } return false; } @@ -61,8 +67,8 @@ LPPassManager::LPPassManager() : FunctionPass(ID), PMDataManager() { skipThisLoop = false; redoThisLoop = false; - LI = NULL; - CurrentLoop = NULL; + LI = nullptr; + CurrentLoop = nullptr; } /// Delete loop from the loop queue and loop hierarchy (LoopInfo). @@ -251,6 +257,8 @@ bool LPPassManager::runOnFunction(Function &F) { // Then call the regular verifyAnalysis functions. verifyPreservedAnalysis(P); + + F.getContext().yield(); } removeNotPreservedAnalysis(P); @@ -365,3 +373,17 @@ void LoopPass::assignPassManager(PMStack &PMS, LPPM->add(this); } + +// Containing function has Attribute::OptimizeNone and transformation +// passes should skip it. +bool LoopPass::skipOptnoneFunction(const Loop *L) const { + const Function *F = L->getHeader()->getParent(); + if (F && F->hasFnAttribute(Attribute::OptimizeNone)) { + // FIXME: Report this to dbgs() only once per function. + DEBUG(dbgs() << "Skipping pass '" << getPassName() + << "' in function " << F->getName() << "\n"); + // FIXME: Delete loop from pass manager's queue? + return true; + } + return false; +} diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp index d26aaf1b9048..10da3d5d6187 100644 --- a/lib/Analysis/MemDepPrinter.cpp +++ b/lib/Analysis/MemDepPrinter.cpp @@ -13,11 +13,10 @@ #include "llvm/Analysis/Passes.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/Assembly/Writer.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/InstIterator.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -45,19 +44,19 @@ namespace { initializeMemDepPrinterPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnFunction(Function &F); + bool runOnFunction(Function &F) override; - void print(raw_ostream &OS, const Module * = 0) const; + void print(raw_ostream &OS, const Module * = nullptr) const override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequiredTransitive<AliasAnalysis>(); AU.addRequiredTransitive<MemoryDependenceAnalysis>(); AU.setPreservesAll(); } - virtual void releaseMemory() { + void releaseMemory() override { Deps.clear(); - F = 0; + F = nullptr; } private: @@ -68,7 +67,7 @@ namespace { return InstTypePair(dep.getInst(), Def); if (dep.isNonFuncLocal()) return InstTypePair(dep.getInst(), NonFuncLocal); - assert(dep.isUnknown() && "unexptected dependence type"); + assert(dep.isUnknown() && "unexpected dependence type"); return InstTypePair(dep.getInst(), Unknown); } static InstTypePair getInstTypePair(const Instruction* inst, DepType type) { @@ -107,7 +106,7 @@ bool MemDepPrinter::runOnFunction(Function &F) { MemDepResult Res = MDA.getDependency(Inst); if (!Res.isNonLocal()) { Deps[Inst].insert(std::make_pair(getInstTypePair(Res), - static_cast<BasicBlock *>(0))); + static_cast<BasicBlock *>(nullptr))); } else if (CallSite CS = cast<Value>(Inst)) { const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI = MDA.getNonLocalCallDependency(CS); @@ -123,8 +122,8 @@ bool MemDepPrinter::runOnFunction(Function &F) { if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { if (!LI->isUnordered()) { // FIXME: Handle atomic/volatile loads. - Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown), - static_cast<BasicBlock *>(0))); + Deps[Inst].insert(std::make_pair(getInstTypePair(nullptr, Unknown), + static_cast<BasicBlock *>(nullptr))); continue; } AliasAnalysis::Location Loc = AA.getLocation(LI); @@ -132,8 +131,8 @@ bool MemDepPrinter::runOnFunction(Function &F) { } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { if (!SI->isUnordered()) { // FIXME: Handle atomic/volatile stores. - Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown), - static_cast<BasicBlock *>(0))); + Deps[Inst].insert(std::make_pair(getInstTypePair(nullptr, Unknown), + static_cast<BasicBlock *>(nullptr))); continue; } AliasAnalysis::Location Loc = AA.getLocation(SI); @@ -177,7 +176,7 @@ void MemDepPrinter::print(raw_ostream &OS, const Module *M) const { OS << DepTypeStr[type]; if (DepBB) { OS << " in block "; - WriteAsOperand(OS, DepBB, /*PrintType=*/false, M); + DepBB->printAsOperand(OS, /*PrintType=*/false, M); } if (DepInst) { OS << " from: "; diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 1db0f634c941..64d339f564d7 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "memory-builtins" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" @@ -30,6 +29,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "memory-builtins" + enum AllocType { OpNewLike = 1<<0, // allocates; never returns null MallocLike = 1<<1 | OpNewLike, // allocates; may return null @@ -76,14 +77,14 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) { CallSite CS(const_cast<Value*>(V)); if (!CS.getInstruction()) - return 0; + return nullptr; if (CS.isNoBuiltin()) - return 0; + return nullptr; Function *Callee = CS.getCalledFunction(); if (!Callee || !Callee->isDeclaration()) - return 0; + return nullptr; return Callee; } @@ -94,17 +95,17 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, bool LookThroughBitCast = false) { // Skip intrinsics if (isa<IntrinsicInst>(V)) - return 0; + return nullptr; Function *Callee = getCalledFunction(V, LookThroughBitCast); if (!Callee) - return 0; + return nullptr; // Make sure that the function is available. StringRef FnName = Callee->getName(); LibFunc::Func TLIFn; if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn)) - return 0; + return nullptr; unsigned i = 0; bool found = false; @@ -115,11 +116,11 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, } } if (!found) - return 0; + return nullptr; const AllocFnsTy *FnData = &AllocationFnData[i]; if ((FnData->AllocTy & AllocTy) != FnData->AllocTy) - return 0; + return nullptr; // Check function prototype. int FstParam = FnData->FstParam; @@ -135,7 +136,7 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, FTy->getParamType(SndParam)->isIntegerTy(32) || FTy->getParamType(SndParam)->isIntegerTy(64))) return FnData; - return 0; + return nullptr; } static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) { @@ -202,19 +203,19 @@ bool llvm::isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI, /// ignore InvokeInst here. const CallInst *llvm::extractMallocCall(const Value *I, const TargetLibraryInfo *TLI) { - return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : 0; + return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : nullptr; } static Value *computeArraySize(const CallInst *CI, const DataLayout *DL, const TargetLibraryInfo *TLI, bool LookThroughSExt = false) { if (!CI) - return 0; + return nullptr; // The size of the malloc's result type must be known to determine array size. Type *T = getMallocAllocatedType(CI, TLI); if (!T || !T->isSized() || !DL) - return 0; + return nullptr; unsigned ElementSize = DL->getTypeAllocSize(T); if (StructType *ST = dyn_cast<StructType>(T)) @@ -223,12 +224,12 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *DL, // If malloc call's arg can be determined to be a multiple of ElementSize, // return the multiple. Otherwise, return NULL. Value *MallocArg = CI->getArgOperand(0); - Value *Multiple = 0; + Value *Multiple = nullptr; if (ComputeMultiple(MallocArg, ElementSize, Multiple, LookThroughSExt)) return Multiple; - return 0; + return nullptr; } /// isArrayMalloc - Returns the corresponding CallInst if the instruction @@ -245,7 +246,7 @@ const CallInst *llvm::isArrayMalloc(const Value *I, return CI; // CI is a non-array malloc or we can't figure out that it is an array malloc. - return 0; + return nullptr; } /// getMallocType - Returns the PointerType resulting from the malloc call. @@ -257,12 +258,12 @@ PointerType *llvm::getMallocType(const CallInst *CI, const TargetLibraryInfo *TLI) { assert(isMallocLikeFn(CI, TLI) && "getMallocType and not malloc call"); - PointerType *MallocType = 0; + PointerType *MallocType = nullptr; unsigned NumOfBitCastUses = 0; // Determine if CallInst has a bitcast use. - for (Value::const_use_iterator UI = CI->use_begin(), E = CI->use_end(); - UI != E; ) + for (Value::const_user_iterator UI = CI->user_begin(), E = CI->user_end(); + UI != E;) if (const BitCastInst *BCI = dyn_cast<BitCastInst>(*UI++)) { MallocType = cast<PointerType>(BCI->getDestTy()); NumOfBitCastUses++; @@ -277,7 +278,7 @@ PointerType *llvm::getMallocType(const CallInst *CI, return cast<PointerType>(CI->getType()); // Type could not be determined. - return 0; + return nullptr; } /// getMallocAllocatedType - Returns the Type allocated by malloc call. @@ -288,7 +289,7 @@ PointerType *llvm::getMallocType(const CallInst *CI, Type *llvm::getMallocAllocatedType(const CallInst *CI, const TargetLibraryInfo *TLI) { PointerType *PT = getMallocType(CI, TLI); - return PT ? PT->getElementType() : 0; + return PT ? PT->getElementType() : nullptr; } /// getMallocArraySize - Returns the array size of a malloc call. If the @@ -308,7 +309,7 @@ Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout *DL, /// is a calloc call. const CallInst *llvm::extractCallocCall(const Value *I, const TargetLibraryInfo *TLI) { - return isCallocLikeFn(I, TLI) ? cast<CallInst>(I) : 0; + return isCallocLikeFn(I, TLI) ? cast<CallInst>(I) : nullptr; } @@ -316,15 +317,15 @@ const CallInst *llvm::extractCallocCall(const Value *I, const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { const CallInst *CI = dyn_cast<CallInst>(I); if (!CI || isa<IntrinsicInst>(CI)) - return 0; + return nullptr; Function *Callee = CI->getCalledFunction(); - if (Callee == 0 || !Callee->isDeclaration()) - return 0; + if (Callee == nullptr || !Callee->isDeclaration()) + return nullptr; StringRef FnName = Callee->getName(); LibFunc::Func TLIFn; if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn)) - return 0; + return nullptr; unsigned ExpectedNumParams; if (TLIFn == LibFunc::free || @@ -335,18 +336,18 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { TLIFn == LibFunc::ZdaPvRKSt9nothrow_t) // delete[](void*, nothrow) ExpectedNumParams = 2; else - return 0; + return nullptr; // Check free prototype. // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin // attribute will exist. FunctionType *FTy = Callee->getFunctionType(); if (!FTy->getReturnType()->isVoidTy()) - return 0; + return nullptr; if (FTy->getNumParams() != ExpectedNumParams) - return 0; + return nullptr; if (FTy->getParamType(0) != Type::getInt8PtrTy(Callee->getContext())) - return 0; + return nullptr; return CI; } @@ -399,12 +400,14 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *DL, LLVMContext &Context, bool RoundToAlign) : DL(DL), TLI(TLI), RoundToAlign(RoundToAlign) { - IntegerType *IntTy = DL->getIntPtrType(Context); - IntTyBits = IntTy->getBitWidth(); - Zero = APInt::getNullValue(IntTyBits); + // Pointer size must be rechecked for each object visited since it could have + // a different address space. } SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { + IntTyBits = DL->getPointerTypeSizeInBits(V->getType()); + Zero = APInt::getNullValue(IntTyBits); + V = V->stripPointerCasts(); if (Instruction *I = dyn_cast<Instruction>(V)) { // If we have already seen this instruction, bail out. Cycles can happen in @@ -456,7 +459,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) { // no interprocedural analysis is done at the moment - if (!A.hasByValAttr()) { + if (!A.hasByValOrInAllocaAttr()) { ++ObjectVisitorArgument; return unknown(); } @@ -592,11 +595,15 @@ ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *DL, bool RoundToAlign) : DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)), RoundToAlign(RoundToAlign) { - IntTy = DL->getIntPtrType(Context); - Zero = ConstantInt::get(IntTy, 0); + // IntTy and Zero must be set for each compute() since the address space may + // be different for later objects. } SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) { + // XXX - Are vectors of pointers possible here? + IntTy = cast<IntegerType>(DL->getIntPtrType(V->getType())); + Zero = ConstantInt::get(IntTy, 0); + SizeOffsetEvalType Result = compute_(V); if (!bothKnown(Result)) { diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 84ff2eed12e4..9eaf10958694 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -14,25 +14,26 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "memdep" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/PredIteratorCache.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/PredIteratorCache.h" using namespace llvm; +#define DEBUG_TYPE "memdep" + STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses"); STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses"); STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses"); @@ -59,7 +60,7 @@ INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep", "Memory Dependence Analysis", false, true) MemoryDependenceAnalysis::MemoryDependenceAnalysis() -: FunctionPass(ID), PredCache(0) { + : FunctionPass(ID), PredCache() { initializeMemoryDependenceAnalysisPass(*PassRegistry::getPassRegistry()); } MemoryDependenceAnalysis::~MemoryDependenceAnalysis() { @@ -87,8 +88,11 @@ void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { bool MemoryDependenceAnalysis::runOnFunction(Function &) { AA = &getAnalysis<AliasAnalysis>(); - TD = getAnalysisIfAvailable<DataLayout>(); - DT = getAnalysisIfAvailable<DominatorTree>(); + DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); + DL = DLP ? &DLP->getDataLayout() : nullptr; + DominatorTreeWrapperPass *DTWP = + getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + DT = DTWP ? &DTWP->getDomTree() : nullptr; if (!PredCache) PredCache.reset(new PredIteratorCache()); return false; @@ -256,17 +260,17 @@ isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, const Value *&MemLocBase, int64_t &MemLocOffs, const LoadInst *LI, - const DataLayout *TD) { + const DataLayout *DL) { // If we have no target data, we can't do this. - if (TD == 0) return false; + if (!DL) return false; // If we haven't already computed the base/offset of MemLoc, do so now. - if (MemLocBase == 0) - MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, TD); + if (!MemLocBase) + MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, DL); unsigned Size = MemoryDependenceAnalysis:: getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size, - LI, *TD); + LI, *DL); return Size != 0; } @@ -280,7 +284,7 @@ isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, unsigned MemoryDependenceAnalysis:: getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize, const LoadInst *LI, - const DataLayout &TD) { + const DataLayout &DL) { // We can only extend simple integer loads. if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) return 0; @@ -293,7 +297,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, // Get the base of this load. int64_t LIOffs = 0; const Value *LIBase = - GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, &TD); + GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, &DL); // If the two pointers are not based on the same pointer, we can't tell that // they are related. @@ -329,7 +333,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, // If this load size is bigger than our known alignment or would not fit // into a native integer register, then we fail. if (NewLoadByteSize > LoadAlign || - !TD.fitsInLegalInteger(NewLoadByteSize*8)) + !DL.fitsInLegalInteger(NewLoadByteSize*8)) return 0; if (LIOffs+NewLoadByteSize > MemLocEnd && @@ -359,13 +363,13 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst) { - const Value *MemLocBase = 0; + const Value *MemLocBase = nullptr; int64_t MemLocOffset = 0; unsigned Limit = BlockScanLimit; bool isInvariantLoad = false; if (isLoad && QueryInst) { LoadInst *LI = dyn_cast<LoadInst>(QueryInst); - if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != 0) + if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr) isInvariantLoad = true; } @@ -422,7 +426,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() && isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase, - MemLocOffset, LI, TD)) + MemLocOffset, LI, DL)) return MemDepResult::getClobber(Inst); continue; @@ -498,7 +502,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // need to continue scanning until the malloc call. const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo(); if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, TLI)) { - const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD); + const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, DL); if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr)) return MemDepResult::getDef(Inst); @@ -690,10 +694,10 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { NonLocalDepInfo::iterator Entry = std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries, NonLocalDepEntry(DirtyBB)); - if (Entry != Cache.begin() && prior(Entry)->getBB() == DirtyBB) + if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB) --Entry; - NonLocalDepEntry *ExistingResult = 0; + NonLocalDepEntry *ExistingResult = nullptr; if (Entry != Cache.begin()+NumSortedEntries && Entry->getBB() == DirtyBB) { // If we already have an entry, and if it isn't already dirty, the block @@ -771,7 +775,7 @@ getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad, "Can't get pointer deps of a non-pointer!"); Result.clear(); - PHITransAddr Address(const_cast<Value *>(Loc.Ptr), TD); + PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL); // This is the set of blocks we've inspected, and the pointer we consider in // each block. Because of critical edges, we currently bail out if querying @@ -804,7 +808,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, if (Entry != Cache->begin() && (Entry-1)->getBB() == BB) --Entry; - NonLocalDepEntry *ExistingResult = 0; + NonLocalDepEntry *ExistingResult = nullptr; if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB) ExistingResult = &*Entry; @@ -957,7 +961,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, if (CacheInfo->TBAATag != Loc.TBAATag) { if (CacheInfo->TBAATag) { CacheInfo->Pair = BBSkipFirstBlockPair(); - CacheInfo->TBAATag = 0; + CacheInfo->TBAATag = nullptr; for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(), DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI) if (Instruction *Inst = DI->getResult().getInst()) @@ -1113,7 +1117,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, SortNonLocalDepInfoCache(*Cache, NumSortedEntries); NumSortedEntries = Cache->size(); } - Cache = 0; + Cache = nullptr; PredList.clear(); for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { @@ -1123,7 +1127,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // Get the PHI translated pointer in this predecessor. This can fail if // not translatable, in which case the getAddr() returns null. PHITransAddr &PredPointer = PredList.back().second; - PredPointer.PHITranslateValue(BB, Pred, 0); + PredPointer.PHITranslateValue(BB, Pred, nullptr); Value *PredPtrVal = PredPointer.getAddr(); @@ -1172,7 +1176,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // predecessor, then we have to assume that the pointer is clobbered in // that predecessor. We can still do PRE of the load, which would insert // a computation of the pointer in this predecessor. - if (PredPtrVal == 0) + if (!PredPtrVal) CanTranslate = false; // FIXME: it is entirely possible that PHI translating will end up with @@ -1221,7 +1225,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // for the given block. It assumes that we haven't modified any of // our datastructures while processing the current block. - if (Cache == 0) { + if (!Cache) { // Refresh the CacheInfo/Cache pointer if it got invalidated. CacheInfo = &NonLocalPointerDeps[CacheKey]; Cache = &CacheInfo->NonLocalDeps; @@ -1276,7 +1280,7 @@ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) { for (unsigned i = 0, e = PInfo.size(); i != e; ++i) { Instruction *Target = PInfo[i].getResult().getInst(); - if (Target == 0) continue; // Ignore non-local dep results. + if (!Target) continue; // Ignore non-local dep results. assert(Target->getParent() == PInfo[i].getBB()); // Eliminating the dirty entry from 'Cache', so update the reverse info. diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp index 03415375263a..f645558bd814 100644 --- a/lib/Analysis/ModuleDebugInfoPrinter.cpp +++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -17,8 +17,7 @@ #include "llvm/Analysis/Passes.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/DebugInfo.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" @@ -34,12 +33,12 @@ namespace { initializeModuleDebugInfoPrinterPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnModule(Module &M); + bool runOnModule(Module &M) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); } - virtual void print(raw_ostream &O, const Module *M) const; + void print(raw_ostream &O, const Module *M) const override; }; } @@ -57,31 +56,27 @@ bool ModuleDebugInfoPrinter::runOnModule(Module &M) { } void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const { - for (DebugInfoFinder::iterator I = Finder.compile_unit_begin(), - E = Finder.compile_unit_end(); I != E; ++I) { + for (DICompileUnit CU : Finder.compile_units()) { O << "Compile Unit: "; - DICompileUnit(*I).print(O); + CU.print(O); O << '\n'; } - for (DebugInfoFinder::iterator I = Finder.subprogram_begin(), - E = Finder.subprogram_end(); I != E; ++I) { + for (DISubprogram S : Finder.subprograms()) { O << "Subprogram: "; - DISubprogram(*I).print(O); + S.print(O); O << '\n'; } - for (DebugInfoFinder::iterator I = Finder.global_variable_begin(), - E = Finder.global_variable_end(); I != E; ++I) { + for (DIGlobalVariable GV : Finder.global_variables()) { O << "GlobalVariable: "; - DIGlobalVariable(*I).print(O); + GV.print(O); O << '\n'; } - for (DebugInfoFinder::iterator I = Finder.type_begin(), - E = Finder.type_end(); I != E; ++I) { + for (DIType T : Finder.types()) { O << "Type: "; - DIType(*I).print(O); + T.print(O); O << '\n'; } } diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp index 907e9621baed..139fa38b8a94 100644 --- a/lib/Analysis/NoAliasAnalysis.cpp +++ b/lib/Analysis/NoAliasAnalysis.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Pass.h" using namespace llvm; @@ -30,48 +31,54 @@ namespace { initializeNoAAPass(*PassRegistry::getPassRegistry()); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - } + void getAnalysisUsage(AnalysisUsage &AU) const override {} - virtual void initializePass() { + void initializePass() override { // Note: NoAA does not call InitializeAliasAnalysis because it's // special and does not support chaining. - TD = getAnalysisIfAvailable<DataLayout>(); + DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); + DL = DLP ? &DLP->getDataLayout() : nullptr; } - virtual AliasResult alias(const Location &LocA, const Location &LocB) { + AliasResult alias(const Location &LocA, const Location &LocB) override { return MayAlias; } - virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS) { + ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override { return UnknownModRefBehavior; } - virtual ModRefBehavior getModRefBehavior(const Function *F) { + ModRefBehavior getModRefBehavior(const Function *F) override { return UnknownModRefBehavior; } - virtual bool pointsToConstantMemory(const Location &Loc, - bool OrLocal) { + bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override { return false; } - virtual ModRefResult getModRefInfo(ImmutableCallSite CS, - const Location &Loc) { + Location getArgLocation(ImmutableCallSite CS, unsigned ArgIdx, + ModRefResult &Mask) override { + Mask = ModRef; + return Location(CS.getArgument(ArgIdx), UnknownSize, + CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)); + } + + ModRefResult getModRefInfo(ImmutableCallSite CS, + const Location &Loc) override { return ModRef; } - virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) { + ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) override { return ModRef; } - virtual void deleteValue(Value *V) {} - virtual void copyValue(Value *From, Value *To) {} - virtual void addEscapingUse(Use &U) {} - + void deleteValue(Value *V) override {} + void copyValue(Value *From, Value *To) override {} + void addEscapingUse(Use &U) override {} + /// getAdjustedAnalysisPointer - This method is used when a pass implements /// an analysis interface through multiple inheritance. If needed, it /// should override this to adjust the this pointer as needed for the /// specified pass info. - virtual void *getAdjustedAnalysisPointer(const void *ID) { + void *getAdjustedAnalysisPointer(const void *ID) override { if (ID == &AliasAnalysis::ID) return (AliasAnalysis*)this; return this; diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp index e6af0663feaa..bfe86425119b 100644 --- a/lib/Analysis/PHITransAddr.cpp +++ b/lib/Analysis/PHITransAddr.cpp @@ -12,10 +12,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/PHITransAddr.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -43,7 +43,7 @@ static bool CanPHITrans(Instruction *Inst) { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void PHITransAddr::dump() const { - if (Addr == 0) { + if (!Addr) { dbgs() << "PHITransAddr: null\n"; return; } @@ -58,7 +58,7 @@ static bool VerifySubExpr(Value *Expr, SmallVectorImpl<Instruction*> &InstInputs) { // If this is a non-instruction value, there is nothing to do. Instruction *I = dyn_cast<Instruction>(Expr); - if (I == 0) return true; + if (!I) return true; // If it's an instruction, it is either in Tmp or its operands recursively // are. @@ -72,7 +72,7 @@ static bool VerifySubExpr(Value *Expr, // If it isn't in the InstInputs list it is a subexpr incorporated into the // address. Sanity check that it is phi translatable. if (!CanPHITrans(I)) { - errs() << "Non phi translatable instruction found in PHITransAddr:\n"; + errs() << "Instruction in PHITransAddr is not phi-translatable:\n"; errs() << *I << '\n'; llvm_unreachable("Either something is missing from InstInputs or " "CanPHITrans is wrong."); @@ -90,7 +90,7 @@ static bool VerifySubExpr(Value *Expr, /// structure is valid, it returns true. If invalid, it prints errors and /// returns false. bool PHITransAddr::Verify() const { - if (Addr == 0) return true; + if (!Addr) return true; SmallVector<Instruction*, 8> Tmp(InstInputs.begin(), InstInputs.end()); @@ -116,14 +116,14 @@ bool PHITransAddr::IsPotentiallyPHITranslatable() const { // If the input value is not an instruction, or if it is not defined in CurBB, // then we don't need to phi translate it. Instruction *Inst = dyn_cast<Instruction>(Addr); - return Inst == 0 || CanPHITrans(Inst); + return !Inst || CanPHITrans(Inst); } static void RemoveInstInputs(Value *V, SmallVectorImpl<Instruction*> &InstInputs) { Instruction *I = dyn_cast<Instruction>(V); - if (I == 0) return; + if (!I) return; // If the instruction is in the InstInputs list, remove it. SmallVectorImpl<Instruction*>::iterator Entry = @@ -147,7 +147,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, const DominatorTree *DT) { // If this is a non-instruction value, it can't require PHI translation. Instruction *Inst = dyn_cast<Instruction>(V); - if (Inst == 0) return V; + if (!Inst) return V; // Determine whether 'Inst' is an input to our PHI translatable expression. bool isInput = std::count(InstInputs.begin(), InstInputs.end(), Inst); @@ -173,7 +173,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, // If this is a non-phi value, and it is analyzable, we can incorporate it // into the expression by making all instruction operands be inputs. if (!CanPHITrans(Inst)) - return 0; + return nullptr; // All instruction operands are now inputs (and of course, they may also be // defined in this block, so they may need to be phi translated themselves. @@ -187,9 +187,9 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, // operands need to be phi translated, and if so, reconstruct it. if (CastInst *Cast = dyn_cast<CastInst>(Inst)) { - if (!isSafeToSpeculativelyExecute(Cast)) return 0; + if (!isSafeToSpeculativelyExecute(Cast)) return nullptr; Value *PHIIn = PHITranslateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT); - if (PHIIn == 0) return 0; + if (!PHIIn) return nullptr; if (PHIIn == Cast->getOperand(0)) return Cast; @@ -202,15 +202,14 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, // Otherwise we have to see if a casted version of the incoming pointer // is available. If so, we can use it, otherwise we have to fail. - for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end(); - UI != E; ++UI) { - if (CastInst *CastI = dyn_cast<CastInst>(*UI)) + for (User *U : PHIIn->users()) { + if (CastInst *CastI = dyn_cast<CastInst>(U)) if (CastI->getOpcode() == Cast->getOpcode() && CastI->getType() == Cast->getType() && (!DT || DT->dominates(CastI->getParent(), PredBB))) return CastI; } - return 0; + return nullptr; } // Handle getelementptr with at least one PHI translatable operand. @@ -219,7 +218,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, bool AnyChanged = false; for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB, DT); - if (GEPOp == 0) return 0; + if (!GEPOp) return nullptr; AnyChanged |= GEPOp != GEP->getOperand(i); GEPOps.push_back(GEPOp); @@ -229,7 +228,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, return GEP; // Simplify the GEP to handle 'gep x, 0' -> x etc. - if (Value *V = SimplifyGEPInst(GEPOps, TD, TLI, DT)) { + if (Value *V = SimplifyGEPInst(GEPOps, DL, TLI, DT)) { for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) RemoveInstInputs(GEPOps[i], InstInputs); @@ -238,9 +237,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, // Scan to see if we have this GEP available. Value *APHIOp = GEPOps[0]; - for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end(); - UI != E; ++UI) { - if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI)) + for (User *U : APHIOp->users()) { + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) if (GEPI->getType() == GEP->getType() && GEPI->getNumOperands() == GEPOps.size() && GEPI->getParent()->getParent() == CurBB->getParent() && @@ -255,7 +253,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, return GEPI; } } - return 0; + return nullptr; } // Handle add with a constant RHS. @@ -267,7 +265,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap(); Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT); - if (LHS == 0) return 0; + if (!LHS) return nullptr; // If the PHI translated LHS is an add of a constant, fold the immediates. if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS)) @@ -285,7 +283,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, } // See if the add simplifies away. - if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, TLI, DT)) { + if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, DL, TLI, DT)) { // If we simplified the operands, the LHS is no longer an input, but Res // is. RemoveInstInputs(LHS, InstInputs); @@ -297,9 +295,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, return Inst; // Otherwise, see if we have this add available somewhere. - for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end(); - UI != E; ++UI) { - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI)) + for (User *U : LHS->users()) { + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) if (BO->getOpcode() == Instruction::Add && BO->getOperand(0) == LHS && BO->getOperand(1) == RHS && BO->getParent()->getParent() == CurBB->getParent() && @@ -307,11 +304,11 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, return BO; } - return 0; + return nullptr; } // Otherwise, we failed. - return 0; + return nullptr; } @@ -329,10 +326,10 @@ bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB, // Make sure the value is live in the predecessor. if (Instruction *Inst = dyn_cast_or_null<Instruction>(Addr)) if (!DT->dominates(Inst->getParent(), PredBB)) - Addr = 0; + Addr = nullptr; } - return Addr == 0; + return Addr == nullptr; } /// PHITranslateWithInsertion - PHI translate this value into the specified @@ -357,7 +354,7 @@ PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB, // If not, destroy any intermediate instructions inserted. while (NewInsts.size() != NISize) NewInsts.pop_back_val()->eraseFromParent(); - return 0; + return nullptr; } @@ -372,7 +369,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, SmallVectorImpl<Instruction*> &NewInsts) { // See if we have a version of this value already available and dominating // PredBB. If so, there is no need to insert a new instance of it. - PHITransAddr Tmp(InVal, TD); + PHITransAddr Tmp(InVal, DL); if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT)) return Tmp.getAddr(); @@ -382,10 +379,10 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, // Handle cast of PHI translatable value. if (CastInst *Cast = dyn_cast<CastInst>(Inst)) { - if (!isSafeToSpeculativelyExecute(Cast)) return 0; + if (!isSafeToSpeculativelyExecute(Cast)) return nullptr; Value *OpVal = InsertPHITranslatedSubExpr(Cast->getOperand(0), CurBB, PredBB, DT, NewInsts); - if (OpVal == 0) return 0; + if (!OpVal) return nullptr; // Otherwise insert a cast at the end of PredBB. CastInst *New = CastInst::Create(Cast->getOpcode(), @@ -403,7 +400,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { Value *OpVal = InsertPHITranslatedSubExpr(GEP->getOperand(i), CurBB, PredBB, DT, NewInsts); - if (OpVal == 0) return 0; + if (!OpVal) return nullptr; GEPOps.push_back(OpVal); } @@ -439,5 +436,5 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, } #endif - return 0; + return nullptr; } diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp index 96804a01edc6..6d929091e3d2 100644 --- a/lib/Analysis/PostDominators.cpp +++ b/lib/Analysis/PostDominators.cpp @@ -11,18 +11,17 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "postdomtree" - #include "llvm/Analysis/PostDominators.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetOperations.h" -#include "llvm/Analysis/DominatorInternals.h" -#include "llvm/Assembly/Writer.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Instructions.h" -#include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/GenericDomTreeConstruction.h" using namespace llvm; +#define DEBUG_TYPE "postdomtree" + //===----------------------------------------------------------------------===// // PostDominatorTree Implementation //===----------------------------------------------------------------------===// diff --git a/lib/Analysis/PtrUseVisitor.cpp b/lib/Analysis/PtrUseVisitor.cpp index 0a342b2167e4..1b0f359e6366 100644 --- a/lib/Analysis/PtrUseVisitor.cpp +++ b/lib/Analysis/PtrUseVisitor.cpp @@ -16,14 +16,13 @@ using namespace llvm; void detail::PtrUseVisitorBase::enqueueUsers(Instruction &I) { - for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); - UI != UE; ++UI) { - if (VisitedUses.insert(&UI.getUse())) { + for (Use &U : I.uses()) { + if (VisitedUses.insert(&U)) { UseToVisit NewU = { - UseToVisit::UseAndIsOffsetKnownPair(&UI.getUse(), IsOffsetKnown), + UseToVisit::UseAndIsOffsetKnownPair(&U, IsOffsetKnown), Offset }; - Worklist.push_back(llvm_move(NewU)); + Worklist.push_back(std::move(NewU)); } } } diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp index 563568876763..08ebf0d85723 100644 --- a/lib/Analysis/RegionInfo.cpp +++ b/lib/Analysis/RegionInfo.cpp @@ -9,36 +9,43 @@ // Detects single entry single exit regions in the control flow graph. //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "region" #include "llvm/Analysis/RegionInfo.h" +#include "llvm/Analysis/RegionInfoImpl.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/RegionIterator.h" -#include "llvm/Assembly/Writer.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include <algorithm> +#include <iterator> #include <set> using namespace llvm; -// Always verify if expensive checking is enabled. -#ifdef XDEBUG -static bool VerifyRegionInfo = true; -#else -static bool VerifyRegionInfo = false; -#endif +#define DEBUG_TYPE "region" -static cl::opt<bool,true> -VerifyRegionInfoX("verify-region-info", cl::location(VerifyRegionInfo), - cl::desc("Verify region info (time consuming)")); +namespace llvm { +template class RegionBase<RegionTraits<Function>>; +template class RegionNodeBase<RegionTraits<Function>>; +template class RegionInfoBase<RegionTraits<Function>>; +} STATISTIC(numRegions, "The # of regions"); STATISTIC(numSimpleRegions, "The # of simple regions"); -static cl::opt<enum Region::PrintStyle> printStyle("print-region-style", +// Always verify if expensive checking is enabled. + +static cl::opt<bool,true> +VerifyRegionInfoX( + "verify-region-info", + cl::location(RegionInfoBase<RegionTraits<Function>>::VerifyRegionInfo), + cl::desc("Verify region info (time consuming)")); + + +static cl::opt<Region::PrintStyle, true> printStyleX("print-region-style", + cl::location(RegionInfo::printStyle), cl::Hidden, cl::desc("style of printing regions"), cl::values( @@ -48,809 +55,110 @@ static cl::opt<enum Region::PrintStyle> printStyle("print-region-style", clEnumValN(Region::PrintRN, "rn", "print regions in detail with element_iterator"), clEnumValEnd)); -//===----------------------------------------------------------------------===// -/// Region Implementation -Region::Region(BasicBlock *Entry, BasicBlock *Exit, RegionInfo* RInfo, - DominatorTree *dt, Region *Parent) - : RegionNode(Parent, Entry, 1), RI(RInfo), DT(dt), exit(Exit) {} - -Region::~Region() { - // Free the cached nodes. - for (BBNodeMapT::iterator it = BBNodeMap.begin(), - ie = BBNodeMap.end(); it != ie; ++it) - delete it->second; - - // Only clean the cache for this Region. Caches of child Regions will be - // cleaned when the child Regions are deleted. - BBNodeMap.clear(); - - for (iterator I = begin(), E = end(); I != E; ++I) - delete *I; -} - -void Region::replaceEntry(BasicBlock *BB) { - entry.setPointer(BB); -} - -void Region::replaceExit(BasicBlock *BB) { - assert(exit && "No exit to replace!"); - exit = BB; -} - -void Region::replaceEntryRecursive(BasicBlock *NewEntry) { - std::vector<Region *> RegionQueue; - BasicBlock *OldEntry = getEntry(); - - RegionQueue.push_back(this); - while (!RegionQueue.empty()) { - Region *R = RegionQueue.back(); - RegionQueue.pop_back(); - - R->replaceEntry(NewEntry); - for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI) - if ((*RI)->getEntry() == OldEntry) - RegionQueue.push_back(*RI); - } -} - -void Region::replaceExitRecursive(BasicBlock *NewExit) { - std::vector<Region *> RegionQueue; - BasicBlock *OldExit = getExit(); - - RegionQueue.push_back(this); - while (!RegionQueue.empty()) { - Region *R = RegionQueue.back(); - RegionQueue.pop_back(); - - R->replaceExit(NewExit); - for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI) - if ((*RI)->getExit() == OldExit) - RegionQueue.push_back(*RI); - } -} - -bool Region::contains(const BasicBlock *B) const { - BasicBlock *BB = const_cast<BasicBlock*>(B); - if (!DT->getNode(BB)) - return false; - BasicBlock *entry = getEntry(), *exit = getExit(); - - // Toplevel region. - if (!exit) - return true; - - return (DT->dominates(entry, BB) - && !(DT->dominates(exit, BB) && DT->dominates(entry, exit))); -} - -bool Region::contains(const Loop *L) const { - // BBs that are not part of any loop are element of the Loop - // described by the NULL pointer. This loop is not part of any region, - // except if the region describes the whole function. - if (L == 0) - return getExit() == 0; - - if (!contains(L->getHeader())) - return false; - - SmallVector<BasicBlock *, 8> ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - - for (SmallVectorImpl<BasicBlock*>::iterator BI = ExitingBlocks.begin(), - BE = ExitingBlocks.end(); BI != BE; ++BI) - if (!contains(*BI)) - return false; - - return true; -} - -Loop *Region::outermostLoopInRegion(Loop *L) const { - if (!contains(L)) - return 0; - - while (L && contains(L->getParentLoop())) { - L = L->getParentLoop(); - } - - return L; -} - -Loop *Region::outermostLoopInRegion(LoopInfo *LI, BasicBlock* BB) const { - assert(LI && BB && "LI and BB cannot be null!"); - Loop *L = LI->getLoopFor(BB); - return outermostLoopInRegion(L); -} - -BasicBlock *Region::getEnteringBlock() const { - BasicBlock *entry = getEntry(); - BasicBlock *Pred; - BasicBlock *enteringBlock = 0; - - for (pred_iterator PI = pred_begin(entry), PE = pred_end(entry); PI != PE; - ++PI) { - Pred = *PI; - if (DT->getNode(Pred) && !contains(Pred)) { - if (enteringBlock) - return 0; - - enteringBlock = Pred; - } - } - - return enteringBlock; -} - -BasicBlock *Region::getExitingBlock() const { - BasicBlock *exit = getExit(); - BasicBlock *Pred; - BasicBlock *exitingBlock = 0; - - if (!exit) - return 0; - - for (pred_iterator PI = pred_begin(exit), PE = pred_end(exit); PI != PE; - ++PI) { - Pred = *PI; - if (contains(Pred)) { - if (exitingBlock) - return 0; - - exitingBlock = Pred; - } - } - - return exitingBlock; -} - -bool Region::isSimple() const { - return !isTopLevelRegion() && getEnteringBlock() && getExitingBlock(); -} - -std::string Region::getNameStr() const { - std::string exitName; - std::string entryName; - - if (getEntry()->getName().empty()) { - raw_string_ostream OS(entryName); - - WriteAsOperand(OS, getEntry(), false); - } else - entryName = getEntry()->getName(); - - if (getExit()) { - if (getExit()->getName().empty()) { - raw_string_ostream OS(exitName); - - WriteAsOperand(OS, getExit(), false); - } else - exitName = getExit()->getName(); - } else - exitName = "<Function Return>"; - - return entryName + " => " + exitName; -} - -void Region::verifyBBInRegion(BasicBlock *BB) const { - if (!contains(BB)) - llvm_unreachable("Broken region found!"); - - BasicBlock *entry = getEntry(), *exit = getExit(); - - for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) - if (!contains(*SI) && exit != *SI) - llvm_unreachable("Broken region found!"); - - if (entry != BB) - for (pred_iterator SI = pred_begin(BB), SE = pred_end(BB); SI != SE; ++SI) - if (!contains(*SI)) - llvm_unreachable("Broken region found!"); -} - -void Region::verifyWalk(BasicBlock *BB, std::set<BasicBlock*> *visited) const { - BasicBlock *exit = getExit(); - - visited->insert(BB); - - verifyBBInRegion(BB); - - for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) - if (*SI != exit && visited->find(*SI) == visited->end()) - verifyWalk(*SI, visited); -} - -void Region::verifyRegion() const { - // Only do verification when user wants to, otherwise this expensive - // check will be invoked by PassManager. - if (!VerifyRegionInfo) return; - - std::set<BasicBlock*> visited; - verifyWalk(getEntry(), &visited); -} - -void Region::verifyRegionNest() const { - for (Region::const_iterator RI = begin(), RE = end(); RI != RE; ++RI) - (*RI)->verifyRegionNest(); - - verifyRegion(); -} - -Region::element_iterator Region::element_begin() { - return GraphTraits<Region*>::nodes_begin(this); -} - -Region::element_iterator Region::element_end() { - return GraphTraits<Region*>::nodes_end(this); -} - -Region::const_element_iterator Region::element_begin() const { - return GraphTraits<const Region*>::nodes_begin(this); -} - -Region::const_element_iterator Region::element_end() const { - return GraphTraits<const Region*>::nodes_end(this); -} - -Region* Region::getSubRegionNode(BasicBlock *BB) const { - Region *R = RI->getRegionFor(BB); - - if (!R || R == this) - return 0; - - // If we pass the BB out of this region, that means our code is broken. - assert(contains(R) && "BB not in current region!"); - - while (contains(R->getParent()) && R->getParent() != this) - R = R->getParent(); - - if (R->getEntry() != BB) - return 0; - - return R; -} - -RegionNode* Region::getBBNode(BasicBlock *BB) const { - assert(contains(BB) && "Can get BB node out of this region!"); - - BBNodeMapT::const_iterator at = BBNodeMap.find(BB); - - if (at != BBNodeMap.end()) - return at->second; - - RegionNode *NewNode = new RegionNode(const_cast<Region*>(this), BB); - BBNodeMap.insert(std::make_pair(BB, NewNode)); - return NewNode; -} - -RegionNode* Region::getNode(BasicBlock *BB) const { - assert(contains(BB) && "Can get BB node out of this region!"); - if (Region* Child = getSubRegionNode(BB)) - return Child->getNode(); - - return getBBNode(BB); -} - -void Region::transferChildrenTo(Region *To) { - for (iterator I = begin(), E = end(); I != E; ++I) { - (*I)->parent = To; - To->children.push_back(*I); - } - children.clear(); -} - -void Region::addSubRegion(Region *SubRegion, bool moveChildren) { - assert(SubRegion->parent == 0 && "SubRegion already has a parent!"); - assert(std::find(begin(), end(), SubRegion) == children.end() - && "Subregion already exists!"); - - SubRegion->parent = this; - children.push_back(SubRegion); - - if (!moveChildren) - return; - - assert(SubRegion->children.size() == 0 - && "SubRegions that contain children are not supported"); - - for (element_iterator I = element_begin(), E = element_end(); I != E; ++I) - if (!(*I)->isSubRegion()) { - BasicBlock *BB = (*I)->getNodeAs<BasicBlock>(); - - if (SubRegion->contains(BB)) - RI->setRegionFor(BB, SubRegion); - } - - std::vector<Region*> Keep; - for (iterator I = begin(), E = end(); I != E; ++I) - if (SubRegion->contains(*I) && *I != SubRegion) { - SubRegion->children.push_back(*I); - (*I)->parent = SubRegion; - } else - Keep.push_back(*I); - - children.clear(); - children.insert(children.begin(), Keep.begin(), Keep.end()); -} - - -Region *Region::removeSubRegion(Region *Child) { - assert(Child->parent == this && "Child is not a child of this region!"); - Child->parent = 0; - RegionSet::iterator I = std::find(children.begin(), children.end(), Child); - assert(I != children.end() && "Region does not exit. Unable to remove."); - children.erase(children.begin()+(I-begin())); - return Child; -} - -unsigned Region::getDepth() const { - unsigned Depth = 0; - - for (Region *R = parent; R != 0; R = R->parent) - ++Depth; - - return Depth; -} - -Region *Region::getExpandedRegion() const { - unsigned NumSuccessors = exit->getTerminator()->getNumSuccessors(); - - if (NumSuccessors == 0) - return NULL; - - for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit()); - PI != PE; ++PI) - if (!DT->dominates(getEntry(), *PI)) - return NULL; - - Region *R = RI->getRegionFor(exit); - - if (R->getEntry() != exit) { - if (exit->getTerminator()->getNumSuccessors() == 1) - return new Region(getEntry(), *succ_begin(exit), RI, DT); - else - return NULL; - } - - while (R->getParent() && R->getParent()->getEntry() == exit) - R = R->getParent(); - - if (!DT->dominates(getEntry(), R->getExit())) - for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit()); - PI != PE; ++PI) - if (!DT->dominates(R->getExit(), *PI)) - return NULL; - - return new Region(getEntry(), R->getExit(), RI, DT); -} - -void Region::print(raw_ostream &OS, bool print_tree, unsigned level, - enum PrintStyle Style) const { - if (print_tree) - OS.indent(level*2) << "[" << level << "] " << getNameStr(); - else - OS.indent(level*2) << getNameStr(); - - OS << "\n"; - - - if (Style != PrintNone) { - OS.indent(level*2) << "{\n"; - OS.indent(level*2 + 2); - - if (Style == PrintBB) { - for (const_block_iterator I = block_begin(), E = block_end(); I != E; ++I) - OS << (*I)->getName() << ", "; // TODO: remove the last "," - } else if (Style == PrintRN) { - for (const_element_iterator I = element_begin(), E = element_end(); I!=E; ++I) - OS << **I << ", "; // TODO: remove the last ", - } - - OS << "\n"; - } +//===----------------------------------------------------------------------===// +// Region implementation +// - if (print_tree) - for (const_iterator RI = begin(), RE = end(); RI != RE; ++RI) - (*RI)->print(OS, print_tree, level+1, Style); +Region::Region(BasicBlock *Entry, BasicBlock *Exit, + RegionInfo* RI, + DominatorTree *DT, Region *Parent) : + RegionBase<RegionTraits<Function>>(Entry, Exit, RI, DT, Parent) { - if (Style != PrintNone) - OS.indent(level*2) << "} \n"; } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void Region::dump() const { - print(dbgs(), true, getDepth(), printStyle.getValue()); -} -#endif - -void Region::clearNodeCache() { - // Free the cached nodes. - for (BBNodeMapT::iterator I = BBNodeMap.begin(), - IE = BBNodeMap.end(); I != IE; ++I) - delete I->second; - - BBNodeMap.clear(); - for (Region::iterator RI = begin(), RE = end(); RI != RE; ++RI) - (*RI)->clearNodeCache(); -} +Region::~Region() { } //===----------------------------------------------------------------------===// // RegionInfo implementation // -bool RegionInfo::isCommonDomFrontier(BasicBlock *BB, BasicBlock *entry, - BasicBlock *exit) const { - for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { - BasicBlock *P = *PI; - if (DT->dominates(entry, P) && !DT->dominates(exit, P)) - return false; - } - return true; -} +RegionInfo::RegionInfo() : + RegionInfoBase<RegionTraits<Function>>() { -bool RegionInfo::isRegion(BasicBlock *entry, BasicBlock *exit) const { - assert(entry && exit && "entry and exit must not be null!"); - typedef DominanceFrontier::DomSetType DST; - - DST *entrySuccs = &DF->find(entry)->second; - - // Exit is the header of a loop that contains the entry. In this case, - // the dominance frontier must only contain the exit. - if (!DT->dominates(entry, exit)) { - for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end(); - SI != SE; ++SI) - if (*SI != exit && *SI != entry) - return false; - - return true; - } - - DST *exitSuccs = &DF->find(exit)->second; - - // Do not allow edges leaving the region. - for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end(); - SI != SE; ++SI) { - if (*SI == exit || *SI == entry) - continue; - if (exitSuccs->find(*SI) == exitSuccs->end()) - return false; - if (!isCommonDomFrontier(*SI, entry, exit)) - return false; - } - - // Do not allow edges pointing into the region. - for (DST::iterator SI = exitSuccs->begin(), SE = exitSuccs->end(); - SI != SE; ++SI) - if (DT->properlyDominates(entry, *SI) && *SI != exit) - return false; - - - return true; -} - -void RegionInfo::insertShortCut(BasicBlock *entry, BasicBlock *exit, - BBtoBBMap *ShortCut) const { - assert(entry && exit && "entry and exit must not be null!"); - - BBtoBBMap::iterator e = ShortCut->find(exit); - - if (e == ShortCut->end()) - // No further region at exit available. - (*ShortCut)[entry] = exit; - else { - // We found a region e that starts at exit. Therefore (entry, e->second) - // is also a region, that is larger than (entry, exit). Insert the - // larger one. - BasicBlock *BB = e->second; - (*ShortCut)[entry] = BB; - } -} - -DomTreeNode* RegionInfo::getNextPostDom(DomTreeNode* N, - BBtoBBMap *ShortCut) const { - BBtoBBMap::iterator e = ShortCut->find(N->getBlock()); - - if (e == ShortCut->end()) - return N->getIDom(); - - return PDT->getNode(e->second)->getIDom(); } -bool RegionInfo::isTrivialRegion(BasicBlock *entry, BasicBlock *exit) const { - assert(entry && exit && "entry and exit must not be null!"); - - unsigned num_successors = succ_end(entry) - succ_begin(entry); - - if (num_successors <= 1 && exit == *(succ_begin(entry))) - return true; +RegionInfo::~RegionInfo() { - return false; } void RegionInfo::updateStatistics(Region *R) { ++numRegions; // TODO: Slow. Should only be enabled if -stats is used. - if (R->isSimple()) ++numSimpleRegions; -} - -Region *RegionInfo::createRegion(BasicBlock *entry, BasicBlock *exit) { - assert(entry && exit && "entry and exit must not be null!"); - - if (isTrivialRegion(entry, exit)) - return 0; - - Region *region = new Region(entry, exit, this, DT); - BBtoRegion.insert(std::make_pair(entry, region)); - - #ifdef XDEBUG - region->verifyRegion(); - #else - DEBUG(region->verifyRegion()); - #endif - - updateStatistics(region); - return region; -} - -void RegionInfo::findRegionsWithEntry(BasicBlock *entry, BBtoBBMap *ShortCut) { - assert(entry); - - DomTreeNode *N = PDT->getNode(entry); - - if (!N) - return; - - Region *lastRegion= 0; - BasicBlock *lastExit = entry; - - // As only a BasicBlock that postdominates entry can finish a region, walk the - // post dominance tree upwards. - while ((N = getNextPostDom(N, ShortCut))) { - BasicBlock *exit = N->getBlock(); - - if (!exit) - break; - - if (isRegion(entry, exit)) { - Region *newRegion = createRegion(entry, exit); - - if (lastRegion) - newRegion->addSubRegion(lastRegion); - - lastRegion = newRegion; - lastExit = exit; - } - - // This can never be a region, so stop the search. - if (!DT->dominates(entry, exit)) - break; - } - - // Tried to create regions from entry to lastExit. Next time take a - // shortcut from entry to lastExit. - if (lastExit != entry) - insertShortCut(entry, lastExit, ShortCut); -} - -void RegionInfo::scanForRegions(Function &F, BBtoBBMap *ShortCut) { - BasicBlock *entry = &(F.getEntryBlock()); - DomTreeNode *N = DT->getNode(entry); - - // Iterate over the dominance tree in post order to start with the small - // regions from the bottom of the dominance tree. If the small regions are - // detected first, detection of bigger regions is faster, as we can jump - // over the small regions. - for (po_iterator<DomTreeNode*> FI = po_begin(N), FE = po_end(N); FI != FE; - ++FI) { - findRegionsWithEntry(FI->getBlock(), ShortCut); - } + if (R->isSimple()) + ++numSimpleRegions; } -Region *RegionInfo::getTopMostParent(Region *region) { - while (region->parent) - region = region->getParent(); +void RegionInfo::recalculate(Function &F, DominatorTree *DT_, + PostDominatorTree *PDT_, DominanceFrontier *DF_) { + DT = DT_; + PDT = PDT_; + DF = DF_; - return region; + TopLevelRegion = new Region(&F.getEntryBlock(), nullptr, + this, DT, nullptr); + updateStatistics(TopLevelRegion); + calculate(F); } -void RegionInfo::buildRegionsTree(DomTreeNode *N, Region *region) { - BasicBlock *BB = N->getBlock(); - - // Passed region exit - while (BB == region->getExit()) - region = region->getParent(); - - BBtoRegionMap::iterator it = BBtoRegion.find(BB); - - // This basic block is a start block of a region. It is already in the - // BBtoRegion relation. Only the child basic blocks have to be updated. - if (it != BBtoRegion.end()) { - Region *newRegion = it->second; - region->addSubRegion(getTopMostParent(newRegion)); - region = newRegion; - } else { - BBtoRegion[BB] = region; - } +//===----------------------------------------------------------------------===// +// RegionInfoPass implementation +// - for (DomTreeNode::iterator CI = N->begin(), CE = N->end(); CI != CE; ++CI) - buildRegionsTree(*CI, region); +RegionInfoPass::RegionInfoPass() : FunctionPass(ID) { + initializeRegionInfoPassPass(*PassRegistry::getPassRegistry()); } -void RegionInfo::releaseMemory() { - BBtoRegion.clear(); - if (TopLevelRegion) - delete TopLevelRegion; - TopLevelRegion = 0; -} +RegionInfoPass::~RegionInfoPass() { -RegionInfo::RegionInfo() : FunctionPass(ID) { - initializeRegionInfoPass(*PassRegistry::getPassRegistry()); - TopLevelRegion = 0; } -RegionInfo::~RegionInfo() { +bool RegionInfoPass::runOnFunction(Function &F) { releaseMemory(); -} -void RegionInfo::Calculate(Function &F) { - // ShortCut a function where for every BB the exit of the largest region - // starting with BB is stored. These regions can be threated as single BBS. - // This improves performance on linear CFGs. - BBtoBBMap ShortCut; + auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto PDT = &getAnalysis<PostDominatorTree>(); + auto DF = &getAnalysis<DominanceFrontier>(); - scanForRegions(F, &ShortCut); - BasicBlock *BB = &F.getEntryBlock(); - buildRegionsTree(DT->getNode(BB), TopLevelRegion); + RI.recalculate(F, DT, PDT, DF); + return false; } -bool RegionInfo::runOnFunction(Function &F) { - releaseMemory(); - - DT = &getAnalysis<DominatorTree>(); - PDT = &getAnalysis<PostDominatorTree>(); - DF = &getAnalysis<DominanceFrontier>(); - - TopLevelRegion = new Region(&F.getEntryBlock(), 0, this, DT, 0); - updateStatistics(TopLevelRegion); - - Calculate(F); +void RegionInfoPass::releaseMemory() { + RI.releaseMemory(); +} - return false; +void RegionInfoPass::verifyAnalysis() const { + RI.verifyAnalysis(); } -void RegionInfo::getAnalysisUsage(AnalysisUsage &AU) const { +void RegionInfoPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequiredTransitive<DominatorTree>(); + AU.addRequiredTransitive<DominatorTreeWrapperPass>(); AU.addRequired<PostDominatorTree>(); AU.addRequired<DominanceFrontier>(); } -void RegionInfo::print(raw_ostream &OS, const Module *) const { - OS << "Region tree:\n"; - TopLevelRegion->print(OS, true, 0, printStyle.getValue()); - OS << "End region tree\n"; +void RegionInfoPass::print(raw_ostream &OS, const Module *) const { + RI.print(OS); } -void RegionInfo::verifyAnalysis() const { - // Only do verification when user wants to, otherwise this expensive check - // will be invoked by PMDataManager::verifyPreservedAnalysis when - // a regionpass (marked PreservedAll) finish. - if (!VerifyRegionInfo) return; - - TopLevelRegion->verifyRegionNest(); -} - -// Region pass manager support. -Region *RegionInfo::getRegionFor(BasicBlock *BB) const { - BBtoRegionMap::const_iterator I= - BBtoRegion.find(BB); - return I != BBtoRegion.end() ? I->second : 0; -} - -void RegionInfo::setRegionFor(BasicBlock *BB, Region *R) { - BBtoRegion[BB] = R; -} - -Region *RegionInfo::operator[](BasicBlock *BB) const { - return getRegionFor(BB); -} - -BasicBlock *RegionInfo::getMaxRegionExit(BasicBlock *BB) const { - BasicBlock *Exit = NULL; - - while (true) { - // Get largest region that starts at BB. - Region *R = getRegionFor(BB); - while (R && R->getParent() && R->getParent()->getEntry() == BB) - R = R->getParent(); - - // Get the single exit of BB. - if (R && R->getEntry() == BB) - Exit = R->getExit(); - else if (++succ_begin(BB) == succ_end(BB)) - Exit = *succ_begin(BB); - else // No single exit exists. - return Exit; - - // Get largest region that starts at Exit. - Region *ExitR = getRegionFor(Exit); - while (ExitR && ExitR->getParent() - && ExitR->getParent()->getEntry() == Exit) - ExitR = ExitR->getParent(); - - for (pred_iterator PI = pred_begin(Exit), PE = pred_end(Exit); PI != PE; - ++PI) - if (!R->contains(*PI) && !ExitR->contains(*PI)) - break; - - // This stops infinite cycles. - if (DT->dominates(Exit, BB)) - break; - - BB = Exit; - } - - return Exit; -} - -Region* -RegionInfo::getCommonRegion(Region *A, Region *B) const { - assert (A && B && "One of the Regions is NULL"); - - if (A->contains(B)) return A; - - while (!B->contains(A)) - B = B->getParent(); - - return B; -} - -Region* -RegionInfo::getCommonRegion(SmallVectorImpl<Region*> &Regions) const { - Region* ret = Regions.back(); - Regions.pop_back(); - - for (SmallVectorImpl<Region*>::const_iterator I = Regions.begin(), - E = Regions.end(); I != E; ++I) - ret = getCommonRegion(ret, *I); - - return ret; -} - -Region* -RegionInfo::getCommonRegion(SmallVectorImpl<BasicBlock*> &BBs) const { - Region* ret = getRegionFor(BBs.back()); - BBs.pop_back(); - - for (SmallVectorImpl<BasicBlock*>::const_iterator I = BBs.begin(), - E = BBs.end(); I != E; ++I) - ret = getCommonRegion(ret, getRegionFor(*I)); - - return ret; +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void RegionInfoPass::dump() const { + RI.dump(); } +#endif -void RegionInfo::splitBlock(BasicBlock* NewBB, BasicBlock *OldBB) -{ - Region *R = getRegionFor(OldBB); - - setRegionFor(NewBB, R); - - while (R->getEntry() == OldBB && !R->isTopLevelRegion()) { - R->replaceEntry(NewBB); - R = R->getParent(); - } - - setRegionFor(OldBB, R); -} +char RegionInfoPass::ID = 0; -char RegionInfo::ID = 0; -INITIALIZE_PASS_BEGIN(RegionInfo, "regions", +INITIALIZE_PASS_BEGIN(RegionInfoPass, "regions", "Detect single entry single exit regions", true, true) -INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) INITIALIZE_PASS_DEPENDENCY(DominanceFrontier) -INITIALIZE_PASS_END(RegionInfo, "regions", +INITIALIZE_PASS_END(RegionInfoPass, "regions", "Detect single entry single exit regions", true, true) // Create methods available outside of this file, to use them @@ -859,7 +167,7 @@ INITIALIZE_PASS_END(RegionInfo, "regions", namespace llvm { FunctionPass *createRegionInfoPass() { - return new RegionInfo(); + return new RegionInfoPass(); } } diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp index 9208fa21d7ec..de34b727a5a0 100644 --- a/lib/Analysis/RegionPass.cpp +++ b/lib/Analysis/RegionPass.cpp @@ -17,10 +17,11 @@ #include "llvm/Analysis/RegionIterator.h" #include "llvm/Support/Timer.h" -#define DEBUG_TYPE "regionpassmgr" #include "llvm/Support/Debug.h" using namespace llvm; +#define DEBUG_TYPE "regionpassmgr" + //===----------------------------------------------------------------------===// // RGPassManager // @@ -31,33 +32,33 @@ RGPassManager::RGPassManager() : FunctionPass(ID), PMDataManager() { skipThisRegion = false; redoThisRegion = false; - RI = NULL; - CurrentRegion = NULL; + RI = nullptr; + CurrentRegion = nullptr; } // Recurse through all subregions and all regions into RQ. -static void addRegionIntoQueue(Region *R, std::deque<Region *> &RQ) { - RQ.push_back(R); - for (Region::iterator I = R->begin(), E = R->end(); I != E; ++I) - addRegionIntoQueue(*I, RQ); +static void addRegionIntoQueue(Region &R, std::deque<Region *> &RQ) { + RQ.push_back(&R); + for (const auto &E : R) + addRegionIntoQueue(*E, RQ); } /// Pass Manager itself does not invalidate any analysis info. void RGPassManager::getAnalysisUsage(AnalysisUsage &Info) const { - Info.addRequired<RegionInfo>(); + Info.addRequired<RegionInfoPass>(); Info.setPreservesAll(); } /// run - Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the function, and if so, return true. bool RGPassManager::runOnFunction(Function &F) { - RI = &getAnalysis<RegionInfo>(); + RI = &getAnalysis<RegionInfoPass>().getRegionInfo(); bool Changed = false; // Collect inherited analysis from Module level pass manager. populateInheritedAnalysis(TPM->activeStack); - addRegionIntoQueue(RI->getTopLevelRegion(), RQ); + addRegionIntoQueue(*RI->getTopLevelRegion(), RQ); if (RQ.empty()) // No regions, skip calling finalizers return false; @@ -185,19 +186,21 @@ private: public: static char ID; - PrintRegionPass() : RegionPass(ID), Out(dbgs()) {} PrintRegionPass(const std::string &B, raw_ostream &o) : RegionPass(ID), Banner(B), Out(o) {} - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); } - virtual bool runOnRegion(Region *R, RGPassManager &RGM) { + bool runOnRegion(Region *R, RGPassManager &RGM) override { Out << Banner; - for (Region::block_iterator I = R->block_begin(), E = R->block_end(); - I != E; ++I) - (*I)->print(Out); + for (const auto &BB : R->blocks()) { + if (BB) + BB->print(Out); + else + Out << "Printing <null> Block"; + } return false; } diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp index c5f1b925921b..ad83113ec930 100644 --- a/lib/Analysis/RegionPrinter.cpp +++ b/lib/Analysis/RegionPrinter.cpp @@ -56,23 +56,24 @@ struct DOTGraphTraits<RegionNode*> : public DefaultDOTGraphTraits { }; template<> -struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> { +struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> { - DOTGraphTraits (bool isSimple=false) + DOTGraphTraits (bool isSimple = false) : DOTGraphTraits<RegionNode*>(isSimple) {} - static std::string getGraphName(RegionInfo *DT) { + static std::string getGraphName(RegionInfoPass *DT) { return "Region Graph"; } - std::string getNodeLabel(RegionNode *Node, RegionInfo *G) { + std::string getNodeLabel(RegionNode *Node, RegionInfoPass *G) { + RegionInfo &RI = G->getRegionInfo(); return DOTGraphTraits<RegionNode*>::getNodeLabel(Node, - G->getTopLevelRegion()); + reinterpret_cast<RegionNode*>(RI.getTopLevelRegion())); } std::string getEdgeAttributes(RegionNode *srcNode, - GraphTraits<RegionInfo*>::ChildIteratorType CI, RegionInfo *RI) { - + GraphTraits<RegionInfo*>::ChildIteratorType CI, RegionInfoPass *G) { + RegionInfo &RI = G->getRegionInfo(); RegionNode *destNode = *CI; if (srcNode->isSubRegion() || destNode->isSubRegion()) @@ -82,7 +83,7 @@ struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> { BasicBlock *srcBB = srcNode->getNodeAs<BasicBlock>(); BasicBlock *destBB = destNode->getNodeAs<BasicBlock>(); - Region *R = RI->getRegionFor(destBB); + Region *R = RI.getRegionFor(destBB); while (R && R->getParent()) if (R->getParent()->getEntry() == destBB) @@ -98,44 +99,45 @@ struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> { // Print the cluster of the subregions. This groups the single basic blocks // and adds a different background color for each group. - static void printRegionCluster(const Region *R, GraphWriter<RegionInfo*> &GW, + static void printRegionCluster(const Region &R, + GraphWriter<RegionInfoPass*> &GW, unsigned depth = 0) { raw_ostream &O = GW.getOStream(); - O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(R) + O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(&R) << " {\n"; O.indent(2 * (depth + 1)) << "label = \"\";\n"; - if (!onlySimpleRegions || R->isSimple()) { + if (!onlySimpleRegions || R.isSimple()) { O.indent(2 * (depth + 1)) << "style = filled;\n"; O.indent(2 * (depth + 1)) << "color = " - << ((R->getDepth() * 2 % 12) + 1) << "\n"; + << ((R.getDepth() * 2 % 12) + 1) << "\n"; } else { O.indent(2 * (depth + 1)) << "style = solid;\n"; O.indent(2 * (depth + 1)) << "color = " - << ((R->getDepth() * 2 % 12) + 2) << "\n"; + << ((R.getDepth() * 2 % 12) + 2) << "\n"; } - for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI) - printRegionCluster(*RI, GW, depth + 1); + for (Region::const_iterator RI = R.begin(), RE = R.end(); RI != RE; ++RI) + printRegionCluster(**RI, GW, depth + 1); - RegionInfo *RI = R->getRegionInfo(); + const RegionInfo &RI = *static_cast<const RegionInfo*>(R.getRegionInfo()); - for (Region::const_block_iterator BI = R->block_begin(), - BE = R->block_end(); BI != BE; ++BI) - if (RI->getRegionFor(*BI) == R) + for (const auto &BB : R.blocks()) + if (RI.getRegionFor(BB) == &R) O.indent(2 * (depth + 1)) << "Node" - << static_cast<const void*>(RI->getTopLevelRegion()->getBBNode(*BI)) + << static_cast<const void*>(RI.getTopLevelRegion()->getBBNode(BB)) << ";\n"; O.indent(2 * depth) << "}\n"; } - static void addCustomGraphFeatures(const RegionInfo* RI, - GraphWriter<RegionInfo*> &GW) { + static void addCustomGraphFeatures(const RegionInfoPass* RIP, + GraphWriter<RegionInfoPass*> &GW) { + const RegionInfo &RI = RIP->getRegionInfo(); raw_ostream &O = GW.getOStream(); O << "\tcolorscheme = \"paired12\"\n"; - printRegionCluster(RI->getTopLevelRegion(), GW, 4); + printRegionCluster(*RI.getTopLevelRegion(), GW, 4); } }; } //end namespace llvm @@ -143,28 +145,28 @@ struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> { namespace { struct RegionViewer - : public DOTGraphTraitsViewer<RegionInfo, false> { + : public DOTGraphTraitsViewer<RegionInfoPass, false> { static char ID; - RegionViewer() : DOTGraphTraitsViewer<RegionInfo, false>("reg", ID){ + RegionViewer() : DOTGraphTraitsViewer<RegionInfoPass, false>("reg", ID){ initializeRegionViewerPass(*PassRegistry::getPassRegistry()); } }; char RegionViewer::ID = 0; struct RegionOnlyViewer - : public DOTGraphTraitsViewer<RegionInfo, true> { + : public DOTGraphTraitsViewer<RegionInfoPass, true> { static char ID; - RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfo, true>("regonly", ID) { + RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfoPass, true>("regonly", ID) { initializeRegionOnlyViewerPass(*PassRegistry::getPassRegistry()); } }; char RegionOnlyViewer::ID = 0; struct RegionPrinter - : public DOTGraphTraitsPrinter<RegionInfo, false> { + : public DOTGraphTraitsPrinter<RegionInfoPass, false> { static char ID; RegionPrinter() : - DOTGraphTraitsPrinter<RegionInfo, false>("reg", ID) { + DOTGraphTraitsPrinter<RegionInfoPass, false>("reg", ID) { initializeRegionPrinterPass(*PassRegistry::getPassRegistry()); } }; @@ -176,7 +178,7 @@ INITIALIZE_PASS(RegionPrinter, "dot-regions", INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function", true, true) - + INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only", "View regions of function (with no function bodies)", true, true) @@ -184,10 +186,10 @@ INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only", namespace { struct RegionOnlyPrinter - : public DOTGraphTraitsPrinter<RegionInfo, true> { + : public DOTGraphTraitsPrinter<RegionInfoPass, true> { static char ID; RegionOnlyPrinter() : - DOTGraphTraitsPrinter<RegionInfo, true>("reg", ID) { + DOTGraphTraitsPrinter<RegionInfoPass, true>("reg", ID) { initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry()); } }; diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index d9b696e0798f..06dbde58c108 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -58,38 +58,38 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "scalar-evolution" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Assembly/Writer.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Operator.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/InstIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLibraryInfo.h" #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "scalar-evolution" + STATISTIC(NumArrayLenItCounts, "Number of trip counts computed with array length"); STATISTIC(NumTripCountsComputed, @@ -114,7 +114,7 @@ VerifySCEV("verify-scev", INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution", "Scalar Evolution Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(LoopInfo) -INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution", "Scalar Evolution Analysis", false, true) @@ -136,9 +136,9 @@ void SCEV::dump() const { #endif void SCEV::print(raw_ostream &OS) const { - switch (getSCEVType()) { + switch (static_cast<SCEVTypes>(getSCEVType())) { case scConstant: - WriteAsOperand(OS, cast<SCEVConstant>(this)->getValue(), false); + cast<SCEVConstant>(this)->getValue()->printAsOperand(OS, false); return; case scTruncate: { const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this); @@ -174,7 +174,7 @@ void SCEV::print(raw_ostream &OS) const { if (AR->getNoWrapFlags(FlagNW) && !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW))) OS << "nw><"; - WriteAsOperand(OS, AR->getLoop()->getHeader(), /*PrintType=*/false); + AR->getLoop()->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ">"; return; } @@ -183,7 +183,7 @@ void SCEV::print(raw_ostream &OS) const { case scUMaxExpr: case scSMaxExpr: { const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this); - const char *OpStr = 0; + const char *OpStr = nullptr; switch (NAry->getSCEVType()) { case scAddExpr: OpStr = " + "; break; case scMulExpr: OpStr = " * "; break; @@ -194,7 +194,7 @@ void SCEV::print(raw_ostream &OS) const { for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); I != E; ++I) { OS << **I; - if (llvm::next(I) != E) + if (std::next(I) != E) OS << OpStr; } OS << ")"; @@ -229,25 +229,24 @@ void SCEV::print(raw_ostream &OS) const { Constant *FieldNo; if (U->isOffsetOf(CTy, FieldNo)) { OS << "offsetof(" << *CTy << ", "; - WriteAsOperand(OS, FieldNo, false); + FieldNo->printAsOperand(OS, false); OS << ")"; return; } // Otherwise just print it normally. - WriteAsOperand(OS, U->getValue(), false); + U->getValue()->printAsOperand(OS, false); return; } case scCouldNotCompute: OS << "***COULDNOTCOMPUTE***"; return; - default: break; } llvm_unreachable("Unknown SCEV kind!"); } Type *SCEV::getType() const { - switch (getSCEVType()) { + switch (static_cast<SCEVTypes>(getSCEVType())) { case scConstant: return cast<SCEVConstant>(this)->getType(); case scTruncate: @@ -267,9 +266,8 @@ Type *SCEV::getType() const { return cast<SCEVUnknown>(this)->getType(); case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - default: - llvm_unreachable("Unknown SCEV kind!"); } + llvm_unreachable("Unknown SCEV kind!"); } bool SCEV::isZero() const { @@ -315,14 +313,14 @@ const SCEV *ScalarEvolution::getConstant(ConstantInt *V) { FoldingSetNodeID ID; ID.AddInteger(scConstant); ID.AddPointer(V); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V); UniqueSCEVs.InsertNode(S, IP); return S; } -const SCEV *ScalarEvolution::getConstant(const APInt& Val) { +const SCEV *ScalarEvolution::getConstant(const APInt &Val) { return getConstant(ConstantInt::get(getContext(), Val)); } @@ -368,7 +366,7 @@ void SCEVUnknown::deleted() { SE->UniqueSCEVs.RemoveNode(this); // Release the value. - setValPtr(0); + setValPtr(nullptr); } void SCEVUnknown::allUsesReplacedWith(Value *New) { @@ -482,7 +480,7 @@ namespace { // Aside from the getSCEVType() ordering, the particular ordering // isn't very important except that it's beneficial to be consistent, // so that (a + b) and (b + a) don't end up as different expressions. - switch (LType) { + switch (static_cast<SCEVTypes>(LType)) { case scUnknown: { const SCEVUnknown *LU = cast<SCEVUnknown>(LHS); const SCEVUnknown *RU = cast<SCEVUnknown>(RHS); @@ -619,9 +617,10 @@ namespace { return compare(LC->getOperand(), RC->getOperand()); } - default: - llvm_unreachable("Unknown SCEV kind!"); + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); } + llvm_unreachable("Unknown SCEV kind!"); } }; } @@ -831,7 +830,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, ID.AddInteger(scTruncate); ID.AddPointer(Op); ID.AddPointer(Ty); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; // Fold if the operand is constant. @@ -921,7 +920,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, ID.AddInteger(scZeroExtend); ID.AddPointer(Op); ID.AddPointer(Ty); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; // zext(trunc(x)) --> zext(x) or x or trunc(x) @@ -1074,7 +1073,7 @@ static const SCEV *getOverflowLimitForStep(const SCEV *Step, return SE->getConstant(APInt::getSignedMaxValue(BitWidth) - SE->getSignedRange(Step).getSignedMin()); } - return 0; + return nullptr; } // The recurrence AR has been shown to have no signed wrap. Typically, if we can @@ -1093,19 +1092,18 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR, // Check for a simple looking step prior to loop entry. const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start); if (!SA) - return 0; + return nullptr; // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV // subtraction is expensive. For this purpose, perform a quick and dirty // difference, by checking for Step in the operand list. SmallVector<const SCEV *, 4> DiffOps; - for (SCEVAddExpr::op_iterator I = SA->op_begin(), E = SA->op_end(); - I != E; ++I) { - if (*I != Step) - DiffOps.push_back(*I); - } + for (const SCEV *Op : SA->operands()) + if (Op != Step) + DiffOps.push_back(Op); + if (DiffOps.size() == SA->getNumOperands()) - return 0; + return nullptr; // This is a postinc AR. Check for overflow on the preinc recurrence using the // same three conditions that getSignExtendedExpr checks. @@ -1141,7 +1139,7 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR, SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) { return PreStart; } - return 0; + return nullptr; } // Get the normalized sign-extended expression for this AddRec's Start. @@ -1183,7 +1181,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, ID.AddInteger(scSignExtend); ID.AddPointer(Op); ID.AddPointer(Ty); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; // If the input value is provably positive, build a zext instead. @@ -1203,6 +1201,23 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, return getTruncateOrSignExtend(X, Ty); } + // sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2 + if (auto SA = dyn_cast<SCEVAddExpr>(Op)) { + if (SA->getNumOperands() == 2) { + auto SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0)); + auto SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1)); + if (SMul && SC1) { + if (auto SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) { + const APInt &C1 = SC1->getValue()->getValue(); + const APInt &C2 = SC2->getValue()->getValue(); + if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && + C2.ugt(C1) && C2.isPowerOf2()) + return getAddExpr(getSignExtendExpr(SC1, Ty), + getSignExtendExpr(SMul, Ty)); + } + } + } + } // If the input value is a chrec scev, and we can prove that the value // did not overflow the old, smaller, value, we can sign extend all of the // operands (often constants). This allows analysis of something like @@ -1294,6 +1309,22 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, L, AR->getNoWrapFlags()); } } + // If Start and Step are constants, check if we can apply this + // transformation: + // sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2 + auto SC1 = dyn_cast<SCEVConstant>(Start); + auto SC2 = dyn_cast<SCEVConstant>(Step); + if (SC1 && SC2) { + const APInt &C1 = SC1->getValue()->getValue(); + const APInt &C2 = SC2->getValue()->getValue(); + if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) && + C2.isPowerOf2()) { + Start = getSignExtendExpr(Start, Ty); + const SCEV *NewAR = getAddRecExpr(getConstant(AR->getType(), 0), Step, + L, AR->getNoWrapFlags()); + return getAddExpr(Start, getSignExtendExpr(NewAR, Ty)); + } + } } // The cast wasn't folded; create an explicit cast node. @@ -1342,9 +1373,8 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, // Force the cast to be folded into the operands of an addrec. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) { SmallVector<const SCEV *, 4> Ops; - for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); - I != E; ++I) - Ops.push_back(getAnyExtendExpr(*I, Ty)); + for (const SCEV *Op : AR->operands()) + Ops.push_back(getAnyExtendExpr(Op, Ty)); return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW); } @@ -1362,7 +1392,7 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, /// what it does, given a sequence of operands that would form an add /// expression like this: /// -/// m + n + 13 + (A * (o + p + (B * q + m + 29))) + r + (-1 * r) +/// m + n + 13 + (A * (o + p + (B * (q + m + 29)))) + r + (-1 * r) /// /// where A and B are constants, update the map with these values: /// @@ -1813,7 +1843,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, ID.AddInteger(scAddExpr); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); - void *IP = 0; + void *IP = nullptr; SCEVAddExpr *S = static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); if (!S) { @@ -2107,7 +2137,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, ID.AddInteger(scMulExpr); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); - void *IP = 0; + void *IP = nullptr; SCEVMulExpr *S = static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); if (!S) { @@ -2232,7 +2262,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, ID.AddInteger(scUDivExpr); ID.AddPointer(LHS); ID.AddPointer(RHS); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator), LHS, RHS); @@ -2240,6 +2270,77 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, return S; } +static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) { + APInt A = C1->getValue()->getValue().abs(); + APInt B = C2->getValue()->getValue().abs(); + uint32_t ABW = A.getBitWidth(); + uint32_t BBW = B.getBitWidth(); + + if (ABW > BBW) + B = B.zext(ABW); + else if (ABW < BBW) + A = A.zext(BBW); + + return APIntOps::GreatestCommonDivisor(A, B); +} + +/// getUDivExactExpr - Get a canonical unsigned division expression, or +/// something simpler if possible. There is no representation for an exact udiv +/// in SCEV IR, but we can attempt to remove factors from the LHS and RHS. +/// We can't do this when it's not exact because the udiv may be clearing bits. +const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS, + const SCEV *RHS) { + // TODO: we could try to find factors in all sorts of things, but for now we + // just deal with u/exact (multiply, constant). See SCEVDivision towards the + // end of this file for inspiration. + + const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS); + if (!Mul) + return getUDivExpr(LHS, RHS); + + if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) { + // If the mulexpr multiplies by a constant, then that constant must be the + // first element of the mulexpr. + if (const SCEVConstant *LHSCst = + dyn_cast<SCEVConstant>(Mul->getOperand(0))) { + if (LHSCst == RHSCst) { + SmallVector<const SCEV *, 2> Operands; + Operands.append(Mul->op_begin() + 1, Mul->op_end()); + return getMulExpr(Operands); + } + + // We can't just assume that LHSCst divides RHSCst cleanly, it could be + // that there's a factor provided by one of the other terms. We need to + // check. + APInt Factor = gcd(LHSCst, RHSCst); + if (!Factor.isIntN(1)) { + LHSCst = cast<SCEVConstant>( + getConstant(LHSCst->getValue()->getValue().udiv(Factor))); + RHSCst = cast<SCEVConstant>( + getConstant(RHSCst->getValue()->getValue().udiv(Factor))); + SmallVector<const SCEV *, 2> Operands; + Operands.push_back(LHSCst); + Operands.append(Mul->op_begin() + 1, Mul->op_end()); + LHS = getMulExpr(Operands); + RHS = RHSCst; + Mul = dyn_cast<SCEVMulExpr>(LHS); + if (!Mul) + return getUDivExactExpr(LHS, RHS); + } + } + } + + for (int i = 0, e = Mul->getNumOperands(); i != e; ++i) { + if (Mul->getOperand(i) == RHS) { + SmallVector<const SCEV *, 2> Operands; + Operands.append(Mul->op_begin(), Mul->op_begin() + i); + Operands.append(Mul->op_begin() + i + 1, Mul->op_end()); + return getMulExpr(Operands); + } + } + + return getUDivExpr(LHS, RHS); +} /// getAddRecExpr - Get an add recurrence expression for the specified loop. /// Simplify the expression as much as possible. @@ -2356,7 +2457,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, for (unsigned i = 0, e = Operands.size(); i != e; ++i) ID.AddPointer(Operands[i]); ID.AddPointer(L); - void *IP = 0; + void *IP = nullptr; SCEVAddRecExpr *S = static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); if (!S) { @@ -2464,7 +2565,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { ID.AddInteger(scSMaxExpr); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); @@ -2568,7 +2669,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { ID.AddInteger(scUMaxExpr); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); @@ -2594,12 +2695,12 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { // If we have DataLayout, we can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. - if (TD) - return getConstant(IntTy, TD->getTypeAllocSize(AllocTy)); + if (DL) + return getConstant(IntTy, DL->getTypeAllocSize(AllocTy)); Constant *C = ConstantExpr::getSizeOf(AllocTy); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI)) C = Folded; Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); assert(Ty == IntTy && "Effective SCEV type doesn't match"); @@ -2612,14 +2713,14 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy, // If we have DataLayout, we can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. - if (TD) { + if (DL) { return getConstant(IntTy, - TD->getStructLayout(STy)->getElementOffset(FieldNo)); + DL->getStructLayout(STy)->getElementOffset(FieldNo)); } Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI)) C = Folded; Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); @@ -2635,7 +2736,7 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) { FoldingSetNodeID ID; ID.AddInteger(scUnknown); ID.AddPointer(V); - void *IP = 0; + void *IP = nullptr; if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) { assert(cast<SCEVUnknown>(S)->getValue() == V && "Stale SCEVUnknown in uniquing map!"); @@ -2667,8 +2768,8 @@ uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); // If we have a DataLayout, use it! - if (TD) - return TD->getTypeSizeInBits(Ty); + if (DL) + return DL->getTypeSizeInBits(Ty); // Integer types have fixed sizes. if (Ty->isIntegerTy()) @@ -2694,8 +2795,8 @@ Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const { // The only other support type is pointer. assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!"); - if (TD) - return TD->getIntPtrType(Ty); + if (DL) + return DL->getIntPtrType(Ty); // Without DataLayout, conservatively assume pointers are 64-bit. return Type::getInt64Ty(getContext()); @@ -2714,7 +2815,7 @@ namespace { bool FindOne; FindInvalidSCEVUnknown() { FindOne = false; } bool follow(const SCEV *S) { - switch (S->getSCEVType()) { + switch (static_cast<SCEVTypes>(S->getSCEVType())) { case scConstant: return false; case scUnknown: @@ -2941,7 +3042,7 @@ const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) { return getPointerBase(Cast->getOperand()); } else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) { - const SCEV *PtrOp = 0; + const SCEV *PtrOp = nullptr; for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); I != E; ++I) { if ((*I)->getType()->isPointerTy()) { @@ -2964,9 +3065,8 @@ static void PushDefUseChildren(Instruction *I, SmallVectorImpl<Instruction *> &Worklist) { // Push the def-use children onto the Worklist stack. - for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); - UI != UE; ++UI) - Worklist.push_back(cast<Instruction>(*UI)); + for (User *U : I->users()) + Worklist.push_back(cast<Instruction>(U)); } /// ForgetSymbolicValue - This looks up computed SCEV values for all @@ -3022,20 +3122,20 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // The loop may have multiple entrances or multiple exits; we can analyze // this phi as an addrec if it has a unique entry value and a unique // backedge value. - Value *BEValueV = 0, *StartValueV = 0; + Value *BEValueV = nullptr, *StartValueV = nullptr; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *V = PN->getIncomingValue(i); if (L->contains(PN->getIncomingBlock(i))) { if (!BEValueV) { BEValueV = V; } else if (BEValueV != V) { - BEValueV = 0; + BEValueV = nullptr; break; } } else if (!StartValueV) { StartValueV = V; } else if (StartValueV != V) { - StartValueV = 0; + StartValueV = nullptr; break; } } @@ -3163,7 +3263,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // PHI's incoming blocks are in a different loop, in which case doing so // risks breaking LCSSA form. Instcombine would normally zap these, but // it doesn't have DominatorTree information, so it may miss cases. - if (Value *V = SimplifyInstruction(PN, TD, TLI, DT)) + if (Value *V = SimplifyInstruction(PN, DL, TLI, DT)) if (LI->replacementPreservesLCSSAForm(PN, V)) return getSCEV(V); @@ -3189,7 +3289,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { const SCEV *TotalOffset = getConstant(IntPtrTy, 0); gep_type_iterator GTI = gep_type_begin(GEP); - for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()), + for (GetElementPtrInst::op_iterator I = std::next(GEP->op_begin()), E = GEP->op_end(); I != E; ++I) { Value *Index = *I; @@ -3295,7 +3395,7 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { // For a SCEVUnknown, ask ValueTracking. unsigned BitWidth = getTypeSizeInBits(U->getType()); APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - ComputeMaskedBits(U->getValue(), Zeros, Ones); + computeKnownBits(U->getValue(), Zeros, Ones); return Zeros.countTrailingOnes(); } @@ -3434,7 +3534,7 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { // For a SCEVUnknown, ask ValueTracking. APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - ComputeMaskedBits(U->getValue(), Zeros, Ones, TD); + computeKnownBits(U->getValue(), Zeros, Ones, DL); if (Ones == ~Zeros + 1) return setUnsignedRange(U, ConservativeResult); return setUnsignedRange(U, @@ -3584,9 +3684,9 @@ ScalarEvolution::getSignedRange(const SCEV *S) { if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { // For a SCEVUnknown, ask ValueTracking. - if (!U->getValue()->getType()->isIntegerTy() && !TD) + if (!U->getValue()->getType()->isIntegerTy() && !DL) return setSignedRange(U, ConservativeResult); - unsigned NS = ComputeNumSignBits(U->getValue(), TD); + unsigned NS = ComputeNumSignBits(U->getValue(), DL); if (NS <= 1) return setSignedRange(U, ConservativeResult); return setSignedRange(U, ConservativeResult.intersectWith( @@ -3687,20 +3787,27 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // Instcombine's ShrinkDemandedConstant may strip bits out of // constants, obscuring what would otherwise be a low-bits mask. - // Use ComputeMaskedBits to compute what ShrinkDemandedConstant + // Use computeKnownBits to compute what ShrinkDemandedConstant // knew about to reconstruct a low-bits mask value. unsigned LZ = A.countLeadingZeros(); + unsigned TZ = A.countTrailingZeros(); unsigned BitWidth = A.getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD); - - APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ); - - if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask)) - return - getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)), - IntegerType::get(getContext(), BitWidth - LZ)), - U->getType()); + computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL); + + APInt EffectiveMask = + APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ); + if ((LZ != 0 || TZ != 0) && !((~A & ~KnownZero) & EffectiveMask)) { + const SCEV *MulCount = getConstant( + ConstantInt::get(getContext(), APInt::getOneBitSet(BitWidth, TZ))); + return getMulExpr( + getZeroExtendExpr( + getTruncateExpr( + getUDivExactExpr(getSCEV(U->getOperand(0)), MulCount), + IntegerType::get(getContext(), BitWidth - LZ - TZ)), + U->getType()), + MulCount); + } } break; @@ -4241,9 +4348,9 @@ ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const { if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute(); assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info"); - const SCEV *BECount = 0; + const SCEV *BECount = nullptr; for (const ExitNotTakenInfo *ENT = &ExitNotTaken; - ENT != 0; ENT = ENT->getNextExit()) { + ENT != nullptr; ENT = ENT->getNextExit()) { assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV"); @@ -4261,7 +4368,7 @@ const SCEV * ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock, ScalarEvolution *SE) const { for (const ExitNotTakenInfo *ENT = &ExitNotTaken; - ENT != 0; ENT = ENT->getNextExit()) { + ENT != nullptr; ENT = ENT->getNextExit()) { if (ENT->ExitingBlock == ExitingBlock) return ENT->ExactNotTaken; @@ -4284,7 +4391,7 @@ bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S, return false; for (const ExitNotTakenInfo *ENT = &ExitNotTaken; - ENT != 0; ENT = ENT->getNextExit()) { + ENT != nullptr; ENT = ENT->getNextExit()) { if (ENT->ExactNotTaken != SE->getCouldNotCompute() && SE->hasOperand(ENT->ExactNotTaken, S)) { @@ -4323,8 +4430,8 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( /// clear - Invalidate this result and free the ExitNotTakenInfo array. void ScalarEvolution::BackedgeTakenInfo::clear() { - ExitNotTaken.ExitingBlock = 0; - ExitNotTaken.ExactNotTaken = 0; + ExitNotTaken.ExitingBlock = nullptr; + ExitNotTaken.ExactNotTaken = nullptr; delete[] ExitNotTaken.getNextExit(); } @@ -4335,32 +4442,63 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { SmallVector<BasicBlock *, 8> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); - // Examine all exits and pick the most conservative values. - const SCEV *MaxBECount = getCouldNotCompute(); - bool CouldComputeBECount = true; SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts; + bool CouldComputeBECount = true; + BasicBlock *Latch = L->getLoopLatch(); // may be NULL. + const SCEV *MustExitMaxBECount = nullptr; + const SCEV *MayExitMaxBECount = nullptr; + + // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts + // and compute maxBECount. for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { - ExitLimit EL = ComputeExitLimit(L, ExitingBlocks[i]); + BasicBlock *ExitBB = ExitingBlocks[i]; + ExitLimit EL = ComputeExitLimit(L, ExitBB); + + // 1. For each exit that can be computed, add an entry to ExitCounts. + // CouldComputeBECount is true only if all exits can be computed. if (EL.Exact == getCouldNotCompute()) // We couldn't compute an exact value for this exit, so // we won't be able to compute an exact value for the loop. CouldComputeBECount = false; else - ExitCounts.push_back(std::make_pair(ExitingBlocks[i], EL.Exact)); + ExitCounts.push_back(std::make_pair(ExitBB, EL.Exact)); - if (MaxBECount == getCouldNotCompute()) - MaxBECount = EL.Max; - else if (EL.Max != getCouldNotCompute()) { - // We cannot take the "min" MaxBECount, because non-unit stride loops may - // skip some loop tests. Taking the max over the exits is sufficiently - // conservative. TODO: We could do better taking into consideration - // that (1) the loop has unit stride (2) the last loop test is - // less-than/greater-than (3) any loop test is less-than/greater-than AND - // falls-through some constant times less then the other tests. - MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max); + // 2. Derive the loop's MaxBECount from each exit's max number of + // non-exiting iterations. Partition the loop exits into two kinds: + // LoopMustExits and LoopMayExits. + // + // A LoopMustExit meets two requirements: + // + // (a) Its ExitLimit.MustExit flag must be set which indicates that the exit + // test condition cannot be skipped (the tested variable has unit stride or + // the test is less-than or greater-than, rather than a strict inequality). + // + // (b) It must dominate the loop latch, hence must be tested on every loop + // iteration. + // + // If any computable LoopMustExit is found, then MaxBECount is the minimum + // EL.Max of computable LoopMustExits. Otherwise, MaxBECount is + // conservatively the maximum EL.Max, where CouldNotCompute is considered + // greater than any computable EL.Max. + if (EL.MustExit && EL.Max != getCouldNotCompute() && Latch && + DT->dominates(ExitBB, Latch)) { + if (!MustExitMaxBECount) + MustExitMaxBECount = EL.Max; + else { + MustExitMaxBECount = + getUMinFromMismatchedTypes(MustExitMaxBECount, EL.Max); + } + } else if (MayExitMaxBECount != getCouldNotCompute()) { + if (!MayExitMaxBECount || EL.Max == getCouldNotCompute()) + MayExitMaxBECount = EL.Max; + else { + MayExitMaxBECount = + getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.Max); + } } } - + const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount : + (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute()); return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount); } @@ -4370,12 +4508,19 @@ ScalarEvolution::ExitLimit ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { // Okay, we've chosen an exiting block. See what condition causes us to - // exit at this block. - // - // FIXME: we should be able to handle switch instructions (with a single exit) - BranchInst *ExitBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); - if (ExitBr == 0) return getCouldNotCompute(); - assert(ExitBr->isConditional() && "If unconditional, it can't be in loop!"); + // exit at this block and remember the exit block and whether all other targets + // lead to the loop header. + bool MustExecuteLoopHeader = true; + BasicBlock *Exit = nullptr; + for (succ_iterator SI = succ_begin(ExitingBlock), SE = succ_end(ExitingBlock); + SI != SE; ++SI) + if (!L->contains(*SI)) { + if (Exit) // Multiple exit successors. + return getCouldNotCompute(); + Exit = *SI; + } else if (*SI != L->getHeader()) { + MustExecuteLoopHeader = false; + } // At this point, we know we have a conditional branch that determines whether // the loop is exited. However, we don't know if the branch is executed each @@ -4394,13 +4539,11 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { // // More extensive analysis could be done to handle more cases here. // - if (ExitBr->getSuccessor(0) != L->getHeader() && - ExitBr->getSuccessor(1) != L->getHeader() && - ExitBr->getParent() != L->getHeader()) { + if (!MustExecuteLoopHeader && ExitingBlock != L->getHeader()) { // The simple checks failed, try climbing the unique predecessor chain // up to the header. bool Ok = false; - for (BasicBlock *BB = ExitBr->getParent(); BB; ) { + for (BasicBlock *BB = ExitingBlock; BB; ) { BasicBlock *Pred = BB->getUniquePredecessor(); if (!Pred) return getCouldNotCompute(); @@ -4424,11 +4567,20 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { return getCouldNotCompute(); } - // Proceed to the next level to examine the exit condition expression. - return ComputeExitLimitFromCond(L, ExitBr->getCondition(), - ExitBr->getSuccessor(0), - ExitBr->getSuccessor(1), - /*IsSubExpr=*/false); + TerminatorInst *Term = ExitingBlock->getTerminator(); + if (BranchInst *BI = dyn_cast<BranchInst>(Term)) { + assert(BI->isConditional() && "If unconditional, it can't be in loop!"); + // Proceed to the next level to examine the exit condition expression. + return ComputeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0), + BI->getSuccessor(1), + /*IsSubExpr=*/false); + } + + if (SwitchInst *SI = dyn_cast<SwitchInst>(Term)) + return ComputeExitLimitFromSingleExitSwitch(L, SI, Exit, + /*IsSubExpr=*/false); + + return getCouldNotCompute(); } /// ComputeExitLimitFromCond - Compute the number of times the @@ -4456,6 +4608,7 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, IsSubExpr || EitherMayExit); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); + bool MustExit = false; if (EitherMayExit) { // Both conditions must be true for the loop to continue executing. // Choose the less conservative count. @@ -4470,6 +4623,7 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, MaxBECount = EL0.Max; else MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max); + MustExit = EL0.MustExit || EL1.MustExit; } else { // Both conditions must be true at the same time for the loop to exit. // For now, be conservative. @@ -4478,9 +4632,10 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, MaxBECount = EL0.Max; if (EL0.Exact == EL1.Exact) BECount = EL0.Exact; + MustExit = EL0.MustExit && EL1.MustExit; } - return ExitLimit(BECount, MaxBECount); + return ExitLimit(BECount, MaxBECount, MustExit); } if (BO->getOpcode() == Instruction::Or) { // Recurse on the operands of the or. @@ -4491,6 +4646,7 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, IsSubExpr || EitherMayExit); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); + bool MustExit = false; if (EitherMayExit) { // Both conditions must be false for the loop to continue executing. // Choose the less conservative count. @@ -4505,6 +4661,7 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, MaxBECount = EL0.Max; else MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max); + MustExit = EL0.MustExit || EL1.MustExit; } else { // Both conditions must be false at the same time for the loop to exit. // For now, be conservative. @@ -4513,9 +4670,10 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, MaxBECount = EL0.Max; if (EL0.Exact == EL1.Exact) BECount = EL0.Exact; + MustExit = EL0.MustExit && EL1.MustExit; } - return ExitLimit(BECount, MaxBECount); + return ExitLimit(BECount, MaxBECount, MustExit); } } @@ -4639,6 +4797,30 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L, return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); } +ScalarEvolution::ExitLimit +ScalarEvolution::ComputeExitLimitFromSingleExitSwitch(const Loop *L, + SwitchInst *Switch, + BasicBlock *ExitingBlock, + bool IsSubExpr) { + assert(!L->contains(ExitingBlock) && "Not an exiting block!"); + + // Give up if the exit is the default dest of a switch. + if (Switch->getDefaultDest() == ExitingBlock) + return getCouldNotCompute(); + + assert(L->contains(Switch->getDefaultDest()) && + "Default case must not exit the loop!"); + const SCEV *LHS = getSCEVAtScope(Switch->getCondition(), L); + const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock)); + + // while (X != Y) --> while (X-Y != 0) + ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, IsSubExpr); + if (EL.hasAnyInfo()) + return EL; + + return getCouldNotCompute(); +} + static ConstantInt * EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, ScalarEvolution &SE) { @@ -4675,7 +4857,7 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( return getCouldNotCompute(); // Okay, we allow one non-constant index into the GEP instruction. - Value *VarIdx = 0; + Value *VarIdx = nullptr; std::vector<Constant*> Indexes; unsigned VarIdxNum = 0; for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i) @@ -4685,7 +4867,7 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( if (VarIdx) return getCouldNotCompute(); // Multiple non-constant idx's. VarIdx = GEP->getOperand(i); VarIdxNum = i-2; - Indexes.push_back(0); + Indexes.push_back(nullptr); } // Loop-invariant loads may be a byproduct of loop optimization. Skip them. @@ -4716,7 +4898,7 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(), Indexes); - if (Result == 0) break; // Cannot compute! + if (!Result) break; // Cannot compute! // Evaluate the condition for this iteration. Result = ConstantExpr::getICmp(predicate, Result, RHS); @@ -4777,14 +4959,14 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, // Otherwise, we can evaluate this instruction if all of its operands are // constant or derived from a PHI node themselves. - PHINode *PHI = 0; + PHINode *PHI = nullptr; for (Instruction::op_iterator OpI = UseInst->op_begin(), OpE = UseInst->op_end(); OpI != OpE; ++OpI) { if (isa<Constant>(*OpI)) continue; Instruction *OpInst = dyn_cast<Instruction>(*OpI); - if (!OpInst || !canConstantEvolve(OpInst, L)) return 0; + if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr; PHINode *P = dyn_cast<PHINode>(OpInst); if (!P) @@ -4798,8 +4980,10 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap); PHIMap[OpInst] = P; } - if (P == 0) return 0; // Not evolving from PHI - if (PHI && PHI != P) return 0; // Evolving from multiple different PHIs. + if (!P) + return nullptr; // Not evolving from PHI + if (PHI && PHI != P) + return nullptr; // Evolving from multiple different PHIs. PHI = P; } // This is a expression evolving from a constant PHI! @@ -4813,7 +4997,7 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, /// constraints, return null. static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { Instruction *I = dyn_cast<Instruction>(V); - if (I == 0 || !canConstantEvolve(I, L)) return 0; + if (!I || !canConstantEvolve(I, L)) return nullptr; if (PHINode *PN = dyn_cast<PHINode>(I)) { return PN; @@ -4830,23 +5014,23 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { /// reason, return null. static Constant *EvaluateExpression(Value *V, const Loop *L, DenseMap<Instruction *, Constant *> &Vals, - const DataLayout *TD, + const DataLayout *DL, const TargetLibraryInfo *TLI) { // Convenient constant check, but redundant for recursive calls. if (Constant *C = dyn_cast<Constant>(V)) return C; Instruction *I = dyn_cast<Instruction>(V); - if (!I) return 0; + if (!I) return nullptr; if (Constant *C = Vals.lookup(I)) return C; // An instruction inside the loop depends on a value outside the loop that we // weren't given a mapping for, or a value such as a call inside the loop. - if (!canConstantEvolve(I, L)) return 0; + if (!canConstantEvolve(I, L)) return nullptr; // An unmapped PHI can be due to a branch or another loop inside this loop, // or due to this not being the initial iteration through a loop where we // couldn't compute the evolution of this particular PHI last time. - if (isa<PHINode>(I)) return 0; + if (isa<PHINode>(I)) return nullptr; std::vector<Constant*> Operands(I->getNumOperands()); @@ -4854,23 +5038,23 @@ static Constant *EvaluateExpression(Value *V, const Loop *L, Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i)); if (!Operand) { Operands[i] = dyn_cast<Constant>(I->getOperand(i)); - if (!Operands[i]) return 0; + if (!Operands[i]) return nullptr; continue; } - Constant *C = EvaluateExpression(Operand, L, Vals, TD, TLI); + Constant *C = EvaluateExpression(Operand, L, Vals, DL, TLI); Vals[Operand] = C; - if (!C) return 0; + if (!C) return nullptr; Operands[i] = C; } if (CmpInst *CI = dyn_cast<CmpInst>(I)) return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], - Operands[1], TD, TLI); + Operands[1], DL, TLI); if (LoadInst *LI = dyn_cast<LoadInst>(I)) { if (!LI->isVolatile()) - return ConstantFoldLoadFromConstPtr(Operands[0], TD); + return ConstantFoldLoadFromConstPtr(Operands[0], DL); } - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD, + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, DL, TLI); } @@ -4888,7 +5072,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, return I->second; if (BEs.ugt(MaxBruteForceIterations)) - return ConstantEvolutionLoopExitValue[PN] = 0; // Not going to evaluate it. + return ConstantEvolutionLoopExitValue[PN] = nullptr; // Not going to evaluate it. Constant *&RetVal = ConstantEvolutionLoopExitValue[PN]; @@ -4900,22 +5084,22 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, // entry must be a constant (coming in from outside of the loop), and the // second must be derived from the same PHI. bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); - PHINode *PHI = 0; + PHINode *PHI = nullptr; for (BasicBlock::iterator I = Header->begin(); (PHI = dyn_cast<PHINode>(I)); ++I) { Constant *StartCST = dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge)); - if (StartCST == 0) continue; + if (!StartCST) continue; CurrentIterVals[PHI] = StartCST; } if (!CurrentIterVals.count(PN)) - return RetVal = 0; + return RetVal = nullptr; Value *BEValue = PN->getIncomingValue(SecondIsBackedge); // Execute the loop symbolically to determine the exit value. if (BEs.getActiveBits() >= 32) - return RetVal = 0; // More than 2^32-1 iterations?? Not doing it! + return RetVal = nullptr; // More than 2^32-1 iterations?? Not doing it! unsigned NumIterations = BEs.getZExtValue(); // must be in range unsigned IterationNum = 0; @@ -4926,10 +5110,10 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, // Compute the value of the PHIs for the next iteration. // EvaluateExpression adds non-phi values to the CurrentIterVals map. DenseMap<Instruction *, Constant *> NextIterVals; - Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, + Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI); - if (NextPHI == 0) - return 0; // Couldn't evaluate! + if (!NextPHI) + return nullptr; // Couldn't evaluate! NextIterVals[PN] = NextPHI; bool StoppedEvolving = NextPHI == CurrentIterVals[PN]; @@ -4952,7 +5136,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, Constant *&NextPHI = NextIterVals[PHI]; if (!NextPHI) { // Not already computed. Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); - NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI); + NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI); } if (NextPHI != I->second) StoppedEvolving = false; @@ -4976,7 +5160,7 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, Value *Cond, bool ExitWhen) { PHINode *PN = getConstantEvolvingPHI(Cond, L); - if (PN == 0) return getCouldNotCompute(); + if (!PN) return getCouldNotCompute(); // If the loop is canonicalized, the PHI will have exactly two entries. // That's the only form we support here. @@ -4989,12 +5173,12 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, // One entry must be a constant (coming in from outside of the loop), and the // second must be derived from the same PHI. bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); - PHINode *PHI = 0; + PHINode *PHI = nullptr; for (BasicBlock::iterator I = Header->begin(); (PHI = dyn_cast<PHINode>(I)); ++I) { Constant *StartCST = dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge)); - if (StartCST == 0) continue; + if (!StartCST) continue; CurrentIterVals[PHI] = StartCST; } if (!CurrentIterVals.count(PN)) @@ -5008,7 +5192,7 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){ ConstantInt *CondVal = dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, CurrentIterVals, - TD, TLI)); + DL, TLI)); // Couldn't symbolically evaluate. if (!CondVal) return getCouldNotCompute(); @@ -5038,7 +5222,7 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, if (NextPHI) continue; // Already computed! Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); - NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI); + NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI); } CurrentIterVals.swap(NextIterVals); } @@ -5064,7 +5248,7 @@ const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { if (Values[u].first == L) return Values[u].second ? Values[u].second : V; } - Values.push_back(std::make_pair(L, static_cast<const SCEV *>(0))); + Values.push_back(std::make_pair(L, static_cast<const SCEV *>(nullptr))); // Otherwise compute it. const SCEV *C = computeSCEVAtScope(V, L); SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values2 = ValuesAtScopes[V]; @@ -5082,8 +5266,7 @@ const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { /// SCEVConstant, because SCEVConstant is restricted to ConstantInt. /// Returns NULL if the SCEV isn't representable as a Constant. static Constant *BuildConstantFromSCEV(const SCEV *V) { - switch (V->getSCEVType()) { - default: // TODO: smax, umax. + switch (static_cast<SCEVTypes>(V->getSCEVType())) { case scCouldNotCompute: case scAddRecExpr: break; @@ -5119,7 +5302,7 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { } for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) { Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i)); - if (!C2) return 0; + if (!C2) return nullptr; // First pointer! if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) { @@ -5134,7 +5317,7 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { // Don't bother trying to sum two pointers. We probably can't // statically compute a load that results from it anyway. if (C2->getType()->isPointerTy()) - return 0; + return nullptr; if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) { if (PTy->getElementType()->isStructTy()) @@ -5152,10 +5335,10 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { const SCEVMulExpr *SM = cast<SCEVMulExpr>(V); if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) { // Don't bother with pointers at all. - if (C->getType()->isPointerTy()) return 0; + if (C->getType()->isPointerTy()) return nullptr; for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) { Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i)); - if (!C2 || C2->getType()->isPointerTy()) return 0; + if (!C2 || C2->getType()->isPointerTy()) return nullptr; C = ConstantExpr::getMul(C, C2); } return C; @@ -5170,8 +5353,11 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { return ConstantExpr::getUDiv(LHS, RHS); break; } + case scSMaxExpr: + case scUMaxExpr: + break; // TODO: smax, umax. } - return 0; + return nullptr; } const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { @@ -5238,17 +5424,17 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { // Check to see if getSCEVAtScope actually made an improvement. if (MadeImprovement) { - Constant *C = 0; + Constant *C = nullptr; if (const CmpInst *CI = dyn_cast<CmpInst>(I)) C = ConstantFoldCompareInstOperands(CI->getPredicate(), - Operands[0], Operands[1], TD, + Operands[0], Operands[1], DL, TLI); else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { if (!LI->isVolatile()) - C = ConstantFoldLoadFromConstPtr(Operands[0], TD); + C = ConstantFoldLoadFromConstPtr(Operands[0], DL); } else C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), - Operands, TD, TLI); + Operands, DL, TLI); if (!C) return V; return getSCEV(C); } @@ -5570,7 +5756,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) { // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step. // We have not yet seen any such cases. const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step); - if (StepC == 0 || StepC->getValue()->equalsInt(0)) + if (!StepC || StepC->getValue()->equalsInt(0)) return getCouldNotCompute(); // For positive steps (counting up until unsigned overflow): @@ -5595,7 +5781,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) { else MaxBECount = getConstant(CountDown ? CR.getUnsignedMax() : -CR.getUnsignedMin()); - return ExitLimit(Distance, MaxBECount); + return ExitLimit(Distance, MaxBECount, /*MustExit=*/true); } // If the recurrence is known not to wraparound, unsigned divide computes the @@ -5603,15 +5789,29 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) { // that the value will either become zero (and thus the loop terminates), that // the loop will terminate through some other exit condition first, or that // the loop has undefined behavior. This means we can't "miss" the exit - // value, even with nonunit stride. + // value, even with nonunit stride, and exit later via the same branch. Note + // that we can skip this exit if loop later exits via a different + // branch. Hence MustExit=false. // // This is only valid for expressions that directly compute the loop exit. It // is invalid for subexpressions in which the loop may exit through this // branch even if this subexpression is false. In that case, the trip count // computed by this udiv could be smaller than the number of well-defined // iterations. - if (!IsSubExpr && AddRec->getNoWrapFlags(SCEV::FlagNW)) - return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); + if (!IsSubExpr && AddRec->getNoWrapFlags(SCEV::FlagNW)) { + const SCEV *Exact = + getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); + return ExitLimit(Exact, Exact, /*MustExit=*/false); + } + + // If Step is a power of two that evenly divides Start we know that the loop + // will always terminate. Start may not be a constant so we just have the + // number of trailing zeros available. This is safe even in presence of + // overflow as the recurrence will overflow to exactly 0. + const APInt &StepV = StepC->getValue()->getValue(); + if (StepV.isPowerOf2() && + GetMinTrailingZeros(getNegativeSCEV(Start)) >= StepV.countTrailingZeros()) + return getUDivExactExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); // Then, try to solve the above equation provided that Start is constant. if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) @@ -5995,18 +6195,30 @@ bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred, // If LHS or RHS is an addrec, check to see if the condition is true in // every iteration of the loop. - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) - if (isLoopEntryGuardedByCond( - AR->getLoop(), Pred, AR->getStart(), RHS) && - isLoopBackedgeGuardedByCond( - AR->getLoop(), Pred, AR->getPostIncExpr(*this), RHS)) - return true; - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) - if (isLoopEntryGuardedByCond( - AR->getLoop(), Pred, LHS, AR->getStart()) && - isLoopBackedgeGuardedByCond( - AR->getLoop(), Pred, LHS, AR->getPostIncExpr(*this))) - return true; + // If LHS and RHS are both addrec, both conditions must be true in + // every iteration of the loop. + const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS); + const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS); + bool LeftGuarded = false; + bool RightGuarded = false; + if (LAR) { + const Loop *L = LAR->getLoop(); + if (isLoopEntryGuardedByCond(L, Pred, LAR->getStart(), RHS) && + isLoopBackedgeGuardedByCond(L, Pred, LAR->getPostIncExpr(*this), RHS)) { + if (!RAR) return true; + LeftGuarded = true; + } + } + if (RAR) { + const Loop *L = RAR->getLoop(); + if (isLoopEntryGuardedByCond(L, Pred, LHS, RAR->getStart()) && + isLoopBackedgeGuardedByCond(L, Pred, LHS, RAR->getPostIncExpr(*this))) { + if (!LAR) return true; + RightGuarded = true; + } + } + if (LeftGuarded && RightGuarded) + return true; // Otherwise see what can be done with known constant ranges. return isKnownPredicateWithRanges(Pred, LHS, RHS); @@ -6024,7 +6236,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred, default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); case ICmpInst::ICMP_SGT: - Pred = ICmpInst::ICMP_SLT; std::swap(LHS, RHS); case ICmpInst::ICMP_SLT: { ConstantRange LHSRange = getSignedRange(LHS); @@ -6036,7 +6247,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred, break; } case ICmpInst::ICMP_SGE: - Pred = ICmpInst::ICMP_SLE; std::swap(LHS, RHS); case ICmpInst::ICMP_SLE: { ConstantRange LHSRange = getSignedRange(LHS); @@ -6048,7 +6258,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred, break; } case ICmpInst::ICMP_UGT: - Pred = ICmpInst::ICMP_ULT; std::swap(LHS, RHS); case ICmpInst::ICMP_ULT: { ConstantRange LHSRange = getUnsignedRange(LHS); @@ -6060,7 +6269,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred, break; } case ICmpInst::ICMP_UGE: - Pred = ICmpInst::ICMP_ULE; std::swap(LHS, RHS); case ICmpInst::ICMP_ULE: { ConstantRange LHSRange = getUnsignedRange(LHS); @@ -6466,7 +6674,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit) : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit); - const SCEV *MaxBECount = getCouldNotCompute(); + const SCEV *MaxBECount; if (isa<SCEVConstant>(BECount)) MaxBECount = BECount; else @@ -6476,7 +6684,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, if (isa<SCEVCouldNotCompute>(MaxBECount)) MaxBECount = BECount; - return ExitLimit(BECount, MaxBECount); + return ExitLimit(BECount, MaxBECount, /*MustExit=*/true); } ScalarEvolution::ExitLimit @@ -6548,7 +6756,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, if (isa<SCEVCouldNotCompute>(MaxBECount)) MaxBECount = BECount; - return ExitLimit(BECount, MaxBECount); + return ExitLimit(BECount, MaxBECount, /*MustExit=*/true); } /// getNumIterationsInRange - Return the number of iterations of this loop that @@ -6677,18 +6885,103 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, return SE.getCouldNotCompute(); } -static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) { - APInt A = C1->getValue()->getValue().abs(); - APInt B = C2->getValue()->getValue().abs(); - uint32_t ABW = A.getBitWidth(); - uint32_t BBW = B.getBitWidth(); +namespace { +struct FindUndefs { + bool Found; + FindUndefs() : Found(false) {} - if (ABW > BBW) - B = B.zext(ABW); - else if (ABW < BBW) - A = A.zext(BBW); + bool follow(const SCEV *S) { + if (const SCEVUnknown *C = dyn_cast<SCEVUnknown>(S)) { + if (isa<UndefValue>(C->getValue())) + Found = true; + } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { + if (isa<UndefValue>(C->getValue())) + Found = true; + } - return APIntOps::GreatestCommonDivisor(A, B); + // Keep looking if we haven't found it yet. + return !Found; + } + bool isDone() const { + // Stop recursion if we have found an undef. + return Found; + } +}; +} + +// Return true when S contains at least an undef value. +static inline bool +containsUndefs(const SCEV *S) { + FindUndefs F; + SCEVTraversal<FindUndefs> ST(F); + ST.visitAll(S); + + return F.Found; +} + +namespace { +// Collect all steps of SCEV expressions. +struct SCEVCollectStrides { + ScalarEvolution &SE; + SmallVectorImpl<const SCEV *> &Strides; + + SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S) + : SE(SE), Strides(S) {} + + bool follow(const SCEV *S) { + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) + Strides.push_back(AR->getStepRecurrence(SE)); + return true; + } + bool isDone() const { return false; } +}; + +// Collect all SCEVUnknown and SCEVMulExpr expressions. +struct SCEVCollectTerms { + SmallVectorImpl<const SCEV *> &Terms; + + SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T) + : Terms(T) {} + + bool follow(const SCEV *S) { + if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S)) { + if (!containsUndefs(S)) + Terms.push_back(S); + + // Stop recursion: once we collected a term, do not walk its operands. + return false; + } + + // Keep looking. + return true; + } + bool isDone() const { return false; } +}; +} + +/// Find parametric terms in this SCEVAddRecExpr. +void SCEVAddRecExpr::collectParametricTerms( + ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &Terms) const { + SmallVector<const SCEV *, 4> Strides; + SCEVCollectStrides StrideCollector(SE, Strides); + visitAll(this, StrideCollector); + + DEBUG({ + dbgs() << "Strides:\n"; + for (const SCEV *S : Strides) + dbgs() << *S << "\n"; + }); + + for (const SCEV *S : Strides) { + SCEVCollectTerms TermCollector(Terms); + visitAll(S, TermCollector); + } + + DEBUG({ + dbgs() << "Terms:\n"; + for (const SCEV *T : Terms) + dbgs() << *T << "\n"; + }); } static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) { @@ -6720,352 +7013,488 @@ static const APInt sdiv(const SCEVConstant *C1, const SCEVConstant *C2) { } namespace { -struct SCEVGCD : public SCEVVisitor<SCEVGCD, const SCEV *> { -public: - // Pattern match Step into Start. When Step is a multiply expression, find - // the largest subexpression of Step that appears in Start. When Start is an - // add expression, try to match Step in the subexpressions of Start, non - // matching subexpressions are returned under Remainder. - static const SCEV *findGCD(ScalarEvolution &SE, const SCEV *Start, - const SCEV *Step, const SCEV **Remainder) { - assert(Remainder && "Remainder should not be NULL"); - SCEVGCD R(SE, Step, SE.getConstant(Step->getType(), 0)); - const SCEV *Res = R.visit(Start); - *Remainder = R.Remainder; - return Res; - } +struct FindSCEVSize { + int Size; + FindSCEVSize() : Size(0) {} - SCEVGCD(ScalarEvolution &S, const SCEV *G, const SCEV *R) - : SE(S), GCD(G), Remainder(R) { - Zero = SE.getConstant(GCD->getType(), 0); - One = SE.getConstant(GCD->getType(), 1); + bool follow(const SCEV *S) { + ++Size; + // Keep looking at all operands of S. + return true; + } + bool isDone() const { + return false; } +}; +} - const SCEV *visitConstant(const SCEVConstant *Constant) { - if (GCD == Constant || Constant == Zero) - return GCD; +// Returns the size of the SCEV S. +static inline int sizeOfSCEV(const SCEV *S) { + FindSCEVSize F; + SCEVTraversal<FindSCEVSize> ST(F); + ST.visitAll(S); + return F.Size; +} - if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD)) { - const SCEV *Res = SE.getConstant(gcd(Constant, CGCD)); - if (Res != One) - return Res; +namespace { - Remainder = SE.getConstant(srem(Constant, CGCD)); - Constant = cast<SCEVConstant>(SE.getMinusSCEV(Constant, Remainder)); - Res = SE.getConstant(gcd(Constant, CGCD)); - return Res; +struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> { +public: + // Computes the Quotient and Remainder of the division of Numerator by + // Denominator. + static void divide(ScalarEvolution &SE, const SCEV *Numerator, + const SCEV *Denominator, const SCEV **Quotient, + const SCEV **Remainder) { + assert(Numerator && Denominator && "Uninitialized SCEV"); + + SCEVDivision D(SE, Numerator, Denominator); + + // Check for the trivial case here to avoid having to check for it in the + // rest of the code. + if (Numerator == Denominator) { + *Quotient = D.One; + *Remainder = D.Zero; + return; } - // When GCD is not a constant, it could be that the GCD is an Add, Mul, - // AddRec, etc., in which case we want to find out how many times the - // Constant divides the GCD: we then return that as the new GCD. - const SCEV *Rem = Zero; - const SCEV *Res = findGCD(SE, GCD, Constant, &Rem); + if (Numerator->isZero()) { + *Quotient = D.Zero; + *Remainder = D.Zero; + return; + } - if (Res == One || Rem != Zero) { - Remainder = Constant; - return One; + // Split the Denominator when it is a product. + if (const SCEVMulExpr *T = dyn_cast<const SCEVMulExpr>(Denominator)) { + const SCEV *Q, *R; + *Quotient = Numerator; + for (const SCEV *Op : T->operands()) { + divide(SE, *Quotient, Op, &Q, &R); + *Quotient = Q; + + // Bail out when the Numerator is not divisible by one of the terms of + // the Denominator. + if (!R->isZero()) { + *Quotient = D.Zero; + *Remainder = Numerator; + return; + } + } + *Remainder = D.Zero; + return; } - assert(isa<SCEVConstant>(Res) && "Res should be a constant"); - Remainder = SE.getConstant(srem(Constant, cast<SCEVConstant>(Res))); - return Res; + D.visit(Numerator); + *Quotient = D.Quotient; + *Remainder = D.Remainder; + } + + SCEVDivision(ScalarEvolution &S, const SCEV *Numerator, const SCEV *Denominator) + : SE(S), Denominator(Denominator) { + Zero = SE.getConstant(Denominator->getType(), 0); + One = SE.getConstant(Denominator->getType(), 1); + + // By default, we don't know how to divide Expr by Denominator. + // Providing the default here simplifies the rest of the code. + Quotient = Zero; + Remainder = Numerator; + } + + // Except in the trivial case described above, we do not know how to divide + // Expr by Denominator for the following functions with empty implementation. + void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {} + void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {} + void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {} + void visitUDivExpr(const SCEVUDivExpr *Numerator) {} + void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {} + void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {} + void visitUnknown(const SCEVUnknown *Numerator) {} + void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {} + + void visitConstant(const SCEVConstant *Numerator) { + if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) { + Quotient = SE.getConstant(sdiv(Numerator, D)); + Remainder = SE.getConstant(srem(Numerator, D)); + return; + } } - const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { - if (GCD != Expr) - Remainder = Expr; - return GCD; + void visitAddRecExpr(const SCEVAddRecExpr *Numerator) { + const SCEV *StartQ, *StartR, *StepQ, *StepR; + assert(Numerator->isAffine() && "Numerator should be affine"); + divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR); + divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR); + Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(), + Numerator->getNoWrapFlags()); + Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(), + Numerator->getNoWrapFlags()); } - const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { - if (GCD != Expr) - Remainder = Expr; - return GCD; - } + void visitAddExpr(const SCEVAddExpr *Numerator) { + SmallVector<const SCEV *, 2> Qs, Rs; + Type *Ty = Denominator->getType(); - const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { - if (GCD != Expr) - Remainder = Expr; - return GCD; - } + for (const SCEV *Op : Numerator->operands()) { + const SCEV *Q, *R; + divide(SE, Op, Denominator, &Q, &R); - const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { - if (GCD == Expr) - return GCD; + // Bail out if types do not match. + if (Ty != Q->getType() || Ty != R->getType()) { + Quotient = Zero; + Remainder = Numerator; + return; + } - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { - const SCEV *Rem = Zero; - const SCEV *Res = findGCD(SE, Expr->getOperand(e - 1 - i), GCD, &Rem); + Qs.push_back(Q); + Rs.push_back(R); + } - // FIXME: There may be ambiguous situations: for instance, - // GCD(-4 + (3 * %m), 2 * %m) where 2 divides -4 and %m divides (3 * %m). - // The order in which the AddExpr is traversed computes a different GCD - // and Remainder. - if (Res != One) - GCD = Res; - if (Rem != Zero) - Remainder = SE.getAddExpr(Remainder, Rem); + if (Qs.size() == 1) { + Quotient = Qs[0]; + Remainder = Rs[0]; + return; } - return GCD; + Quotient = SE.getAddExpr(Qs); + Remainder = SE.getAddExpr(Rs); } - const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { - if (GCD == Expr) - return GCD; + void visitMulExpr(const SCEVMulExpr *Numerator) { + SmallVector<const SCEV *, 2> Qs; + Type *Ty = Denominator->getType(); - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { - if (Expr->getOperand(i) == GCD) - return GCD; - } + bool FoundDenominatorTerm = false; + for (const SCEV *Op : Numerator->operands()) { + // Bail out if types do not match. + if (Ty != Op->getType()) { + Quotient = Zero; + Remainder = Numerator; + return; + } + + if (FoundDenominatorTerm) { + Qs.push_back(Op); + continue; + } - // If we have not returned yet, it means that GCD is not part of Expr. - const SCEV *PartialGCD = One; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { - const SCEV *Rem = Zero; - const SCEV *Res = findGCD(SE, Expr->getOperand(i), GCD, &Rem); - if (Rem != Zero) - // GCD does not divide Expr->getOperand(i). + // Check whether Denominator divides one of the product operands. + const SCEV *Q, *R; + divide(SE, Op, Denominator, &Q, &R); + if (!R->isZero()) { + Qs.push_back(Op); continue; + } - if (Res == GCD) - return GCD; - PartialGCD = SE.getMulExpr(PartialGCD, Res); - if (PartialGCD == GCD) - return GCD; - } - - if (PartialGCD != One) - return PartialGCD; - - Remainder = Expr; - const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(GCD); - if (!Mul) - return PartialGCD; - - // When the GCD is a multiply expression, try to decompose it: - // this occurs when Step does not divide the Start expression - // as in: {(-4 + (3 * %m)),+,(2 * %m)} - for (int i = 0, e = Mul->getNumOperands(); i < e; ++i) { - const SCEV *Rem = Zero; - const SCEV *Res = findGCD(SE, Expr, Mul->getOperand(i), &Rem); - if (Rem == Zero) { - Remainder = Rem; - return Res; + // Bail out if types do not match. + if (Ty != Q->getType()) { + Quotient = Zero; + Remainder = Numerator; + return; } + + FoundDenominatorTerm = true; + Qs.push_back(Q); } - return PartialGCD; - } + if (FoundDenominatorTerm) { + Remainder = Zero; + if (Qs.size() == 1) + Quotient = Qs[0]; + else + Quotient = SE.getMulExpr(Qs); + return; + } - const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { - if (GCD != Expr) - Remainder = Expr; - return GCD; - } + if (!isa<SCEVUnknown>(Denominator)) { + Quotient = Zero; + Remainder = Numerator; + return; + } - const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { - if (GCD == Expr) - return GCD; + // The Remainder is obtained by replacing Denominator by 0 in Numerator. + ValueToValueMap RewriteMap; + RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] = + cast<SCEVConstant>(Zero)->getValue(); + Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true); + + if (Remainder->isZero()) { + // The Quotient is obtained by replacing Denominator by 1 in Numerator. + RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] = + cast<SCEVConstant>(One)->getValue(); + Quotient = + SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true); + return; + } - if (!Expr->isAffine()) { - Remainder = Expr; - return GCD; + // Quotient is (Numerator - Remainder) divided by Denominator. + const SCEV *Q, *R; + const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder); + if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) { + // This SCEV does not seem to simplify: fail the division here. + Quotient = Zero; + Remainder = Numerator; + return; } + divide(SE, Diff, Denominator, &Q, &R); + assert(R == Zero && + "(Numerator - Remainder) should evenly divide Denominator"); + Quotient = Q; + } + +private: + ScalarEvolution &SE; + const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One; +}; +} - const SCEV *Rem = Zero; - const SCEV *Res = findGCD(SE, Expr->getOperand(0), GCD, &Rem); - if (Rem != Zero) - Remainder = SE.getAddExpr(Remainder, Rem); +static bool findArrayDimensionsRec(ScalarEvolution &SE, + SmallVectorImpl<const SCEV *> &Terms, + SmallVectorImpl<const SCEV *> &Sizes) { + int Last = Terms.size() - 1; + const SCEV *Step = Terms[Last]; - Rem = Zero; - Res = findGCD(SE, Expr->getOperand(1), Res, &Rem); - if (Rem != Zero) { - Remainder = Expr; - return GCD; + // End of recursion. + if (Last == 0) { + if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Step)) { + SmallVector<const SCEV *, 2> Qs; + for (const SCEV *Op : M->operands()) + if (!isa<SCEVConstant>(Op)) + Qs.push_back(Op); + + Step = SE.getMulExpr(Qs); } - return Res; + Sizes.push_back(Step); + return true; } - const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { - if (GCD != Expr) - Remainder = Expr; - return GCD; - } + for (const SCEV *&Term : Terms) { + // Normalize the terms before the next call to findArrayDimensionsRec. + const SCEV *Q, *R; + SCEVDivision::divide(SE, Term, Step, &Q, &R); - const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { - if (GCD != Expr) - Remainder = Expr; - return GCD; - } + // Bail out when GCD does not evenly divide one of the terms. + if (!R->isZero()) + return false; - const SCEV *visitUnknown(const SCEVUnknown *Expr) { - if (GCD != Expr) - Remainder = Expr; - return GCD; + Term = Q; } - const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { - return One; - } + // Remove all SCEVConstants. + Terms.erase(std::remove_if(Terms.begin(), Terms.end(), [](const SCEV *E) { + return isa<SCEVConstant>(E); + }), + Terms.end()); -private: - ScalarEvolution &SE; - const SCEV *GCD, *Remainder, *Zero, *One; -}; + if (Terms.size() > 0) + if (!findArrayDimensionsRec(SE, Terms, Sizes)) + return false; -struct SCEVDivision : public SCEVVisitor<SCEVDivision, const SCEV *> { -public: - // Remove from Start all multiples of Step. - static const SCEV *divide(ScalarEvolution &SE, const SCEV *Start, - const SCEV *Step) { - SCEVDivision D(SE, Step); - const SCEV *Rem = D.Zero; - (void)Rem; - // The division is guaranteed to succeed: Step should divide Start with no - // remainder. - assert(Step == SCEVGCD::findGCD(SE, Start, Step, &Rem) && Rem == D.Zero && - "Step should divide Start with no remainder."); - return D.visit(Start); - } + Sizes.push_back(Step); + return true; +} + +namespace { +struct FindParameter { + bool FoundParameter; + FindParameter() : FoundParameter(false) {} - SCEVDivision(ScalarEvolution &S, const SCEV *G) : SE(S), GCD(G) { - Zero = SE.getConstant(GCD->getType(), 0); - One = SE.getConstant(GCD->getType(), 1); + bool follow(const SCEV *S) { + if (isa<SCEVUnknown>(S)) { + FoundParameter = true; + // Stop recursion: we found a parameter. + return false; + } + // Keep looking. + return true; + } + bool isDone() const { + // Stop recursion if we have found a parameter. + return FoundParameter; } +}; +} - const SCEV *visitConstant(const SCEVConstant *Constant) { - if (GCD == Constant) - return One; +// Returns true when S contains at least a SCEVUnknown parameter. +static inline bool +containsParameters(const SCEV *S) { + FindParameter F; + SCEVTraversal<FindParameter> ST(F); + ST.visitAll(S); - if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD)) - return SE.getConstant(sdiv(Constant, CGCD)); - return Constant; - } + return F.FoundParameter; +} - const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { - if (GCD == Expr) - return One; - return Expr; - } +// Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter. +static inline bool +containsParameters(SmallVectorImpl<const SCEV *> &Terms) { + for (const SCEV *T : Terms) + if (containsParameters(T)) + return true; + return false; +} - const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { - if (GCD == Expr) - return One; - return Expr; - } +// Return the number of product terms in S. +static inline int numberOfTerms(const SCEV *S) { + if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(S)) + return Expr->getNumOperands(); + return 1; +} - const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { - if (GCD == Expr) - return One; - return Expr; - } +static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) { + if (isa<SCEVConstant>(T)) + return nullptr; - const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { - if (GCD == Expr) - return One; + if (isa<SCEVUnknown>(T)) + return T; - SmallVector<const SCEV *, 2> Operands; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - Operands.push_back(divide(SE, Expr->getOperand(i), GCD)); + if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(T)) { + SmallVector<const SCEV *, 2> Factors; + for (const SCEV *Op : M->operands()) + if (!isa<SCEVConstant>(Op)) + Factors.push_back(Op); - if (Operands.size() == 1) - return Operands[0]; - return SE.getAddExpr(Operands); + return SE.getMulExpr(Factors); } - const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { - if (GCD == Expr) - return One; + return T; +} - bool FoundGCDTerm = false; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - if (Expr->getOperand(i) == GCD) - FoundGCDTerm = true; +/// Return the size of an element read or written by Inst. +const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) { + Type *Ty; + if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) + Ty = Store->getValueOperand()->getType(); + else if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) + Ty = Load->getType(); + else + return nullptr; - SmallVector<const SCEV *, 2> Operands; - if (FoundGCDTerm) { - FoundGCDTerm = false; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { - if (FoundGCDTerm) - Operands.push_back(Expr->getOperand(i)); - else if (Expr->getOperand(i) == GCD) - FoundGCDTerm = true; - else - Operands.push_back(Expr->getOperand(i)); - } - } else { - FoundGCDTerm = false; - const SCEV *PartialGCD = One; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { - if (PartialGCD == GCD) { - Operands.push_back(Expr->getOperand(i)); - continue; - } + Type *ETy = getEffectiveSCEVType(PointerType::getUnqual(Ty)); + return getSizeOfExpr(ETy, Ty); +} - const SCEV *Rem = Zero; - const SCEV *Res = SCEVGCD::findGCD(SE, Expr->getOperand(i), GCD, &Rem); - if (Rem == Zero) { - PartialGCD = SE.getMulExpr(PartialGCD, Res); - Operands.push_back(divide(SE, Expr->getOperand(i), GCD)); - } else { - Operands.push_back(Expr->getOperand(i)); - } - } - } +/// Second step of delinearization: compute the array dimensions Sizes from the +/// set of Terms extracted from the memory access function of this SCEVAddRec. +void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms, + SmallVectorImpl<const SCEV *> &Sizes, + const SCEV *ElementSize) const { - if (Operands.size() == 1) - return Operands[0]; - return SE.getMulExpr(Operands); - } + if (Terms.size() < 1 || !ElementSize) + return; - const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { - if (GCD == Expr) - return One; - return Expr; - } + // Early return when Terms do not contain parameters: we do not delinearize + // non parametric SCEVs. + if (!containsParameters(Terms)) + return; - const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { - if (GCD == Expr) - return One; + DEBUG({ + dbgs() << "Terms:\n"; + for (const SCEV *T : Terms) + dbgs() << *T << "\n"; + }); - assert(Expr->isAffine() && "Expr should be affine"); + // Remove duplicates. + std::sort(Terms.begin(), Terms.end()); + Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end()); - const SCEV *Start = divide(SE, Expr->getStart(), GCD); - const SCEV *Step = divide(SE, Expr->getStepRecurrence(SE), GCD); + // Put larger terms first. + std::sort(Terms.begin(), Terms.end(), [](const SCEV *LHS, const SCEV *RHS) { + return numberOfTerms(LHS) > numberOfTerms(RHS); + }); - return SE.getAddRecExpr(Start, Step, Expr->getLoop(), - Expr->getNoWrapFlags()); - } + ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); - const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { - if (GCD == Expr) - return One; - return Expr; + // Divide all terms by the element size. + for (const SCEV *&Term : Terms) { + const SCEV *Q, *R; + SCEVDivision::divide(SE, Term, ElementSize, &Q, &R); + Term = Q; } - const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { - if (GCD == Expr) - return One; - return Expr; - } + SmallVector<const SCEV *, 4> NewTerms; + + // Remove constant factors. + for (const SCEV *T : Terms) + if (const SCEV *NewT = removeConstantFactors(SE, T)) + NewTerms.push_back(NewT); - const SCEV *visitUnknown(const SCEVUnknown *Expr) { - if (GCD == Expr) - return One; - return Expr; + DEBUG({ + dbgs() << "Terms after sorting:\n"; + for (const SCEV *T : NewTerms) + dbgs() << *T << "\n"; + }); + + if (NewTerms.empty() || + !findArrayDimensionsRec(SE, NewTerms, Sizes)) { + Sizes.clear(); + return; } - const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { - return Expr; + // The last element to be pushed into Sizes is the size of an element. + Sizes.push_back(ElementSize); + + DEBUG({ + dbgs() << "Sizes:\n"; + for (const SCEV *S : Sizes) + dbgs() << *S << "\n"; + }); +} + +/// Third step of delinearization: compute the access functions for the +/// Subscripts based on the dimensions in Sizes. +void SCEVAddRecExpr::computeAccessFunctions( + ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &Subscripts, + SmallVectorImpl<const SCEV *> &Sizes) const { + + // Early exit in case this SCEV is not an affine multivariate function. + if (Sizes.empty() || !this->isAffine()) + return; + + const SCEV *Res = this; + int Last = Sizes.size() - 1; + for (int i = Last; i >= 0; i--) { + const SCEV *Q, *R; + SCEVDivision::divide(SE, Res, Sizes[i], &Q, &R); + + DEBUG({ + dbgs() << "Res: " << *Res << "\n"; + dbgs() << "Sizes[i]: " << *Sizes[i] << "\n"; + dbgs() << "Res divided by Sizes[i]:\n"; + dbgs() << "Quotient: " << *Q << "\n"; + dbgs() << "Remainder: " << *R << "\n"; + }); + + Res = Q; + + // Do not record the last subscript corresponding to the size of elements in + // the array. + if (i == Last) { + + // Bail out if the remainder is too complex. + if (isa<SCEVAddRecExpr>(R)) { + Subscripts.clear(); + Sizes.clear(); + return; + } + + continue; + } + + // Record the access function for the current subscript. + Subscripts.push_back(R); } -private: - ScalarEvolution &SE; - const SCEV *GCD, *Zero, *One; -}; + // Also push in last position the remainder of the last division: it will be + // the access function of the innermost dimension. + Subscripts.push_back(Res); + + std::reverse(Subscripts.begin(), Subscripts.end()); + + DEBUG({ + dbgs() << "Subscripts:\n"; + for (const SCEV *S : Subscripts) + dbgs() << *S << "\n"; + }); } /// Splits the SCEV into two vectors of SCEVs representing the subscripts and @@ -7117,93 +7546,40 @@ private: /// asking for the SCEV of the memory access with respect to all enclosing /// loops, calling SCEV->delinearize on that and printing the results. -const SCEV * -SCEVAddRecExpr::delinearize(ScalarEvolution &SE, - SmallVectorImpl<const SCEV *> &Subscripts, - SmallVectorImpl<const SCEV *> &Sizes) const { - // Early exit in case this SCEV is not an affine multivariate function. - if (!this->isAffine()) - return this; - - const SCEV *Start = this->getStart(); - const SCEV *Step = this->getStepRecurrence(SE); - - // Build the SCEV representation of the cannonical induction variable in the - // loop of this SCEV. - const SCEV *Zero = SE.getConstant(this->getType(), 0); - const SCEV *One = SE.getConstant(this->getType(), 1); - const SCEV *IV = - SE.getAddRecExpr(Zero, One, this->getLoop(), this->getNoWrapFlags()); - - DEBUG(dbgs() << "(delinearize: " << *this << "\n"); - - // Currently we fail to delinearize when the stride of this SCEV is 1. We - // could decide to not fail in this case: we could just return 1 for the size - // of the subscript, and this same SCEV for the access function. - if (Step == One) { - DEBUG(dbgs() << "failed to delinearize " << *this << "\n)\n"); - return this; - } - - // Find the GCD and Remainder of the Start and Step coefficients of this SCEV. - const SCEV *Remainder = NULL; - const SCEV *GCD = SCEVGCD::findGCD(SE, Start, Step, &Remainder); - - DEBUG(dbgs() << "GCD: " << *GCD << "\n"); - DEBUG(dbgs() << "Remainder: " << *Remainder << "\n"); - - // Same remark as above: we currently fail the delinearization, although we - // can very well handle this special case. - if (GCD == One) { - DEBUG(dbgs() << "failed to delinearize " << *this << "\n)\n"); - return this; - } - - // As findGCD computed Remainder, GCD divides "Start - Remainder." The - // Quotient is then this SCEV without Remainder, scaled down by the GCD. The - // Quotient is what will be used in the next subscript delinearization. - const SCEV *Quotient = - SCEVDivision::divide(SE, SE.getMinusSCEV(Start, Remainder), GCD); - DEBUG(dbgs() << "Quotient: " << *Quotient << "\n"); - - const SCEV *Rem; - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Quotient)) - // Recursively call delinearize on the Quotient until there are no more - // multiples that can be recognized. - Rem = AR->delinearize(SE, Subscripts, Sizes); - else - Rem = Quotient; +void SCEVAddRecExpr::delinearize(ScalarEvolution &SE, + SmallVectorImpl<const SCEV *> &Subscripts, + SmallVectorImpl<const SCEV *> &Sizes, + const SCEV *ElementSize) const { + // First step: collect parametric terms. + SmallVector<const SCEV *, 4> Terms; + collectParametricTerms(SE, Terms); - // Scale up the cannonical induction variable IV by whatever remains from the - // Step after division by the GCD: the GCD is the size of all the sub-array. - if (Step != GCD) { - Step = SCEVDivision::divide(SE, Step, GCD); - IV = SE.getMulExpr(IV, Step); - } - // The access function in the current subscript is computed as the cannonical - // induction variable IV (potentially scaled up by the step) and offset by - // Rem, the offset of delinearization in the sub-array. - const SCEV *Index = SE.getAddExpr(IV, Rem); + if (Terms.empty()) + return; - // Record the access function and the size of the current subscript. - Subscripts.push_back(Index); - Sizes.push_back(GCD); + // Second step: find subscript sizes. + SE.findArrayDimensions(Terms, Sizes, ElementSize); -#ifndef NDEBUG - int Size = Sizes.size(); - DEBUG(dbgs() << "succeeded to delinearize " << *this << "\n"); - DEBUG(dbgs() << "ArrayDecl[UnknownSize]"); - for (int i = 0; i < Size - 1; i++) - DEBUG(dbgs() << "[" << *Sizes[i] << "]"); - DEBUG(dbgs() << " with elements of " << *Sizes[Size - 1] << " bytes.\n"); - - DEBUG(dbgs() << "ArrayRef"); - for (int i = 0; i < Size; i++) - DEBUG(dbgs() << "[" << *Subscripts[i] << "]"); - DEBUG(dbgs() << "\n)\n"); -#endif + if (Sizes.empty()) + return; + + // Third step: compute the access functions for each subscript. + computeAccessFunctions(SE, Subscripts, Sizes); + + if (Subscripts.empty()) + return; - return Remainder; + DEBUG({ + dbgs() << "succeeded to delinearize " << *this << "\n"; + dbgs() << "ArrayDecl[UnknownSize]"; + for (const SCEV *S : Sizes) + dbgs() << "[" << *S << "]"; + + dbgs() << "\nArrayRef"; + for (const SCEV *S : Subscripts) + dbgs() << "[" << *S << "]"; + dbgs() << "\n"; + }); } //===----------------------------------------------------------------------===// @@ -7225,11 +7601,8 @@ void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) { // so that future queries will recompute the expressions using the new // value. Value *Old = getValPtr(); - SmallVector<User *, 16> Worklist; + SmallVector<User *, 16> Worklist(Old->user_begin(), Old->user_end()); SmallPtrSet<User *, 8> Visited; - for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end(); - UI != UE; ++UI) - Worklist.push_back(*UI); while (!Worklist.empty()) { User *U = Worklist.pop_back_val(); // Deleting the Old value will cause this to dangle. Postpone @@ -7241,9 +7614,7 @@ void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) { if (PHINode *PN = dyn_cast<PHINode>(U)) SE->ConstantEvolutionLoopExitValue.erase(PN); SE->ValueExprMap.erase(U); - for (Value::use_iterator UI = U->use_begin(), UE = U->use_end(); - UI != UE; ++UI) - Worklist.push_back(*UI); + Worklist.insert(Worklist.end(), U->user_begin(), U->user_end()); } // Delete the Old value. if (PHINode *PN = dyn_cast<PHINode>(Old)) @@ -7260,16 +7631,18 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) //===----------------------------------------------------------------------===// ScalarEvolution::ScalarEvolution() - : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64), FirstUnknown(0) { + : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64), + BlockDispositions(64), FirstUnknown(nullptr) { initializeScalarEvolutionPass(*PassRegistry::getPassRegistry()); } bool ScalarEvolution::runOnFunction(Function &F) { this->F = &F; LI = &getAnalysis<LoopInfo>(); - TD = getAnalysisIfAvailable<DataLayout>(); + DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfo>(); - DT = &getAnalysis<DominatorTree>(); + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); return false; } @@ -7278,7 +7651,7 @@ void ScalarEvolution::releaseMemory() { // destructors, so that they release their references to their values. for (SCEVUnknown *U = FirstUnknown; U; U = U->Next) U->~SCEVUnknown(); - FirstUnknown = 0; + FirstUnknown = nullptr; ValueExprMap.clear(); @@ -7306,7 +7679,7 @@ void ScalarEvolution::releaseMemory() { void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequiredTransitive<LoopInfo>(); - AU.addRequiredTransitive<DominatorTree>(); + AU.addRequiredTransitive<DominatorTreeWrapperPass>(); AU.addRequired<TargetLibraryInfo>(); } @@ -7321,7 +7694,7 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, PrintLoopInfo(OS, SE, *I); OS << "Loop "; - WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false); + L->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": "; SmallVector<BasicBlock *, 8> ExitBlocks; @@ -7337,7 +7710,7 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, OS << "\n" "Loop "; - WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false); + L->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": "; if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) { @@ -7359,7 +7732,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); OS << "Classifying expressions for: "; - WriteAsOperand(OS, F, /*PrintType=*/false); + F->printAsOperand(OS, /*PrintType=*/false); OS << "\n"; for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) { @@ -7390,7 +7763,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { } OS << "Determining loop execution counts for: "; - WriteAsOperand(OS, F, /*PrintType=*/false); + F->printAsOperand(OS, /*PrintType=*/false); OS << "\n"; for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) PrintLoopInfo(OS, &SE, *I); @@ -7417,7 +7790,7 @@ ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) { ScalarEvolution::LoopDisposition ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { - switch (S->getSCEVType()) { + switch (static_cast<SCEVTypes>(S->getSCEVType())) { case scConstant: return LoopInvariant; case scTruncate: @@ -7490,8 +7863,8 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { return LoopInvariant; case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - default: llvm_unreachable("Unknown SCEV kind!"); } + llvm_unreachable("Unknown SCEV kind!"); } bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) { @@ -7523,7 +7896,7 @@ ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) { ScalarEvolution::BlockDisposition ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { - switch (S->getSCEVType()) { + switch (static_cast<SCEVTypes>(S->getSCEVType())) { case scConstant: return ProperlyDominatesBlock; case scTruncate: @@ -7580,9 +7953,8 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { return ProperlyDominatesBlock; case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - default: - llvm_unreachable("Unknown SCEV kind!"); } + llvm_unreachable("Unknown SCEV kind!"); } bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) { @@ -7637,7 +8009,7 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { typedef DenseMap<const Loop *, std::string> VerifyMap; -/// replaceSubString - Replaces all occurences of From in Str with To. +/// replaceSubString - Replaces all occurrences of From in Str with To. static void replaceSubString(std::string &Str, StringRef From, StringRef To) { size_t Pos = 0; while ((Pos = Str.find(From, Pos)) != std::string::npos) { diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp index 79c5f0deb03b..6933f74150fd 100644 --- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp +++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -34,7 +34,7 @@ namespace { public: static char ID; // Class identification, replacement for typeinfo - ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(0) { + ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(nullptr) { initializeScalarEvolutionAliasAnalysisPass( *PassRegistry::getPassRegistry()); } @@ -43,16 +43,16 @@ namespace { /// an analysis interface through multiple inheritance. If needed, it /// should override this to adjust the this pointer as needed for the /// specified pass info. - virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + void *getAdjustedAnalysisPointer(AnalysisID PI) override { if (PI == &AliasAnalysis::ID) return (AliasAnalysis*)this; return this; } private: - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual bool runOnFunction(Function &F); - virtual AliasResult alias(const Location &LocA, const Location &LocB); + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnFunction(Function &F) override; + AliasResult alias(const Location &LocA, const Location &LocB) override; Value *GetBaseValue(const SCEV *S); }; @@ -102,7 +102,7 @@ ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) { return U->getValue(); } // No Identified object found. - return 0; + return nullptr; } AliasAnalysis::AliasResult @@ -162,10 +162,10 @@ ScalarEvolutionAliasAnalysis::alias(const Location &LocA, if ((AO && AO != LocA.Ptr) || (BO && BO != LocB.Ptr)) if (alias(Location(AO ? AO : LocA.Ptr, AO ? +UnknownSize : LocA.Size, - AO ? 0 : LocA.TBAATag), + AO ? nullptr : LocA.TBAATag), Location(BO ? BO : LocB.Ptr, BO ? +UnknownSize : LocB.Size, - BO ? 0 : LocB.TBAATag)) == NoAlias) + BO ? nullptr : LocB.TBAATag)) == NoAlias) return NoAlias; // Forward the query to the next analysis. diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 86a557b55f7e..968c619a48dd 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -14,11 +14,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ScalarEvolutionExpander.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" @@ -43,12 +45,10 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, // not allowed to move it. BasicBlock::iterator BIP = Builder.GetInsertPoint(); - Instruction *Ret = NULL; + Instruction *Ret = nullptr; // Check to see if there is already a cast! - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); - UI != E; ++UI) { - User *U = *UI; + for (User *U : V->users()) if (U->getType() == Ty) if (CastInst *CI = dyn_cast<CastInst>(U)) if (CI->getOpcode() == Op) { @@ -68,7 +68,6 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, Ret = CI; break; } - } // Create a new cast. if (!Ret) @@ -209,7 +208,7 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder, const SCEV *Factor, ScalarEvolution &SE, - const DataLayout *TD) { + const DataLayout *DL) { // Everything is divisible by one. if (Factor->isOne()) return true; @@ -249,7 +248,7 @@ static bool FactorOutConstant(const SCEV *&S, // In a Mul, check if there is a constant operand which is a multiple // of the given factor. if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) { - if (TD) { + if (DL) { // With DataLayout, the size is known. Check if there is a constant // operand which is a multiple of the given factor. If so, we can // factor it. @@ -269,7 +268,7 @@ static bool FactorOutConstant(const SCEV *&S, for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { const SCEV *SOp = M->getOperand(i); const SCEV *Remainder = SE.getConstant(SOp->getType(), 0); - if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) && + if (FactorOutConstant(SOp, Remainder, Factor, SE, DL) && Remainder->isZero()) { SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); NewMulOps[i] = SOp; @@ -284,12 +283,12 @@ static bool FactorOutConstant(const SCEV *&S, if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) { const SCEV *Step = A->getStepRecurrence(SE); const SCEV *StepRem = SE.getConstant(Step->getType(), 0); - if (!FactorOutConstant(Step, StepRem, Factor, SE, TD)) + if (!FactorOutConstant(Step, StepRem, Factor, SE, DL)) return false; if (!StepRem->isZero()) return false; const SCEV *Start = A->getStart(); - if (!FactorOutConstant(Start, Remainder, Factor, SE, TD)) + if (!FactorOutConstant(Start, Remainder, Factor, SE, DL)) return false; S = SE.getAddRecExpr(Start, Step, A->getLoop(), A->getNoWrapFlags(SCEV::FlagNW)); @@ -403,8 +402,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // without the other. SplitAddRecs(Ops, Ty, SE); - Type *IntPtrTy = SE.TD - ? SE.TD->getIntPtrType(PTy) + Type *IntPtrTy = SE.DL + ? SE.DL->getIntPtrType(PTy) : Type::getInt64Ty(PTy->getContext()); // Descend down the pointer's type and attempt to convert the other @@ -423,7 +422,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, for (unsigned i = 0, e = Ops.size(); i != e; ++i) { const SCEV *Op = Ops[i]; const SCEV *Remainder = SE.getConstant(Ty, 0); - if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) { + if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.DL)) { // Op now has ElSize factored out. ScaledOps.push_back(Op); if (!Remainder->isZero()) @@ -457,13 +456,13 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, bool FoundFieldNo = false; // An empty struct has no fields. if (STy->getNumElements() == 0) break; - if (SE.TD) { + if (SE.DL) { // With DataLayout, field offsets are known. See if a constant offset // falls within any of the struct fields. if (Ops.empty()) break; if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0])) if (SE.getTypeSizeInBits(C->getType()) <= 64) { - const StructLayout &SL = *SE.TD->getStructLayout(STy); + const StructLayout &SL = *SE.DL->getStructLayout(STy); uint64_t FullOffset = C->getValue()->getZExtValue(); if (FullOffset < SL.getSizeInBytes()) { unsigned ElIdx = SL.getElementContainingOffset(FullOffset); @@ -629,21 +628,21 @@ static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B, const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { // Test whether we've already computed the most relevant loop for this SCEV. std::pair<DenseMap<const SCEV *, const Loop *>::iterator, bool> Pair = - RelevantLoops.insert(std::make_pair(S, static_cast<const Loop *>(0))); + RelevantLoops.insert(std::make_pair(S, nullptr)); if (!Pair.second) return Pair.first->second; if (isa<SCEVConstant>(S)) // A constant has no relevant loops. - return 0; + return nullptr; if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { if (const Instruction *I = dyn_cast<Instruction>(U->getValue())) return Pair.first->second = SE.LI->getLoopFor(I->getParent()); // A non-instruction has no relevant loops. - return 0; + return nullptr; } if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) { - const Loop *L = 0; + const Loop *L = nullptr; if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) L = AR->getLoop(); for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end(); @@ -718,7 +717,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { // Emit instructions to add all the operands. Hoist as much as possible // out of loops, and form meaningful getelementptrs where possible. - Value *Sum = 0; + Value *Sum = nullptr; for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { const Loop *CurLoop = I->first; @@ -786,7 +785,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { // Emit instructions to mul all the operands. Hoist as much as possible // out of loops. - Value *Prod = 0; + Value *Prod = nullptr; for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { const SCEV *Op = I->second; @@ -894,18 +893,18 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, Instruction *InsertPos, bool allowScale) { if (IncV == InsertPos) - return NULL; + return nullptr; switch (IncV->getOpcode()) { default: - return NULL; + return nullptr; // Check for a simple Add/Sub or GEP of a loop invariant step. case Instruction::Add: case Instruction::Sub: { Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1)); if (!OInst || SE.DT->dominates(OInst, InsertPos)) return dyn_cast<Instruction>(IncV->getOperand(0)); - return NULL; + return nullptr; } case Instruction::BitCast: return dyn_cast<Instruction>(IncV->getOperand(0)); @@ -916,7 +915,7 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, continue; if (Instruction *OInst = dyn_cast<Instruction>(*I)) { if (!SE.DT->dominates(OInst, InsertPos)) - return NULL; + return nullptr; } if (allowScale) { // allow any kind of GEP as long as it can be hoisted. @@ -927,11 +926,11 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, // have 2 operands. i1* is used by the expander to represent an // address-size element. if (IncV->getNumOperands() != 2) - return NULL; + return nullptr; unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace(); if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS) && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS)) - return NULL; + return nullptr; break; } return dyn_cast<Instruction>(IncV->getOperand(0)); @@ -1016,6 +1015,54 @@ Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L, return IncV; } +/// \brief Hoist the addrec instruction chain rooted in the loop phi above the +/// position. This routine assumes that this is possible (has been checked). +static void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist, + Instruction *Pos, PHINode *LoopPhi) { + do { + if (DT->dominates(InstToHoist, Pos)) + break; + // Make sure the increment is where we want it. But don't move it + // down past a potential existing post-inc user. + InstToHoist->moveBefore(Pos); + Pos = InstToHoist; + InstToHoist = cast<Instruction>(InstToHoist->getOperand(0)); + } while (InstToHoist != LoopPhi); +} + +/// \brief Check whether we can cheaply express the requested SCEV in terms of +/// the available PHI SCEV by truncation and/or invertion of the step. +static bool canBeCheaplyTransformed(ScalarEvolution &SE, + const SCEVAddRecExpr *Phi, + const SCEVAddRecExpr *Requested, + bool &InvertStep) { + Type *PhiTy = SE.getEffectiveSCEVType(Phi->getType()); + Type *RequestedTy = SE.getEffectiveSCEVType(Requested->getType()); + + if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth()) + return false; + + // Try truncate it if necessary. + Phi = dyn_cast<SCEVAddRecExpr>(SE.getTruncateOrNoop(Phi, RequestedTy)); + if (!Phi) + return false; + + // Check whether truncation will help. + if (Phi == Requested) { + InvertStep = false; + return true; + } + + // Check whether inverting will help: {R,+,-1} == R - {0,+,1}. + if (SE.getAddExpr(Requested->getStart(), + SE.getNegativeSCEV(Requested)) == Phi) { + InvertStep = true; + return true; + } + + return false; +} + /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand /// the base addrec, which is the addrec without any non-loop-dominating /// values, and return the PHI. @@ -1023,49 +1070,87 @@ PHINode * SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, const Loop *L, Type *ExpandTy, - Type *IntTy) { + Type *IntTy, + Type *&TruncTy, + bool &InvertStep) { assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position"); // Reuse a previously-inserted PHI, if present. BasicBlock *LatchBlock = L->getLoopLatch(); if (LatchBlock) { + PHINode *AddRecPhiMatch = nullptr; + Instruction *IncV = nullptr; + TruncTy = nullptr; + InvertStep = false; + + // Only try partially matching scevs that need truncation and/or + // step-inversion if we know this loop is outside the current loop. + bool TryNonMatchingSCEV = IVIncInsertLoop && + SE.DT->properlyDominates(LatchBlock, IVIncInsertLoop->getHeader()); + for (BasicBlock::iterator I = L->getHeader()->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) { - if (!SE.isSCEVable(PN->getType()) || - (SE.getEffectiveSCEVType(PN->getType()) != - SE.getEffectiveSCEVType(Normalized->getType())) || - SE.getSCEV(PN) != Normalized) + if (!SE.isSCEVable(PN->getType())) continue; - Instruction *IncV = - cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)); + const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PN)); + if (!PhiSCEV) + continue; + bool IsMatchingSCEV = PhiSCEV == Normalized; + // We only handle truncation and inversion of phi recurrences for the + // expanded expression if the expanded expression's loop dominates the + // loop we insert to. Check now, so we can bail out early. + if (!IsMatchingSCEV && !TryNonMatchingSCEV) + continue; + + Instruction *TempIncV = + cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)); + + // Check whether we can reuse this PHI node. if (LSRMode) { - if (!isExpandedAddRecExprPHI(PN, IncV, L)) + if (!isExpandedAddRecExprPHI(PN, TempIncV, L)) continue; - if (L == IVIncInsertLoop && !hoistIVInc(IncV, IVIncInsertPos)) + if (L == IVIncInsertLoop && !hoistIVInc(TempIncV, IVIncInsertPos)) continue; - } - else { - if (!isNormalAddRecExprPHI(PN, IncV, L)) + } else { + if (!isNormalAddRecExprPHI(PN, TempIncV, L)) continue; - if (L == IVIncInsertLoop) - do { - if (SE.DT->dominates(IncV, IVIncInsertPos)) - break; - // Make sure the increment is where we want it. But don't move it - // down past a potential existing post-inc user. - IncV->moveBefore(IVIncInsertPos); - IVIncInsertPos = IncV; - IncV = cast<Instruction>(IncV->getOperand(0)); - } while (IncV != PN); } + + // Stop if we have found an exact match SCEV. + if (IsMatchingSCEV) { + IncV = TempIncV; + TruncTy = nullptr; + InvertStep = false; + AddRecPhiMatch = PN; + break; + } + + // Try whether the phi can be translated into the requested form + // (truncated and/or offset by a constant). + if ((!TruncTy || InvertStep) && + canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) { + // Record the phi node. But don't stop we might find an exact match + // later. + AddRecPhiMatch = PN; + IncV = TempIncV; + TruncTy = SE.getEffectiveSCEVType(Normalized->getType()); + } + } + + if (AddRecPhiMatch) { + // Potentially, move the increment. We have made sure in + // isExpandedAddRecExprPHI or hoistIVInc that this is possible. + if (L == IVIncInsertLoop) + hoistBeforePos(SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch); + // Ok, the add recurrence looks usable. // Remember this PHI, even in post-inc mode. - InsertedValues.insert(PN); + InsertedValues.insert(AddRecPhiMatch); // Remember the increment. rememberInstruction(IncV); - return PN; + return AddRecPhiMatch; } } @@ -1159,13 +1244,13 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { PostIncLoopSet Loops; Loops.insert(L); Normalized = - cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, 0, 0, - Loops, SE, *SE.DT)); + cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, nullptr, + nullptr, Loops, SE, *SE.DT)); } // Strip off any non-loop-dominating component from the addrec start. const SCEV *Start = Normalized->getStart(); - const SCEV *PostLoopOffset = 0; + const SCEV *PostLoopOffset = nullptr; if (!SE.properlyDominates(Start, L->getHeader())) { PostLoopOffset = Start; Start = SE.getConstant(Normalized->getType(), 0); @@ -1177,7 +1262,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Strip off any non-loop-dominating component from the addrec step. const SCEV *Step = Normalized->getStepRecurrence(SE); - const SCEV *PostLoopScale = 0; + const SCEV *PostLoopScale = nullptr; if (!SE.dominates(Step, L->getHeader())) { PostLoopScale = Step; Step = SE.getConstant(Normalized->getType(), 1); @@ -1190,7 +1275,12 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Expand the core addrec. If we need post-loop scaling, force it to // expand to an integer type to avoid the need for additional casting. Type *ExpandTy = PostLoopScale ? IntTy : STy; - PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy); + // In some cases, we decide to reuse an existing phi node but need to truncate + // it and/or invert the step. + Type *TruncTy = nullptr; + bool InvertStep = false; + PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy, + TruncTy, InvertStep); // Accommodate post-inc mode, if necessary. Value *Result; @@ -1231,6 +1321,26 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { } } + // We have decided to reuse an induction variable of a dominating loop. Apply + // truncation and/or invertion of the step. + if (TruncTy) { + Type *ResTy = Result->getType(); + // Normalize the result type. + if (ResTy != SE.getEffectiveSCEVType(ResTy)) + Result = InsertNoopCastOfTo(Result, SE.getEffectiveSCEVType(ResTy)); + // Truncate the result. + if (TruncTy != Result->getType()) { + Result = Builder.CreateTrunc(Result, TruncTy); + rememberInstruction(Result); + } + // Invert the result. + if (InvertStep) { + Result = Builder.CreateSub(expandCodeFor(Normalized->getStart(), TruncTy), + Result); + rememberInstruction(Result); + } + } + // Re-apply any non-loop-dominating scale. if (PostLoopScale) { assert(S->isAffine() && "Can't linearly scale non-affine recurrences."); @@ -1263,7 +1373,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { const Loop *L = S->getLoop(); // First check for an existing canonical IV in a suitable type. - PHINode *CanonicalIV = 0; + PHINode *CanonicalIV = nullptr; if (PHINode *PN = L->getCanonicalInductionVariable()) if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty)) CanonicalIV = PN; @@ -1279,12 +1389,12 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(), S->getNoWrapFlags(SCEV::FlagNW))); BasicBlock::iterator NewInsertPt = - llvm::next(BasicBlock::iterator(cast<Instruction>(V))); + std::next(BasicBlock::iterator(cast<Instruction>(V))); BuilderType::InsertPointGuard Guard(Builder); while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt) || isa<LandingPadInst>(NewInsertPt)) ++NewInsertPt; - V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0, + V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr, NewInsertPt); return V; } @@ -1333,8 +1443,12 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { Constant *One = ConstantInt::get(Ty, 1); for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { BasicBlock *HP = *HPI; - if (!PredSeen.insert(HP)) + if (!PredSeen.insert(HP)) { + // There must be an incoming value for each predecessor, even the + // duplicates! + CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP); continue; + } if (L->contains(HP)) { // Insert a unit add instruction right before the terminator @@ -1507,7 +1621,7 @@ Value *SCEVExpander::expand(const SCEV *S) { while (InsertPt != Builder.GetInsertPoint() && (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt))) { - InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); + InsertPt = std::next(BasicBlock::iterator(InsertPt)); } break; } @@ -1528,7 +1642,7 @@ Value *SCEVExpander::expand(const SCEV *S) { // // This is independent of PostIncLoops. The mapped value simply materializes // the expression at this insertion point. If the mapped value happened to be - // a postinc expansion, it could be reused by a non postinc user, but only if + // a postinc expansion, it could be reused by a non-postinc user, but only if // its insertion point was already at the head of the loop. InsertedExpressions[std::make_pair(S, InsertPt)] = V; return V; @@ -1557,20 +1671,12 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, // Emit code for it. BuilderType::InsertPointGuard Guard(Builder); - PHINode *V = cast<PHINode>(expandCodeFor(H, 0, L->getHeader()->begin())); + PHINode *V = cast<PHINode>(expandCodeFor(H, nullptr, + L->getHeader()->begin())); return V; } -/// Sort values by integer width for replaceCongruentIVs. -static bool width_descending(Value *lhs, Value *rhs) { - // Put pointers at the back and make sure pointer < pointer = false. - if (!lhs->getType()->isIntegerTy() || !rhs->getType()->isIntegerTy()) - return rhs->getType()->isIntegerTy() && !lhs->getType()->isIntegerTy(); - return rhs->getType()->getPrimitiveSizeInBits() - < lhs->getType()->getPrimitiveSizeInBits(); -} - /// replaceCongruentIVs - Check for congruent phis in this loop header and /// replace them with their most canonical representative. Return the number of /// phis eliminated. @@ -1587,7 +1693,13 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, Phis.push_back(Phi); } if (TTI) - std::sort(Phis.begin(), Phis.end(), width_descending); + std::sort(Phis.begin(), Phis.end(), [](Value *LHS, Value *RHS) { + // Put pointers at the back and make sure pointer < pointer = false. + if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) + return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy(); + return RHS->getType()->getPrimitiveSizeInBits() < + LHS->getType()->getPrimitiveSizeInBits(); + }); unsigned NumElim = 0; DenseMap<const SCEV *, PHINode *> ExprToIVMap; @@ -1599,7 +1711,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, // Fold constant phis. They may be congruent to other constant phis and // would confuse the logic below that expects proper IVs. - if (Value *V = Phi->hasConstantValue()) { + if (Value *V = SimplifyInstruction(Phi, SE.DL, SE.TLI, SE.DT)) { Phi->replaceAllUsesWith(V); DeadInsts.push_back(Phi); ++NumElim; diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp index f1106168440d..3ccefb01101d 100644 --- a/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Dominators.h" +#include "llvm/IR/Dominators.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolutionNormalization.h" @@ -113,7 +113,7 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) { // Transform each operand. for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); I != E; ++I) { - Operands.push_back(TransformSubExpr(*I, LUser, 0)); + Operands.push_back(TransformSubExpr(*I, LUser, nullptr)); } // Conservatively use AnyWrap until/unless we need FlagNW. const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); @@ -131,7 +131,10 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) { // expression: {-2,+,1,+,2} + {1,+,2} => {-1,+,3,+,2} if (AR->isAffine() && IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) { - Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE)); + const SCEV *TransformedStep = + TransformSubExpr(AR->getStepRecurrence(SE), + User, OperandValToReplace); + Result = SE.getMinusSCEV(Result, TransformedStep); Loops.insert(L); } #if 0 @@ -144,6 +147,20 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) { #endif break; case Normalize: + // We want to normalize step expression, because otherwise we might not be + // able to denormalize to the original expression. + // + // Here is an example what will happen if we don't normalize step: + // ORIGINAL ISE: + // {(100 /u {1,+,1}<%bb16>),+,(100 /u {1,+,1}<%bb16>)}<%bb25> + // NORMALIZED ISE: + // {((-1 * (100 /u {1,+,1}<%bb16>)) + (100 /u {0,+,1}<%bb16>)),+, + // (100 /u {0,+,1}<%bb16>)}<%bb25> + // DENORMALIZED BACK ISE: + // {((2 * (100 /u {1,+,1}<%bb16>)) + (-1 * (100 /u {2,+,1}<%bb16>))),+, + // (100 /u {1,+,1}<%bb16>)}<%bb25> + // Note that the initial value changes after normalization + + // denormalization, which isn't correct. if (Loops.count(L)) { const SCEV *TransformedStep = TransformSubExpr(AR->getStepRecurrence(SE), @@ -157,8 +174,14 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) { #endif break; case Denormalize: - if (Loops.count(L)) - Result = cast<SCEVAddRecExpr>(Result)->getPostIncExpr(SE); + // Here we want to normalize step expressions for the same reasons, as + // stated above. + if (Loops.count(L)) { + const SCEV *TransformedStep = + TransformSubExpr(AR->getStepRecurrence(SE), + User, OperandValToReplace); + Result = SE.getAddExpr(Result, TransformedStep); + } break; } return Result; @@ -218,7 +241,7 @@ TransformSubExpr(const SCEV *S, Instruction *User, Value *OperandValToReplace) { } /// Top level driver for transforming an expression DAG into its requested -/// post-inc form (either "Normalized" or "Denormalized". +/// post-inc form (either "Normalized" or "Denormalized"). const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, const SCEV *S, Instruction *User, diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp index 15b78728a73c..edd82f5fe296 100644 --- a/lib/Analysis/SparsePropagation.cpp +++ b/lib/Analysis/SparsePropagation.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sparseprop" #include "llvm/Analysis/SparsePropagation.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -21,6 +20,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "sparseprop" + //===----------------------------------------------------------------------===// // AbstractLatticeFunction Implementation //===----------------------------------------------------------------------===// @@ -147,7 +148,7 @@ void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI, return; Constant *C = LatticeFunc->GetConstant(BCValue, BI->getCondition(), *this); - if (C == 0 || !isa<ConstantInt>(C)) { + if (!C || !isa<ConstantInt>(C)) { // Non-constant values can go either way. Succs[0] = Succs[1] = true; return; @@ -189,7 +190,7 @@ void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI, return; Constant *C = LatticeFunc->GetConstant(SCValue, SI.getCondition(), *this); - if (C == 0 || !isa<ConstantInt>(C)) { + if (!C || !isa<ConstantInt>(C)) { // All destinations are executable! Succs.assign(TI.getNumSuccessors(), true); return; @@ -303,11 +304,10 @@ void SparseSolver::Solve(Function &F) { // "I" got into the work list because it made a transition. See if any // users are both live and in need of updating. - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); - UI != E; ++UI) { - Instruction *U = cast<Instruction>(*UI); - if (BBExecutable.count(U->getParent())) // Inst is executable? - visitInst(*U); + for (User *U : I->users()) { + Instruction *UI = cast<Instruction>(U); + if (BBExecutable.count(UI->getParent())) // Inst is executable? + visitInst(*UI); } } diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 0353295345ce..cdb0b79fd7ee 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -7,18 +7,19 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "tti" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/Operator.h" #include "llvm/IR/Instruction.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Instructions.h" -#include "llvm/Support/CallSite.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; +#define DEBUG_TYPE "tti" + // Setup the analysis group to manage the TargetTransformInfo passes. INITIALIZE_ANALYSIS_GROUP(TargetTransformInfo, "Target Information", NoTTI) char TargetTransformInfo::ID = 0; @@ -35,16 +36,6 @@ void TargetTransformInfo::pushTTIStack(Pass *P) { PTTI->TopTTI = this; } -void TargetTransformInfo::popTTIStack() { - TopTTI = 0; - - // Walk up the chain and update the top TTI pointer. - for (TargetTransformInfo *PTTI = PrevTTI; PTTI; PTTI = PTTI->PrevTTI) - PTTI->TopTTI = PrevTTI; - - PrevTTI = 0; -} - void TargetTransformInfo::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetTransformInfo>(); } @@ -158,6 +149,16 @@ unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { return PrevTTI->getIntImmCost(Imm, Ty); } +unsigned TargetTransformInfo::getIntImmCost(unsigned Opc, unsigned Idx, + const APInt &Imm, Type *Ty) const { + return PrevTTI->getIntImmCost(Opc, Idx, Imm, Ty); +} + +unsigned TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty) const { + return PrevTTI->getIntImmCost(IID, Idx, Imm, Ty); +} + unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { return PrevTTI->getNumberOfRegisters(Vector); } @@ -231,22 +232,23 @@ unsigned TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty, namespace { -struct NoTTI : ImmutablePass, TargetTransformInfo { +struct NoTTI final : ImmutablePass, TargetTransformInfo { const DataLayout *DL; - NoTTI() : ImmutablePass(ID), DL(0) { + NoTTI() : ImmutablePass(ID), DL(nullptr) { initializeNoTTIPass(*PassRegistry::getPassRegistry()); } - virtual void initializePass() { + virtual void initializePass() override { // Note that this subclass is special, and must *not* call initializeTTI as // it does not chain. TopTTI = this; - PrevTTI = 0; - DL = getAnalysisIfAvailable<DataLayout>(); + PrevTTI = nullptr; + DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); + DL = DLP ? &DLP->getDataLayout() : nullptr; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + virtual void getAnalysisUsage(AnalysisUsage &AU) const override { // Note that this subclass is special, and must *not* call // TTI::getAnalysisUsage as it breaks the recursion. } @@ -255,13 +257,14 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { static char ID; /// Provide necessary pointer adjustments for the two base classes. - virtual void *getAdjustedAnalysisPointer(const void *ID) { + virtual void *getAdjustedAnalysisPointer(const void *ID) override { if (ID == &TargetTransformInfo::ID) return (TargetTransformInfo*)this; return this; } - unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) const { + unsigned getOperationCost(unsigned Opcode, Type *Ty, + Type *OpTy) const override { switch (Opcode) { default: // By default, just classify everything as 'basic'. @@ -318,7 +321,7 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { } unsigned getGEPCost(const Value *Ptr, - ArrayRef<const Value *> Operands) const { + ArrayRef<const Value *> Operands) const override { // In the basic model, we just assume that all-constant GEPs will be folded // into their uses via addressing modes. for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) @@ -328,7 +331,8 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { return TCC_Free; } - unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const { + unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const override + { assert(FTy && "FunctionType must be provided to this routine."); // The target-independent implementation just measures the size of the @@ -343,7 +347,8 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { return TCC_Basic * (NumArgs + 1); } - unsigned getCallCost(const Function *F, int NumArgs = -1) const { + unsigned getCallCost(const Function *F, int NumArgs = -1) const override + { assert(F && "A concrete function must be provided to this routine."); if (NumArgs < 0) @@ -364,7 +369,7 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { } unsigned getCallCost(const Function *F, - ArrayRef<const Value *> Arguments) const { + ArrayRef<const Value *> Arguments) const override { // Simply delegate to generic handling of the call. // FIXME: We should use instsimplify or something else to catch calls which // will constant fold with these arguments. @@ -372,7 +377,7 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { } unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<Type *> ParamTys) const { + ArrayRef<Type *> ParamTys) const override { switch (IID) { default: // Intrinsics rarely (if ever) have normal argument setup constraints. @@ -394,8 +399,9 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { } } - unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<const Value *> Arguments) const { + unsigned + getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef<const Value *> Arguments) const override { // Delegate to the generic intrinsic handling code. This mostly provides an // opportunity for targets to (for example) special case the cost of // certain intrinsics based on constants used as arguments. @@ -406,14 +412,14 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { return TopTTI->getIntrinsicCost(IID, RetTy, ParamTys); } - unsigned getUserCost(const User *U) const { + unsigned getUserCost(const User *U) const override { if (isa<PHINode>(U)) return TCC_Free; // Model all PHI nodes as free. - if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) - // In the basic model we just assume that all-constant GEPs will be - // folded into their uses via addressing modes. - return GEP->hasAllConstantIndices() ? TCC_Free : TCC_Basic; + if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) { + SmallVector<const Value *, 4> Indices(GEP->idx_begin(), GEP->idx_end()); + return TopTTI->getGEPCost(GEP->getPointerOperand(), Indices); + } if (ImmutableCallSite CS = U) { const Function *F = CS.getCalledFunction(); @@ -423,12 +429,7 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { return TopTTI->getCallCost(cast<FunctionType>(FTy), CS.arg_size()); } - SmallVector<const Value *, 8> Arguments; - for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), - AE = CS.arg_end(); - AI != AE; ++AI) - Arguments.push_back(*AI); - + SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end()); return TopTTI->getCallCost(F, Arguments); } @@ -443,12 +444,12 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { // Otherwise delegate to the fully generic implementations. return getOperationCost(Operator::getOpcode(U), U->getType(), U->getNumOperands() == 1 ? - U->getOperand(0)->getType() : 0); + U->getOperand(0)->getType() : nullptr); } - bool hasBranchDivergence() const { return false; } + bool hasBranchDivergence() const override { return false; } - bool isLoweredToCall(const Function *F) const { + bool isLoweredToCall(const Function *F) const override { // FIXME: These should almost certainly not be handled here, and instead // handled with the help of TLI or the target itself. This was largely // ported from existing analysis heuristics here so that such refactorings @@ -479,126 +480,136 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { return true; } - void getUnrollingPreferences(Loop *, UnrollingPreferences &) const { } + void getUnrollingPreferences(Loop *, UnrollingPreferences &) const override { + } - bool isLegalAddImmediate(int64_t Imm) const { + bool isLegalAddImmediate(int64_t Imm) const override { return false; } - bool isLegalICmpImmediate(int64_t Imm) const { + bool isLegalICmpImmediate(int64_t Imm) const override { return false; } bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, - bool HasBaseReg, int64_t Scale) const { + bool HasBaseReg, int64_t Scale) const override + { // Guess that reg+reg addressing is allowed. This heuristic is taken from // the implementation of LSR. return !BaseGV && BaseOffset == 0 && Scale <= 1; } int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, - bool HasBaseReg, int64_t Scale) const { + bool HasBaseReg, int64_t Scale) const override { // Guess that all legal addressing mode are free. if(isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale)) return 0; return -1; } - - bool isTruncateFree(Type *Ty1, Type *Ty2) const { + bool isTruncateFree(Type *Ty1, Type *Ty2) const override { return false; } - bool isTypeLegal(Type *Ty) const { + bool isTypeLegal(Type *Ty) const override { return false; } - unsigned getJumpBufAlignment() const { + unsigned getJumpBufAlignment() const override { return 0; } - unsigned getJumpBufSize() const { + unsigned getJumpBufSize() const override { return 0; } - bool shouldBuildLookupTables() const { + bool shouldBuildLookupTables() const override { return true; } - PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const { + PopcntSupportKind + getPopcntSupport(unsigned IntTyWidthInBit) const override { return PSK_Software; } - bool haveFastSqrt(Type *Ty) const { + bool haveFastSqrt(Type *Ty) const override { return false; } - unsigned getIntImmCost(const APInt &Imm, Type *Ty) const { - return 1; + unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override { + return TCC_Basic; + } + + unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty) const override { + return TCC_Free; + } + + unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, + Type *Ty) const override { + return TCC_Free; } - unsigned getNumberOfRegisters(bool Vector) const { + unsigned getNumberOfRegisters(bool Vector) const override { return 8; } - unsigned getRegisterBitWidth(bool Vector) const { + unsigned getRegisterBitWidth(bool Vector) const override { return 32; } - unsigned getMaximumUnrollFactor() const { + unsigned getMaximumUnrollFactor() const override { return 1; } unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, - OperandValueKind) const { + OperandValueKind) const override { return 1; } - unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, - int Index = 0, Type *SubTp = 0) const { + unsigned getShuffleCost(ShuffleKind Kind, Type *Ty, + int Index = 0, Type *SubTp = nullptr) const override { return 1; } unsigned getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const { + Type *Src) const override { return 1; } - unsigned getCFInstrCost(unsigned Opcode) const { + unsigned getCFInstrCost(unsigned Opcode) const override { return 1; } unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy = 0) const { + Type *CondTy = nullptr) const override { return 1; } unsigned getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index = -1) const { + unsigned Index = -1) const override { return 1; } - unsigned getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) const { + unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) const override { return 1; } - unsigned getIntrinsicInstrCost(Intrinsic::ID ID, - Type *RetTy, - ArrayRef<Type*> Tys) const { + unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef<Type*> Tys) const override { return 1; } - unsigned getNumberOfParts(Type *Tp) const { + unsigned getNumberOfParts(Type *Tp) const override { return 0; } - unsigned getAddressComputationCost(Type *Tp, bool) const { + unsigned getAddressComputationCost(Type *Tp, bool) const override { return 0; } - unsigned getReductionCost(unsigned, Type *, bool) const { + unsigned getReductionCost(unsigned, Type *, bool) const override { return 1; } }; diff --git a/lib/Analysis/Trace.cpp b/lib/Analysis/Trace.cpp index 4c68322b8282..5a1acc00fb94 100644 --- a/lib/Analysis/Trace.cpp +++ b/lib/Analysis/Trace.cpp @@ -16,7 +16,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/Trace.h" -#include "llvm/Assembly/Writer.h" #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -37,7 +36,7 @@ void Trace::print(raw_ostream &O) const { O << "; Trace from function " << F->getName() << ", blocks:\n"; for (const_iterator i = begin(), e = end(); i != e; ++i) { O << "; "; - WriteAsOperand(O, *i, true, getModule()); + (*i)->printAsOperand(O, true, getModule()); O << "\n"; } O << "; Trace parent function: \n" << *F; diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp index 6791d4b9102b..f36f6f8a8768 100644 --- a/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -144,7 +144,7 @@ namespace { const MDNode *Node; public: - TBAANode() : Node(0) {} + TBAANode() : Node(nullptr) {} explicit TBAANode(const MDNode *N) : Node(N) {} /// getNode - Get the MDNode for this TBAANode. @@ -182,7 +182,6 @@ namespace { const MDNode *Node; public: - TBAAStructTagNode() : Node(0) {} explicit TBAAStructTagNode(const MDNode *N) : Node(N) {} /// Get the MDNode for this TBAAStructTagNode. @@ -218,7 +217,7 @@ namespace { const MDNode *Node; public: - TBAAStructTypeNode() : Node(0) {} + TBAAStructTypeNode() : Node(nullptr) {} explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {} /// Get the MDNode for this TBAAStructTypeNode. @@ -281,7 +280,7 @@ namespace { initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry()); } - virtual void initializePass() { + void initializePass() override { InitializeAliasAnalysis(this); } @@ -289,7 +288,7 @@ namespace { /// an analysis interface through multiple inheritance. If needed, it /// should override this to adjust the this pointer as needed for the /// specified pass info. - virtual void *getAdjustedAnalysisPointer(const void *PI) { + void *getAdjustedAnalysisPointer(const void *PI) override { if (PI == &AliasAnalysis::ID) return (AliasAnalysis*)this; return this; @@ -299,15 +298,15 @@ namespace { bool PathAliases(const MDNode *A, const MDNode *B) const; private: - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual AliasResult alias(const Location &LocA, const Location &LocB); - virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal); - virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS); - virtual ModRefBehavior getModRefBehavior(const Function *F); - virtual ModRefResult getModRefInfo(ImmutableCallSite CS, - const Location &Loc); - virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2); + void getAnalysisUsage(AnalysisUsage &AU) const override; + AliasResult alias(const Location &LocA, const Location &LocB) override; + bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override; + ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override; + ModRefBehavior getModRefBehavior(const Function *F) override; + ModRefResult getModRefInfo(ImmutableCallSite CS, + const Location &Loc) override; + ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) override; }; } // End of anonymous namespace @@ -340,7 +339,8 @@ static bool isStructPathTBAA(const MDNode *MD) { bool TypeBasedAliasAnalysis::Aliases(const MDNode *A, const MDNode *B) const { - if (isStructPathTBAA(A)) + // Make sure that both MDNodes are struct-path aware. + if (isStructPathTBAA(A) && isStructPathTBAA(B)) return PathAliases(A, B); // Keep track of the root node for A and B. @@ -386,6 +386,10 @@ TypeBasedAliasAnalysis::Aliases(const MDNode *A, bool TypeBasedAliasAnalysis::PathAliases(const MDNode *A, const MDNode *B) const { + // Verify that both input nodes are struct-path aware. + assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware."); + assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware."); + // Keep track of the root node for A and B. TBAAStructTypeNode RootA, RootB; TBAAStructTagNode TagA(A), TagB(B); @@ -555,38 +559,40 @@ bool MDNode::isTBAAVtableAccess() const { MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { if (!A || !B) - return NULL; + return nullptr; if (A == B) return A; // For struct-path aware TBAA, we use the access type of the tag. - bool StructPath = isStructPathTBAA(A); + bool StructPath = isStructPathTBAA(A) && isStructPathTBAA(B); if (StructPath) { A = cast_or_null<MDNode>(A->getOperand(1)); - if (!A) return 0; + if (!A) return nullptr; B = cast_or_null<MDNode>(B->getOperand(1)); - if (!B) return 0; + if (!B) return nullptr; } SmallVector<MDNode *, 4> PathA; MDNode *T = A; while (T) { PathA.push_back(T); - T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0; + T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) + : nullptr; } SmallVector<MDNode *, 4> PathB; T = B; while (T) { PathB.push_back(T); - T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0; + T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) + : nullptr; } int IA = PathA.size() - 1; int IB = PathB.size() - 1; - MDNode *Ret = 0; + MDNode *Ret = nullptr; while (IA >= 0 && IB >=0) { if (PathA[IA] == PathB[IB]) Ret = PathA[IA]; @@ -599,7 +605,7 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { return Ret; if (!Ret) - return 0; + return nullptr; // We need to convert from a type node to a tag node. Type *Int64 = IntegerType::get(A->getContext(), 64); Value *Ops[3] = { Ret, Ret, ConstantInt::get(Int64, 0) }; diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index e39ee628ff0c..e6d09f4e31f6 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -16,8 +16,11 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" @@ -25,10 +28,9 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" -#include "llvm/Support/ConstantRange.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/PatternMatch.h" #include <cstring> using namespace llvm; using namespace llvm::PatternMatch; @@ -44,10 +46,10 @@ static unsigned getBitWidth(Type *Ty, const DataLayout *TD) { return TD ? TD->getPointerTypeSizeInBits(Ty) : 0; } -static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, - APInt &KnownZero, APInt &KnownOne, - APInt &KnownZero2, APInt &KnownOne2, - const DataLayout *TD, unsigned Depth) { +static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, + APInt &KnownZero, APInt &KnownOne, + APInt &KnownZero2, APInt &KnownOne2, + const DataLayout *TD, unsigned Depth) { if (!Add) { if (ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) { // We know that the top bits of C-X are clear if X contains less bits @@ -58,7 +60,7 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros(); // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); + llvm::computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); // If all of the MaskV bits are known to be zero, then we know the // output top bits are zero, because we now know that the output is @@ -79,13 +81,10 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, // result. For an add, this works with either operand. For a subtract, // this only works if the known zeros are in the right operand. APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - llvm::ComputeMaskedBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1); - assert((LHSKnownZero & LHSKnownOne) == 0 && - "Bits known to be one AND zero?"); + llvm::computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1); unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes(); - llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + llvm::computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes(); // Determine which operand has more trailing zeros, and use that @@ -130,15 +129,13 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, } } -static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW, - APInt &KnownZero, APInt &KnownOne, - APInt &KnownZero2, APInt &KnownOne2, - const DataLayout *TD, unsigned Depth) { +static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW, + APInt &KnownZero, APInt &KnownOne, + APInt &KnownZero2, APInt &KnownOne2, + const DataLayout *TD, unsigned Depth) { unsigned BitWidth = KnownZero.getBitWidth(); - ComputeMaskedBits(Op1, KnownZero, KnownOne, TD, Depth+1); - ComputeMaskedBits(Op0, KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op1, KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(Op0, KnownZero2, KnownOne2, TD, Depth+1); bool isKnownNegative = false; bool isKnownNonNegative = false; @@ -192,7 +189,8 @@ static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW, KnownOne.setBit(BitWidth - 1); } -void llvm::computeMaskedBitsLoad(const MDNode &Ranges, APInt &KnownZero) { +void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, + APInt &KnownZero) { unsigned BitWidth = KnownZero.getBitWidth(); unsigned NumRanges = Ranges.getNumOperands() / 2; assert(NumRanges >= 1); @@ -211,8 +209,9 @@ void llvm::computeMaskedBitsLoad(const MDNode &Ranges, APInt &KnownZero) { KnownZero = APInt::getHighBitsSet(BitWidth, MinLeadingZeros); } -/// ComputeMaskedBits - Determine which of the bits are known to be either zero -/// or one and return them in the KnownZero/KnownOne bit sets. + +/// Determine which bits of V are known to be either zero or one and return +/// them in the KnownZero/KnownOne bit sets. /// /// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that /// we cannot optimize based on the assumption that it is zero without changing @@ -226,8 +225,8 @@ void llvm::computeMaskedBitsLoad(const MDNode &Ranges, APInt &KnownZero) { /// where V is a vector, known zero, and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. -void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, - const DataLayout *TD, unsigned Depth) { +void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, + const DataLayout *TD, unsigned Depth) { assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); unsigned BitWidth = KnownZero.getBitWidth(); @@ -241,7 +240,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, V->getType()->getScalarSizeInBits() == BitWidth) && KnownZero.getBitWidth() == BitWidth && KnownOne.getBitWidth() == BitWidth && - "V, Mask, KnownOne and KnownZero should have same BitWidth"); + "V, KnownOne and KnownZero should have same BitWidth"); if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { // We know all of the bits for a constant! @@ -303,7 +302,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (GA->mayBeOverridden()) { KnownZero.clearAllBits(); KnownOne.clearAllBits(); } else { - ComputeMaskedBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1); } return; } @@ -311,8 +310,9 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (Argument *A = dyn_cast<Argument>(V)) { unsigned Align = 0; - if (A->hasByValAttr()) { - // Get alignment information off byval arguments if specified in the IR. + if (A->hasByValOrInAllocaAttr()) { + // Get alignment information off byval/inalloca arguments if specified in + // the IR. Align = A->getParamAlignment(); } else if (TD && A->hasStructRetAttr()) { // An sret parameter has at least the ABI alignment of the return type. @@ -340,49 +340,43 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, default: break; case Instruction::Load: if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range)) - computeMaskedBitsLoad(*MD, KnownZero); - return; + computeKnownBitsFromRangeMetadata(*MD, KnownZero); + break; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); - ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); // Output known-1 bits are only known if set in both the LHS & RHS. KnownOne &= KnownOne2; // Output known-0 are known to be clear if zero in either the LHS | RHS. KnownZero |= KnownZero2; - return; + break; } case Instruction::Or: { - ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); - ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); // Output known-0 bits are only known if clear in both the LHS & RHS. KnownZero &= KnownZero2; // Output known-1 are known to be set if set in either the LHS | RHS. KnownOne |= KnownOne2; - return; + break; } case Instruction::Xor: { - ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); - ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); // Output known-0 bits are known if clear or set in both the LHS & RHS. APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); // Output known-1 are known to be set if set in only one of the LHS, RHS. KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); KnownZero = KnownZeroOut; - return; + break; } case Instruction::Mul: { bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); - ComputeMaskedBitsMul(I->getOperand(0), I->getOperand(1), NSW, + computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, KnownZero, KnownOne, KnownZero2, KnownOne2, TD, Depth); break; } @@ -390,42 +384,41 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); unsigned LeadZ = KnownZero2.countLeadingOnes(); KnownOne2.clearAllBits(); KnownZero2.clearAllBits(); - ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); if (RHSUnknownLeadingOnes != BitWidth) LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ); - return; + break; } case Instruction::Select: - ComputeMaskedBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1); - ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, + computeKnownBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); // Only known if known in both the LHS and RHS. KnownOne &= KnownOne2; KnownZero &= KnownZero2; - return; + break; case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::FPToUI: case Instruction::FPToSI: case Instruction::SIToFP: case Instruction::UIToFP: - return; // Can't work with floating point. + break; // Can't work with floating point. case Instruction::PtrToInt: case Instruction::IntToPtr: + case Instruction::AddrSpaceCast: // Pointers could be different sizes. // We can't handle these if we don't know the pointer size. - if (!TD) return; + if (!TD) break; // FALL THROUGH and handle them the same as zext/trunc. case Instruction::ZExt: case Instruction::Trunc: { @@ -438,19 +431,19 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, SrcBitWidth = TD->getTypeSizeInBits(SrcTy->getScalarType()); } else { SrcBitWidth = SrcTy->getScalarSizeInBits(); - if (!SrcBitWidth) return; + if (!SrcBitWidth) break; } assert(SrcBitWidth && "SrcBitWidth can't be zero"); KnownZero = KnownZero.zextOrTrunc(SrcBitWidth); KnownOne = KnownOne.zextOrTrunc(SrcBitWidth); - ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); KnownZero = KnownZero.zextOrTrunc(BitWidth); KnownOne = KnownOne.zextOrTrunc(BitWidth); // Any top bits are known to be zero. if (BitWidth > SrcBitWidth) KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); - return; + break; } case Instruction::BitCast: { Type *SrcTy = I->getOperand(0)->getType(); @@ -458,8 +451,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) !I->getType()->isVectorTy()) { - ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - return; + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + break; } break; } @@ -469,8 +462,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownZero = KnownZero.trunc(SrcBitWidth); KnownOne = KnownOne.trunc(SrcBitWidth); - ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); @@ -480,18 +472,17 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); else if (KnownOne[SrcBitWidth-1]) // Input sign bit known set KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); - return; + break; } case Instruction::Shl: // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); KnownZero <<= ShiftAmt; KnownOne <<= ShiftAmt; KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0 - return; + break; } break; case Instruction::LShr: @@ -501,13 +492,12 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); // Unsigned shift right. - ComputeMaskedBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); // high bits known zero. KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt); - return; + break; } break; case Instruction::AShr: @@ -517,8 +507,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Signed shift right. - ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); @@ -527,19 +516,19 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownZero |= HighBits; else if (KnownOne[BitWidth-ShiftAmt-1]) // New bits are known one. KnownOne |= HighBits; - return; + break; } break; case Instruction::Sub: { bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); - ComputeMaskedBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, + computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, KnownZero, KnownOne, KnownZero2, KnownOne2, TD, Depth); break; } case Instruction::Add: { bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); - ComputeMaskedBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, + computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, KnownZero, KnownOne, KnownZero2, KnownOne2, TD, Depth); break; @@ -549,7 +538,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, APInt RA = Rem->getValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); // The low bits of the first operand are unchanged by the srem. KnownZero = KnownZero2 & LowBits; @@ -573,8 +562,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, // remainder is zero. if (KnownZero.isNonNegative()) { APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD, - Depth+1); + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD, + Depth+1); // If it's known zero, our sign bit is also zero. if (LHSKnownZero.isNegative()) KnownZero.setBit(BitWidth - 1); @@ -586,9 +575,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, APInt RA = Rem->getValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, - Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, + Depth+1); KnownZero |= ~LowBits; KnownOne &= LowBits; break; @@ -597,8 +585,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); unsigned Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); @@ -621,8 +609,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, // Analyze all of the subscripts of this getelementptr instruction // to determine if we can prove known low zero bits. APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0); - ComputeMaskedBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD, - Depth+1); + computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD, + Depth+1); unsigned TrailZ = LocalKnownZero.countTrailingOnes(); gep_type_iterator GTI = gep_type_begin(I); @@ -630,8 +618,10 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, Value *Index = I->getOperand(i); if (StructType *STy = dyn_cast<StructType>(*GTI)) { // Handle struct member offset arithmetic. - if (!TD) - return; + if (!TD) { + TrailZ = 0; + break; + } // Handle case when index is vector zeroinitializer Constant *CIndex = cast<Constant>(Index); @@ -649,11 +639,14 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, } else { // Handle array index arithmetic. Type *IndexedTy = GTI.getIndexedType(); - if (!IndexedTy->isSized()) return; + if (!IndexedTy->isSized()) { + TrailZ = 0; + break; + } unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits(); uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1; LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0); - ComputeMaskedBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1); + computeKnownBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1); TrailZ = std::min(TrailZ, unsigned(countTrailingZeros(TypeSize) + LocalKnownZero.countTrailingOnes())); @@ -695,11 +688,11 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; // Ok, we have a PHI of the form L op= R. Check for low // zero bits. - ComputeMaskedBits(R, KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(R, KnownZero2, KnownOne2, TD, Depth+1); // We need to take the minimum number of known bits APInt KnownZero3(KnownZero), KnownOne3(KnownOne); - ComputeMaskedBits(L, KnownZero3, KnownOne3, TD, Depth+1); + computeKnownBits(L, KnownZero3, KnownOne3, TD, Depth+1); KnownZero = APInt::getLowBitsSet(BitWidth, std::min(KnownZero2.countTrailingOnes(), @@ -711,7 +704,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, // Unreachable blocks may have zero-operand PHI nodes. if (P->getNumIncomingValues() == 0) - return; + break; // Otherwise take the unions of the known bit sets of the operands, // taking conservative care to avoid excessive recursion. @@ -730,8 +723,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownOne2 = APInt(BitWidth, 0); // Recurse, but cap the recursion to one level, because we don't // want to waste time spinning around in loops. - ComputeMaskedBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD, - MaxDepth-1); + computeKnownBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD, + MaxDepth-1); KnownZero &= KnownZero2; KnownOne &= KnownOne2; // If all bits have been ruled out, there's no need to check @@ -743,6 +736,12 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; } case Instruction::Call: + case Instruction::Invoke: + if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range)) + computeKnownBitsFromRangeMetadata(*MD, KnownZero); + // If a range metadata is attached to this IntrinsicInst, intersect the + // explicit range specified by the metadata and the implicit range of + // the intrinsic. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { switch (II->getIntrinsicID()) { default: break; @@ -752,16 +751,16 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, // If this call is undefined for 0, the result will be less than 2^n. if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) LowBits -= 1; - KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); break; } case Intrinsic::ctpop: { unsigned LowBits = Log2_32(BitWidth)+1; - KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); break; } case Intrinsic::x86_sse42_crc32_64_64: - KnownZero = APInt::getHighBitsSet(64, 32); + KnownZero |= APInt::getHighBitsSet(64, 32); break; } } @@ -775,30 +774,32 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, default: break; case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: - ComputeMaskedBitsAddSub(true, II->getArgOperand(0), - II->getArgOperand(1), false, KnownZero, - KnownOne, KnownZero2, KnownOne2, TD, Depth); + computeKnownBitsAddSub(true, II->getArgOperand(0), + II->getArgOperand(1), false, KnownZero, + KnownOne, KnownZero2, KnownOne2, TD, Depth); break; case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: - ComputeMaskedBitsAddSub(false, II->getArgOperand(0), - II->getArgOperand(1), false, KnownZero, - KnownOne, KnownZero2, KnownOne2, TD, Depth); + computeKnownBitsAddSub(false, II->getArgOperand(0), + II->getArgOperand(1), false, KnownZero, + KnownOne, KnownZero2, KnownOne2, TD, Depth); break; case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: - ComputeMaskedBitsMul(II->getArgOperand(0), II->getArgOperand(1), - false, KnownZero, KnownOne, - KnownZero2, KnownOne2, TD, Depth); + computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), + false, KnownZero, KnownOne, + KnownZero2, KnownOne2, TD, Depth); break; } } } } + + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); } /// ComputeSignBit - Determine whether the sign bit is known to be zero or -/// one. Convenience wrapper around ComputeMaskedBits. +/// one. Convenience wrapper around computeKnownBits. void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, const DataLayout *TD, unsigned Depth) { unsigned BitWidth = getBitWidth(V->getType(), TD); @@ -809,7 +810,7 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, } APInt ZeroBits(BitWidth, 0); APInt OneBits(BitWidth, 0); - ComputeMaskedBits(V, ZeroBits, OneBits, TD, Depth); + computeKnownBits(V, ZeroBits, OneBits, TD, Depth); KnownOne = OneBits[BitWidth - 1]; KnownZero = ZeroBits[BitWidth - 1]; } @@ -841,7 +842,7 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) { if (Depth++ == MaxDepth) return false; - Value *X = 0, *Y = 0; + Value *X = nullptr, *Y = nullptr; // A shift of a power of two is a power of two or zero. if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) || match(V, m_Shr(m_Value(X), m_Value())))) @@ -881,10 +882,10 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) { unsigned BitWidth = V->getType()->getScalarSizeInBits(); APInt LHSZeroBits(BitWidth, 0), LHSOneBits(BitWidth, 0); - ComputeMaskedBits(X, LHSZeroBits, LHSOneBits, 0, Depth); + computeKnownBits(X, LHSZeroBits, LHSOneBits, nullptr, Depth); APInt RHSZeroBits(BitWidth, 0), RHSOneBits(BitWidth, 0); - ComputeMaskedBits(Y, RHSZeroBits, RHSOneBits, 0, Depth); + computeKnownBits(Y, RHSZeroBits, RHSOneBits, nullptr, Depth); // If i8 V is a power of two or zero: // ZeroBits: 1 1 1 0 1 1 1 1 // ~ZeroBits: 0 0 0 1 0 0 0 0 @@ -1004,7 +1005,7 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), TD); // X | Y != 0 if X != 0 or Y != 0. - Value *X = 0, *Y = 0; + Value *X = nullptr, *Y = nullptr; if (match(V, m_Or(m_Value(X), m_Value(Y)))) return isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth); @@ -1022,7 +1023,7 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(X, KnownZero, KnownOne, TD, Depth); + computeKnownBits(X, KnownZero, KnownOne, TD, Depth); if (KnownOne[0]) return true; } @@ -1064,12 +1065,12 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { APInt Mask = APInt::getSignedMaxValue(BitWidth); // The sign bit of X is set. If some other bit is set then X is not equal // to INT_MIN. - ComputeMaskedBits(X, KnownZero, KnownOne, TD, Depth); + computeKnownBits(X, KnownZero, KnownOne, TD, Depth); if ((KnownOne & Mask) != 0) return true; // The sign bit of Y is set. If some other bit is set then Y is not equal // to INT_MIN. - ComputeMaskedBits(Y, KnownZero, KnownOne, TD, Depth); + computeKnownBits(Y, KnownZero, KnownOne, TD, Depth); if ((KnownOne & Mask) != 0) return true; } @@ -1099,7 +1100,7 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { if (!BitWidth) return false; APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); + computeKnownBits(V, KnownZero, KnownOne, TD, Depth); return KnownOne != 0; } @@ -1115,8 +1116,7 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout *TD, unsigned Depth) { APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); - ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(V, KnownZero, KnownOne, TD, Depth); return (KnownZero & Mask) == Mask; } @@ -1141,7 +1141,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, unsigned Tmp, Tmp2; unsigned FirstAnswer = 1; - // Note that ConstantInt is handled by the general ComputeMaskedBits case + // Note that ConstantInt is handled by the general computeKnownBits case // below. if (Depth == 6) @@ -1186,7 +1186,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, FirstAnswer = std::min(Tmp, Tmp2); // We computed what we know about the sign bits as our first // answer. Now proceed to the generic code that uses - // ComputeMaskedBits, and pick whichever answer is better. + // computeKnownBits, and pick whichever answer is better. } break; @@ -1206,7 +1206,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, if (ConstantInt *CRHS = dyn_cast<ConstantInt>(U->getOperand(1))) if (CRHS->isAllOnesValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. @@ -1231,7 +1231,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, if (ConstantInt *CLHS = dyn_cast<ConstantInt>(U->getOperand(0))) if (CLHS->isNullValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - ComputeMaskedBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) @@ -1277,7 +1277,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, // use this information. APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); APInt Mask; - ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); + computeKnownBits(V, KnownZero, KnownOne, TD, Depth); if (KnownZero.isNegative()) { // sign bit is 0 Mask = KnownZero; @@ -1363,7 +1363,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, Op1 = ConstantInt::get(V->getContext(), API); } - Value *Mul0 = NULL; + Value *Mul0 = nullptr; if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) { if (Constant *Op1C = dyn_cast<Constant>(Op1)) if (Constant *MulC = dyn_cast<Constant>(Mul0)) { @@ -1387,7 +1387,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, } } - Value *Mul1 = NULL; + Value *Mul1 = nullptr; if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) { if (Constant *Op0C = dyn_cast<Constant>(Op0)) if (Constant *MulC = dyn_cast<Constant>(Mul1)) { @@ -1431,7 +1431,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { return 1; // Limit search depth. const Operator *I = dyn_cast<Operator>(V); - if (I == 0) return false; + if (!I) return false; // Check if the nsz fast-math flag is set if (const FPMathOperator *FPO = dyn_cast<FPMathOperator>(I)) @@ -1512,7 +1512,7 @@ Value *llvm::isBytewiseValue(Value *V) { // If the top/bottom halves aren't the same, reject it. if (Val != Val2) - return 0; + return nullptr; } return ConstantInt::get(V->getContext(), Val); } @@ -1524,11 +1524,11 @@ Value *llvm::isBytewiseValue(Value *V) { Value *Elt = CA->getElementAsConstant(0); Value *Val = isBytewiseValue(Elt); if (!Val) - return 0; + return nullptr; for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I) if (CA->getElementAsConstant(I) != Elt) - return 0; + return nullptr; return Val; } @@ -1539,7 +1539,7 @@ Value *llvm::isBytewiseValue(Value *V) { // %c = or i16 %a, %b // but until there is an example that actually needs this, it doesn't seem // worth worrying about. - return 0; + return nullptr; } @@ -1589,7 +1589,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, Value *V = FindInsertedValue(From, Idxs); if (!V) - return NULL; + return nullptr; // Insert the value in the new (sub) aggregrate return llvm::InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip), @@ -1640,7 +1640,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, if (Constant *C = dyn_cast<Constant>(V)) { C = C->getAggregateElement(idx_range[0]); - if (C == 0) return 0; + if (!C) return nullptr; return FindInsertedValue(C, idx_range.slice(1), InsertBefore); } @@ -1653,7 +1653,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, if (req_idx == idx_range.end()) { // We can't handle this without inserting insertvalues if (!InsertBefore) - return 0; + return nullptr; // The requested index identifies a part of a nested aggregate. Handle // this specially. For example, @@ -1707,7 +1707,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, } // Otherwise, we don't know (such as, extracting from a function return value // or load instruction) - return 0; + return nullptr; } /// GetPointerBaseWithConstantOffset - Analyze the specified pointer to see if @@ -1733,7 +1733,8 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, } Ptr = GEP->getPointerOperand(); - } else if (Operator::getOpcode(Ptr) == Instruction::BitCast) { + } else if (Operator::getOpcode(Ptr) == Instruction::BitCast || + Operator::getOpcode(Ptr) == Instruction::AddrSpaceCast) { Ptr = cast<Operator>(Ptr)->getOperand(0); } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) { if (GA->mayBeOverridden()) @@ -1768,13 +1769,13 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, // Make sure the index-ee is a pointer to array of i8. PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType()); ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType()); - if (AT == 0 || !AT->getElementType()->isIntegerTy(8)) + if (!AT || !AT->getElementType()->isIntegerTy(8)) return false; // Check to make sure that the first operand of the GEP is an integer and // has value 0 so that we are sure we're indexing into the initializer. const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1)); - if (FirstIdx == 0 || !FirstIdx->isZero()) + if (!FirstIdx || !FirstIdx->isZero()) return false; // If the second index isn't a ConstantInt, then this is a variable index @@ -1806,7 +1807,7 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, // Must be a Constant Array const ConstantDataArray *Array = dyn_cast<ConstantDataArray>(GV->getInitializer()); - if (Array == 0 || !Array->isString()) + if (!Array || !Array->isString()) return false; // Get the number of elements in the array @@ -1902,7 +1903,8 @@ llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) { for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { V = GEP->getPointerOperand(); - } else if (Operator::getOpcode(V) == Instruction::BitCast) { + } else if (Operator::getOpcode(V) == Instruction::BitCast || + Operator::getOpcode(V) == Instruction::AddrSpaceCast) { V = cast<Operator>(V)->getOperand(0); } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { if (GA->mayBeOverridden()) @@ -1912,7 +1914,7 @@ llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) { // See if InstructionSimplify knows any relevant tricks. if (Instruction *I = dyn_cast<Instruction>(V)) // TODO: Acquire a DominatorTree and use it. - if (Value *Simplified = SimplifyInstruction(I, TD, 0)) { + if (Value *Simplified = SimplifyInstruction(I, TD, nullptr)) { V = Simplified; continue; } @@ -1959,9 +1961,8 @@ llvm::GetUnderlyingObjects(Value *V, /// are lifetime markers. /// bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { - for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); - UI != UE; ++UI) { - const IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI); + for (const User *U : V->users()) { + const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U); if (!II) return false; if (II->getIntrinsicID() != Intrinsic::lifetime_start && @@ -1987,7 +1988,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, return true; case Instruction::UDiv: case Instruction::URem: - // x / y is undefined if y == 0, but calcuations like x / 3 are safe. + // x / y is undefined if y == 0, but calculations like x / 3 are safe. return isKnownNonZero(Inst->getOperand(1), TD); case Instruction::SDiv: case Instruction::SRem: { @@ -2001,19 +2002,21 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, return false; APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(Op, KnownZero, KnownOne, TD); + computeKnownBits(Op, KnownZero, KnownOne, TD); return !!KnownZero; } case Instruction::Load: { const LoadInst *LI = cast<LoadInst>(Inst); - if (!LI->isUnordered()) + if (!LI->isUnordered() || + // Speculative load may create a race that did not exist in the source. + LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread)) return false; - return LI->getPointerOperand()->isDereferenceablePointer(); + return LI->getPointerOperand()->isDereferenceablePointer(TD); } case Instruction::Call: { if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { switch (II->getIntrinsicID()) { - // These synthetic intrinsics have no side-effects, and just mark + // These synthetic intrinsics have no side-effects and just mark // information about their operands. // FIXME: There are other no-op synthetic instructions that potentially // should be considered at least *safe* to speculate... @@ -2033,6 +2036,12 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, case Intrinsic::umul_with_overflow: case Intrinsic::usub_with_overflow: return true; + // Sqrt should be OK, since the llvm sqrt intrinsic isn't defined to set + // errno like libm sqrt would. + case Intrinsic::sqrt: + case Intrinsic::fma: + case Intrinsic::fmuladd: + return true; // TODO: some fp intrinsics are marked as having the same error handling // as libm. They're safe to speculate when they won't error. // TODO: are convert_{from,to}_fp16 safe? @@ -2068,14 +2077,18 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { // Alloca never returns null, malloc might. if (isa<AllocaInst>(V)) return true; - // A byval argument is never null. + // A byval, inalloca, or nonnull argument is never null. if (const Argument *A = dyn_cast<Argument>(V)) - return A->hasByValAttr(); + return A->hasByValOrInAllocaAttr() || A->hasNonNullAttr(); // Global values are not null unless extern weak. if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) return !GV->hasExternalWeakLinkage(); + if (ImmutableCallSite CS = V) + if (CS.isReturnNonNull()) + return true; + // operator new never returns null. if (isOperatorNewLikeFn(V, TLI, /*LookThroughBitCast=*/true)) return true; |