diff options
Diffstat (limited to 'contrib/llvm/lib/Analysis')
58 files changed, 39766 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp new file mode 100644 index 000000000000..b8b6d37a792f --- /dev/null +++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp @@ -0,0 +1,556 @@ +//===- AliasAnalysis.cpp - Generic Alias Analysis Interface Implementation -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the generic AliasAnalysis interface which is used as the +// common interface used by all clients and implementations of alias analysis. +// +// This file also implements the default version of the AliasAnalysis interface +// that is to be used when no other implementation is specified. This does some +// simple tests that detect obvious cases: two different global pointers cannot +// alias, a global cannot alias a malloc, two different mallocs cannot alias, +// etc. +// +// This alias analysis implementation really isn't very good for anything, but +// it is very fast, and makes a nice clean default implementation. Because it +// handles lots of little corner cases, other, more complex, alias analysis +// implementations may choose to rely on this pass to resolve these simple and +// easy cases. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" +#include "llvm/Target/TargetLibraryInfo.h" +using namespace llvm; + +// Register the AliasAnalysis interface, providing a nice name to refer to. +INITIALIZE_ANALYSIS_GROUP(AliasAnalysis, "Alias Analysis", NoAA) +char AliasAnalysis::ID = 0; + +//===----------------------------------------------------------------------===// +// Default chaining methods +//===----------------------------------------------------------------------===// + +AliasAnalysis::AliasResult +AliasAnalysis::alias(const Location &LocA, const Location &LocB) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + return AA->alias(LocA, LocB); +} + +bool AliasAnalysis::pointsToConstantMemory(const Location &Loc, + bool OrLocal) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + return AA->pointsToConstantMemory(Loc, OrLocal); +} + +void AliasAnalysis::deleteValue(Value *V) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + AA->deleteValue(V); +} + +void AliasAnalysis::copyValue(Value *From, Value *To) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + AA->copyValue(From, To); +} + +void AliasAnalysis::addEscapingUse(Use &U) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + AA->addEscapingUse(U); +} + + +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(ImmutableCallSite CS, + const Location &Loc) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + + ModRefBehavior MRB = getModRefBehavior(CS); + if (MRB == DoesNotAccessMemory) + return NoModRef; + + ModRefResult Mask = ModRef; + if (onlyReadsMemory(MRB)) + Mask = Ref; + + if (onlyAccessesArgPointees(MRB)) { + bool doesAlias = false; + if (doesAccessArgPointees(MRB)) { + MDNode *CSTag = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa); + for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); + AI != AE; ++AI) { + const Value *Arg = *AI; + if (!Arg->getType()->isPointerTy()) + continue; + Location CSLoc(Arg, UnknownSize, CSTag); + if (!isNoAlias(CSLoc, Loc)) { + doesAlias = true; + break; + } + } + } + if (!doesAlias) + return NoModRef; + } + + // If Loc is a constant memory location, the call definitely could not + // modify the memory location. + if ((Mask & Mod) && pointsToConstantMemory(Loc)) + Mask = ModRefResult(Mask & ~Mod); + + // If this is the end of the chain, don't forward. + if (!AA) return Mask; + + // Otherwise, fall back to the next AA in the chain. But we can merge + // in any mask we've managed to compute. + return ModRefResult(AA->getModRefInfo(CS, Loc) & Mask); +} + +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + + // If CS1 or CS2 are readnone, they don't interact. + ModRefBehavior CS1B = getModRefBehavior(CS1); + if (CS1B == DoesNotAccessMemory) return NoModRef; + + ModRefBehavior CS2B = getModRefBehavior(CS2); + if (CS2B == DoesNotAccessMemory) return NoModRef; + + // If they both only read from memory, there is no dependence. + if (onlyReadsMemory(CS1B) && onlyReadsMemory(CS2B)) + return NoModRef; + + AliasAnalysis::ModRefResult Mask = ModRef; + + // If CS1 only reads memory, the only dependence on CS2 can be + // from CS1 reading memory written by CS2. + if (onlyReadsMemory(CS1B)) + Mask = ModRefResult(Mask & Ref); + + // If CS2 only access memory through arguments, accumulate the mod/ref + // information from CS1's references to the memory referenced by + // CS2's arguments. + if (onlyAccessesArgPointees(CS2B)) { + AliasAnalysis::ModRefResult R = NoModRef; + if (doesAccessArgPointees(CS2B)) { + MDNode *CS2Tag = CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa); + for (ImmutableCallSite::arg_iterator + I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) { + const Value *Arg = *I; + if (!Arg->getType()->isPointerTy()) + continue; + Location CS2Loc(Arg, UnknownSize, CS2Tag); + R = ModRefResult((R | getModRefInfo(CS1, CS2Loc)) & Mask); + if (R == Mask) + break; + } + } + return R; + } + + // If CS1 only accesses memory through arguments, check if CS2 references + // any of the memory referenced by CS1's arguments. If not, return NoModRef. + if (onlyAccessesArgPointees(CS1B)) { + AliasAnalysis::ModRefResult R = NoModRef; + if (doesAccessArgPointees(CS1B)) { + MDNode *CS1Tag = CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa); + for (ImmutableCallSite::arg_iterator + I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) { + const Value *Arg = *I; + if (!Arg->getType()->isPointerTy()) + continue; + Location CS1Loc(Arg, UnknownSize, CS1Tag); + if (getModRefInfo(CS2, CS1Loc) != NoModRef) { + R = Mask; + break; + } + } + } + if (R == NoModRef) + return R; + } + + // If this is the end of the chain, don't forward. + if (!AA) return Mask; + + // Otherwise, fall back to the next AA in the chain. But we can merge + // in any mask we've managed to compute. + return ModRefResult(AA->getModRefInfo(CS1, CS2) & Mask); +} + +AliasAnalysis::ModRefBehavior +AliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + + ModRefBehavior Min = UnknownModRefBehavior; + + // Call back into the alias analysis with the other form of getModRefBehavior + // to see if it can give a better response. + if (const Function *F = CS.getCalledFunction()) + Min = getModRefBehavior(F); + + // If this is the end of the chain, don't forward. + if (!AA) return Min; + + // Otherwise, fall back to the next AA in the chain. But we can merge + // in any result we've managed to compute. + return ModRefBehavior(AA->getModRefBehavior(CS) & Min); +} + +AliasAnalysis::ModRefBehavior +AliasAnalysis::getModRefBehavior(const Function *F) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + return AA->getModRefBehavior(F); +} + +//===----------------------------------------------------------------------===// +// AliasAnalysis non-virtual helper method implementation +//===----------------------------------------------------------------------===// + +AliasAnalysis::Location AliasAnalysis::getLocation(const LoadInst *LI) { + return Location(LI->getPointerOperand(), + getTypeStoreSize(LI->getType()), + LI->getMetadata(LLVMContext::MD_tbaa)); +} + +AliasAnalysis::Location AliasAnalysis::getLocation(const StoreInst *SI) { + return Location(SI->getPointerOperand(), + getTypeStoreSize(SI->getValueOperand()->getType()), + SI->getMetadata(LLVMContext::MD_tbaa)); +} + +AliasAnalysis::Location AliasAnalysis::getLocation(const VAArgInst *VI) { + return Location(VI->getPointerOperand(), + UnknownSize, + VI->getMetadata(LLVMContext::MD_tbaa)); +} + +AliasAnalysis::Location +AliasAnalysis::getLocation(const AtomicCmpXchgInst *CXI) { + return Location(CXI->getPointerOperand(), + getTypeStoreSize(CXI->getCompareOperand()->getType()), + CXI->getMetadata(LLVMContext::MD_tbaa)); +} + +AliasAnalysis::Location +AliasAnalysis::getLocation(const AtomicRMWInst *RMWI) { + return Location(RMWI->getPointerOperand(), + getTypeStoreSize(RMWI->getValOperand()->getType()), + RMWI->getMetadata(LLVMContext::MD_tbaa)); +} + +AliasAnalysis::Location +AliasAnalysis::getLocationForSource(const MemTransferInst *MTI) { + uint64_t Size = UnknownSize; + if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength())) + Size = C->getValue().getZExtValue(); + + // memcpy/memmove can have TBAA tags. For memcpy, they apply + // to both the source and the destination. + MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa); + + return Location(MTI->getRawSource(), Size, TBAATag); +} + +AliasAnalysis::Location +AliasAnalysis::getLocationForDest(const MemIntrinsic *MTI) { + uint64_t Size = UnknownSize; + if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength())) + Size = C->getValue().getZExtValue(); + + // memcpy/memmove can have TBAA tags. For memcpy, they apply + // to both the source and the destination. + MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa); + + return Location(MTI->getRawDest(), Size, TBAATag); +} + + + +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) { + // Be conservative in the face of volatile/atomic. + if (!L->isUnordered()) + return ModRef; + + // If the load address doesn't alias the given address, it doesn't read + // or write the specified memory. + if (!alias(getLocation(L), Loc)) + return NoModRef; + + // Otherwise, a load just reads. + return Ref; +} + +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) { + // Be conservative in the face of volatile/atomic. + if (!S->isUnordered()) + return ModRef; + + // If the store address cannot alias the pointer in question, then the + // specified memory cannot be modified by the store. + if (!alias(getLocation(S), Loc)) + return NoModRef; + + // If the pointer is a pointer to constant memory, then it could not have been + // modified by this store. + if (pointsToConstantMemory(Loc)) + return NoModRef; + + // Otherwise, a store just writes. + return Mod; +} + +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) { + // If the va_arg address cannot alias the pointer in question, then the + // specified memory cannot be accessed by the va_arg. + if (!alias(getLocation(V), Loc)) + return NoModRef; + + // If the pointer is a pointer to constant memory, then it could not have been + // modified by this va_arg. + if (pointsToConstantMemory(Loc)) + return NoModRef; + + // Otherwise, a va_arg reads and writes. + return ModRef; +} + +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(const AtomicCmpXchgInst *CX, const Location &Loc) { + // Acquire/Release cmpxchg has properties that matter for arbitrary addresses. + if (CX->getOrdering() > Monotonic) + return ModRef; + + // If the cmpxchg address does not alias the location, it does not access it. + if (!alias(getLocation(CX), Loc)) + return NoModRef; + + return ModRef; +} + +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) { + // Acquire/Release atomicrmw has properties that matter for arbitrary addresses. + if (RMW->getOrdering() > Monotonic) + return ModRef; + + // If the atomicrmw address does not alias the location, it does not access it. + if (!alias(getLocation(RMW), Loc)) + return NoModRef; + + return ModRef; +} + +namespace { + /// Only find pointer captures which happen before the given instruction. Uses + /// the dominator tree to determine whether one instruction is before another. + /// Only support the case where the Value is defined in the same basic block + /// as the given instruction and the use. + struct CapturesBefore : public CaptureTracker { + CapturesBefore(const Instruction *I, DominatorTree *DT) + : BeforeHere(I), DT(DT), Captured(false) {} + + void tooManyUses() { Captured = true; } + + bool shouldExplore(Use *U) { + Instruction *I = cast<Instruction>(U->getUser()); + BasicBlock *BB = I->getParent(); + // We explore this usage only if the usage can reach "BeforeHere". + // If use is not reachable from entry, there is no need to explore. + if (BeforeHere != I && !DT->isReachableFromEntry(BB)) + return false; + // If the value is defined in the same basic block as use and BeforeHere, + // there is no need to explore the use if BeforeHere dominates use. + // Check whether there is a path from I to BeforeHere. + if (BeforeHere != I && DT->dominates(BeforeHere, I) && + !isPotentiallyReachable(I, BeforeHere, DT)) + return false; + return true; + } + + bool captured(Use *U) { + Instruction *I = cast<Instruction>(U->getUser()); + BasicBlock *BB = I->getParent(); + // Same logic as in shouldExplore. + if (BeforeHere != I && !DT->isReachableFromEntry(BB)) + return false; + if (BeforeHere != I && DT->dominates(BeforeHere, I) && + !isPotentiallyReachable(I, BeforeHere, DT)) + return false; + Captured = true; + return true; + } + + const Instruction *BeforeHere; + DominatorTree *DT; + + bool Captured; + }; +} + +// FIXME: this is really just shoring-up a deficiency in alias analysis. +// BasicAA isn't willing to spend linear time determining whether an alloca +// was captured before or after this particular call, while we are. However, +// with a smarter AA in place, this test is just wasting compile time. +AliasAnalysis::ModRefResult +AliasAnalysis::callCapturesBefore(const Instruction *I, + const AliasAnalysis::Location &MemLoc, + DominatorTree *DT) { + if (!DT || !TD) return AliasAnalysis::ModRef; + + const Value *Object = GetUnderlyingObject(MemLoc.Ptr, TD); + if (!isIdentifiedObject(Object) || isa<GlobalValue>(Object) || + isa<Constant>(Object)) + return AliasAnalysis::ModRef; + + ImmutableCallSite CS(I); + if (!CS.getInstruction() || CS.getInstruction() == Object) + return AliasAnalysis::ModRef; + + CapturesBefore CB(I, DT); + llvm::PointerMayBeCaptured(Object, &CB); + if (CB.Captured) + return AliasAnalysis::ModRef; + + unsigned ArgNo = 0; + AliasAnalysis::ModRefResult R = AliasAnalysis::NoModRef; + for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); + CI != CE; ++CI, ++ArgNo) { + // Only look at the no-capture or byval pointer arguments. If this + // pointer were passed to arguments that were neither of these, then it + // couldn't be no-capture. + if (!(*CI)->getType()->isPointerTy() || + (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo))) + continue; + + // If this is a no-capture pointer argument, see if we can tell that it + // is impossible to alias the pointer we're checking. If not, we have to + // assume that the call could touch the pointer, even though it doesn't + // escape. + if (isNoAlias(AliasAnalysis::Location(*CI), + AliasAnalysis::Location(Object))) + continue; + if (CS.doesNotAccessMemory(ArgNo)) + continue; + if (CS.onlyReadsMemory(ArgNo)) { + R = AliasAnalysis::Ref; + continue; + } + return AliasAnalysis::ModRef; + } + return R; +} + +// AliasAnalysis destructor: DO NOT move this to the header file for +// AliasAnalysis or else clients of the AliasAnalysis class may not depend on +// the AliasAnalysis.o file in the current .a file, causing alias analysis +// support to not be included in the tool correctly! +// +AliasAnalysis::~AliasAnalysis() {} + +/// InitializeAliasAnalysis - Subclasses must call this method to initialize the +/// AliasAnalysis interface before any other methods are called. +/// +void AliasAnalysis::InitializeAliasAnalysis(Pass *P) { + TD = P->getAnalysisIfAvailable<DataLayout>(); + TLI = P->getAnalysisIfAvailable<TargetLibraryInfo>(); + AA = &P->getAnalysis<AliasAnalysis>(); +} + +// getAnalysisUsage - All alias analysis implementations should invoke this +// directly (using AliasAnalysis::getAnalysisUsage(AU)). +void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AliasAnalysis>(); // All AA's chain +} + +/// getTypeStoreSize - Return the DataLayout store size for the given type, +/// if known, or a conservative value otherwise. +/// +uint64_t AliasAnalysis::getTypeStoreSize(Type *Ty) { + return TD ? TD->getTypeStoreSize(Ty) : UnknownSize; +} + +/// canBasicBlockModify - Return true if it is possible for execution of the +/// specified basic block to modify the value pointed to by Ptr. +/// +bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB, + const Location &Loc) { + return canInstructionRangeModify(BB.front(), BB.back(), Loc); +} + +/// canInstructionRangeModify - Return true if it is possible for the execution +/// of the specified instructions to modify the value pointed to by Ptr. The +/// instructions to consider are all of the instructions in the range of [I1,I2] +/// INCLUSIVE. I1 and I2 must be in the same basic block. +/// +bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1, + const Instruction &I2, + const Location &Loc) { + assert(I1.getParent() == I2.getParent() && + "Instructions not in same basic block!"); + BasicBlock::const_iterator I = &I1; + BasicBlock::const_iterator E = &I2; + ++E; // Convert from inclusive to exclusive range. + + for (; I != E; ++I) // Check every instruction in range + if (getModRefInfo(I, Loc) & Mod) + return true; + return false; +} + +/// isNoAliasCall - Return true if this pointer is returned by a noalias +/// function. +bool llvm::isNoAliasCall(const Value *V) { + if (isa<CallInst>(V) || isa<InvokeInst>(V)) + return ImmutableCallSite(cast<Instruction>(V)) + .paramHasAttr(0, Attribute::NoAlias); + return false; +} + +/// isNoAliasArgument - Return true if this is an argument with the noalias +/// attribute. +bool llvm::isNoAliasArgument(const Value *V) +{ + if (const Argument *A = dyn_cast<Argument>(V)) + return A->hasNoAliasAttr(); + return false; +} + +/// isIdentifiedObject - Return true if this pointer refers to a distinct and +/// identifiable object. This returns true for: +/// Global Variables and Functions (but not Global Aliases) +/// Allocas and Mallocs +/// ByVal and NoAlias Arguments +/// NoAlias returns +/// +bool llvm::isIdentifiedObject(const Value *V) { + if (isa<AllocaInst>(V)) + return true; + if (isa<GlobalValue>(V) && !isa<GlobalAlias>(V)) + return true; + if (isNoAliasCall(V)) + return true; + if (const Argument *A = dyn_cast<Argument>(V)) + return A->hasNoAliasAttr() || A->hasByValAttr(); + return false; +} diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp new file mode 100644 index 000000000000..9f4a47c77e03 --- /dev/null +++ b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp @@ -0,0 +1,171 @@ +//===- AliasAnalysisCounter.cpp - Alias Analysis Query Counter ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass which can be used to count how many alias queries +// are being made and how the alias analysis implementation being used responds. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +static cl::opt<bool> +PrintAll("count-aa-print-all-queries", cl::ReallyHidden, cl::init(true)); +static cl::opt<bool> +PrintAllFailures("count-aa-print-all-failed-queries", cl::ReallyHidden); + +namespace { + class AliasAnalysisCounter : public ModulePass, public AliasAnalysis { + unsigned No, May, Partial, Must; + unsigned NoMR, JustRef, JustMod, MR; + Module *M; + public: + static char ID; // Class identification, replacement for typeinfo + AliasAnalysisCounter() : ModulePass(ID) { + initializeAliasAnalysisCounterPass(*PassRegistry::getPassRegistry()); + No = May = Partial = Must = 0; + NoMR = JustRef = JustMod = MR = 0; + } + + void printLine(const char *Desc, unsigned Val, unsigned Sum) { + errs() << " " << Val << " " << Desc << " responses (" + << Val*100/Sum << "%)\n"; + } + ~AliasAnalysisCounter() { + unsigned AASum = No+May+Partial+Must; + unsigned MRSum = NoMR+JustRef+JustMod+MR; + if (AASum + MRSum) { // Print a report if any counted queries occurred... + errs() << "\n===== Alias Analysis Counter Report =====\n" + << " Analysis counted:\n" + << " " << AASum << " Total Alias Queries Performed\n"; + if (AASum) { + printLine("no alias", No, AASum); + printLine("may alias", May, AASum); + printLine("partial alias", Partial, AASum); + printLine("must alias", Must, AASum); + errs() << " Alias Analysis Counter Summary: " << No*100/AASum << "%/" + << May*100/AASum << "%/" + << Partial*100/AASum << "%/" + << Must*100/AASum<<"%\n\n"; + } + + errs() << " " << MRSum << " Total Mod/Ref Queries Performed\n"; + if (MRSum) { + printLine("no mod/ref", NoMR, MRSum); + printLine("ref", JustRef, MRSum); + printLine("mod", JustMod, MRSum); + printLine("mod/ref", MR, MRSum); + errs() << " Mod/Ref Analysis Counter Summary: " <<NoMR*100/MRSum + << "%/" << JustRef*100/MRSum << "%/" << JustMod*100/MRSum + << "%/" << MR*100/MRSum <<"%\n\n"; + } + } + } + + bool runOnModule(Module &M) { + this->M = &M; + InitializeAliasAnalysis(this); + return false; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AliasAnalysis::getAnalysisUsage(AU); + AU.addRequired<AliasAnalysis>(); + AU.setPreservesAll(); + } + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + + // FIXME: We could count these too... + bool pointsToConstantMemory(const Location &Loc, bool OrLocal) { + return getAnalysis<AliasAnalysis>().pointsToConstantMemory(Loc, OrLocal); + } + + // Forwarding functions: just delegate to a real AA implementation, counting + // the number of responses... + AliasResult alias(const Location &LocA, const Location &LocB); + + ModRefResult getModRefInfo(ImmutableCallSite CS, + const Location &Loc); + ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + return AliasAnalysis::getModRefInfo(CS1,CS2); + } + }; +} + +char AliasAnalysisCounter::ID = 0; +INITIALIZE_AG_PASS(AliasAnalysisCounter, AliasAnalysis, "count-aa", + "Count Alias Analysis Query Responses", false, true, false) + +ModulePass *llvm::createAliasAnalysisCounterPass() { + return new AliasAnalysisCounter(); +} + +AliasAnalysis::AliasResult +AliasAnalysisCounter::alias(const Location &LocA, const Location &LocB) { + AliasResult R = getAnalysis<AliasAnalysis>().alias(LocA, LocB); + + const char *AliasString = 0; + switch (R) { + case NoAlias: No++; AliasString = "No alias"; break; + case MayAlias: May++; AliasString = "May alias"; break; + case PartialAlias: Partial++; AliasString = "Partial alias"; break; + case MustAlias: Must++; AliasString = "Must alias"; break; + } + + if (PrintAll || (PrintAllFailures && R == MayAlias)) { + errs() << AliasString << ":\t"; + errs() << "[" << LocA.Size << "B] "; + WriteAsOperand(errs(), LocA.Ptr, true, M); + errs() << ", "; + errs() << "[" << LocB.Size << "B] "; + WriteAsOperand(errs(), LocB.Ptr, true, M); + errs() << "\n"; + } + + return R; +} + +AliasAnalysis::ModRefResult +AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS, + const Location &Loc) { + ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, Loc); + + const char *MRString = 0; + switch (R) { + case NoModRef: NoMR++; MRString = "NoModRef"; break; + case Ref: JustRef++; MRString = "JustRef"; break; + case Mod: JustMod++; MRString = "JustMod"; break; + case ModRef: MR++; MRString = "ModRef"; break; + } + + if (PrintAll || (PrintAllFailures && R == ModRef)) { + errs() << MRString << ": Ptr: "; + errs() << "[" << Loc.Size << "B] "; + WriteAsOperand(errs(), Loc.Ptr, true, M); + errs() << "\t<->" << *CS.getInstruction() << '\n'; + } + return R; +} diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp new file mode 100644 index 000000000000..a571463dfe12 --- /dev/null +++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -0,0 +1,372 @@ +//===- AliasAnalysisEvaluator.cpp - Alias Analysis Accuracy Evaluator -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple N^2 alias analysis accuracy evaluator. +// Basically, for each function in the program, it simply queries to see how the +// alias analysis implementation answers alias queries between each pair of +// pointers in the function. +// +// This is inspired and adapted from code by: Naveen Neelakantam, Francesco +// Spadini, and Wojciech Stryjewski. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +static cl::opt<bool> PrintAll("print-all-alias-modref-info", cl::ReallyHidden); + +static cl::opt<bool> PrintNoAlias("print-no-aliases", cl::ReallyHidden); +static cl::opt<bool> PrintMayAlias("print-may-aliases", cl::ReallyHidden); +static cl::opt<bool> PrintPartialAlias("print-partial-aliases", cl::ReallyHidden); +static cl::opt<bool> PrintMustAlias("print-must-aliases", cl::ReallyHidden); + +static cl::opt<bool> PrintNoModRef("print-no-modref", cl::ReallyHidden); +static cl::opt<bool> PrintMod("print-mod", cl::ReallyHidden); +static cl::opt<bool> PrintRef("print-ref", cl::ReallyHidden); +static cl::opt<bool> PrintModRef("print-modref", cl::ReallyHidden); + +static cl::opt<bool> EvalTBAA("evaluate-tbaa", cl::ReallyHidden); + +namespace { + class AAEval : public FunctionPass { + unsigned NoAlias, MayAlias, PartialAlias, MustAlias; + unsigned NoModRef, Mod, Ref, ModRef; + + public: + static char ID; // Pass identification, replacement for typeid + AAEval() : FunctionPass(ID) { + initializeAAEvalPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AliasAnalysis>(); + AU.setPreservesAll(); + } + + bool doInitialization(Module &M) { + NoAlias = MayAlias = PartialAlias = MustAlias = 0; + NoModRef = Mod = Ref = ModRef = 0; + + if (PrintAll) { + PrintNoAlias = PrintMayAlias = true; + PrintPartialAlias = PrintMustAlias = true; + PrintNoModRef = PrintMod = PrintRef = PrintModRef = true; + } + return false; + } + + bool runOnFunction(Function &F); + bool doFinalization(Module &M); + }; +} + +char AAEval::ID = 0; +INITIALIZE_PASS_BEGIN(AAEval, "aa-eval", + "Exhaustive Alias Analysis Precision Evaluator", false, true) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(AAEval, "aa-eval", + "Exhaustive Alias Analysis Precision Evaluator", false, true) + +FunctionPass *llvm::createAAEvalPass() { return new AAEval(); } + +static void PrintResults(const char *Msg, bool P, const Value *V1, + const Value *V2, const Module *M) { + if (P) { + std::string o1, o2; + { + raw_string_ostream os1(o1), os2(o2); + WriteAsOperand(os1, V1, true, M); + WriteAsOperand(os2, V2, true, M); + } + + if (o2 < o1) + std::swap(o1, o2); + errs() << " " << Msg << ":\t" + << o1 << ", " + << o2 << "\n"; + } +} + +static inline void +PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr, + Module *M) { + if (P) { + errs() << " " << Msg << ": Ptr: "; + WriteAsOperand(errs(), Ptr, true, M); + errs() << "\t<->" << *I << '\n'; + } +} + +static inline void +PrintModRefResults(const char *Msg, bool P, CallSite CSA, CallSite CSB, + Module *M) { + if (P) { + errs() << " " << Msg << ": " << *CSA.getInstruction() + << " <-> " << *CSB.getInstruction() << '\n'; + } +} + +static inline void +PrintLoadStoreResults(const char *Msg, bool P, const Value *V1, + const Value *V2, const Module *M) { + if (P) { + errs() << " " << Msg << ": " << *V1 + << " <-> " << *V2 << '\n'; + } +} + +static inline bool isInterestingPointer(Value *V) { + return V->getType()->isPointerTy() + && !isa<ConstantPointerNull>(V); +} + +bool AAEval::runOnFunction(Function &F) { + AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); + + SetVector<Value *> Pointers; + SetVector<CallSite> CallSites; + SetVector<Value *> Loads; + SetVector<Value *> Stores; + + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) + if (I->getType()->isPointerTy()) // Add all pointer arguments. + Pointers.insert(I); + + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + if (I->getType()->isPointerTy()) // Add all pointer instructions. + Pointers.insert(&*I); + if (EvalTBAA && isa<LoadInst>(&*I)) + Loads.insert(&*I); + if (EvalTBAA && isa<StoreInst>(&*I)) + Stores.insert(&*I); + Instruction &Inst = *I; + if (CallSite CS = cast<Value>(&Inst)) { + Value *Callee = CS.getCalledValue(); + // Skip actual functions for direct function calls. + if (!isa<Function>(Callee) && isInterestingPointer(Callee)) + Pointers.insert(Callee); + // Consider formals. + for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); + AI != AE; ++AI) + if (isInterestingPointer(*AI)) + Pointers.insert(*AI); + CallSites.insert(CS); + } else { + // Consider all operands. + for (Instruction::op_iterator OI = Inst.op_begin(), OE = Inst.op_end(); + OI != OE; ++OI) + if (isInterestingPointer(*OI)) + Pointers.insert(*OI); + } + } + + if (PrintNoAlias || PrintMayAlias || PrintPartialAlias || PrintMustAlias || + PrintNoModRef || PrintMod || PrintRef || PrintModRef) + errs() << "Function: " << F.getName() << ": " << Pointers.size() + << " pointers, " << CallSites.size() << " call sites\n"; + + // iterate over the worklist, and run the full (n^2)/2 disambiguations + for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end(); + I1 != E; ++I1) { + uint64_t I1Size = AliasAnalysis::UnknownSize; + Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType(); + if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy); + + for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) { + uint64_t I2Size = AliasAnalysis::UnknownSize; + Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType(); + if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy); + + switch (AA.alias(*I1, I1Size, *I2, I2Size)) { + case AliasAnalysis::NoAlias: + PrintResults("NoAlias", PrintNoAlias, *I1, *I2, F.getParent()); + ++NoAlias; break; + case AliasAnalysis::MayAlias: + PrintResults("MayAlias", PrintMayAlias, *I1, *I2, F.getParent()); + ++MayAlias; break; + case AliasAnalysis::PartialAlias: + PrintResults("PartialAlias", PrintPartialAlias, *I1, *I2, + F.getParent()); + ++PartialAlias; break; + case AliasAnalysis::MustAlias: + PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent()); + ++MustAlias; break; + } + } + } + + if (EvalTBAA) { + // iterate over all pairs of load, store + for (SetVector<Value *>::iterator I1 = Loads.begin(), E = Loads.end(); + I1 != E; ++I1) { + for (SetVector<Value *>::iterator I2 = Stores.begin(), E2 = Stores.end(); + I2 != E2; ++I2) { + switch (AA.alias(AA.getLocation(cast<LoadInst>(*I1)), + AA.getLocation(cast<StoreInst>(*I2)))) { + case AliasAnalysis::NoAlias: + PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2, + F.getParent()); + ++NoAlias; break; + case AliasAnalysis::MayAlias: + PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2, + F.getParent()); + ++MayAlias; break; + case AliasAnalysis::PartialAlias: + PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2, + F.getParent()); + ++PartialAlias; break; + case AliasAnalysis::MustAlias: + PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2, + F.getParent()); + ++MustAlias; break; + } + } + } + + // iterate over all pairs of store, store + for (SetVector<Value *>::iterator I1 = Stores.begin(), E = Stores.end(); + I1 != E; ++I1) { + for (SetVector<Value *>::iterator I2 = Stores.begin(); I2 != I1; ++I2) { + switch (AA.alias(AA.getLocation(cast<StoreInst>(*I1)), + AA.getLocation(cast<StoreInst>(*I2)))) { + case AliasAnalysis::NoAlias: + PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2, + F.getParent()); + ++NoAlias; break; + case AliasAnalysis::MayAlias: + PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2, + F.getParent()); + ++MayAlias; break; + case AliasAnalysis::PartialAlias: + PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2, + F.getParent()); + ++PartialAlias; break; + case AliasAnalysis::MustAlias: + PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2, + F.getParent()); + ++MustAlias; break; + } + } + } + } + + // Mod/ref alias analysis: compare all pairs of calls and values + for (SetVector<CallSite>::iterator C = CallSites.begin(), + Ce = CallSites.end(); C != Ce; ++C) { + Instruction *I = C->getInstruction(); + + for (SetVector<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end(); + V != Ve; ++V) { + uint64_t Size = AliasAnalysis::UnknownSize; + Type *ElTy = cast<PointerType>((*V)->getType())->getElementType(); + if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy); + + switch (AA.getModRefInfo(*C, *V, Size)) { + case AliasAnalysis::NoModRef: + PrintModRefResults("NoModRef", PrintNoModRef, I, *V, F.getParent()); + ++NoModRef; break; + case AliasAnalysis::Mod: + PrintModRefResults("Just Mod", PrintMod, I, *V, F.getParent()); + ++Mod; break; + case AliasAnalysis::Ref: + PrintModRefResults("Just Ref", PrintRef, I, *V, F.getParent()); + ++Ref; break; + case AliasAnalysis::ModRef: + PrintModRefResults("Both ModRef", PrintModRef, I, *V, F.getParent()); + ++ModRef; break; + } + } + } + + // Mod/ref alias analysis: compare all pairs of calls + for (SetVector<CallSite>::iterator C = CallSites.begin(), + Ce = CallSites.end(); C != Ce; ++C) { + for (SetVector<CallSite>::iterator D = CallSites.begin(); D != Ce; ++D) { + if (D == C) + continue; + switch (AA.getModRefInfo(*C, *D)) { + case AliasAnalysis::NoModRef: + PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent()); + ++NoModRef; break; + case AliasAnalysis::Mod: + PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent()); + ++Mod; break; + case AliasAnalysis::Ref: + PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent()); + ++Ref; break; + case AliasAnalysis::ModRef: + PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent()); + ++ModRef; break; + } + } + } + + return false; +} + +static void PrintPercent(unsigned Num, unsigned Sum) { + errs() << "(" << Num*100ULL/Sum << "." + << ((Num*1000ULL/Sum) % 10) << "%)\n"; +} + +bool AAEval::doFinalization(Module &M) { + unsigned AliasSum = NoAlias + MayAlias + PartialAlias + MustAlias; + errs() << "===== Alias Analysis Evaluator Report =====\n"; + if (AliasSum == 0) { + errs() << " Alias Analysis Evaluator Summary: No pointers!\n"; + } else { + errs() << " " << AliasSum << " Total Alias Queries Performed\n"; + errs() << " " << NoAlias << " no alias responses "; + PrintPercent(NoAlias, AliasSum); + errs() << " " << MayAlias << " may alias responses "; + PrintPercent(MayAlias, AliasSum); + errs() << " " << PartialAlias << " partial alias responses "; + PrintPercent(PartialAlias, AliasSum); + errs() << " " << MustAlias << " must alias responses "; + PrintPercent(MustAlias, AliasSum); + errs() << " Alias Analysis Evaluator Pointer Alias Summary: " + << NoAlias*100/AliasSum << "%/" << MayAlias*100/AliasSum << "%/" + << PartialAlias*100/AliasSum << "%/" + << MustAlias*100/AliasSum << "%\n"; + } + + // Display the summary for mod/ref analysis + unsigned ModRefSum = NoModRef + Mod + Ref + ModRef; + if (ModRefSum == 0) { + errs() << " Alias Analysis Mod/Ref Evaluator Summary: no mod/ref!\n"; + } else { + errs() << " " << ModRefSum << " Total ModRef Queries Performed\n"; + errs() << " " << NoModRef << " no mod/ref responses "; + PrintPercent(NoModRef, ModRefSum); + errs() << " " << Mod << " mod responses "; + PrintPercent(Mod, ModRefSum); + errs() << " " << Ref << " ref responses "; + PrintPercent(Ref, ModRefSum); + errs() << " " << ModRef << " mod & ref responses "; + PrintPercent(ModRef, ModRefSum); + errs() << " Alias Analysis Evaluator Mod/Ref Summary: " + << NoModRef*100/ModRefSum << "%/" << Mod*100/ModRefSum << "%/" + << Ref*100/ModRefSum << "%/" << ModRef*100/ModRefSum << "%\n"; + } + + return false; +} diff --git a/contrib/llvm/lib/Analysis/AliasDebugger.cpp b/contrib/llvm/lib/Analysis/AliasDebugger.cpp new file mode 100644 index 000000000000..f6178e36f0a9 --- /dev/null +++ b/contrib/llvm/lib/Analysis/AliasDebugger.cpp @@ -0,0 +1,138 @@ +//===- AliasDebugger.cpp - Simple Alias Analysis Use Checker --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This simple pass checks alias analysis users to ensure that if they +// create a new value, they do not query AA without informing it of the value. +// It acts as a shim over any other AA pass you want. +// +// Yes keeping track of every value in the program is expensive, but this is +// a debugging pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include <set> +using namespace llvm; + +namespace { + + class AliasDebugger : public ModulePass, public AliasAnalysis { + + //What we do is simple. Keep track of every value the AA could + //know about, and verify that queries are one of those. + //A query to a value that didn't exist when the AA was created + //means someone forgot to update the AA when creating new values + + std::set<const Value*> Vals; + + public: + static char ID; // Class identification, replacement for typeinfo + AliasDebugger() : ModulePass(ID) { + initializeAliasDebuggerPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) { + InitializeAliasAnalysis(this); // set up super class + + for(Module::global_iterator I = M.global_begin(), + E = M.global_end(); I != E; ++I) { + Vals.insert(&*I); + for (User::const_op_iterator OI = I->op_begin(), + OE = I->op_end(); OI != OE; ++OI) + Vals.insert(*OI); + } + + for(Module::iterator I = M.begin(), + E = M.end(); I != E; ++I){ + Vals.insert(&*I); + if(!I->isDeclaration()) { + for (Function::arg_iterator AI = I->arg_begin(), AE = I->arg_end(); + AI != AE; ++AI) + Vals.insert(&*AI); + for (Function::const_iterator FI = I->begin(), FE = I->end(); + FI != FE; ++FI) + for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end(); + BI != BE; ++BI) { + Vals.insert(&*BI); + for (User::const_op_iterator OI = BI->op_begin(), + OE = BI->op_end(); OI != OE; ++OI) + Vals.insert(*OI); + } + } + + } + return false; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AliasAnalysis::getAnalysisUsage(AU); + AU.setPreservesAll(); // Does not transform code + } + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + + //------------------------------------------------ + // Implement the AliasAnalysis API + // + AliasResult alias(const Location &LocA, const Location &LocB) { + assert(Vals.find(LocA.Ptr) != Vals.end() && + "Never seen value in AA before"); + assert(Vals.find(LocB.Ptr) != Vals.end() && + "Never seen value in AA before"); + return AliasAnalysis::alias(LocA, LocB); + } + + ModRefResult getModRefInfo(ImmutableCallSite CS, + const Location &Loc) { + assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before"); + return AliasAnalysis::getModRefInfo(CS, Loc); + } + + ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + return AliasAnalysis::getModRefInfo(CS1,CS2); + } + + bool pointsToConstantMemory(const Location &Loc, bool OrLocal) { + assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before"); + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + } + + virtual void deleteValue(Value *V) { + assert(Vals.find(V) != Vals.end() && "Never seen value in AA before"); + AliasAnalysis::deleteValue(V); + } + virtual void copyValue(Value *From, Value *To) { + Vals.insert(To); + AliasAnalysis::copyValue(From, To); + } + + }; +} + +char AliasDebugger::ID = 0; +INITIALIZE_AG_PASS(AliasDebugger, AliasAnalysis, "debug-aa", + "AA use debugger", false, true, false) + +Pass *llvm::createAliasDebugger() { return new AliasDebugger(); } + diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp new file mode 100644 index 000000000000..2289c1223e97 --- /dev/null +++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp @@ -0,0 +1,653 @@ +//===- AliasSetTracker.cpp - Alias Sets Tracker implementation-------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AliasSetTracker and AliasSet classes. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +/// mergeSetIn - Merge the specified alias set into this alias set. +/// +void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) { + assert(!AS.Forward && "Alias set is already forwarding!"); + assert(!Forward && "This set is a forwarding set!!"); + + // Update the alias and access types of this set... + AccessTy |= AS.AccessTy; + AliasTy |= AS.AliasTy; + Volatile |= AS.Volatile; + + if (AliasTy == MustAlias) { + // Check that these two merged sets really are must aliases. Since both + // used to be must-alias sets, we can just check any pointer from each set + // for aliasing. + AliasAnalysis &AA = AST.getAliasAnalysis(); + PointerRec *L = getSomePointer(); + PointerRec *R = AS.getSomePointer(); + + // If the pointers are not a must-alias pair, this set becomes a may alias. + if (AA.alias(AliasAnalysis::Location(L->getValue(), + L->getSize(), + L->getTBAAInfo()), + AliasAnalysis::Location(R->getValue(), + R->getSize(), + R->getTBAAInfo())) + != AliasAnalysis::MustAlias) + AliasTy = MayAlias; + } + + if (UnknownInsts.empty()) { // Merge call sites... + if (!AS.UnknownInsts.empty()) + std::swap(UnknownInsts, AS.UnknownInsts); + } else if (!AS.UnknownInsts.empty()) { + UnknownInsts.insert(UnknownInsts.end(), AS.UnknownInsts.begin(), AS.UnknownInsts.end()); + AS.UnknownInsts.clear(); + } + + AS.Forward = this; // Forward across AS now... + addRef(); // AS is now pointing to us... + + // Merge the list of constituent pointers... + if (AS.PtrList) { + *PtrListEnd = AS.PtrList; + AS.PtrList->setPrevInList(PtrListEnd); + PtrListEnd = AS.PtrListEnd; + + AS.PtrList = 0; + AS.PtrListEnd = &AS.PtrList; + assert(*AS.PtrListEnd == 0 && "End of list is not null?"); + } +} + +void AliasSetTracker::removeAliasSet(AliasSet *AS) { + if (AliasSet *Fwd = AS->Forward) { + Fwd->dropRef(*this); + AS->Forward = 0; + } + AliasSets.erase(AS); +} + +void AliasSet::removeFromTracker(AliasSetTracker &AST) { + assert(RefCount == 0 && "Cannot remove non-dead alias set from tracker!"); + AST.removeAliasSet(this); +} + +void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry, + uint64_t Size, const MDNode *TBAAInfo, + bool KnownMustAlias) { + assert(!Entry.hasAliasSet() && "Entry already in set!"); + + // Check to see if we have to downgrade to _may_ alias. + if (isMustAlias() && !KnownMustAlias) + if (PointerRec *P = getSomePointer()) { + AliasAnalysis &AA = AST.getAliasAnalysis(); + AliasAnalysis::AliasResult Result = + AA.alias(AliasAnalysis::Location(P->getValue(), P->getSize(), + P->getTBAAInfo()), + AliasAnalysis::Location(Entry.getValue(), Size, TBAAInfo)); + if (Result != AliasAnalysis::MustAlias) + AliasTy = MayAlias; + else // First entry of must alias must have maximum size! + P->updateSizeAndTBAAInfo(Size, TBAAInfo); + assert(Result != AliasAnalysis::NoAlias && "Cannot be part of must set!"); + } + + Entry.setAliasSet(this); + Entry.updateSizeAndTBAAInfo(Size, TBAAInfo); + + // Add it to the end of the list... + assert(*PtrListEnd == 0 && "End of list is not null?"); + *PtrListEnd = &Entry; + PtrListEnd = Entry.setPrevInList(PtrListEnd); + assert(*PtrListEnd == 0 && "End of list is not null?"); + addRef(); // Entry points to alias set. +} + +void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) { + UnknownInsts.push_back(I); + + if (!I->mayWriteToMemory()) { + AliasTy = MayAlias; + AccessTy |= Refs; + return; + } + + // FIXME: This should use mod/ref information to make this not suck so bad + AliasTy = MayAlias; + AccessTy = ModRef; +} + +/// aliasesPointer - Return true if the specified pointer "may" (or must) +/// alias one of the members in the set. +/// +bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size, + const MDNode *TBAAInfo, + AliasAnalysis &AA) const { + if (AliasTy == MustAlias) { + assert(UnknownInsts.empty() && "Illegal must alias set!"); + + // If this is a set of MustAliases, only check to see if the pointer aliases + // SOME value in the set. + PointerRec *SomePtr = getSomePointer(); + assert(SomePtr && "Empty must-alias set??"); + return AA.alias(AliasAnalysis::Location(SomePtr->getValue(), + SomePtr->getSize(), + SomePtr->getTBAAInfo()), + AliasAnalysis::Location(Ptr, Size, TBAAInfo)); + } + + // If this is a may-alias set, we have to check all of the pointers in the set + // to be sure it doesn't alias the set... + for (iterator I = begin(), E = end(); I != E; ++I) + if (AA.alias(AliasAnalysis::Location(Ptr, Size, TBAAInfo), + AliasAnalysis::Location(I.getPointer(), I.getSize(), + I.getTBAAInfo()))) + return true; + + // Check the unknown instructions... + if (!UnknownInsts.empty()) { + for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) + if (AA.getModRefInfo(UnknownInsts[i], + AliasAnalysis::Location(Ptr, Size, TBAAInfo)) != + AliasAnalysis::NoModRef) + return true; + } + + return false; +} + +bool AliasSet::aliasesUnknownInst(Instruction *Inst, AliasAnalysis &AA) const { + if (!Inst->mayReadOrWriteMemory()) + return false; + + for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) { + CallSite C1 = getUnknownInst(i), C2 = Inst; + if (!C1 || !C2 || + AA.getModRefInfo(C1, C2) != AliasAnalysis::NoModRef || + AA.getModRefInfo(C2, C1) != AliasAnalysis::NoModRef) + return true; + } + + for (iterator I = begin(), E = end(); I != E; ++I) + if (AA.getModRefInfo(Inst, AliasAnalysis::Location(I.getPointer(), + I.getSize(), + I.getTBAAInfo())) != + AliasAnalysis::NoModRef) + return true; + + return false; +} + +void AliasSetTracker::clear() { + // Delete all the PointerRec entries. + for (PointerMapType::iterator I = PointerMap.begin(), E = PointerMap.end(); + I != E; ++I) + I->second->eraseFromList(); + + PointerMap.clear(); + + // The alias sets should all be clear now. + AliasSets.clear(); +} + + +/// findAliasSetForPointer - Given a pointer, find the one alias set to put the +/// instruction referring to the pointer into. If there are multiple alias sets +/// that may alias the pointer, merge them together and return the unified set. +/// +AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr, + uint64_t Size, + const MDNode *TBAAInfo) { + AliasSet *FoundSet = 0; + for (iterator I = begin(), E = end(); I != E; ++I) { + if (I->Forward || !I->aliasesPointer(Ptr, Size, TBAAInfo, AA)) continue; + + if (FoundSet == 0) { // If this is the first alias set ptr can go into. + FoundSet = I; // Remember it. + } else { // Otherwise, we must merge the sets. + FoundSet->mergeSetIn(*I, *this); // Merge in contents. + } + } + + return FoundSet; +} + +/// containsPointer - Return true if the specified location is represented by +/// this alias set, false otherwise. This does not modify the AST object or +/// alias sets. +bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size, + const MDNode *TBAAInfo) const { + for (const_iterator I = begin(), E = end(); I != E; ++I) + if (!I->Forward && I->aliasesPointer(Ptr, Size, TBAAInfo, AA)) + return true; + return false; +} + + + +AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) { + AliasSet *FoundSet = 0; + for (iterator I = begin(), E = end(); I != E; ++I) { + if (I->Forward || !I->aliasesUnknownInst(Inst, AA)) + continue; + + if (FoundSet == 0) // If this is the first alias set ptr can go into. + FoundSet = I; // Remember it. + else if (!I->Forward) // Otherwise, we must merge the sets. + FoundSet->mergeSetIn(*I, *this); // Merge in contents. + } + return FoundSet; +} + + + + +/// getAliasSetForPointer - Return the alias set that the specified pointer +/// lives in. +AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size, + const MDNode *TBAAInfo, + bool *New) { + AliasSet::PointerRec &Entry = getEntryFor(Pointer); + + // Check to see if the pointer is already known. + if (Entry.hasAliasSet()) { + Entry.updateSizeAndTBAAInfo(Size, TBAAInfo); + // Return the set! + return *Entry.getAliasSet(*this)->getForwardedTarget(*this); + } + + if (AliasSet *AS = findAliasSetForPointer(Pointer, Size, TBAAInfo)) { + // Add it to the alias set it aliases. + AS->addPointer(*this, Entry, Size, TBAAInfo); + return *AS; + } + + if (New) *New = true; + // Otherwise create a new alias set to hold the loaded pointer. + AliasSets.push_back(new AliasSet()); + AliasSets.back().addPointer(*this, Entry, Size, TBAAInfo); + return AliasSets.back(); +} + +bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) { + bool NewPtr; + addPointer(Ptr, Size, TBAAInfo, AliasSet::NoModRef, NewPtr); + return NewPtr; +} + + +bool AliasSetTracker::add(LoadInst *LI) { + if (LI->getOrdering() > Monotonic) return addUnknown(LI); + AliasSet::AccessType ATy = AliasSet::Refs; + bool NewPtr; + AliasSet &AS = addPointer(LI->getOperand(0), + AA.getTypeStoreSize(LI->getType()), + LI->getMetadata(LLVMContext::MD_tbaa), + ATy, NewPtr); + if (LI->isVolatile()) AS.setVolatile(); + return NewPtr; +} + +bool AliasSetTracker::add(StoreInst *SI) { + if (SI->getOrdering() > Monotonic) return addUnknown(SI); + AliasSet::AccessType ATy = AliasSet::Mods; + bool NewPtr; + Value *Val = SI->getOperand(0); + AliasSet &AS = addPointer(SI->getOperand(1), + AA.getTypeStoreSize(Val->getType()), + SI->getMetadata(LLVMContext::MD_tbaa), + ATy, NewPtr); + if (SI->isVolatile()) AS.setVolatile(); + return NewPtr; +} + +bool AliasSetTracker::add(VAArgInst *VAAI) { + bool NewPtr; + addPointer(VAAI->getOperand(0), AliasAnalysis::UnknownSize, + VAAI->getMetadata(LLVMContext::MD_tbaa), + AliasSet::ModRef, NewPtr); + return NewPtr; +} + + +bool AliasSetTracker::addUnknown(Instruction *Inst) { + if (isa<DbgInfoIntrinsic>(Inst)) + return true; // Ignore DbgInfo Intrinsics. + if (!Inst->mayReadOrWriteMemory()) + return true; // doesn't alias anything + + AliasSet *AS = findAliasSetForUnknownInst(Inst); + if (AS) { + AS->addUnknownInst(Inst, AA); + return false; + } + AliasSets.push_back(new AliasSet()); + AS = &AliasSets.back(); + AS->addUnknownInst(Inst, AA); + return true; +} + +bool AliasSetTracker::add(Instruction *I) { + // Dispatch to one of the other add methods. + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + return add(LI); + if (StoreInst *SI = dyn_cast<StoreInst>(I)) + return add(SI); + if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I)) + return add(VAAI); + return addUnknown(I); +} + +void AliasSetTracker::add(BasicBlock &BB) { + for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) + add(I); +} + +void AliasSetTracker::add(const AliasSetTracker &AST) { + assert(&AA == &AST.AA && + "Merging AliasSetTracker objects with different Alias Analyses!"); + + // Loop over all of the alias sets in AST, adding the pointers contained + // therein into the current alias sets. This can cause alias sets to be + // merged together in the current AST. + for (const_iterator I = AST.begin(), E = AST.end(); I != E; ++I) { + if (I->Forward) continue; // Ignore forwarding alias sets + + AliasSet &AS = const_cast<AliasSet&>(*I); + + // If there are any call sites in the alias set, add them to this AST. + for (unsigned i = 0, e = AS.UnknownInsts.size(); i != e; ++i) + add(AS.UnknownInsts[i]); + + // Loop over all of the pointers in this alias set. + bool X; + for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) { + AliasSet &NewAS = addPointer(ASI.getPointer(), ASI.getSize(), + ASI.getTBAAInfo(), + (AliasSet::AccessType)AS.AccessTy, X); + if (AS.isVolatile()) NewAS.setVolatile(); + } + } +} + +/// remove - Remove the specified (potentially non-empty) alias set from the +/// tracker. +void AliasSetTracker::remove(AliasSet &AS) { + // Drop all call sites. + AS.UnknownInsts.clear(); + + // Clear the alias set. + unsigned NumRefs = 0; + while (!AS.empty()) { + AliasSet::PointerRec *P = AS.PtrList; + + Value *ValToRemove = P->getValue(); + + // Unlink and delete entry from the list of values. + P->eraseFromList(); + + // Remember how many references need to be dropped. + ++NumRefs; + + // Finally, remove the entry. + PointerMap.erase(ValToRemove); + } + + // Stop using the alias set, removing it. + AS.RefCount -= NumRefs; + if (AS.RefCount == 0) + AS.removeFromTracker(*this); +} + +bool +AliasSetTracker::remove(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) { + AliasSet *AS = findAliasSetForPointer(Ptr, Size, TBAAInfo); + if (!AS) return false; + remove(*AS); + return true; +} + +bool AliasSetTracker::remove(LoadInst *LI) { + uint64_t Size = AA.getTypeStoreSize(LI->getType()); + const MDNode *TBAAInfo = LI->getMetadata(LLVMContext::MD_tbaa); + AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size, TBAAInfo); + if (!AS) return false; + remove(*AS); + return true; +} + +bool AliasSetTracker::remove(StoreInst *SI) { + uint64_t Size = AA.getTypeStoreSize(SI->getOperand(0)->getType()); + const MDNode *TBAAInfo = SI->getMetadata(LLVMContext::MD_tbaa); + AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size, TBAAInfo); + if (!AS) return false; + remove(*AS); + return true; +} + +bool AliasSetTracker::remove(VAArgInst *VAAI) { + AliasSet *AS = findAliasSetForPointer(VAAI->getOperand(0), + AliasAnalysis::UnknownSize, + VAAI->getMetadata(LLVMContext::MD_tbaa)); + if (!AS) return false; + remove(*AS); + return true; +} + +bool AliasSetTracker::removeUnknown(Instruction *I) { + if (!I->mayReadOrWriteMemory()) + return false; // doesn't alias anything + + AliasSet *AS = findAliasSetForUnknownInst(I); + if (!AS) return false; + remove(*AS); + return true; +} + +bool AliasSetTracker::remove(Instruction *I) { + // Dispatch to one of the other remove methods... + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + return remove(LI); + if (StoreInst *SI = dyn_cast<StoreInst>(I)) + return remove(SI); + if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I)) + return remove(VAAI); + return removeUnknown(I); +} + + +// deleteValue method - This method is used to remove a pointer value from the +// AliasSetTracker entirely. It should be used when an instruction is deleted +// from the program to update the AST. If you don't use this, you would have +// dangling pointers to deleted instructions. +// +void AliasSetTracker::deleteValue(Value *PtrVal) { + // Notify the alias analysis implementation that this value is gone. + AA.deleteValue(PtrVal); + + // If this is a call instruction, remove the callsite from the appropriate + // AliasSet (if present). + if (Instruction *Inst = dyn_cast<Instruction>(PtrVal)) { + if (Inst->mayReadOrWriteMemory()) { + // Scan all the alias sets to see if this call site is contained. + for (iterator I = begin(), E = end(); I != E; ++I) { + if (I->Forward) continue; + + I->removeUnknownInst(Inst); + } + } + } + + // First, look up the PointerRec for this pointer. + PointerMapType::iterator I = PointerMap.find_as(PtrVal); + if (I == PointerMap.end()) return; // Noop + + // If we found one, remove the pointer from the alias set it is in. + AliasSet::PointerRec *PtrValEnt = I->second; + AliasSet *AS = PtrValEnt->getAliasSet(*this); + + // Unlink and delete from the list of values. + PtrValEnt->eraseFromList(); + + // Stop using the alias set. + AS->dropRef(*this); + + PointerMap.erase(I); +} + +// copyValue - This method should be used whenever a preexisting value in the +// program is copied or cloned, introducing a new value. Note that it is ok for +// clients that use this method to introduce the same value multiple times: if +// the tracker already knows about a value, it will ignore the request. +// +void AliasSetTracker::copyValue(Value *From, Value *To) { + // Notify the alias analysis implementation that this value is copied. + AA.copyValue(From, To); + + // First, look up the PointerRec for this pointer. + PointerMapType::iterator I = PointerMap.find_as(From); + if (I == PointerMap.end()) + return; // Noop + assert(I->second->hasAliasSet() && "Dead entry?"); + + AliasSet::PointerRec &Entry = getEntryFor(To); + if (Entry.hasAliasSet()) return; // Already in the tracker! + + // Add it to the alias set it aliases... + I = PointerMap.find_as(From); + AliasSet *AS = I->second->getAliasSet(*this); + AS->addPointer(*this, Entry, I->second->getSize(), + I->second->getTBAAInfo(), + true); +} + + + +//===----------------------------------------------------------------------===// +// AliasSet/AliasSetTracker Printing Support +//===----------------------------------------------------------------------===// + +void AliasSet::print(raw_ostream &OS) const { + OS << " AliasSet[" << (const void*)this << ", " << RefCount << "] "; + OS << (AliasTy == MustAlias ? "must" : "may") << " alias, "; + switch (AccessTy) { + case NoModRef: OS << "No access "; break; + case Refs : OS << "Ref "; break; + case Mods : OS << "Mod "; break; + case ModRef : OS << "Mod/Ref "; break; + default: llvm_unreachable("Bad value for AccessTy!"); + } + if (isVolatile()) OS << "[volatile] "; + if (Forward) + OS << " forwarding to " << (void*)Forward; + + + if (!empty()) { + OS << "Pointers: "; + for (iterator I = begin(), E = end(); I != E; ++I) { + if (I != begin()) OS << ", "; + WriteAsOperand(OS << "(", I.getPointer()); + OS << ", " << I.getSize() << ")"; + } + } + if (!UnknownInsts.empty()) { + OS << "\n " << UnknownInsts.size() << " Unknown instructions: "; + for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) { + if (i) OS << ", "; + WriteAsOperand(OS, UnknownInsts[i]); + } + } + OS << "\n"; +} + +void AliasSetTracker::print(raw_ostream &OS) const { + OS << "Alias Set Tracker: " << AliasSets.size() << " alias sets for " + << PointerMap.size() << " pointer values.\n"; + for (const_iterator I = begin(), E = end(); I != E; ++I) + I->print(OS); + OS << "\n"; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void AliasSet::dump() const { print(dbgs()); } +void AliasSetTracker::dump() const { print(dbgs()); } +#endif + +//===----------------------------------------------------------------------===// +// ASTCallbackVH Class Implementation +//===----------------------------------------------------------------------===// + +void AliasSetTracker::ASTCallbackVH::deleted() { + assert(AST && "ASTCallbackVH called with a null AliasSetTracker!"); + AST->deleteValue(getValPtr()); + // this now dangles! +} + +void AliasSetTracker::ASTCallbackVH::allUsesReplacedWith(Value *V) { + AST->copyValue(getValPtr(), V); +} + +AliasSetTracker::ASTCallbackVH::ASTCallbackVH(Value *V, AliasSetTracker *ast) + : CallbackVH(V), AST(ast) {} + +AliasSetTracker::ASTCallbackVH & +AliasSetTracker::ASTCallbackVH::operator=(Value *V) { + return *this = ASTCallbackVH(V, AST); +} + +//===----------------------------------------------------------------------===// +// AliasSetPrinter Pass +//===----------------------------------------------------------------------===// + +namespace { + class AliasSetPrinter : public FunctionPass { + AliasSetTracker *Tracker; + public: + static char ID; // Pass identification, replacement for typeid + AliasSetPrinter() : FunctionPass(ID) { + initializeAliasSetPrinterPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<AliasAnalysis>(); + } + + virtual bool runOnFunction(Function &F) { + Tracker = new AliasSetTracker(getAnalysis<AliasAnalysis>()); + + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) + Tracker->add(&*I); + Tracker->print(errs()); + delete Tracker; + return false; + } + }; +} + +char AliasSetPrinter::ID = 0; +INITIALIZE_PASS_BEGIN(AliasSetPrinter, "print-alias-sets", + "Alias Set Printer", false, true) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(AliasSetPrinter, "print-alias-sets", + "Alias Set Printer", false, true) diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp new file mode 100644 index 000000000000..98f2a55a2fdd --- /dev/null +++ b/contrib/llvm/lib/Analysis/Analysis.cpp @@ -0,0 +1,100 @@ +//===-- Analysis.cpp ------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm-c/Analysis.h" +#include "llvm-c/Initialization.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/InitializePasses.h" +#include "llvm/IR/Module.h" +#include "llvm/PassRegistry.h" +#include <cstring> + +using namespace llvm; + +/// initializeAnalysis - Initialize all passes linked into the Analysis library. +void llvm::initializeAnalysis(PassRegistry &Registry) { + initializeAliasAnalysisAnalysisGroup(Registry); + initializeAliasAnalysisCounterPass(Registry); + initializeAAEvalPass(Registry); + initializeAliasDebuggerPass(Registry); + initializeAliasSetPrinterPass(Registry); + initializeNoAAPass(Registry); + initializeBasicAliasAnalysisPass(Registry); + initializeBlockFrequencyInfoPass(Registry); + initializeBranchProbabilityInfoPass(Registry); + initializeCostModelAnalysisPass(Registry); + initializeCFGViewerPass(Registry); + initializeCFGPrinterPass(Registry); + initializeCFGOnlyViewerPass(Registry); + initializeCFGOnlyPrinterPass(Registry); + initializeDependenceAnalysisPass(Registry); + initializeDelinearizationPass(Registry); + initializeDominanceFrontierPass(Registry); + initializeDomViewerPass(Registry); + initializeDomPrinterPass(Registry); + initializeDomOnlyViewerPass(Registry); + initializePostDomViewerPass(Registry); + initializeDomOnlyPrinterPass(Registry); + initializePostDomPrinterPass(Registry); + initializePostDomOnlyViewerPass(Registry); + initializePostDomOnlyPrinterPass(Registry); + initializeIVUsersPass(Registry); + initializeInstCountPass(Registry); + initializeIntervalPartitionPass(Registry); + initializeLazyValueInfoPass(Registry); + initializeLibCallAliasAnalysisPass(Registry); + initializeLintPass(Registry); + initializeLoopInfoPass(Registry); + initializeMemDepPrinterPass(Registry); + initializeMemoryDependenceAnalysisPass(Registry); + initializeModuleDebugInfoPrinterPass(Registry); + initializePostDominatorTreePass(Registry); + initializeRegionInfoPass(Registry); + initializeRegionViewerPass(Registry); + initializeRegionPrinterPass(Registry); + initializeRegionOnlyViewerPass(Registry); + initializeRegionOnlyPrinterPass(Registry); + initializeScalarEvolutionPass(Registry); + initializeScalarEvolutionAliasAnalysisPass(Registry); + initializeTargetTransformInfoAnalysisGroup(Registry); + initializeTypeBasedAliasAnalysisPass(Registry); +} + +void LLVMInitializeAnalysis(LLVMPassRegistryRef R) { + initializeAnalysis(*unwrap(R)); +} + +LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action, + char **OutMessages) { + std::string Messages; + + LLVMBool Result = verifyModule(*unwrap(M), + static_cast<VerifierFailureAction>(Action), + OutMessages? &Messages : 0); + + if (OutMessages) + *OutMessages = strdup(Messages.c_str()); + + return Result; +} + +LLVMBool LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action) { + return verifyFunction(*unwrap<Function>(Fn), + static_cast<VerifierFailureAction>(Action)); +} + +void LLVMViewFunctionCFG(LLVMValueRef Fn) { + Function *F = unwrap<Function>(Fn); + F->viewCFG(); +} + +void LLVMViewFunctionCFGOnly(LLVMValueRef Fn) { + Function *F = unwrap<Function>(Fn); + F->viewCFGOnly(); +} diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp new file mode 100644 index 000000000000..b2c20110e90e --- /dev/null +++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -0,0 +1,1309 @@ +//===- BasicAliasAnalysis.cpp - Stateless Alias Analysis Impl -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the primary stateless implementation of the +// Alias Analysis interface that implements identities (two different +// globals cannot alias, etc), but does no stateful analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Operator.h" +#include "llvm/Pass.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include <algorithm> +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Useful predicates +//===----------------------------------------------------------------------===// + +/// isNonEscapingLocalObject - Return true if the pointer is to a function-local +/// object that never escapes from the function. +static bool isNonEscapingLocalObject(const Value *V) { + // If this is a local allocation, check to see if it escapes. + if (isa<AllocaInst>(V) || isNoAliasCall(V)) + // Set StoreCaptures to True so that we can assume in our callers that the + // pointer is not the result of a load instruction. Currently + // PointerMayBeCaptured doesn't have any special analysis for the + // StoreCaptures=false case; if it did, our callers could be refined to be + // more precise. + return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true); + + // If this is an argument that corresponds to a byval or noalias argument, + // then it has not escaped before entering the function. Check if it escapes + // inside the function. + if (const Argument *A = dyn_cast<Argument>(V)) + if (A->hasByValAttr() || A->hasNoAliasAttr()) + // Note even if the argument is marked nocapture we still need to check + // for copies made inside the function. The nocapture attribute only + // specifies that there are no copies made that outlive the function. + return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true); + + return false; +} + +/// isEscapeSource - Return true if the pointer is one which would have +/// been considered an escape by isNonEscapingLocalObject. +static bool isEscapeSource(const Value *V) { + if (isa<CallInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V)) + return true; + + // The load case works because isNonEscapingLocalObject considers all + // stores to be escapes (it passes true for the StoreCaptures argument + // to PointerMayBeCaptured). + if (isa<LoadInst>(V)) + return true; + + return false; +} + +/// getObjectSize - Return the size of the object specified by V, or +/// UnknownSize if unknown. +static uint64_t getObjectSize(const Value *V, const DataLayout &TD, + const TargetLibraryInfo &TLI, + bool RoundToAlign = false) { + uint64_t Size; + if (getObjectSize(V, Size, &TD, &TLI, RoundToAlign)) + return Size; + return AliasAnalysis::UnknownSize; +} + +/// isObjectSmallerThan - Return true if we can prove that the object specified +/// by V is smaller than Size. +static bool isObjectSmallerThan(const Value *V, uint64_t Size, + const DataLayout &TD, + const TargetLibraryInfo &TLI) { + // Note that the meanings of the "object" are slightly different in the + // following contexts: + // c1: llvm::getObjectSize() + // c2: llvm.objectsize() intrinsic + // c3: isObjectSmallerThan() + // c1 and c2 share the same meaning; however, the meaning of "object" in c3 + // refers to the "entire object". + // + // Consider this example: + // char *p = (char*)malloc(100) + // char *q = p+80; + // + // In the context of c1 and c2, the "object" pointed by q refers to the + // stretch of memory of q[0:19]. So, getObjectSize(q) should return 20. + // + // However, in the context of c3, the "object" refers to the chunk of memory + // being allocated. So, the "object" has 100 bytes, and q points to the middle + // the "object". In case q is passed to isObjectSmallerThan() as the 1st + // parameter, before the llvm::getObjectSize() is called to get the size of + // entire object, we should: + // - either rewind the pointer q to the base-address of the object in + // question (in this case rewind to p), or + // - just give up. It is up to caller to make sure the pointer is pointing + // to the base address the object. + // + // We go for 2nd option for simplicity. + if (!isIdentifiedObject(V)) + return false; + + // This function needs to use the aligned object size because we allow + // reads a bit past the end given sufficient alignment. + uint64_t ObjectSize = getObjectSize(V, TD, TLI, /*RoundToAlign*/true); + + return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size; +} + +/// isObjectSize - Return true if we can prove that the object specified +/// by V has size Size. +static bool isObjectSize(const Value *V, uint64_t Size, + const DataLayout &TD, const TargetLibraryInfo &TLI) { + uint64_t ObjectSize = getObjectSize(V, TD, TLI); + return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize == Size; +} + +/// isIdentifiedFunctionLocal - Return true if V is umabigously identified +/// at the function-level. Different IdentifiedFunctionLocals can't alias. +/// Further, an IdentifiedFunctionLocal can not alias with any function +/// arguments other than itself, which is not neccessarily true for +/// IdentifiedObjects. +static bool isIdentifiedFunctionLocal(const Value *V) +{ + return isa<AllocaInst>(V) || isNoAliasCall(V) || isNoAliasArgument(V); +} + + +//===----------------------------------------------------------------------===// +// GetElementPtr Instruction Decomposition and Analysis +//===----------------------------------------------------------------------===// + +namespace { + enum ExtensionKind { + EK_NotExtended, + EK_SignExt, + EK_ZeroExt + }; + + struct VariableGEPIndex { + const Value *V; + ExtensionKind Extension; + int64_t Scale; + + bool operator==(const VariableGEPIndex &Other) const { + return V == Other.V && Extension == Other.Extension && + Scale == Other.Scale; + } + + bool operator!=(const VariableGEPIndex &Other) const { + return !operator==(Other); + } + }; +} + + +/// GetLinearExpression - Analyze the specified value as a linear expression: +/// "A*V + B", where A and B are constant integers. Return the scale and offset +/// values as APInts and return V as a Value*, and return whether we looked +/// through any sign or zero extends. The incoming Value is known to have +/// IntegerType and it may already be sign or zero extended. +/// +/// Note that this looks through extends, so the high bits may not be +/// represented in the result. +static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, + ExtensionKind &Extension, + const DataLayout &TD, unsigned Depth) { + assert(V->getType()->isIntegerTy() && "Not an integer value"); + + // Limit our recursion depth. + if (Depth == 6) { + Scale = 1; + Offset = 0; + return V; + } + + if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) { + if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) { + switch (BOp->getOpcode()) { + default: break; + case Instruction::Or: + // X|C == X+C if all the bits in C are unset in X. Otherwise we can't + // analyze it. + if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &TD)) + break; + // FALL THROUGH. + case Instruction::Add: + V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, + TD, Depth+1); + Offset += RHSC->getValue(); + return V; + case Instruction::Mul: + V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, + TD, Depth+1); + Offset *= RHSC->getValue(); + Scale *= RHSC->getValue(); + return V; + case Instruction::Shl: + V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, + TD, Depth+1); + Offset <<= RHSC->getValue().getLimitedValue(); + Scale <<= RHSC->getValue().getLimitedValue(); + return V; + } + } + } + + // Since GEP indices are sign extended anyway, we don't care about the high + // bits of a sign or zero extended value - just scales and offsets. The + // extensions have to be consistent though. + if ((isa<SExtInst>(V) && Extension != EK_ZeroExt) || + (isa<ZExtInst>(V) && Extension != EK_SignExt)) { + Value *CastOp = cast<CastInst>(V)->getOperand(0); + unsigned OldWidth = Scale.getBitWidth(); + unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits(); + Scale = Scale.trunc(SmallWidth); + Offset = Offset.trunc(SmallWidth); + Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt; + + Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension, + TD, Depth+1); + Scale = Scale.zext(OldWidth); + Offset = Offset.zext(OldWidth); + + return Result; + } + + Scale = 1; + Offset = 0; + return V; +} + +/// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it +/// into a base pointer with a constant offset and a number of scaled symbolic +/// offsets. +/// +/// The scaled symbolic offsets (represented by pairs of a Value* and a scale in +/// the VarIndices vector) are Value*'s that are known to be scaled by the +/// specified amount, but which may have other unrepresented high bits. As such, +/// the gep cannot necessarily be reconstructed from its decomposed form. +/// +/// When DataLayout is around, this function is capable of analyzing everything +/// that GetUnderlyingObject can look through. When not, it just looks +/// through pointer casts. +/// +static const Value * +DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, + SmallVectorImpl<VariableGEPIndex> &VarIndices, + const DataLayout *TD) { + // Limit recursion depth to limit compile time in crazy cases. + unsigned MaxLookup = 6; + + BaseOffs = 0; + do { + // See if this is a bitcast or GEP. + const Operator *Op = dyn_cast<Operator>(V); + if (Op == 0) { + // The only non-operator case we can handle are GlobalAliases. + if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { + if (!GA->mayBeOverridden()) { + V = GA->getAliasee(); + continue; + } + } + return V; + } + + if (Op->getOpcode() == Instruction::BitCast) { + V = Op->getOperand(0); + continue; + } + + const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op); + if (GEPOp == 0) { + // If it's not a GEP, hand it off to SimplifyInstruction to see if it + // can come up with something. This matches what GetUnderlyingObject does. + if (const Instruction *I = dyn_cast<Instruction>(V)) + // TODO: Get a DominatorTree and use it here. + if (const Value *Simplified = + SimplifyInstruction(const_cast<Instruction *>(I), TD)) { + V = Simplified; + continue; + } + + return V; + } + + // Don't attempt to analyze GEPs over unsized objects. + if (!GEPOp->getOperand(0)->getType()->getPointerElementType()->isSized()) + return V; + + // If we are lacking DataLayout information, we can't compute the offets of + // elements computed by GEPs. However, we can handle bitcast equivalent + // GEPs. + if (TD == 0) { + if (!GEPOp->hasAllZeroIndices()) + return V; + V = GEPOp->getOperand(0); + continue; + } + + unsigned AS = GEPOp->getPointerAddressSpace(); + // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices. + gep_type_iterator GTI = gep_type_begin(GEPOp); + for (User::const_op_iterator I = GEPOp->op_begin()+1, + E = GEPOp->op_end(); I != E; ++I) { + Value *Index = *I; + // Compute the (potentially symbolic) offset in bytes for this index. + if (StructType *STy = dyn_cast<StructType>(*GTI++)) { + // For a struct, add the member offset. + unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); + if (FieldNo == 0) continue; + + BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo); + continue; + } + + // For an array/pointer, add the element offset, explicitly scaled. + if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) { + if (CIdx->isZero()) continue; + BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue(); + continue; + } + + uint64_t Scale = TD->getTypeAllocSize(*GTI); + ExtensionKind Extension = EK_NotExtended; + + // If the integer type is smaller than the pointer size, it is implicitly + // sign extended to pointer size. + unsigned Width = Index->getType()->getIntegerBitWidth(); + if (TD->getPointerSizeInBits(AS) > Width) + Extension = EK_SignExt; + + // Use GetLinearExpression to decompose the index into a C1*V+C2 form. + APInt IndexScale(Width, 0), IndexOffset(Width, 0); + Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension, + *TD, 0); + + // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale. + // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale. + BaseOffs += IndexOffset.getSExtValue()*Scale; + Scale *= IndexScale.getSExtValue(); + + // If we already had an occurrence of this index variable, merge this + // scale into it. For example, we want to handle: + // A[x][x] -> x*16 + x*4 -> x*20 + // This also ensures that 'x' only appears in the index list once. + for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) { + if (VarIndices[i].V == Index && + VarIndices[i].Extension == Extension) { + Scale += VarIndices[i].Scale; + VarIndices.erase(VarIndices.begin()+i); + break; + } + } + + // Make sure that we have a scale that makes sense for this target's + // pointer size. + if (unsigned ShiftBits = 64 - TD->getPointerSizeInBits(AS)) { + Scale <<= ShiftBits; + Scale = (int64_t)Scale >> ShiftBits; + } + + if (Scale) { + VariableGEPIndex Entry = {Index, Extension, + static_cast<int64_t>(Scale)}; + VarIndices.push_back(Entry); + } + } + + // Analyze the base pointer next. + V = GEPOp->getOperand(0); + } while (--MaxLookup); + + // If the chain of expressions is too deep, just return early. + return V; +} + +/// GetIndexDifference - Dest and Src are the variable indices from two +/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base +/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic +/// difference between the two pointers. +static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest, + const SmallVectorImpl<VariableGEPIndex> &Src) { + if (Src.empty()) return; + + for (unsigned i = 0, e = Src.size(); i != e; ++i) { + const Value *V = Src[i].V; + ExtensionKind Extension = Src[i].Extension; + int64_t Scale = Src[i].Scale; + + // Find V in Dest. This is N^2, but pointer indices almost never have more + // than a few variable indexes. + for (unsigned j = 0, e = Dest.size(); j != e; ++j) { + if (Dest[j].V != V || Dest[j].Extension != Extension) continue; + + // If we found it, subtract off Scale V's from the entry in Dest. If it + // goes to zero, remove the entry. + if (Dest[j].Scale != Scale) + Dest[j].Scale -= Scale; + else + Dest.erase(Dest.begin()+j); + Scale = 0; + break; + } + + // If we didn't consume this entry, add it to the end of the Dest list. + if (Scale) { + VariableGEPIndex Entry = { V, Extension, -Scale }; + Dest.push_back(Entry); + } + } +} + +//===----------------------------------------------------------------------===// +// BasicAliasAnalysis Pass +//===----------------------------------------------------------------------===// + +#ifndef NDEBUG +static const Function *getParent(const Value *V) { + if (const Instruction *inst = dyn_cast<Instruction>(V)) + return inst->getParent()->getParent(); + + if (const Argument *arg = dyn_cast<Argument>(V)) + return arg->getParent(); + + return NULL; +} + +static bool notDifferentParent(const Value *O1, const Value *O2) { + + const Function *F1 = getParent(O1); + const Function *F2 = getParent(O2); + + return !F1 || !F2 || F1 == F2; +} +#endif + +namespace { + /// BasicAliasAnalysis - This is the primary alias analysis implementation. + struct BasicAliasAnalysis : public ImmutablePass, public AliasAnalysis { + static char ID; // Class identification, replacement for typeinfo + BasicAliasAnalysis() : ImmutablePass(ID) { + initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + InitializeAliasAnalysis(this); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AliasAnalysis>(); + AU.addRequired<TargetLibraryInfo>(); + } + + virtual AliasResult alias(const Location &LocA, + const Location &LocB) { + assert(AliasCache.empty() && "AliasCache must be cleared after use!"); + assert(notDifferentParent(LocA.Ptr, LocB.Ptr) && + "BasicAliasAnalysis doesn't support interprocedural queries."); + AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.TBAATag, + LocB.Ptr, LocB.Size, LocB.TBAATag); + // AliasCache rarely has more than 1 or 2 elements, always use + // shrink_and_clear so it quickly returns to the inline capacity of the + // SmallDenseMap if it ever grows larger. + // FIXME: This should really be shrink_to_inline_capacity_and_clear(). + AliasCache.shrink_and_clear(); + return Alias; + } + + virtual ModRefResult getModRefInfo(ImmutableCallSite CS, + const Location &Loc); + + virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + // The AliasAnalysis base class has some smarts, lets use them. + return AliasAnalysis::getModRefInfo(CS1, CS2); + } + + /// pointsToConstantMemory - Chase pointers until we find a (constant + /// global) or not. + virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal); + + /// getModRefBehavior - Return the behavior when calling the given + /// call site. + virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS); + + /// getModRefBehavior - Return the behavior when calling the given function. + /// For use when the call site is not known. + virtual ModRefBehavior getModRefBehavior(const Function *F); + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + + private: + // AliasCache - Track alias queries to guard against recursion. + typedef std::pair<Location, Location> LocPair; + typedef SmallDenseMap<LocPair, AliasResult, 8> AliasCacheTy; + AliasCacheTy AliasCache; + + // Visited - Track instructions visited by pointsToConstantMemory. + SmallPtrSet<const Value*, 16> Visited; + + // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP + // instruction against another. + AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size, + const MDNode *V1TBAAInfo, + const Value *V2, uint64_t V2Size, + const MDNode *V2TBAAInfo, + const Value *UnderlyingV1, const Value *UnderlyingV2); + + // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI + // instruction against another. + AliasResult aliasPHI(const PHINode *PN, uint64_t PNSize, + const MDNode *PNTBAAInfo, + const Value *V2, uint64_t V2Size, + const MDNode *V2TBAAInfo); + + /// aliasSelect - Disambiguate a Select instruction against another value. + AliasResult aliasSelect(const SelectInst *SI, uint64_t SISize, + const MDNode *SITBAAInfo, + const Value *V2, uint64_t V2Size, + const MDNode *V2TBAAInfo); + + AliasResult aliasCheck(const Value *V1, uint64_t V1Size, + const MDNode *V1TBAATag, + const Value *V2, uint64_t V2Size, + const MDNode *V2TBAATag); + }; +} // End of anonymous namespace + +// Register this pass... +char BasicAliasAnalysis::ID = 0; +INITIALIZE_AG_PASS_BEGIN(BasicAliasAnalysis, AliasAnalysis, "basicaa", + "Basic Alias Analysis (stateless AA impl)", + false, true, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_AG_PASS_END(BasicAliasAnalysis, AliasAnalysis, "basicaa", + "Basic Alias Analysis (stateless AA impl)", + false, true, false) + + +ImmutablePass *llvm::createBasicAliasAnalysisPass() { + return new BasicAliasAnalysis(); +} + +/// pointsToConstantMemory - Returns whether the given pointer value +/// points to memory that is local to the function, with global constants being +/// considered local to all functions. +bool +BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) { + assert(Visited.empty() && "Visited must be cleared after use!"); + + unsigned MaxLookup = 8; + SmallVector<const Value *, 16> Worklist; + Worklist.push_back(Loc.Ptr); + do { + const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), TD); + if (!Visited.insert(V)) { + Visited.clear(); + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + } + + // An alloca instruction defines local memory. + if (OrLocal && isa<AllocaInst>(V)) + continue; + + // A global constant counts as local memory for our purposes. + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { + // Note: this doesn't require GV to be "ODR" because it isn't legal for a + // global to be marked constant in some modules and non-constant in + // others. GV may even be a declaration, not a definition. + if (!GV->isConstant()) { + Visited.clear(); + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + } + continue; + } + + // If both select values point to local memory, then so does the select. + if (const SelectInst *SI = dyn_cast<SelectInst>(V)) { + Worklist.push_back(SI->getTrueValue()); + Worklist.push_back(SI->getFalseValue()); + continue; + } + + // If all values incoming to a phi node point to local memory, then so does + // the phi. + if (const PHINode *PN = dyn_cast<PHINode>(V)) { + // Don't bother inspecting phi nodes with many operands. + if (PN->getNumIncomingValues() > MaxLookup) { + Visited.clear(); + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + } + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + Worklist.push_back(PN->getIncomingValue(i)); + continue; + } + + // Otherwise be conservative. + Visited.clear(); + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + + } while (!Worklist.empty() && --MaxLookup); + + Visited.clear(); + return Worklist.empty(); +} + +/// getModRefBehavior - Return the behavior when calling the given call site. +AliasAnalysis::ModRefBehavior +BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { + if (CS.doesNotAccessMemory()) + // Can't do better than this. + return DoesNotAccessMemory; + + ModRefBehavior Min = UnknownModRefBehavior; + + // If the callsite knows it only reads memory, don't return worse + // than that. + if (CS.onlyReadsMemory()) + Min = OnlyReadsMemory; + + // The AliasAnalysis base class has some smarts, lets use them. + return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); +} + +/// getModRefBehavior - Return the behavior when calling the given function. +/// For use when the call site is not known. +AliasAnalysis::ModRefBehavior +BasicAliasAnalysis::getModRefBehavior(const Function *F) { + // If the function declares it doesn't access memory, we can't do better. + if (F->doesNotAccessMemory()) + return DoesNotAccessMemory; + + // For intrinsics, we can check the table. + if (unsigned iid = F->getIntrinsicID()) { +#define GET_INTRINSIC_MODREF_BEHAVIOR +#include "llvm/IR/Intrinsics.gen" +#undef GET_INTRINSIC_MODREF_BEHAVIOR + } + + ModRefBehavior Min = UnknownModRefBehavior; + + // If the function declares it only reads memory, go with that. + if (F->onlyReadsMemory()) + Min = OnlyReadsMemory; + + // Otherwise be conservative. + return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min); +} + +/// getModRefInfo - Check to see if the specified callsite can clobber the +/// specified memory object. Since we only look at local properties of this +/// function, we really can't say much about this query. We do, however, use +/// simple "address taken" analysis on local objects. +AliasAnalysis::ModRefResult +BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, + const Location &Loc) { + assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) && + "AliasAnalysis query involving multiple functions!"); + + const Value *Object = GetUnderlyingObject(Loc.Ptr, TD); + + // If this is a tail call and Loc.Ptr points to a stack location, we know that + // the tail call cannot access or modify the local stack. + // We cannot exclude byval arguments here; these belong to the caller of + // the current function not to the current function, and a tail callee + // may reference them. + if (isa<AllocaInst>(Object)) + if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) + if (CI->isTailCall()) + return NoModRef; + + // If the pointer is to a locally allocated object that does not escape, + // then the call can not mod/ref the pointer unless the call takes the pointer + // as an argument, and itself doesn't capture it. + if (!isa<Constant>(Object) && CS.getInstruction() != Object && + isNonEscapingLocalObject(Object)) { + bool PassedAsArg = false; + unsigned ArgNo = 0; + for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); + CI != CE; ++CI, ++ArgNo) { + // Only look at the no-capture or byval pointer arguments. If this + // pointer were passed to arguments that were neither of these, then it + // couldn't be no-capture. + if (!(*CI)->getType()->isPointerTy() || + (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo))) + continue; + + // If this is a no-capture pointer argument, see if we can tell that it + // is impossible to alias the pointer we're checking. If not, we have to + // assume that the call could touch the pointer, even though it doesn't + // escape. + if (!isNoAlias(Location(*CI), Location(Object))) { + PassedAsArg = true; + break; + } + } + + if (!PassedAsArg) + return NoModRef; + } + + const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>(); + ModRefResult Min = ModRef; + + // Finally, handle specific knowledge of intrinsics. + const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()); + if (II != 0) + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::memcpy: + case Intrinsic::memmove: { + uint64_t Len = UnknownSize; + if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) + Len = LenCI->getZExtValue(); + Value *Dest = II->getArgOperand(0); + Value *Src = II->getArgOperand(1); + // If it can't overlap the source dest, then it doesn't modref the loc. + if (isNoAlias(Location(Dest, Len), Loc)) { + if (isNoAlias(Location(Src, Len), Loc)) + return NoModRef; + // If it can't overlap the dest, then worst case it reads the loc. + Min = Ref; + } else if (isNoAlias(Location(Src, Len), Loc)) { + // If it can't overlap the source, then worst case it mutates the loc. + Min = Mod; + } + break; + } + case Intrinsic::memset: + // Since memset is 'accesses arguments' only, the AliasAnalysis base class + // will handle it for the variable length case. + if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) { + uint64_t Len = LenCI->getZExtValue(); + Value *Dest = II->getArgOperand(0); + if (isNoAlias(Location(Dest, Len), Loc)) + return NoModRef; + } + // We know that memset doesn't load anything. + Min = Mod; + break; + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: { + uint64_t PtrSize = + cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); + if (isNoAlias(Location(II->getArgOperand(1), + PtrSize, + II->getMetadata(LLVMContext::MD_tbaa)), + Loc)) + return NoModRef; + break; + } + case Intrinsic::invariant_end: { + uint64_t PtrSize = + cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(); + if (isNoAlias(Location(II->getArgOperand(2), + PtrSize, + II->getMetadata(LLVMContext::MD_tbaa)), + Loc)) + return NoModRef; + break; + } + case Intrinsic::arm_neon_vld1: { + // LLVM's vld1 and vst1 intrinsics currently only support a single + // vector register. + uint64_t Size = + TD ? TD->getTypeStoreSize(II->getType()) : UnknownSize; + if (isNoAlias(Location(II->getArgOperand(0), Size, + II->getMetadata(LLVMContext::MD_tbaa)), + Loc)) + return NoModRef; + break; + } + case Intrinsic::arm_neon_vst1: { + uint64_t Size = + TD ? TD->getTypeStoreSize(II->getArgOperand(1)->getType()) : UnknownSize; + if (isNoAlias(Location(II->getArgOperand(0), Size, + II->getMetadata(LLVMContext::MD_tbaa)), + Loc)) + return NoModRef; + break; + } + } + + // We can bound the aliasing properties of memset_pattern16 just as we can + // for memcpy/memset. This is particularly important because the + // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16 + // whenever possible. + else if (TLI.has(LibFunc::memset_pattern16) && + CS.getCalledFunction() && + CS.getCalledFunction()->getName() == "memset_pattern16") { + const Function *MS = CS.getCalledFunction(); + FunctionType *MemsetType = MS->getFunctionType(); + if (!MemsetType->isVarArg() && MemsetType->getNumParams() == 3 && + isa<PointerType>(MemsetType->getParamType(0)) && + isa<PointerType>(MemsetType->getParamType(1)) && + isa<IntegerType>(MemsetType->getParamType(2))) { + uint64_t Len = UnknownSize; + if (const ConstantInt *LenCI = dyn_cast<ConstantInt>(CS.getArgument(2))) + Len = LenCI->getZExtValue(); + const Value *Dest = CS.getArgument(0); + const Value *Src = CS.getArgument(1); + // If it can't overlap the source dest, then it doesn't modref the loc. + if (isNoAlias(Location(Dest, Len), Loc)) { + // Always reads 16 bytes of the source. + if (isNoAlias(Location(Src, 16), Loc)) + return NoModRef; + // If it can't overlap the dest, then worst case it reads the loc. + Min = Ref; + // Always reads 16 bytes of the source. + } else if (isNoAlias(Location(Src, 16), Loc)) { + // If it can't overlap the source, then worst case it mutates the loc. + Min = Mod; + } + } + } + + // The AliasAnalysis base class has some smarts, lets use them. + return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min); +} + +static bool areVarIndicesEqual(SmallVectorImpl<VariableGEPIndex> &Indices1, + SmallVectorImpl<VariableGEPIndex> &Indices2) { + unsigned Size1 = Indices1.size(); + unsigned Size2 = Indices2.size(); + + if (Size1 != Size2) + return false; + + for (unsigned I = 0; I != Size1; ++I) + if (Indices1[I] != Indices2[I]) + return false; + + return true; +} + +/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction +/// against another pointer. We know that V1 is a GEP, but we don't know +/// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, TD), +/// UnderlyingV2 is the same for V2. +/// +AliasAnalysis::AliasResult +BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, + const MDNode *V1TBAAInfo, + const Value *V2, uint64_t V2Size, + const MDNode *V2TBAAInfo, + const Value *UnderlyingV1, + const Value *UnderlyingV2) { + int64_t GEP1BaseOffset; + SmallVector<VariableGEPIndex, 4> GEP1VariableIndices; + + // If we have two gep instructions with must-alias or not-alias'ing base + // pointers, figure out if the indexes to the GEP tell us anything about the + // derived pointer. + if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) { + // Do the base pointers alias? + AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0, + UnderlyingV2, UnknownSize, 0); + + // Check for geps of non-aliasing underlying pointers where the offsets are + // identical. + if ((BaseAlias == MayAlias) && V1Size == V2Size) { + // Do the base pointers alias assuming type and size. + AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size, + V1TBAAInfo, UnderlyingV2, + V2Size, V2TBAAInfo); + if (PreciseBaseAlias == NoAlias) { + // See if the computed offset from the common pointer tells us about the + // relation of the resulting pointer. + int64_t GEP2BaseOffset; + SmallVector<VariableGEPIndex, 4> GEP2VariableIndices; + const Value *GEP2BasePtr = + DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD); + const Value *GEP1BasePtr = + DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD); + // DecomposeGEPExpression and GetUnderlyingObject should return the + // same result except when DecomposeGEPExpression has no DataLayout. + if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { + assert(TD == 0 && + "DecomposeGEPExpression and GetUnderlyingObject disagree!"); + return MayAlias; + } + // Same offsets. + if (GEP1BaseOffset == GEP2BaseOffset && + areVarIndicesEqual(GEP1VariableIndices, GEP2VariableIndices)) + return NoAlias; + GEP1VariableIndices.clear(); + } + } + + // If we get a No or May, then return it immediately, no amount of analysis + // will improve this situation. + if (BaseAlias != MustAlias) return BaseAlias; + + // Otherwise, we have a MustAlias. Since the base pointers alias each other + // exactly, see if the computed offset from the common pointer tells us + // about the relation of the resulting pointer. + const Value *GEP1BasePtr = + DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD); + + int64_t GEP2BaseOffset; + SmallVector<VariableGEPIndex, 4> GEP2VariableIndices; + const Value *GEP2BasePtr = + DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD); + + // DecomposeGEPExpression and GetUnderlyingObject should return the + // same result except when DecomposeGEPExpression has no DataLayout. + if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { + assert(TD == 0 && + "DecomposeGEPExpression and GetUnderlyingObject disagree!"); + return MayAlias; + } + + // Subtract the GEP2 pointer from the GEP1 pointer to find out their + // symbolic difference. + GEP1BaseOffset -= GEP2BaseOffset; + GetIndexDifference(GEP1VariableIndices, GEP2VariableIndices); + + } else { + // Check to see if these two pointers are related by the getelementptr + // instruction. If one pointer is a GEP with a non-zero index of the other + // pointer, we know they cannot alias. + + // If both accesses are unknown size, we can't do anything useful here. + if (V1Size == UnknownSize && V2Size == UnknownSize) + return MayAlias; + + AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, 0, + V2, V2Size, V2TBAAInfo); + if (R != MustAlias) + // If V2 may alias GEP base pointer, conservatively returns MayAlias. + // If V2 is known not to alias GEP base pointer, then the two values + // cannot alias per GEP semantics: "A pointer value formed from a + // getelementptr instruction is associated with the addresses associated + // with the first operand of the getelementptr". + return R; + + const Value *GEP1BasePtr = + DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD); + + // DecomposeGEPExpression and GetUnderlyingObject should return the + // same result except when DecomposeGEPExpression has no DataLayout. + if (GEP1BasePtr != UnderlyingV1) { + assert(TD == 0 && + "DecomposeGEPExpression and GetUnderlyingObject disagree!"); + return MayAlias; + } + } + + // In the two GEP Case, if there is no difference in the offsets of the + // computed pointers, the resultant pointers are a must alias. This + // hapens when we have two lexically identical GEP's (for example). + // + // In the other case, if we have getelementptr <ptr>, 0, 0, 0, 0, ... and V2 + // must aliases the GEP, the end result is a must alias also. + if (GEP1BaseOffset == 0 && GEP1VariableIndices.empty()) + return MustAlias; + + // If there is a constant difference between the pointers, but the difference + // is less than the size of the associated memory object, then we know + // that the objects are partially overlapping. If the difference is + // greater, we know they do not overlap. + if (GEP1BaseOffset != 0 && GEP1VariableIndices.empty()) { + if (GEP1BaseOffset >= 0) { + if (V2Size != UnknownSize) { + if ((uint64_t)GEP1BaseOffset < V2Size) + return PartialAlias; + return NoAlias; + } + } else { + if (V1Size != UnknownSize) { + if (-(uint64_t)GEP1BaseOffset < V1Size) + return PartialAlias; + return NoAlias; + } + } + } + + // Try to distinguish something like &A[i][1] against &A[42][0]. + // Grab the least significant bit set in any of the scales. + if (!GEP1VariableIndices.empty()) { + uint64_t Modulo = 0; + for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i) + Modulo |= (uint64_t)GEP1VariableIndices[i].Scale; + Modulo = Modulo ^ (Modulo & (Modulo - 1)); + + // We can compute the difference between the two addresses + // mod Modulo. Check whether that difference guarantees that the + // two locations do not alias. + uint64_t ModOffset = (uint64_t)GEP1BaseOffset & (Modulo - 1); + if (V1Size != UnknownSize && V2Size != UnknownSize && + ModOffset >= V2Size && V1Size <= Modulo - ModOffset) + return NoAlias; + } + + // Statically, we can see that the base objects are the same, but the + // pointers have dynamic offsets which we can't resolve. And none of our + // little tricks above worked. + // + // TODO: Returning PartialAlias instead of MayAlias is a mild hack; the + // practical effect of this is protecting TBAA in the case of dynamic + // indices into arrays of unions or malloc'd memory. + return PartialAlias; +} + +static AliasAnalysis::AliasResult +MergeAliasResults(AliasAnalysis::AliasResult A, AliasAnalysis::AliasResult B) { + // If the results agree, take it. + if (A == B) + return A; + // A mix of PartialAlias and MustAlias is PartialAlias. + if ((A == AliasAnalysis::PartialAlias && B == AliasAnalysis::MustAlias) || + (B == AliasAnalysis::PartialAlias && A == AliasAnalysis::MustAlias)) + return AliasAnalysis::PartialAlias; + // Otherwise, we don't know anything. + return AliasAnalysis::MayAlias; +} + +/// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select +/// instruction against another. +AliasAnalysis::AliasResult +BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize, + const MDNode *SITBAAInfo, + const Value *V2, uint64_t V2Size, + const MDNode *V2TBAAInfo) { + // If the values are Selects with the same condition, we can do a more precise + // check: just check for aliases between the values on corresponding arms. + if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) + if (SI->getCondition() == SI2->getCondition()) { + AliasResult Alias = + aliasCheck(SI->getTrueValue(), SISize, SITBAAInfo, + SI2->getTrueValue(), V2Size, V2TBAAInfo); + if (Alias == MayAlias) + return MayAlias; + AliasResult ThisAlias = + aliasCheck(SI->getFalseValue(), SISize, SITBAAInfo, + SI2->getFalseValue(), V2Size, V2TBAAInfo); + return MergeAliasResults(ThisAlias, Alias); + } + + // If both arms of the Select node NoAlias or MustAlias V2, then returns + // NoAlias / MustAlias. Otherwise, returns MayAlias. + AliasResult Alias = + aliasCheck(V2, V2Size, V2TBAAInfo, SI->getTrueValue(), SISize, SITBAAInfo); + if (Alias == MayAlias) + return MayAlias; + + AliasResult ThisAlias = + aliasCheck(V2, V2Size, V2TBAAInfo, SI->getFalseValue(), SISize, SITBAAInfo); + return MergeAliasResults(ThisAlias, Alias); +} + +// aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction +// against another. +AliasAnalysis::AliasResult +BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, + const MDNode *PNTBAAInfo, + const Value *V2, uint64_t V2Size, + const MDNode *V2TBAAInfo) { + // If the values are PHIs in the same block, we can do a more precise + // as well as efficient check: just check for aliases between the values + // on corresponding edges. + if (const PHINode *PN2 = dyn_cast<PHINode>(V2)) + if (PN2->getParent() == PN->getParent()) { + LocPair Locs(Location(PN, PNSize, PNTBAAInfo), + Location(V2, V2Size, V2TBAAInfo)); + if (PN > V2) + std::swap(Locs.first, Locs.second); + // Analyse the PHIs' inputs under the assumption that the PHIs are + // NoAlias. + // If the PHIs are May/MustAlias there must be (recursively) an input + // operand from outside the PHIs' cycle that is MayAlias/MustAlias or + // there must be an operation on the PHIs within the PHIs' value cycle + // that causes a MayAlias. + // Pretend the phis do not alias. + AliasResult Alias = NoAlias; + assert(AliasCache.count(Locs) && + "There must exist an entry for the phi node"); + AliasResult OrigAliasResult = AliasCache[Locs]; + AliasCache[Locs] = NoAlias; + + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + AliasResult ThisAlias = + aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo, + PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)), + V2Size, V2TBAAInfo); + Alias = MergeAliasResults(ThisAlias, Alias); + if (Alias == MayAlias) + break; + } + + // Reset if speculation failed. + if (Alias != NoAlias) + AliasCache[Locs] = OrigAliasResult; + + return Alias; + } + + SmallPtrSet<Value*, 4> UniqueSrc; + SmallVector<Value*, 4> V1Srcs; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *PV1 = PN->getIncomingValue(i); + if (isa<PHINode>(PV1)) + // If any of the source itself is a PHI, return MayAlias conservatively + // to avoid compile time explosion. The worst possible case is if both + // sides are PHI nodes. In which case, this is O(m x n) time where 'm' + // and 'n' are the number of PHI sources. + return MayAlias; + if (UniqueSrc.insert(PV1)) + V1Srcs.push_back(PV1); + } + + AliasResult Alias = aliasCheck(V2, V2Size, V2TBAAInfo, + V1Srcs[0], PNSize, PNTBAAInfo); + // Early exit if the check of the first PHI source against V2 is MayAlias. + // Other results are not possible. + if (Alias == MayAlias) + return MayAlias; + + // If all sources of the PHI node NoAlias or MustAlias V2, then returns + // NoAlias / MustAlias. Otherwise, returns MayAlias. + for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) { + Value *V = V1Srcs[i]; + + AliasResult ThisAlias = aliasCheck(V2, V2Size, V2TBAAInfo, + V, PNSize, PNTBAAInfo); + Alias = MergeAliasResults(ThisAlias, Alias); + if (Alias == MayAlias) + break; + } + + return Alias; +} + +// aliasCheck - Provide a bunch of ad-hoc rules to disambiguate in common cases, +// such as array references. +// +AliasAnalysis::AliasResult +BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, + const MDNode *V1TBAAInfo, + const Value *V2, uint64_t V2Size, + const MDNode *V2TBAAInfo) { + // If either of the memory references is empty, it doesn't matter what the + // pointer values are. + if (V1Size == 0 || V2Size == 0) + return NoAlias; + + // Strip off any casts if they exist. + V1 = V1->stripPointerCasts(); + V2 = V2->stripPointerCasts(); + + // Are we checking for alias of the same value? + if (V1 == V2) return MustAlias; + + if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy()) + return NoAlias; // Scalars cannot alias each other + + // Figure out what objects these things are pointing to if we can. + const Value *O1 = GetUnderlyingObject(V1, TD); + const Value *O2 = GetUnderlyingObject(V2, TD); + + // Null values in the default address space don't point to any object, so they + // don't alias any other pointer. + if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O1)) + if (CPN->getType()->getAddressSpace() == 0) + return NoAlias; + if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O2)) + if (CPN->getType()->getAddressSpace() == 0) + return NoAlias; + + if (O1 != O2) { + // If V1/V2 point to two different objects we know that we have no alias. + if (isIdentifiedObject(O1) && isIdentifiedObject(O2)) + return NoAlias; + + // Constant pointers can't alias with non-const isIdentifiedObject objects. + if ((isa<Constant>(O1) && isIdentifiedObject(O2) && !isa<Constant>(O2)) || + (isa<Constant>(O2) && isIdentifiedObject(O1) && !isa<Constant>(O1))) + return NoAlias; + + // Function arguments can't alias with things that are known to be + // unambigously identified at the function level. + if ((isa<Argument>(O1) && isIdentifiedFunctionLocal(O2)) || + (isa<Argument>(O2) && isIdentifiedFunctionLocal(O1))) + return NoAlias; + + // Most objects can't alias null. + if ((isa<ConstantPointerNull>(O2) && isKnownNonNull(O1)) || + (isa<ConstantPointerNull>(O1) && isKnownNonNull(O2))) + return NoAlias; + + // If one pointer is the result of a call/invoke or load and the other is a + // non-escaping local object within the same function, then we know the + // object couldn't escape to a point where the call could return it. + // + // Note that if the pointers are in different functions, there are a + // variety of complications. A call with a nocapture argument may still + // temporary store the nocapture argument's value in a temporary memory + // location if that memory location doesn't escape. Or it may pass a + // nocapture value to other functions as long as they don't capture it. + if (isEscapeSource(O1) && isNonEscapingLocalObject(O2)) + return NoAlias; + if (isEscapeSource(O2) && isNonEscapingLocalObject(O1)) + return NoAlias; + } + + // If the size of one access is larger than the entire object on the other + // side, then we know such behavior is undefined and can assume no alias. + if (TD) + if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD, *TLI)) || + (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD, *TLI))) + return NoAlias; + + // Check the cache before climbing up use-def chains. This also terminates + // otherwise infinitely recursive queries. + LocPair Locs(Location(V1, V1Size, V1TBAAInfo), + Location(V2, V2Size, V2TBAAInfo)); + if (V1 > V2) + std::swap(Locs.first, Locs.second); + std::pair<AliasCacheTy::iterator, bool> Pair = + AliasCache.insert(std::make_pair(Locs, MayAlias)); + if (!Pair.second) + return Pair.first->second; + + // FIXME: This isn't aggressively handling alias(GEP, PHI) for example: if the + // GEP can't simplify, we don't even look at the PHI cases. + if (!isa<GEPOperator>(V1) && isa<GEPOperator>(V2)) { + std::swap(V1, V2); + std::swap(V1Size, V2Size); + std::swap(O1, O2); + std::swap(V1TBAAInfo, V2TBAAInfo); + } + if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) { + AliasResult Result = aliasGEP(GV1, V1Size, V1TBAAInfo, V2, V2Size, V2TBAAInfo, O1, O2); + if (Result != MayAlias) return AliasCache[Locs] = Result; + } + + if (isa<PHINode>(V2) && !isa<PHINode>(V1)) { + std::swap(V1, V2); + std::swap(V1Size, V2Size); + std::swap(V1TBAAInfo, V2TBAAInfo); + } + if (const PHINode *PN = dyn_cast<PHINode>(V1)) { + AliasResult Result = aliasPHI(PN, V1Size, V1TBAAInfo, + V2, V2Size, V2TBAAInfo); + if (Result != MayAlias) return AliasCache[Locs] = Result; + } + + if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) { + std::swap(V1, V2); + std::swap(V1Size, V2Size); + std::swap(V1TBAAInfo, V2TBAAInfo); + } + if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) { + AliasResult Result = aliasSelect(S1, V1Size, V1TBAAInfo, + V2, V2Size, V2TBAAInfo); + if (Result != MayAlias) return AliasCache[Locs] = Result; + } + + // If both pointers are pointing into the same object and one of them + // accesses is accessing the entire object, then the accesses must + // overlap in some way. + if (TD && O1 == O2) + if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *TD, *TLI)) || + (V2Size != UnknownSize && isObjectSize(O2, V2Size, *TD, *TLI))) + return AliasCache[Locs] = PartialAlias; + + AliasResult Result = + AliasAnalysis::alias(Location(V1, V1Size, V1TBAAInfo), + Location(V2, V2Size, V2TBAAInfo)); + return AliasCache[Locs] = Result; +} diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp new file mode 100644 index 000000000000..62f3ab16ca7c --- /dev/null +++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp @@ -0,0 +1,161 @@ +//=======-------- BlockFrequencyInfo.cpp - Block Frequency Analysis -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Loops should be simplified before this analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BlockFrequencyImpl.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" + +using namespace llvm; + +#ifndef NDEBUG +enum GVDAGType { + GVDT_None, + GVDT_Fraction, + GVDT_Integer +}; + +static cl::opt<GVDAGType> +ViewBlockFreqPropagationDAG("view-block-freq-propagation-dags", cl::Hidden, + cl::desc("Pop up a window to show a dag displaying how block " + "frequencies propagation through the CFG."), + cl::values( + clEnumValN(GVDT_None, "none", + "do not display graphs."), + clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " + "fractional block frequency representation."), + clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " + "integer fractional block frequency representation."), + clEnumValEnd)); + +namespace llvm { + +template <> +struct GraphTraits<BlockFrequencyInfo *> { + typedef const BasicBlock NodeType; + typedef succ_const_iterator ChildIteratorType; + typedef Function::const_iterator nodes_iterator; + + static inline const NodeType *getEntryNode(const BlockFrequencyInfo *G) { + return G->getFunction()->begin(); + } + static ChildIteratorType child_begin(const NodeType *N) { + return succ_begin(N); + } + static ChildIteratorType child_end(const NodeType *N) { + return succ_end(N); + } + static nodes_iterator nodes_begin(const BlockFrequencyInfo *G) { + return G->getFunction()->begin(); + } + static nodes_iterator nodes_end(const BlockFrequencyInfo *G) { + return G->getFunction()->end(); + } +}; + +template<> +struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits { + explicit DOTGraphTraits(bool isSimple=false) : + DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(const BlockFrequencyInfo *G) { + return G->getFunction()->getName(); + } + + std::string getNodeLabel(const BasicBlock *Node, + const BlockFrequencyInfo *Graph) { + std::string Result; + raw_string_ostream OS(Result); + + OS << Node->getName().str() << ":"; + switch (ViewBlockFreqPropagationDAG) { + case GVDT_Fraction: + Graph->getBlockFreq(Node).print(OS); + break; + case GVDT_Integer: + OS << Graph->getBlockFreq(Node).getFrequency(); + break; + case GVDT_None: + llvm_unreachable("If we are not supposed to render a graph we should " + "never reach this point."); + } + + return Result; + } +}; + +} // end namespace llvm +#endif + +INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq", + "Block Frequency Analysis", true, true) +INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo) +INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq", + "Block Frequency Analysis", true, true) + +char BlockFrequencyInfo::ID = 0; + + +BlockFrequencyInfo::BlockFrequencyInfo() : FunctionPass(ID) { + initializeBlockFrequencyInfoPass(*PassRegistry::getPassRegistry()); + BFI = new BlockFrequencyImpl<BasicBlock, Function, BranchProbabilityInfo>(); +} + +BlockFrequencyInfo::~BlockFrequencyInfo() { + delete BFI; +} + +void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<BranchProbabilityInfo>(); + AU.setPreservesAll(); +} + +bool BlockFrequencyInfo::runOnFunction(Function &F) { + BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>(); + BFI->doFunction(&F, &BPI); +#ifndef NDEBUG + if (ViewBlockFreqPropagationDAG != GVDT_None) + view(); +#endif + return false; +} + +void BlockFrequencyInfo::print(raw_ostream &O, const Module *) const { + if (BFI) BFI->print(O); +} + +BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const { + return BFI->getBlockFreq(BB); +} + +/// Pop up a ghostview window with the current block frequency propagation +/// rendered using dot. +void BlockFrequencyInfo::view() const { +// This code is only for debugging. +#ifndef NDEBUG + ViewGraph(const_cast<BlockFrequencyInfo *>(this), "BlockFrequencyDAGs"); +#else + errs() << "BlockFrequencyInfo::view is only available in debug builds on " + "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +const Function *BlockFrequencyInfo::getFunction() const { + return BFI->Fn; +} diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp new file mode 100644 index 000000000000..86560ca33d0c --- /dev/null +++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -0,0 +1,651 @@ +//===-- BranchProbabilityInfo.cpp - Branch Probability Analysis -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Loops should be simplified before this analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +INITIALIZE_PASS_BEGIN(BranchProbabilityInfo, "branch-prob", + "Branch Probability Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob", + "Branch Probability Analysis", false, true) + +char BranchProbabilityInfo::ID = 0; + +// Weights are for internal use only. They are used by heuristics to help to +// estimate edges' probability. Example: +// +// Using "Loop Branch Heuristics" we predict weights of edges for the +// block BB2. +// ... +// | +// V +// BB1<-+ +// | | +// | | (Weight = 124) +// V | +// BB2--+ +// | +// | (Weight = 4) +// V +// BB3 +// +// Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875 +// Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125 +static const uint32_t LBH_TAKEN_WEIGHT = 124; +static const uint32_t LBH_NONTAKEN_WEIGHT = 4; + +/// \brief Unreachable-terminating branch taken weight. +/// +/// This is the weight for a branch being taken to a block that terminates +/// (eventually) in unreachable. These are predicted as unlikely as possible. +static const uint32_t UR_TAKEN_WEIGHT = 1; + +/// \brief Unreachable-terminating branch not-taken weight. +/// +/// This is the weight for a branch not being taken toward a block that +/// terminates (eventually) in unreachable. Such a branch is essentially never +/// taken. Set the weight to an absurdly high value so that nested loops don't +/// easily subsume it. +static const uint32_t UR_NONTAKEN_WEIGHT = 1024*1024 - 1; + +/// \brief Weight for a branch taken going into a cold block. +/// +/// This is the weight for a branch taken toward a block marked +/// cold. A block is marked cold if it's postdominated by a +/// block containing a call to a cold function. Cold functions +/// are those marked with attribute 'cold'. +static const uint32_t CC_TAKEN_WEIGHT = 4; + +/// \brief Weight for a branch not-taken into a cold block. +/// +/// This is the weight for a branch not taken toward a block marked +/// cold. +static const uint32_t CC_NONTAKEN_WEIGHT = 64; + +static const uint32_t PH_TAKEN_WEIGHT = 20; +static const uint32_t PH_NONTAKEN_WEIGHT = 12; + +static const uint32_t ZH_TAKEN_WEIGHT = 20; +static const uint32_t ZH_NONTAKEN_WEIGHT = 12; + +static const uint32_t FPH_TAKEN_WEIGHT = 20; +static const uint32_t FPH_NONTAKEN_WEIGHT = 12; + +/// \brief Invoke-terminating normal branch taken weight +/// +/// This is the weight for branching to the normal destination of an invoke +/// instruction. We expect this to happen most of the time. Set the weight to an +/// absurdly high value so that nested loops subsume it. +static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1; + +/// \brief Invoke-terminating normal branch not-taken weight. +/// +/// This is the weight for branching to the unwind destination of an invoke +/// instruction. This is essentially never taken. +static const uint32_t IH_NONTAKEN_WEIGHT = 1; + +// Standard weight value. Used when none of the heuristics set weight for +// the edge. +static const uint32_t NORMAL_WEIGHT = 16; + +// Minimum weight of an edge. Please note, that weight is NEVER 0. +static const uint32_t MIN_WEIGHT = 1; + +static uint32_t getMaxWeightFor(BasicBlock *BB) { + return UINT32_MAX / BB->getTerminator()->getNumSuccessors(); +} + + +/// \brief Calculate edge weights for successors lead to unreachable. +/// +/// Predict that a successor which leads necessarily to an +/// unreachable-terminated block as extremely unlikely. +bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) { + TerminatorInst *TI = BB->getTerminator(); + if (TI->getNumSuccessors() == 0) { + if (isa<UnreachableInst>(TI)) + PostDominatedByUnreachable.insert(BB); + return false; + } + + SmallVector<unsigned, 4> UnreachableEdges; + SmallVector<unsigned, 4> ReachableEdges; + + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + if (PostDominatedByUnreachable.count(*I)) + UnreachableEdges.push_back(I.getSuccessorIndex()); + else + ReachableEdges.push_back(I.getSuccessorIndex()); + } + + // If all successors are in the set of blocks post-dominated by unreachable, + // this block is too. + if (UnreachableEdges.size() == TI->getNumSuccessors()) + PostDominatedByUnreachable.insert(BB); + + // Skip probabilities if this block has a single successor or if all were + // reachable. + if (TI->getNumSuccessors() == 1 || UnreachableEdges.empty()) + return false; + + uint32_t UnreachableWeight = + std::max(UR_TAKEN_WEIGHT / (unsigned)UnreachableEdges.size(), MIN_WEIGHT); + for (SmallVectorImpl<unsigned>::iterator I = UnreachableEdges.begin(), + E = UnreachableEdges.end(); + I != E; ++I) + setEdgeWeight(BB, *I, UnreachableWeight); + + if (ReachableEdges.empty()) + return true; + uint32_t ReachableWeight = + std::max(UR_NONTAKEN_WEIGHT / (unsigned)ReachableEdges.size(), + NORMAL_WEIGHT); + for (SmallVectorImpl<unsigned>::iterator I = ReachableEdges.begin(), + E = ReachableEdges.end(); + I != E; ++I) + setEdgeWeight(BB, *I, ReachableWeight); + + return true; +} + +// Propagate existing explicit probabilities from either profile data or +// 'expect' intrinsic processing. +bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) { + TerminatorInst *TI = BB->getTerminator(); + if (TI->getNumSuccessors() == 1) + return false; + if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI)) + return false; + + MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof); + if (!WeightsNode) + return false; + + // Ensure there are weights for all of the successors. Note that the first + // operand to the metadata node is a name, not a weight. + if (WeightsNode->getNumOperands() != TI->getNumSuccessors() + 1) + return false; + + // Build up the final weights that will be used in a temporary buffer, but + // don't add them until all weihts are present. Each weight value is clamped + // to [1, getMaxWeightFor(BB)]. + uint32_t WeightLimit = getMaxWeightFor(BB); + SmallVector<uint32_t, 2> Weights; + Weights.reserve(TI->getNumSuccessors()); + for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) { + ConstantInt *Weight = dyn_cast<ConstantInt>(WeightsNode->getOperand(i)); + if (!Weight) + return false; + Weights.push_back( + std::max<uint32_t>(1, Weight->getLimitedValue(WeightLimit))); + } + assert(Weights.size() == TI->getNumSuccessors() && "Checked above"); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + setEdgeWeight(BB, i, Weights[i]); + + return true; +} + +/// \brief Calculate edge weights for edges leading to cold blocks. +/// +/// A cold block is one post-dominated by a block with a call to a +/// cold function. Those edges are unlikely to be taken, so we give +/// them relatively low weight. +/// +/// Return true if we could compute the weights for cold edges. +/// Return false, otherwise. +bool BranchProbabilityInfo::calcColdCallHeuristics(BasicBlock *BB) { + TerminatorInst *TI = BB->getTerminator(); + if (TI->getNumSuccessors() == 0) + return false; + + // Determine which successors are post-dominated by a cold block. + SmallVector<unsigned, 4> ColdEdges; + SmallVector<unsigned, 4> NormalEdges; + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) + if (PostDominatedByColdCall.count(*I)) + ColdEdges.push_back(I.getSuccessorIndex()); + else + NormalEdges.push_back(I.getSuccessorIndex()); + + // If all successors are in the set of blocks post-dominated by cold calls, + // this block is in the set post-dominated by cold calls. + if (ColdEdges.size() == TI->getNumSuccessors()) + PostDominatedByColdCall.insert(BB); + else { + // Otherwise, if the block itself contains a cold function, add it to the + // set of blocks postdominated by a cold call. + assert(!PostDominatedByColdCall.count(BB)); + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (CallInst *CI = dyn_cast<CallInst>(I)) + if (CI->hasFnAttr(Attribute::Cold)) { + PostDominatedByColdCall.insert(BB); + break; + } + } + + // Skip probabilities if this block has a single successor. + if (TI->getNumSuccessors() == 1 || ColdEdges.empty()) + return false; + + uint32_t ColdWeight = + std::max(CC_TAKEN_WEIGHT / (unsigned) ColdEdges.size(), MIN_WEIGHT); + for (SmallVectorImpl<unsigned>::iterator I = ColdEdges.begin(), + E = ColdEdges.end(); + I != E; ++I) + setEdgeWeight(BB, *I, ColdWeight); + + if (NormalEdges.empty()) + return true; + uint32_t NormalWeight = std::max( + CC_NONTAKEN_WEIGHT / (unsigned) NormalEdges.size(), NORMAL_WEIGHT); + for (SmallVectorImpl<unsigned>::iterator I = NormalEdges.begin(), + E = NormalEdges.end(); + I != E; ++I) + setEdgeWeight(BB, *I, NormalWeight); + + return true; +} + +// Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion +// between two pointer or pointer and NULL will fail. +bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) { + BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator()); + if (!BI || !BI->isConditional()) + return false; + + Value *Cond = BI->getCondition(); + ICmpInst *CI = dyn_cast<ICmpInst>(Cond); + if (!CI || !CI->isEquality()) + return false; + + Value *LHS = CI->getOperand(0); + + if (!LHS->getType()->isPointerTy()) + return false; + + assert(CI->getOperand(1)->getType()->isPointerTy()); + + // p != 0 -> isProb = true + // p == 0 -> isProb = false + // p != q -> isProb = true + // p == q -> isProb = false; + unsigned TakenIdx = 0, NonTakenIdx = 1; + bool isProb = CI->getPredicate() == ICmpInst::ICMP_NE; + if (!isProb) + std::swap(TakenIdx, NonTakenIdx); + + setEdgeWeight(BB, TakenIdx, PH_TAKEN_WEIGHT); + setEdgeWeight(BB, NonTakenIdx, PH_NONTAKEN_WEIGHT); + return true; +} + +// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges +// as taken, exiting edges as not-taken. +bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { + Loop *L = LI->getLoopFor(BB); + if (!L) + return false; + + SmallVector<unsigned, 8> BackEdges; + SmallVector<unsigned, 8> ExitingEdges; + SmallVector<unsigned, 8> InEdges; // Edges from header to the loop. + + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + if (!L->contains(*I)) + ExitingEdges.push_back(I.getSuccessorIndex()); + else if (L->getHeader() == *I) + BackEdges.push_back(I.getSuccessorIndex()); + else + InEdges.push_back(I.getSuccessorIndex()); + } + + if (uint32_t numBackEdges = BackEdges.size()) { + uint32_t backWeight = LBH_TAKEN_WEIGHT / numBackEdges; + if (backWeight < NORMAL_WEIGHT) + backWeight = NORMAL_WEIGHT; + + for (SmallVectorImpl<unsigned>::iterator EI = BackEdges.begin(), + EE = BackEdges.end(); EI != EE; ++EI) { + setEdgeWeight(BB, *EI, backWeight); + } + } + + if (uint32_t numInEdges = InEdges.size()) { + uint32_t inWeight = LBH_TAKEN_WEIGHT / numInEdges; + if (inWeight < NORMAL_WEIGHT) + inWeight = NORMAL_WEIGHT; + + for (SmallVectorImpl<unsigned>::iterator EI = InEdges.begin(), + EE = InEdges.end(); EI != EE; ++EI) { + setEdgeWeight(BB, *EI, inWeight); + } + } + + if (uint32_t numExitingEdges = ExitingEdges.size()) { + uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numExitingEdges; + if (exitWeight < MIN_WEIGHT) + exitWeight = MIN_WEIGHT; + + for (SmallVectorImpl<unsigned>::iterator EI = ExitingEdges.begin(), + EE = ExitingEdges.end(); EI != EE; ++EI) { + setEdgeWeight(BB, *EI, exitWeight); + } + } + + return true; +} + +bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) { + BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator()); + if (!BI || !BI->isConditional()) + return false; + + Value *Cond = BI->getCondition(); + ICmpInst *CI = dyn_cast<ICmpInst>(Cond); + if (!CI) + return false; + + Value *RHS = CI->getOperand(1); + ConstantInt *CV = dyn_cast<ConstantInt>(RHS); + if (!CV) + return false; + + bool isProb; + if (CV->isZero()) { + switch (CI->getPredicate()) { + case CmpInst::ICMP_EQ: + // X == 0 -> Unlikely + isProb = false; + break; + case CmpInst::ICMP_NE: + // X != 0 -> Likely + isProb = true; + break; + case CmpInst::ICMP_SLT: + // X < 0 -> Unlikely + isProb = false; + break; + case CmpInst::ICMP_SGT: + // X > 0 -> Likely + isProb = true; + break; + default: + return false; + } + } else if (CV->isOne() && CI->getPredicate() == CmpInst::ICMP_SLT) { + // InstCombine canonicalizes X <= 0 into X < 1. + // X <= 0 -> Unlikely + isProb = false; + } else if (CV->isAllOnesValue()) { + switch (CI->getPredicate()) { + case CmpInst::ICMP_EQ: + // X == -1 -> Unlikely + isProb = false; + break; + case CmpInst::ICMP_NE: + // X != -1 -> Likely + isProb = true; + break; + case CmpInst::ICMP_SGT: + // InstCombine canonicalizes X >= 0 into X > -1. + // X >= 0 -> Likely + isProb = true; + break; + default: + return false; + } + } else { + return false; + } + + unsigned TakenIdx = 0, NonTakenIdx = 1; + + if (!isProb) + std::swap(TakenIdx, NonTakenIdx); + + setEdgeWeight(BB, TakenIdx, ZH_TAKEN_WEIGHT); + setEdgeWeight(BB, NonTakenIdx, ZH_NONTAKEN_WEIGHT); + + return true; +} + +bool BranchProbabilityInfo::calcFloatingPointHeuristics(BasicBlock *BB) { + BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); + if (!BI || !BI->isConditional()) + return false; + + Value *Cond = BI->getCondition(); + FCmpInst *FCmp = dyn_cast<FCmpInst>(Cond); + if (!FCmp) + return false; + + bool isProb; + if (FCmp->isEquality()) { + // f1 == f2 -> Unlikely + // f1 != f2 -> Likely + isProb = !FCmp->isTrueWhenEqual(); + } else if (FCmp->getPredicate() == FCmpInst::FCMP_ORD) { + // !isnan -> Likely + isProb = true; + } else if (FCmp->getPredicate() == FCmpInst::FCMP_UNO) { + // isnan -> Unlikely + isProb = false; + } else { + return false; + } + + unsigned TakenIdx = 0, NonTakenIdx = 1; + + if (!isProb) + std::swap(TakenIdx, NonTakenIdx); + + setEdgeWeight(BB, TakenIdx, FPH_TAKEN_WEIGHT); + setEdgeWeight(BB, NonTakenIdx, FPH_NONTAKEN_WEIGHT); + + return true; +} + +bool BranchProbabilityInfo::calcInvokeHeuristics(BasicBlock *BB) { + InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()); + if (!II) + return false; + + setEdgeWeight(BB, 0/*Index for Normal*/, IH_TAKEN_WEIGHT); + setEdgeWeight(BB, 1/*Index for Unwind*/, IH_NONTAKEN_WEIGHT); + return true; +} + +void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LoopInfo>(); + AU.setPreservesAll(); +} + +bool BranchProbabilityInfo::runOnFunction(Function &F) { + LastF = &F; // Store the last function we ran on for printing. + LI = &getAnalysis<LoopInfo>(); + assert(PostDominatedByUnreachable.empty()); + assert(PostDominatedByColdCall.empty()); + + // Walk the basic blocks in post-order so that we can build up state about + // the successors of a block iteratively. + for (po_iterator<BasicBlock *> I = po_begin(&F.getEntryBlock()), + E = po_end(&F.getEntryBlock()); + I != E; ++I) { + DEBUG(dbgs() << "Computing probabilities for " << I->getName() << "\n"); + if (calcUnreachableHeuristics(*I)) + continue; + if (calcMetadataWeights(*I)) + continue; + if (calcColdCallHeuristics(*I)) + continue; + if (calcLoopBranchHeuristics(*I)) + continue; + if (calcPointerHeuristics(*I)) + continue; + if (calcZeroHeuristics(*I)) + continue; + if (calcFloatingPointHeuristics(*I)) + continue; + calcInvokeHeuristics(*I); + } + + PostDominatedByUnreachable.clear(); + PostDominatedByColdCall.clear(); + return false; +} + +void BranchProbabilityInfo::print(raw_ostream &OS, const Module *) const { + OS << "---- Branch Probabilities ----\n"; + // We print the probabilities from the last function the analysis ran over, + // or the function it is currently running over. + assert(LastF && "Cannot print prior to running over a function"); + for (Function::const_iterator BI = LastF->begin(), BE = LastF->end(); + BI != BE; ++BI) { + for (succ_const_iterator SI = succ_begin(BI), SE = succ_end(BI); + SI != SE; ++SI) { + printEdgeProbability(OS << " ", BI, *SI); + } + } +} + +uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const { + uint32_t Sum = 0; + + for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + uint32_t Weight = getEdgeWeight(BB, I.getSuccessorIndex()); + uint32_t PrevSum = Sum; + + Sum += Weight; + assert(Sum > PrevSum); (void) PrevSum; + } + + return Sum; +} + +bool BranchProbabilityInfo:: +isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const { + // Hot probability is at least 4/5 = 80% + // FIXME: Compare against a static "hot" BranchProbability. + return getEdgeProbability(Src, Dst) > BranchProbability(4, 5); +} + +BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const { + uint32_t Sum = 0; + uint32_t MaxWeight = 0; + BasicBlock *MaxSucc = 0; + + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + BasicBlock *Succ = *I; + uint32_t Weight = getEdgeWeight(BB, Succ); + uint32_t PrevSum = Sum; + + Sum += Weight; + assert(Sum > PrevSum); (void) PrevSum; + + if (Weight > MaxWeight) { + MaxWeight = Weight; + MaxSucc = Succ; + } + } + + // Hot probability is at least 4/5 = 80% + if (BranchProbability(MaxWeight, Sum) > BranchProbability(4, 5)) + return MaxSucc; + + return 0; +} + +/// Get the raw edge weight for the edge. If can't find it, return +/// DEFAULT_WEIGHT value. Here an edge is specified using PredBlock and an index +/// to the successors. +uint32_t BranchProbabilityInfo:: +getEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors) const { + DenseMap<Edge, uint32_t>::const_iterator I = + Weights.find(std::make_pair(Src, IndexInSuccessors)); + + if (I != Weights.end()) + return I->second; + + return DEFAULT_WEIGHT; +} + +/// Get the raw edge weight calculated for the block pair. This returns the sum +/// of all raw edge weights from Src to Dst. +uint32_t BranchProbabilityInfo:: +getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const { + uint32_t Weight = 0; + DenseMap<Edge, uint32_t>::const_iterator MapI; + for (succ_const_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I) + if (*I == Dst) { + MapI = Weights.find(std::make_pair(Src, I.getSuccessorIndex())); + if (MapI != Weights.end()) + Weight += MapI->second; + } + return (Weight == 0) ? DEFAULT_WEIGHT : Weight; +} + +/// Set the edge weight for a given edge specified by PredBlock and an index +/// to the successors. +void BranchProbabilityInfo:: +setEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors, + uint32_t Weight) { + Weights[std::make_pair(Src, IndexInSuccessors)] = Weight; + DEBUG(dbgs() << "set edge " << Src->getName() << " -> " + << IndexInSuccessors << " successor weight to " + << Weight << "\n"); +} + +/// Get an edge's probability, relative to other out-edges from Src. +BranchProbability BranchProbabilityInfo:: +getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const { + uint32_t N = getEdgeWeight(Src, IndexInSuccessors); + uint32_t D = getSumForBlock(Src); + + return BranchProbability(N, D); +} + +/// Get the probability of going from Src to Dst. It returns the sum of all +/// probabilities for edges from Src to Dst. +BranchProbability BranchProbabilityInfo:: +getEdgeProbability(const BasicBlock *Src, const BasicBlock *Dst) const { + + uint32_t N = getEdgeWeight(Src, Dst); + uint32_t D = getSumForBlock(Src); + + return BranchProbability(N, D); +} + +raw_ostream & +BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, + const BasicBlock *Src, + const BasicBlock *Dst) const { + + const BranchProbability Prob = getEdgeProbability(Src, Dst); + OS << "edge " << Src->getName() << " -> " << Dst->getName() + << " probability is " << Prob + << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n"); + + return OS; +} diff --git a/contrib/llvm/lib/Analysis/CFG.cpp b/contrib/llvm/lib/Analysis/CFG.cpp new file mode 100644 index 000000000000..c3f32d3a840c --- /dev/null +++ b/contrib/llvm/lib/Analysis/CFG.cpp @@ -0,0 +1,245 @@ +//===-- CFG.cpp - BasicBlock analysis --------------------------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of functions performs analyses on basic blocks, and instructions +// contained within basic blocks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CFG.h" + +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" + +using namespace llvm; + +/// FindFunctionBackedges - Analyze the specified function to find all of the +/// loop backedges in the function and return them. This is a relatively cheap +/// (compared to computing dominators and loop info) analysis. +/// +/// The output is added to Result, as pairs of <from,to> edge info. +void llvm::FindFunctionBackedges(const Function &F, + SmallVectorImpl<std::pair<const BasicBlock*,const BasicBlock*> > &Result) { + const BasicBlock *BB = &F.getEntryBlock(); + if (succ_begin(BB) == succ_end(BB)) + return; + + SmallPtrSet<const BasicBlock*, 8> Visited; + SmallVector<std::pair<const BasicBlock*, succ_const_iterator>, 8> VisitStack; + SmallPtrSet<const BasicBlock*, 8> InStack; + + Visited.insert(BB); + VisitStack.push_back(std::make_pair(BB, succ_begin(BB))); + InStack.insert(BB); + do { + std::pair<const BasicBlock*, succ_const_iterator> &Top = VisitStack.back(); + const BasicBlock *ParentBB = Top.first; + succ_const_iterator &I = Top.second; + + bool FoundNew = false; + while (I != succ_end(ParentBB)) { + BB = *I++; + if (Visited.insert(BB)) { + FoundNew = true; + break; + } + // Successor is in VisitStack, it's a back edge. + if (InStack.count(BB)) + Result.push_back(std::make_pair(ParentBB, BB)); + } + + if (FoundNew) { + // Go down one level if there is a unvisited successor. + InStack.insert(BB); + VisitStack.push_back(std::make_pair(BB, succ_begin(BB))); + } else { + // Go up one level. + InStack.erase(VisitStack.pop_back_val().first); + } + } while (!VisitStack.empty()); +} + +/// GetSuccessorNumber - Search for the specified successor of basic block BB +/// and return its position in the terminator instruction's list of +/// successors. It is an error to call this with a block that is not a +/// successor. +unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) { + TerminatorInst *Term = BB->getTerminator(); +#ifndef NDEBUG + unsigned e = Term->getNumSuccessors(); +#endif + for (unsigned i = 0; ; ++i) { + assert(i != e && "Didn't find edge?"); + if (Term->getSuccessor(i) == Succ) + return i; + } +} + +/// isCriticalEdge - Return true if the specified edge is a critical edge. +/// Critical edges are edges from a block with multiple successors to a block +/// with multiple predecessors. +bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum, + bool AllowIdenticalEdges) { + assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!"); + if (TI->getNumSuccessors() == 1) return false; + + const BasicBlock *Dest = TI->getSuccessor(SuccNum); + const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest); + + // If there is more than one predecessor, this is a critical edge... + assert(I != E && "No preds, but we have an edge to the block?"); + const BasicBlock *FirstPred = *I; + ++I; // Skip one edge due to the incoming arc from TI. + if (!AllowIdenticalEdges) + return I != E; + + // If AllowIdenticalEdges is true, then we allow this edge to be considered + // non-critical iff all preds come from TI's block. + while (I != E) { + const BasicBlock *P = *I; + if (P != FirstPred) + return true; + // Note: leave this as is until no one ever compiles with either gcc 4.0.1 + // or Xcode 2. This seems to work around the pred_iterator assert in PR 2207 + E = pred_end(P); + ++I; + } + return false; +} + +// LoopInfo contains a mapping from basic block to the innermost loop. Find +// the outermost loop in the loop nest that contains BB. +static const Loop *getOutermostLoop(const LoopInfo *LI, const BasicBlock *BB) { + const Loop *L = LI->getLoopFor(BB); + if (L) { + while (const Loop *Parent = L->getParentLoop()) + L = Parent; + } + return L; +} + +// True if there is a loop which contains both BB1 and BB2. +static bool loopContainsBoth(const LoopInfo *LI, + const BasicBlock *BB1, const BasicBlock *BB2) { + const Loop *L1 = getOutermostLoop(LI, BB1); + const Loop *L2 = getOutermostLoop(LI, BB2); + return L1 != NULL && L1 == L2; +} + +static bool isPotentiallyReachableInner(SmallVectorImpl<BasicBlock *> &Worklist, + BasicBlock *StopBB, + const DominatorTree *DT, + const LoopInfo *LI) { + // When the stop block is unreachable, it's dominated from everywhere, + // regardless of whether there's a path between the two blocks. + if (DT && !DT->isReachableFromEntry(StopBB)) + DT = 0; + + // Limit the number of blocks we visit. The goal is to avoid run-away compile + // times on large CFGs without hampering sensible code. Arbitrarily chosen. + unsigned Limit = 32; + SmallSet<const BasicBlock*, 64> Visited; + do { + BasicBlock *BB = Worklist.pop_back_val(); + if (!Visited.insert(BB)) + continue; + if (BB == StopBB) + return true; + if (DT && DT->dominates(BB, StopBB)) + return true; + if (LI && loopContainsBoth(LI, BB, StopBB)) + return true; + + if (!--Limit) { + // We haven't been able to prove it one way or the other. Conservatively + // answer true -- that there is potentially a path. + return true; + } + + if (const Loop *Outer = LI ? getOutermostLoop(LI, BB) : 0) { + // All blocks in a single loop are reachable from all other blocks. From + // any of these blocks, we can skip directly to the exits of the loop, + // ignoring any other blocks inside the loop body. + Outer->getExitBlocks(Worklist); + } else { + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) + Worklist.push_back(*I); + } + } while (!Worklist.empty()); + + // We have exhausted all possible paths and are certain that 'To' can not be + // reached from 'From'. + return false; +} + +bool llvm::isPotentiallyReachable(const BasicBlock *A, const BasicBlock *B, + const DominatorTree *DT, const LoopInfo *LI) { + assert(A->getParent() == B->getParent() && + "This analysis is function-local!"); + + SmallVector<BasicBlock*, 32> Worklist; + Worklist.push_back(const_cast<BasicBlock*>(A)); + + return isPotentiallyReachableInner(Worklist, const_cast<BasicBlock*>(B), + DT, LI); +} + +bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B, + const DominatorTree *DT, const LoopInfo *LI) { + assert(A->getParent()->getParent() == B->getParent()->getParent() && + "This analysis is function-local!"); + + SmallVector<BasicBlock*, 32> Worklist; + + if (A->getParent() == B->getParent()) { + // The same block case is special because it's the only time we're looking + // within a single block to see which instruction comes first. Once we + // start looking at multiple blocks, the first instruction of the block is + // reachable, so we only need to determine reachability between whole + // blocks. + BasicBlock *BB = const_cast<BasicBlock *>(A->getParent()); + + // If the block is in a loop then we can reach any instruction in the block + // from any other instruction in the block by going around a backedge. + if (LI && LI->getLoopFor(BB) != 0) + return true; + + // Linear scan, start at 'A', see whether we hit 'B' or the end first. + for (BasicBlock::const_iterator I = A, E = BB->end(); I != E; ++I) { + if (&*I == B) + return true; + } + + // Can't be in a loop if it's the entry block -- the entry block may not + // have predecessors. + if (BB == &BB->getParent()->getEntryBlock()) + return false; + + // Otherwise, continue doing the normal per-BB CFG walk. + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) + Worklist.push_back(*I); + + if (Worklist.empty()) { + // We've proven that there's no path! + return false; + } + } else { + Worklist.push_back(const_cast<BasicBlock*>(A->getParent())); + } + + if (A->getParent() == &A->getParent()->getParent()->getEntryBlock()) + return true; + if (B->getParent() == &A->getParent()->getParent()->getEntryBlock()) + return false; + + return isPotentiallyReachableInner(Worklist, + const_cast<BasicBlock*>(B->getParent()), + DT, LI); +} diff --git a/contrib/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm/lib/Analysis/CFGPrinter.cpp new file mode 100644 index 000000000000..9b6879a42ed4 --- /dev/null +++ b/contrib/llvm/lib/Analysis/CFGPrinter.cpp @@ -0,0 +1,164 @@ +//===- CFGPrinter.cpp - DOT printer for the control flow graph ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a '-dot-cfg' analysis pass, which emits the +// cfg.<fnname>.dot file for each function in the program, with a graph of the +// CFG for that function. +// +// The other main feature of this file is that it implements the +// Function::viewCFG method, which is useful for debugging passes which operate +// on the CFG. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CFGPrinter.h" +#include "llvm/Pass.h" +using namespace llvm; + +namespace { + struct CFGViewer : public FunctionPass { + static char ID; // Pass identifcation, replacement for typeid + CFGViewer() : FunctionPass(ID) { + initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F) { + F.viewCFG(); + return false; + } + + void print(raw_ostream &OS, const Module* = 0) const {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; +} + +char CFGViewer::ID = 0; +INITIALIZE_PASS(CFGViewer, "view-cfg", "View CFG of function", false, true) + +namespace { + struct CFGOnlyViewer : public FunctionPass { + static char ID; // Pass identifcation, replacement for typeid + CFGOnlyViewer() : FunctionPass(ID) { + initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F) { + F.viewCFGOnly(); + return false; + } + + void print(raw_ostream &OS, const Module* = 0) const {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; +} + +char CFGOnlyViewer::ID = 0; +INITIALIZE_PASS(CFGOnlyViewer, "view-cfg-only", + "View CFG of function (with no function bodies)", false, true) + +namespace { + struct CFGPrinter : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + CFGPrinter() : FunctionPass(ID) { + initializeCFGPrinterPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F) { + std::string Filename = "cfg." + F.getName().str() + ".dot"; + errs() << "Writing '" << Filename << "'..."; + + std::string ErrorInfo; + raw_fd_ostream File(Filename.c_str(), ErrorInfo); + + if (ErrorInfo.empty()) + WriteGraph(File, (const Function*)&F); + else + errs() << " error opening file for writing!"; + errs() << "\n"; + return false; + } + + void print(raw_ostream &OS, const Module* = 0) const {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; +} + +char CFGPrinter::ID = 0; +INITIALIZE_PASS(CFGPrinter, "dot-cfg", "Print CFG of function to 'dot' file", + false, true) + +namespace { + struct CFGOnlyPrinter : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + CFGOnlyPrinter() : FunctionPass(ID) { + initializeCFGOnlyPrinterPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F) { + std::string Filename = "cfg." + F.getName().str() + ".dot"; + errs() << "Writing '" << Filename << "'..."; + + std::string ErrorInfo; + raw_fd_ostream File(Filename.c_str(), ErrorInfo); + + if (ErrorInfo.empty()) + WriteGraph(File, (const Function*)&F, true); + else + errs() << " error opening file for writing!"; + errs() << "\n"; + return false; + } + void print(raw_ostream &OS, const Module* = 0) const {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; +} + +char CFGOnlyPrinter::ID = 0; +INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only", + "Print CFG of function to 'dot' file (with no function bodies)", + false, true) + +/// viewCFG - This function is meant for use from the debugger. You can just +/// say 'call F->viewCFG()' and a ghostview window should pop up from the +/// program, displaying the CFG of the current function. This depends on there +/// being a 'dot' and 'gv' program in your path. +/// +void Function::viewCFG() const { + ViewGraph(this, "cfg" + getName()); +} + +/// viewCFGOnly - This function is meant for use from the debugger. It works +/// just like viewCFG, but it does not include the contents of basic blocks +/// into the nodes, just the label. If you are only interested in the CFG t +/// his can make the graph smaller. +/// +void Function::viewCFGOnly() const { + ViewGraph(this, "cfg" + getName(), true); +} + +FunctionPass *llvm::createCFGPrinterPass () { + return new CFGPrinter(); +} + +FunctionPass *llvm::createCFGOnlyPrinterPass () { + return new CFGOnlyPrinter(); +} + diff --git a/contrib/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm/lib/Analysis/CaptureTracking.cpp new file mode 100644 index 000000000000..79fab1be4413 --- /dev/null +++ b/contrib/llvm/lib/Analysis/CaptureTracking.cpp @@ -0,0 +1,186 @@ +//===--- CaptureTracking.cpp - Determine whether a pointer is captured ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains routines that help determine which pointers are captured. +// A pointer value is captured if the function makes a copy of any part of the +// pointer that outlives the call. Not being captured means, more or less, that +// the pointer is only dereferenced and not stored in a global. Returning part +// of the pointer as the function return value may or may not count as capturing +// the pointer, depending on the context. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/CallSite.h" + +using namespace llvm; + +CaptureTracker::~CaptureTracker() {} + +bool CaptureTracker::shouldExplore(Use *U) { return true; } + +namespace { + struct SimpleCaptureTracker : public CaptureTracker { + explicit SimpleCaptureTracker(bool ReturnCaptures) + : ReturnCaptures(ReturnCaptures), Captured(false) {} + + void tooManyUses() { Captured = true; } + + bool captured(Use *U) { + if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures) + return false; + + Captured = true; + return true; + } + + bool ReturnCaptures; + + bool Captured; + }; +} + +/// PointerMayBeCaptured - Return true if this pointer value may be captured +/// by the enclosing function (which is required to exist). This routine can +/// be expensive, so consider caching the results. The boolean ReturnCaptures +/// specifies whether returning the value (or part of it) from the function +/// counts as capturing it or not. The boolean StoreCaptures specified whether +/// storing the value (or part of it) into memory anywhere automatically +/// counts as capturing it or not. +bool llvm::PointerMayBeCaptured(const Value *V, + bool ReturnCaptures, bool StoreCaptures) { + assert(!isa<GlobalValue>(V) && + "It doesn't make sense to ask whether a global is captured."); + + // TODO: If StoreCaptures is not true, we could do Fancy analysis + // to determine whether this store is not actually an escape point. + // In that case, BasicAliasAnalysis should be updated as well to + // take advantage of this. + (void)StoreCaptures; + + SimpleCaptureTracker SCT(ReturnCaptures); + PointerMayBeCaptured(V, &SCT); + return SCT.Captured; +} + +/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep +/// a cache. Then we can move the code from BasicAliasAnalysis into +/// that path, and remove this threshold. +static int const Threshold = 20; + +void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) { + assert(V->getType()->isPointerTy() && "Capture is for pointers only!"); + SmallVector<Use*, Threshold> Worklist; + SmallSet<Use*, Threshold> Visited; + int Count = 0; + + for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); + UI != UE; ++UI) { + // If there are lots of uses, conservatively say that the value + // is captured to avoid taking too much compile time. + if (Count++ >= Threshold) + return Tracker->tooManyUses(); + + Use *U = &UI.getUse(); + if (!Tracker->shouldExplore(U)) continue; + Visited.insert(U); + Worklist.push_back(U); + } + + while (!Worklist.empty()) { + Use *U = Worklist.pop_back_val(); + Instruction *I = cast<Instruction>(U->getUser()); + V = U->get(); + + switch (I->getOpcode()) { + case Instruction::Call: + case Instruction::Invoke: { + CallSite CS(I); + // Not captured if the callee is readonly, doesn't return a copy through + // its return value and doesn't unwind (a readonly function can leak bits + // by throwing an exception or not depending on the input value). + if (CS.onlyReadsMemory() && CS.doesNotThrow() && I->getType()->isVoidTy()) + break; + + // Not captured if only passed via 'nocapture' arguments. Note that + // calling a function pointer does not in itself cause the pointer to + // be captured. This is a subtle point considering that (for example) + // the callee might return its own address. It is analogous to saying + // that loading a value from a pointer does not cause the pointer to be + // captured, even though the loaded value might be the pointer itself + // (think of self-referential objects). + CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); + for (CallSite::arg_iterator A = B; A != E; ++A) + if (A->get() == V && !CS.doesNotCapture(A - B)) + // The parameter is not marked 'nocapture' - captured. + if (Tracker->captured(U)) + return; + break; + } + case Instruction::Load: + // Loading from a pointer does not cause it to be captured. + break; + case Instruction::VAArg: + // "va-arg" from a pointer does not cause it to be captured. + break; + case Instruction::Store: + if (V == I->getOperand(0)) + // Stored the pointer - conservatively assume it may be captured. + if (Tracker->captured(U)) + return; + // Storing to the pointee does not cause the pointer to be captured. + break; + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::PHI: + case Instruction::Select: + // The original value is not captured via this if the new value isn't. + Count = 0; + for (Instruction::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) { + // If there are lots of uses, conservatively say that the value + // is captured to avoid taking too much compile time. + if (Count++ >= Threshold) + return Tracker->tooManyUses(); + + Use *U = &UI.getUse(); + if (Visited.insert(U)) + if (Tracker->shouldExplore(U)) + Worklist.push_back(U); + } + break; + case Instruction::ICmp: + // Don't count comparisons of a no-alias return value against null as + // captures. This allows us to ignore comparisons of malloc results + // with null, for example. + if (ConstantPointerNull *CPN = + dyn_cast<ConstantPointerNull>(I->getOperand(1))) + if (CPN->getType()->getAddressSpace() == 0) + if (isNoAliasCall(V->stripPointerCasts())) + break; + // Otherwise, be conservative. There are crazy ways to capture pointers + // using comparisons. + if (Tracker->captured(U)) + return; + break; + default: + // Something else - be conservative and say it is captured. + if (Tracker->captured(U)) + return; + break; + } + } + + // All uses examined. +} diff --git a/contrib/llvm/lib/Analysis/CodeMetrics.cpp b/contrib/llvm/lib/Analysis/CodeMetrics.cpp new file mode 100644 index 000000000000..8cda01a24c0d --- /dev/null +++ b/contrib/llvm/lib/Analysis/CodeMetrics.cpp @@ -0,0 +1,96 @@ +//===- CodeMetrics.cpp - Code cost measurements ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements code cost measurement utilities. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/CallSite.h" + +using namespace llvm; + +/// analyzeBasicBlock - Fill in the current structure with information gleaned +/// from the specified block. +void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, + const TargetTransformInfo &TTI) { + ++NumBlocks; + unsigned NumInstsBeforeThisBB = NumInsts; + for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); + II != E; ++II) { + // Special handling for calls. + if (isa<CallInst>(II) || isa<InvokeInst>(II)) { + ImmutableCallSite CS(cast<Instruction>(II)); + + if (const Function *F = CS.getCalledFunction()) { + // If a function is both internal and has a single use, then it is + // extremely likely to get inlined in the future (it was probably + // exposed by an interleaved devirtualization pass). + if (!CS.isNoInline() && F->hasInternalLinkage() && F->hasOneUse()) + ++NumInlineCandidates; + + // If this call is to function itself, then the function is recursive. + // Inlining it into other functions is a bad idea, because this is + // basically just a form of loop peeling, and our metrics aren't useful + // for that case. + if (F == BB->getParent()) + isRecursive = true; + + if (TTI.isLoweredToCall(F)) + ++NumCalls; + } else { + // We don't want inline asm to count as a call - that would prevent loop + // unrolling. The argument setup cost is still real, though. + if (!isa<InlineAsm>(CS.getCalledValue())) + ++NumCalls; + } + } + + if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { + if (!AI->isStaticAlloca()) + this->usesDynamicAlloca = true; + } + + if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy()) + ++NumVectorInsts; + + if (const CallInst *CI = dyn_cast<CallInst>(II)) + if (CI->hasFnAttr(Attribute::NoDuplicate)) + notDuplicatable = true; + + if (const InvokeInst *InvI = dyn_cast<InvokeInst>(II)) + if (InvI->hasFnAttr(Attribute::NoDuplicate)) + notDuplicatable = true; + + NumInsts += TTI.getUserCost(&*II); + } + + if (isa<ReturnInst>(BB->getTerminator())) + ++NumRets; + + // We never want to inline functions that contain an indirectbr. This is + // incorrect because all the blockaddress's (in static global initializers + // for example) would be referring to the original function, and this indirect + // jump would jump from the inlined copy of the function into the original + // function which is extremely undefined behavior. + // FIXME: This logic isn't really right; we can safely inline functions + // with indirectbr's as long as no other function or global references the + // blockaddress of a block within the current function. And as a QOI issue, + // if someone is using a blockaddress without an indirectbr, and that + // reference somehow ends up in another function or global, we probably + // don't want to inline this function. + notDuplicatable |= isa<IndirectBrInst>(BB->getTerminator()); + + // Remember NumInsts for this BB. + NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB; +} diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp new file mode 100644 index 000000000000..3d32232dacf9 --- /dev/null +++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp @@ -0,0 +1,1640 @@ +//===-- ConstantFolding.cpp - Fold instructions into constants ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines routines for folding instructions into constants. +// +// Also, to supplement the basic IR ConstantExpr simplifications, +// this file defines some additional folding routines that can make use of +// DataLayout information. These functions cannot go in IR due to library +// dependency issues. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FEnv.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include <cerrno> +#include <cmath> +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Constant Folding internal helper functions +//===----------------------------------------------------------------------===// + +/// FoldBitCast - Constant fold bitcast, symbolically evaluating it with +/// DataLayout. This always returns a non-null constant, but it may be a +/// ConstantExpr if unfoldable. +static Constant *FoldBitCast(Constant *C, Type *DestTy, + const DataLayout &TD) { + // Catch the obvious splat cases. + if (C->isNullValue() && !DestTy->isX86_MMXTy()) + return Constant::getNullValue(DestTy); + if (C->isAllOnesValue() && !DestTy->isX86_MMXTy()) + return Constant::getAllOnesValue(DestTy); + + // Handle a vector->integer cast. + if (IntegerType *IT = dyn_cast<IntegerType>(DestTy)) { + VectorType *VTy = dyn_cast<VectorType>(C->getType()); + if (VTy == 0) + return ConstantExpr::getBitCast(C, DestTy); + + unsigned NumSrcElts = VTy->getNumElements(); + Type *SrcEltTy = VTy->getElementType(); + + // If the vector is a vector of floating point, convert it to vector of int + // to simplify things. + if (SrcEltTy->isFloatingPointTy()) { + unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); + Type *SrcIVTy = + VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts); + // Ask IR to do the conversion now that #elts line up. + C = ConstantExpr::getBitCast(C, SrcIVTy); + } + + ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(C); + if (CDV == 0) + return ConstantExpr::getBitCast(C, DestTy); + + // Now that we know that the input value is a vector of integers, just shift + // and insert them into our result. + unsigned BitShift = TD.getTypeAllocSizeInBits(SrcEltTy); + APInt Result(IT->getBitWidth(), 0); + for (unsigned i = 0; i != NumSrcElts; ++i) { + Result <<= BitShift; + if (TD.isLittleEndian()) + Result |= CDV->getElementAsInteger(NumSrcElts-i-1); + else + Result |= CDV->getElementAsInteger(i); + } + + return ConstantInt::get(IT, Result); + } + + // The code below only handles casts to vectors currently. + VectorType *DestVTy = dyn_cast<VectorType>(DestTy); + if (DestVTy == 0) + return ConstantExpr::getBitCast(C, DestTy); + + // If this is a scalar -> vector cast, convert the input into a <1 x scalar> + // vector so the code below can handle it uniformly. + if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) { + Constant *Ops = C; // don't take the address of C! + return FoldBitCast(ConstantVector::get(Ops), DestTy, TD); + } + + // If this is a bitcast from constant vector -> vector, fold it. + if (!isa<ConstantDataVector>(C) && !isa<ConstantVector>(C)) + return ConstantExpr::getBitCast(C, DestTy); + + // If the element types match, IR can fold it. + unsigned NumDstElt = DestVTy->getNumElements(); + unsigned NumSrcElt = C->getType()->getVectorNumElements(); + if (NumDstElt == NumSrcElt) + return ConstantExpr::getBitCast(C, DestTy); + + Type *SrcEltTy = C->getType()->getVectorElementType(); + Type *DstEltTy = DestVTy->getElementType(); + + // Otherwise, we're changing the number of elements in a vector, which + // requires endianness information to do the right thing. For example, + // bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>) + // folds to (little endian): + // <4 x i32> <i32 0, i32 0, i32 1, i32 0> + // and to (big endian): + // <4 x i32> <i32 0, i32 0, i32 0, i32 1> + + // First thing is first. We only want to think about integer here, so if + // we have something in FP form, recast it as integer. + if (DstEltTy->isFloatingPointTy()) { + // Fold to an vector of integers with same size as our FP type. + unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits(); + Type *DestIVTy = + VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt); + // Recursively handle this integer conversion, if possible. + C = FoldBitCast(C, DestIVTy, TD); + + // Finally, IR can handle this now that #elts line up. + return ConstantExpr::getBitCast(C, DestTy); + } + + // Okay, we know the destination is integer, if the input is FP, convert + // it to integer first. + if (SrcEltTy->isFloatingPointTy()) { + unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); + Type *SrcIVTy = + VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt); + // Ask IR to do the conversion now that #elts line up. + C = ConstantExpr::getBitCast(C, SrcIVTy); + // If IR wasn't able to fold it, bail out. + if (!isa<ConstantVector>(C) && // FIXME: Remove ConstantVector. + !isa<ConstantDataVector>(C)) + return C; + } + + // Now we know that the input and output vectors are both integer vectors + // of the same size, and that their #elements is not the same. Do the + // conversion here, which depends on whether the input or output has + // more elements. + bool isLittleEndian = TD.isLittleEndian(); + + SmallVector<Constant*, 32> Result; + if (NumDstElt < NumSrcElt) { + // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>) + Constant *Zero = Constant::getNullValue(DstEltTy); + unsigned Ratio = NumSrcElt/NumDstElt; + unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits(); + unsigned SrcElt = 0; + for (unsigned i = 0; i != NumDstElt; ++i) { + // Build each element of the result. + Constant *Elt = Zero; + unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1); + for (unsigned j = 0; j != Ratio; ++j) { + Constant *Src =dyn_cast<ConstantInt>(C->getAggregateElement(SrcElt++)); + if (!Src) // Reject constantexpr elements. + return ConstantExpr::getBitCast(C, DestTy); + + // Zero extend the element to the right size. + Src = ConstantExpr::getZExt(Src, Elt->getType()); + + // Shift it to the right place, depending on endianness. + Src = ConstantExpr::getShl(Src, + ConstantInt::get(Src->getType(), ShiftAmt)); + ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize; + + // Mix it in. + Elt = ConstantExpr::getOr(Elt, Src); + } + Result.push_back(Elt); + } + return ConstantVector::get(Result); + } + + // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>) + unsigned Ratio = NumDstElt/NumSrcElt; + unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits(); + + // Loop over each source value, expanding into multiple results. + for (unsigned i = 0; i != NumSrcElt; ++i) { + Constant *Src = dyn_cast<ConstantInt>(C->getAggregateElement(i)); + if (!Src) // Reject constantexpr elements. + return ConstantExpr::getBitCast(C, DestTy); + + unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1); + for (unsigned j = 0; j != Ratio; ++j) { + // Shift the piece of the value into the right place, depending on + // endianness. + Constant *Elt = ConstantExpr::getLShr(Src, + ConstantInt::get(Src->getType(), ShiftAmt)); + ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize; + + // Truncate and remember this piece. + Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy)); + } + } + + return ConstantVector::get(Result); +} + + +/// IsConstantOffsetFromGlobal - If this constant is actually a constant offset +/// from a global, return the global and the constant. Because of +/// constantexprs, this function is recursive. +static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, + APInt &Offset, const DataLayout &TD) { + // Trivial case, constant is the global. + if ((GV = dyn_cast<GlobalValue>(C))) { + unsigned BitWidth = TD.getPointerTypeSizeInBits(GV->getType()); + Offset = APInt(BitWidth, 0); + return true; + } + + // Otherwise, if this isn't a constant expr, bail out. + ConstantExpr *CE = dyn_cast<ConstantExpr>(C); + if (!CE) return false; + + // Look through ptr->int and ptr->ptr casts. + if (CE->getOpcode() == Instruction::PtrToInt || + CE->getOpcode() == Instruction::BitCast) + return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD); + + // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) + GEPOperator *GEP = dyn_cast<GEPOperator>(CE); + if (!GEP) + return false; + + unsigned BitWidth = TD.getPointerTypeSizeInBits(GEP->getType()); + APInt TmpOffset(BitWidth, 0); + + // If the base isn't a global+constant, we aren't either. + if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, TD)) + return false; + + // Otherwise, add any offset that our operands provide. + if (!GEP->accumulateConstantOffset(TD, TmpOffset)) + return false; + + Offset = TmpOffset; + return true; +} + +/// ReadDataFromGlobal - Recursive helper to read bits out of global. C is the +/// constant being copied out of. ByteOffset is an offset into C. CurPtr is the +/// pointer to copy results into and BytesLeft is the number of bytes left in +/// the CurPtr buffer. TD is the target data. +static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, + unsigned char *CurPtr, unsigned BytesLeft, + const DataLayout &TD) { + assert(ByteOffset <= TD.getTypeAllocSize(C->getType()) && + "Out of range access"); + + // If this element is zero or undefined, we can just return since *CurPtr is + // zero initialized. + if (isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) + return true; + + if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) { + if (CI->getBitWidth() > 64 || + (CI->getBitWidth() & 7) != 0) + return false; + + uint64_t Val = CI->getZExtValue(); + unsigned IntBytes = unsigned(CI->getBitWidth()/8); + + for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) { + int n = ByteOffset; + if (!TD.isLittleEndian()) + n = IntBytes - n - 1; + CurPtr[i] = (unsigned char)(Val >> (n * 8)); + ++ByteOffset; + } + return true; + } + + if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) { + if (CFP->getType()->isDoubleTy()) { + C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), TD); + return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD); + } + if (CFP->getType()->isFloatTy()){ + C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), TD); + return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD); + } + if (CFP->getType()->isHalfTy()){ + C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), TD); + return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD); + } + return false; + } + + if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) { + const StructLayout *SL = TD.getStructLayout(CS->getType()); + unsigned Index = SL->getElementContainingOffset(ByteOffset); + uint64_t CurEltOffset = SL->getElementOffset(Index); + ByteOffset -= CurEltOffset; + + while (1) { + // If the element access is to the element itself and not to tail padding, + // read the bytes from the element. + uint64_t EltSize = TD.getTypeAllocSize(CS->getOperand(Index)->getType()); + + if (ByteOffset < EltSize && + !ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr, + BytesLeft, TD)) + return false; + + ++Index; + + // Check to see if we read from the last struct element, if so we're done. + if (Index == CS->getType()->getNumElements()) + return true; + + // If we read all of the bytes we needed from this element we're done. + uint64_t NextEltOffset = SL->getElementOffset(Index); + + if (BytesLeft <= NextEltOffset - CurEltOffset - ByteOffset) + return true; + + // Move to the next element of the struct. + CurPtr += NextEltOffset - CurEltOffset - ByteOffset; + BytesLeft -= NextEltOffset - CurEltOffset - ByteOffset; + ByteOffset = 0; + CurEltOffset = NextEltOffset; + } + // not reached. + } + + if (isa<ConstantArray>(C) || isa<ConstantVector>(C) || + isa<ConstantDataSequential>(C)) { + Type *EltTy = C->getType()->getSequentialElementType(); + uint64_t EltSize = TD.getTypeAllocSize(EltTy); + uint64_t Index = ByteOffset / EltSize; + uint64_t Offset = ByteOffset - Index * EltSize; + uint64_t NumElts; + if (ArrayType *AT = dyn_cast<ArrayType>(C->getType())) + NumElts = AT->getNumElements(); + else + NumElts = C->getType()->getVectorNumElements(); + + for (; Index != NumElts; ++Index) { + if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr, + BytesLeft, TD)) + return false; + + uint64_t BytesWritten = EltSize - Offset; + assert(BytesWritten <= EltSize && "Not indexing into this element?"); + if (BytesWritten >= BytesLeft) + return true; + + Offset = 0; + BytesLeft -= BytesWritten; + CurPtr += BytesWritten; + } + return true; + } + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { + if (CE->getOpcode() == Instruction::IntToPtr && + CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getType())) { + return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, + BytesLeft, TD); + } + } + + // Otherwise, unknown initializer type. + return false; +} + +static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, + const DataLayout &TD) { + PointerType *PTy = cast<PointerType>(C->getType()); + Type *LoadTy = PTy->getElementType(); + IntegerType *IntType = dyn_cast<IntegerType>(LoadTy); + + // If this isn't an integer load we can't fold it directly. + if (!IntType) { + unsigned AS = PTy->getAddressSpace(); + + // If this is a float/double load, we can try folding it as an int32/64 load + // and then bitcast the result. This can be useful for union cases. Note + // that address spaces don't matter here since we're not going to result in + // an actual new load. + Type *MapTy; + if (LoadTy->isHalfTy()) + MapTy = Type::getInt16PtrTy(C->getContext(), AS); + else if (LoadTy->isFloatTy()) + MapTy = Type::getInt32PtrTy(C->getContext(), AS); + else if (LoadTy->isDoubleTy()) + MapTy = Type::getInt64PtrTy(C->getContext(), AS); + else if (LoadTy->isVectorTy()) { + MapTy = PointerType::getIntNPtrTy(C->getContext(), + TD.getTypeAllocSizeInBits(LoadTy), + AS); + } else + return 0; + + C = FoldBitCast(C, MapTy, TD); + if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, TD)) + return FoldBitCast(Res, LoadTy, TD); + return 0; + } + + unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8; + if (BytesLoaded > 32 || BytesLoaded == 0) + return 0; + + GlobalValue *GVal; + APInt Offset; + if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD)) + return 0; + + GlobalVariable *GV = dyn_cast<GlobalVariable>(GVal); + if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() || + !GV->getInitializer()->getType()->isSized()) + return 0; + + // If we're loading off the beginning of the global, some bytes may be valid, + // but we don't try to handle this. + if (Offset.isNegative()) + return 0; + + // If we're not accessing anything in this constant, the result is undefined. + if (Offset.getZExtValue() >= + TD.getTypeAllocSize(GV->getInitializer()->getType())) + return UndefValue::get(IntType); + + unsigned char RawBytes[32] = {0}; + if (!ReadDataFromGlobal(GV->getInitializer(), Offset.getZExtValue(), RawBytes, + BytesLoaded, TD)) + return 0; + + APInt ResultVal = APInt(IntType->getBitWidth(), 0); + if (TD.isLittleEndian()) { + ResultVal = RawBytes[BytesLoaded - 1]; + for (unsigned i = 1; i != BytesLoaded; ++i) { + ResultVal <<= 8; + ResultVal |= RawBytes[BytesLoaded - 1 - i]; + } + } else { + ResultVal = RawBytes[0]; + for (unsigned i = 1; i != BytesLoaded; ++i) { + ResultVal <<= 8; + ResultVal |= RawBytes[i]; + } + } + + return ConstantInt::get(IntType->getContext(), ResultVal); +} + +/// ConstantFoldLoadFromConstPtr - Return the value that a load from C would +/// produce if it is constant and determinable. If this is not determinable, +/// return null. +Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, + const DataLayout *TD) { + // First, try the easy cases: + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) + if (GV->isConstant() && GV->hasDefinitiveInitializer()) + return GV->getInitializer(); + + // If the loaded value isn't a constant expr, we can't handle it. + ConstantExpr *CE = dyn_cast<ConstantExpr>(C); + if (!CE) + return 0; + + if (CE->getOpcode() == Instruction::GetElementPtr) { + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0))) { + if (GV->isConstant() && GV->hasDefinitiveInitializer()) { + if (Constant *V = + ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE)) + return V; + } + } + } + + // Instead of loading constant c string, use corresponding integer value + // directly if string length is small enough. + StringRef Str; + if (TD && getConstantStringInfo(CE, Str) && !Str.empty()) { + unsigned StrLen = Str.size(); + Type *Ty = cast<PointerType>(CE->getType())->getElementType(); + unsigned NumBits = Ty->getPrimitiveSizeInBits(); + // Replace load with immediate integer if the result is an integer or fp + // value. + if ((NumBits >> 3) == StrLen + 1 && (NumBits & 7) == 0 && + (isa<IntegerType>(Ty) || Ty->isFloatingPointTy())) { + APInt StrVal(NumBits, 0); + APInt SingleChar(NumBits, 0); + if (TD->isLittleEndian()) { + for (signed i = StrLen-1; i >= 0; i--) { + SingleChar = (uint64_t) Str[i] & UCHAR_MAX; + StrVal = (StrVal << 8) | SingleChar; + } + } else { + for (unsigned i = 0; i < StrLen; i++) { + SingleChar = (uint64_t) Str[i] & UCHAR_MAX; + StrVal = (StrVal << 8) | SingleChar; + } + // Append NULL at the end. + SingleChar = 0; + StrVal = (StrVal << 8) | SingleChar; + } + + Constant *Res = ConstantInt::get(CE->getContext(), StrVal); + if (Ty->isFloatingPointTy()) + Res = ConstantExpr::getBitCast(Res, Ty); + return Res; + } + } + + // If this load comes from anywhere in a constant global, and if the global + // is all undef or zero, we know what it loads. + if (GlobalVariable *GV = + dyn_cast<GlobalVariable>(GetUnderlyingObject(CE, TD))) { + if (GV->isConstant() && GV->hasDefinitiveInitializer()) { + Type *ResTy = cast<PointerType>(C->getType())->getElementType(); + if (GV->getInitializer()->isNullValue()) + return Constant::getNullValue(ResTy); + if (isa<UndefValue>(GV->getInitializer())) + return UndefValue::get(ResTy); + } + } + + // Try hard to fold loads from bitcasted strange and non-type-safe things. + if (TD) + return FoldReinterpretLoadFromConstPtr(CE, *TD); + return 0; +} + +static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){ + if (LI->isVolatile()) return 0; + + if (Constant *C = dyn_cast<Constant>(LI->getOperand(0))) + return ConstantFoldLoadFromConstPtr(C, TD); + + return 0; +} + +/// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression. +/// Attempt to symbolically evaluate the result of a binary operator merging +/// these together. If target data info is available, it is provided as DL, +/// otherwise DL is null. +static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, + Constant *Op1, const DataLayout *DL){ + // SROA + + // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl. + // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute + // bits. + + + if (Opc == Instruction::And && DL) { + unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType()->getScalarType()); + APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0); + APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0); + ComputeMaskedBits(Op0, KnownZero0, KnownOne0, DL); + ComputeMaskedBits(Op1, KnownZero1, KnownOne1, DL); + if ((KnownOne1 | KnownZero0).isAllOnesValue()) { + // All the bits of Op0 that the 'and' could be masking are already zero. + return Op0; + } + if ((KnownOne0 | KnownZero1).isAllOnesValue()) { + // All the bits of Op1 that the 'and' could be masking are already zero. + return Op1; + } + + APInt KnownZero = KnownZero0 | KnownZero1; + APInt KnownOne = KnownOne0 & KnownOne1; + if ((KnownZero | KnownOne).isAllOnesValue()) { + return ConstantInt::get(Op0->getType(), KnownOne); + } + } + + // If the constant expr is something like &A[123] - &A[4].f, fold this into a + // constant. This happens frequently when iterating over a global array. + if (Opc == Instruction::Sub && DL) { + GlobalValue *GV1, *GV2; + APInt Offs1, Offs2; + + if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *DL)) + if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *DL) && + GV1 == GV2) { + unsigned OpSize = DL->getTypeSizeInBits(Op0->getType()); + + // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow. + // PtrToInt may change the bitwidth so we have convert to the right size + // first. + return ConstantInt::get(Op0->getType(), Offs1.zextOrTrunc(OpSize) - + Offs2.zextOrTrunc(OpSize)); + } + } + + return 0; +} + +/// CastGEPIndices - If array indices are not pointer-sized integers, +/// explicitly cast them so that they aren't implicitly casted by the +/// getelementptr. +static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, + Type *ResultTy, const DataLayout *TD, + const TargetLibraryInfo *TLI) { + if (!TD) + return 0; + + Type *IntPtrTy = TD->getIntPtrType(ResultTy); + + bool Any = false; + SmallVector<Constant*, 32> NewIdxs; + for (unsigned i = 1, e = Ops.size(); i != e; ++i) { + if ((i == 1 || + !isa<StructType>(GetElementPtrInst::getIndexedType( + Ops[0]->getType(), + Ops.slice(1, i - 1)))) && + Ops[i]->getType() != IntPtrTy) { + Any = true; + NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i], + true, + IntPtrTy, + true), + Ops[i], IntPtrTy)); + } else + NewIdxs.push_back(Ops[i]); + } + + if (!Any) + return 0; + + Constant *C = ConstantExpr::getGetElementPtr(Ops[0], NewIdxs); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) + C = Folded; + } + + return C; +} + +/// Strip the pointer casts, but preserve the address space information. +static Constant* StripPtrCastKeepAS(Constant* Ptr) { + assert(Ptr->getType()->isPointerTy() && "Not a pointer type"); + PointerType *OldPtrTy = cast<PointerType>(Ptr->getType()); + Ptr = cast<Constant>(Ptr->stripPointerCasts()); + PointerType *NewPtrTy = cast<PointerType>(Ptr->getType()); + + // Preserve the address space number of the pointer. + if (NewPtrTy->getAddressSpace() != OldPtrTy->getAddressSpace()) { + NewPtrTy = NewPtrTy->getElementType()->getPointerTo( + OldPtrTy->getAddressSpace()); + Ptr = ConstantExpr::getPointerCast(Ptr, NewPtrTy); + } + return Ptr; +} + +/// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP +/// constant expression, do so. +static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, + Type *ResultTy, const DataLayout *TD, + const TargetLibraryInfo *TLI) { + Constant *Ptr = Ops[0]; + if (!TD || !Ptr->getType()->getPointerElementType()->isSized() || + !Ptr->getType()->isPointerTy()) + return 0; + + Type *IntPtrTy = TD->getIntPtrType(Ptr->getType()); + Type *ResultElementTy = ResultTy->getPointerElementType(); + + // If this is a constant expr gep that is effectively computing an + // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12' + for (unsigned i = 1, e = Ops.size(); i != e; ++i) + if (!isa<ConstantInt>(Ops[i])) { + + // If this is "gep i8* Ptr, (sub 0, V)", fold this as: + // "inttoptr (sub (ptrtoint Ptr), V)" + if (Ops.size() == 2 && ResultElementTy->isIntegerTy(8)) { + ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[1]); + assert((CE == 0 || CE->getType() == IntPtrTy) && + "CastGEPIndices didn't canonicalize index types!"); + if (CE && CE->getOpcode() == Instruction::Sub && + CE->getOperand(0)->isNullValue()) { + Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType()); + Res = ConstantExpr::getSub(Res, CE->getOperand(1)); + Res = ConstantExpr::getIntToPtr(Res, ResultTy); + if (ConstantExpr *ResCE = dyn_cast<ConstantExpr>(Res)) + Res = ConstantFoldConstantExpression(ResCE, TD, TLI); + return Res; + } + } + return 0; + } + + unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy); + APInt Offset = + APInt(BitWidth, TD->getIndexedOffset(Ptr->getType(), + makeArrayRef((Value *const*) + Ops.data() + 1, + Ops.size() - 1))); + Ptr = StripPtrCastKeepAS(Ptr); + + // If this is a GEP of a GEP, fold it all into a single GEP. + while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) { + SmallVector<Value *, 4> NestedOps(GEP->op_begin() + 1, GEP->op_end()); + + // Do not try the incorporate the sub-GEP if some index is not a number. + bool AllConstantInt = true; + for (unsigned i = 0, e = NestedOps.size(); i != e; ++i) + if (!isa<ConstantInt>(NestedOps[i])) { + AllConstantInt = false; + break; + } + if (!AllConstantInt) + break; + + Ptr = cast<Constant>(GEP->getOperand(0)); + Offset += APInt(BitWidth, + TD->getIndexedOffset(Ptr->getType(), NestedOps)); + Ptr = StripPtrCastKeepAS(Ptr); + } + + // If the base value for this address is a literal integer value, fold the + // getelementptr to the resulting integer value casted to the pointer type. + APInt BasePtr(BitWidth, 0); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { + if (CE->getOpcode() == Instruction::IntToPtr) { + if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0))) + BasePtr = Base->getValue().zextOrTrunc(BitWidth); + } + } + + if (Ptr->isNullValue() || BasePtr != 0) { + Constant *C = ConstantInt::get(Ptr->getContext(), Offset + BasePtr); + return ConstantExpr::getIntToPtr(C, ResultTy); + } + + // Otherwise form a regular getelementptr. Recompute the indices so that + // we eliminate over-indexing of the notional static type array bounds. + // This makes it easy to determine if the getelementptr is "inbounds". + // Also, this helps GlobalOpt do SROA on GlobalVariables. + Type *Ty = Ptr->getType(); + assert(Ty->isPointerTy() && "Forming regular GEP of non-pointer type"); + SmallVector<Constant *, 32> NewIdxs; + + do { + if (SequentialType *ATy = dyn_cast<SequentialType>(Ty)) { + if (ATy->isPointerTy()) { + // The only pointer indexing we'll do is on the first index of the GEP. + if (!NewIdxs.empty()) + break; + + // Only handle pointers to sized types, not pointers to functions. + if (!ATy->getElementType()->isSized()) + return 0; + } + + // Determine which element of the array the offset points into. + APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType())); + if (ElemSize == 0) + // The element size is 0. This may be [0 x Ty]*, so just use a zero + // index for this level and proceed to the next level to see if it can + // accommodate the offset. + NewIdxs.push_back(ConstantInt::get(IntPtrTy, 0)); + else { + // The element size is non-zero divide the offset by the element + // size (rounding down), to compute the index at this level. + APInt NewIdx = Offset.udiv(ElemSize); + Offset -= NewIdx * ElemSize; + NewIdxs.push_back(ConstantInt::get(IntPtrTy, NewIdx)); + } + Ty = ATy->getElementType(); + } else if (StructType *STy = dyn_cast<StructType>(Ty)) { + // If we end up with an offset that isn't valid for this struct type, we + // can't re-form this GEP in a regular form, so bail out. The pointer + // operand likely went through casts that are necessary to make the GEP + // sensible. + const StructLayout &SL = *TD->getStructLayout(STy); + if (Offset.uge(SL.getSizeInBytes())) + break; + + // Determine which field of the struct the offset points into. The + // getZExtValue is fine as we've already ensured that the offset is + // within the range representable by the StructLayout API. + unsigned ElIdx = SL.getElementContainingOffset(Offset.getZExtValue()); + NewIdxs.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()), + ElIdx)); + Offset -= APInt(BitWidth, SL.getElementOffset(ElIdx)); + Ty = STy->getTypeAtIndex(ElIdx); + } else { + // We've reached some non-indexable type. + break; + } + } while (Ty != ResultElementTy); + + // If we haven't used up the entire offset by descending the static + // type, then the offset is pointing into the middle of an indivisible + // member, so we can't simplify it. + if (Offset != 0) + return 0; + + // Create a GEP. + Constant *C = ConstantExpr::getGetElementPtr(Ptr, NewIdxs); + assert(C->getType()->getPointerElementType() == Ty && + "Computed GetElementPtr has unexpected type!"); + + // If we ended up indexing a member with a type that doesn't match + // the type of what the original indices indexed, add a cast. + if (Ty != ResultElementTy) + C = FoldBitCast(C, ResultTy, *TD); + + return C; +} + + + +//===----------------------------------------------------------------------===// +// Constant Folding public APIs +//===----------------------------------------------------------------------===// + +/// ConstantFoldInstruction - Try to constant fold the specified instruction. +/// If successful, the constant result is returned, if not, null is returned. +/// Note that this fails if not all of the operands are constant. Otherwise, +/// this function can only fail when attempting to fold instructions like loads +/// and stores, which have no constant expression form. +Constant *llvm::ConstantFoldInstruction(Instruction *I, + const DataLayout *TD, + const TargetLibraryInfo *TLI) { + // Handle PHI nodes quickly here... + if (PHINode *PN = dyn_cast<PHINode>(I)) { + Constant *CommonValue = 0; + + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *Incoming = PN->getIncomingValue(i); + // If the incoming value is undef then skip it. Note that while we could + // skip the value if it is equal to the phi node itself we choose not to + // because that would break the rule that constant folding only applies if + // all operands are constants. + if (isa<UndefValue>(Incoming)) + continue; + // If the incoming value is not a constant, then give up. + Constant *C = dyn_cast<Constant>(Incoming); + if (!C) + return 0; + // Fold the PHI's operands. + if (ConstantExpr *NewC = dyn_cast<ConstantExpr>(C)) + C = ConstantFoldConstantExpression(NewC, TD, TLI); + // If the incoming value is a different constant to + // the one we saw previously, then give up. + if (CommonValue && C != CommonValue) + return 0; + CommonValue = C; + } + + + // If we reach here, all incoming values are the same constant or undef. + return CommonValue ? CommonValue : UndefValue::get(PN->getType()); + } + + // Scan the operand list, checking to see if they are all constants, if so, + // hand off to ConstantFoldInstOperands. + SmallVector<Constant*, 8> Ops; + for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) { + Constant *Op = dyn_cast<Constant>(*i); + if (!Op) + return 0; // All operands not constant! + + // Fold the Instruction's operands. + if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(Op)) + Op = ConstantFoldConstantExpression(NewCE, TD, TLI); + + Ops.push_back(Op); + } + + if (const CmpInst *CI = dyn_cast<CmpInst>(I)) + return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1], + TD, TLI); + + if (const LoadInst *LI = dyn_cast<LoadInst>(I)) + return ConstantFoldLoadInst(LI, TD); + + if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I)) { + return ConstantExpr::getInsertValue( + cast<Constant>(IVI->getAggregateOperand()), + cast<Constant>(IVI->getInsertedValueOperand()), + IVI->getIndices()); + } + + if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I)) { + return ConstantExpr::getExtractValue( + cast<Constant>(EVI->getAggregateOperand()), + EVI->getIndices()); + } + + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI); +} + +static Constant * +ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD, + const TargetLibraryInfo *TLI, + SmallPtrSet<ConstantExpr *, 4> &FoldedOps) { + SmallVector<Constant *, 8> Ops; + for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; + ++i) { + Constant *NewC = cast<Constant>(*i); + // Recursively fold the ConstantExpr's operands. If we have already folded + // a ConstantExpr, we don't have to process it again. + if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) { + if (FoldedOps.insert(NewCE)) + NewC = ConstantFoldConstantExpressionImpl(NewCE, TD, TLI, FoldedOps); + } + Ops.push_back(NewC); + } + + if (CE->isCompare()) + return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1], + TD, TLI); + return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI); +} + +/// ConstantFoldConstantExpression - Attempt to fold the constant expression +/// using the specified DataLayout. If successful, the constant result is +/// result is returned, if not, null is returned. +Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, + const DataLayout *TD, + const TargetLibraryInfo *TLI) { + SmallPtrSet<ConstantExpr *, 4> FoldedOps; + return ConstantFoldConstantExpressionImpl(CE, TD, TLI, FoldedOps); +} + +/// ConstantFoldInstOperands - Attempt to constant fold an instruction with the +/// specified opcode and operands. If successful, the constant result is +/// returned, if not, null is returned. Note that this function can fail when +/// attempting to fold instructions like loads and stores, which have no +/// constant expression form. +/// +/// TODO: This function neither utilizes nor preserves nsw/nuw/inbounds/etc +/// information, due to only being passed an opcode and operands. Constant +/// folding using this function strips this information. +/// +Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, + ArrayRef<Constant *> Ops, + const DataLayout *TD, + const TargetLibraryInfo *TLI) { + // Handle easy binops first. + if (Instruction::isBinaryOp(Opcode)) { + if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1])) { + if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD)) + return C; + } + + return ConstantExpr::get(Opcode, Ops[0], Ops[1]); + } + + switch (Opcode) { + default: return 0; + case Instruction::ICmp: + case Instruction::FCmp: llvm_unreachable("Invalid for compares"); + case Instruction::Call: + if (Function *F = dyn_cast<Function>(Ops.back())) + if (canConstantFoldCallTo(F)) + return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1), TLI); + return 0; + case Instruction::PtrToInt: + // If the input is a inttoptr, eliminate the pair. This requires knowing + // the width of a pointer, so it can't be done in ConstantExpr::getCast. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) { + if (TD && CE->getOpcode() == Instruction::IntToPtr) { + Constant *Input = CE->getOperand(0); + unsigned InWidth = Input->getType()->getScalarSizeInBits(); + unsigned PtrWidth = TD->getPointerTypeSizeInBits(CE->getType()); + if (PtrWidth < InWidth) { + Constant *Mask = + ConstantInt::get(CE->getContext(), + APInt::getLowBitsSet(InWidth, PtrWidth)); + Input = ConstantExpr::getAnd(Input, Mask); + } + // Do a zext or trunc to get to the dest size. + return ConstantExpr::getIntegerCast(Input, DestTy, false); + } + } + return ConstantExpr::getCast(Opcode, Ops[0], DestTy); + case Instruction::IntToPtr: + // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if + // the int size is >= the ptr size and the address spaces are the same. + // This requires knowing the width of a pointer, so it can't be done in + // ConstantExpr::getCast. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) { + if (TD && CE->getOpcode() == Instruction::PtrToInt) { + Constant *SrcPtr = CE->getOperand(0); + unsigned SrcPtrSize = TD->getPointerTypeSizeInBits(SrcPtr->getType()); + unsigned MidIntSize = CE->getType()->getScalarSizeInBits(); + + if (MidIntSize >= SrcPtrSize) { + unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace(); + if (SrcAS == DestTy->getPointerAddressSpace()) + return FoldBitCast(CE->getOperand(0), DestTy, *TD); + } + } + } + + return ConstantExpr::getCast(Opcode, Ops[0], DestTy); + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::AddrSpaceCast: + return ConstantExpr::getCast(Opcode, Ops[0], DestTy); + case Instruction::BitCast: + if (TD) + return FoldBitCast(Ops[0], DestTy, *TD); + return ConstantExpr::getBitCast(Ops[0], DestTy); + case Instruction::Select: + return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]); + case Instruction::ExtractElement: + return ConstantExpr::getExtractElement(Ops[0], Ops[1]); + case Instruction::InsertElement: + return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]); + case Instruction::ShuffleVector: + return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]); + case Instruction::GetElementPtr: + if (Constant *C = CastGEPIndices(Ops, DestTy, TD, TLI)) + return C; + if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD, TLI)) + return C; + + return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1)); + } +} + +/// ConstantFoldCompareInstOperands - Attempt to constant fold a compare +/// instruction (icmp/fcmp) with the specified operands. If it fails, it +/// returns a constant expression of the specified operands. +/// +Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, + Constant *Ops0, Constant *Ops1, + const DataLayout *TD, + const TargetLibraryInfo *TLI) { + // fold: icmp (inttoptr x), null -> icmp x, 0 + // fold: icmp (ptrtoint x), 0 -> icmp x, null + // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y + // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y + // + // ConstantExpr::getCompare cannot do this, because it doesn't have TD + // around to know if bit truncation is happening. + if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) { + if (TD && Ops1->isNullValue()) { + if (CE0->getOpcode() == Instruction::IntToPtr) { + Type *IntPtrTy = TD->getIntPtrType(CE0->getType()); + // Convert the integer value to the right size to ensure we get the + // proper extension or truncation. + Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0), + IntPtrTy, false); + Constant *Null = Constant::getNullValue(C->getType()); + return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI); + } + + // Only do this transformation if the int is intptrty in size, otherwise + // there is a truncation or extension that we aren't modeling. + if (CE0->getOpcode() == Instruction::PtrToInt) { + Type *IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType()); + if (CE0->getType() == IntPtrTy) { + Constant *C = CE0->getOperand(0); + Constant *Null = Constant::getNullValue(C->getType()); + return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI); + } + } + } + + if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) { + if (TD && CE0->getOpcode() == CE1->getOpcode()) { + if (CE0->getOpcode() == Instruction::IntToPtr) { + Type *IntPtrTy = TD->getIntPtrType(CE0->getType()); + + // Convert the integer value to the right size to ensure we get the + // proper extension or truncation. + Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0), + IntPtrTy, false); + Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0), + IntPtrTy, false); + return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD, TLI); + } + + // Only do this transformation if the int is intptrty in size, otherwise + // there is a truncation or extension that we aren't modeling. + if (CE0->getOpcode() == Instruction::PtrToInt) { + Type *IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType()); + if (CE0->getType() == IntPtrTy && + CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) { + return ConstantFoldCompareInstOperands(Predicate, + CE0->getOperand(0), + CE1->getOperand(0), + TD, + TLI); + } + } + } + } + + // icmp eq (or x, y), 0 -> (icmp eq x, 0) & (icmp eq y, 0) + // icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0) + if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) && + CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) { + Constant *LHS = + ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1, + TD, TLI); + Constant *RHS = + ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1, + TD, TLI); + unsigned OpC = + Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; + Constant *Ops[] = { LHS, RHS }; + return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD, TLI); + } + } + + return ConstantExpr::getCompare(Predicate, Ops0, Ops1); +} + + +/// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a +/// getelementptr constantexpr, return the constant value being addressed by the +/// constant expression, or null if something is funny and we can't decide. +Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, + ConstantExpr *CE) { + if (!CE->getOperand(1)->isNullValue()) + return 0; // Do not allow stepping over the value! + + // Loop over all of the operands, tracking down which value we are + // addressing. + for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i) { + C = C->getAggregateElement(CE->getOperand(i)); + if (C == 0) + return 0; + } + return C; +} + +/// ConstantFoldLoadThroughGEPIndices - Given a constant and getelementptr +/// indices (with an *implied* zero pointer index that is not in the list), +/// return the constant value being addressed by a virtual load, or null if +/// something is funny and we can't decide. +Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C, + ArrayRef<Constant*> Indices) { + // Loop over all of the operands, tracking down which value we are + // addressing. + for (unsigned i = 0, e = Indices.size(); i != e; ++i) { + C = C->getAggregateElement(Indices[i]); + if (C == 0) + return 0; + } + return C; +} + + +//===----------------------------------------------------------------------===// +// Constant Folding for Calls +// + +/// canConstantFoldCallTo - Return true if its even possible to fold a call to +/// the specified function. +bool llvm::canConstantFoldCallTo(const Function *F) { + switch (F->getIntrinsicID()) { + case Intrinsic::fabs: + case Intrinsic::log: + case Intrinsic::log2: + case Intrinsic::log10: + case Intrinsic::exp: + case Intrinsic::exp2: + case Intrinsic::floor: + case Intrinsic::sqrt: + case Intrinsic::pow: + case Intrinsic::powi: + case Intrinsic::bswap: + case Intrinsic::ctpop: + case Intrinsic::ctlz: + case Intrinsic::cttz: + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: + case Intrinsic::convert_from_fp16: + case Intrinsic::convert_to_fp16: + case Intrinsic::x86_sse_cvtss2si: + case Intrinsic::x86_sse_cvtss2si64: + case Intrinsic::x86_sse_cvttss2si: + case Intrinsic::x86_sse_cvttss2si64: + case Intrinsic::x86_sse2_cvtsd2si: + case Intrinsic::x86_sse2_cvtsd2si64: + case Intrinsic::x86_sse2_cvttsd2si: + case Intrinsic::x86_sse2_cvttsd2si64: + return true; + default: + return false; + case 0: break; + } + + if (!F->hasName()) + return false; + StringRef Name = F->getName(); + + // In these cases, the check of the length is required. We don't want to + // return true for a name like "cos\0blah" which strcmp would return equal to + // "cos", but has length 8. + switch (Name[0]) { + default: return false; + case 'a': + return Name == "acos" || Name == "asin" || Name == "atan" || Name =="atan2"; + case 'c': + return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh"; + case 'e': + return Name == "exp" || Name == "exp2"; + case 'f': + return Name == "fabs" || Name == "fmod" || Name == "floor"; + case 'l': + return Name == "log" || Name == "log10"; + case 'p': + return Name == "pow"; + case 's': + return Name == "sin" || Name == "sinh" || Name == "sqrt" || + Name == "sinf" || Name == "sqrtf"; + case 't': + return Name == "tan" || Name == "tanh"; + } +} + +static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, + Type *Ty) { + sys::llvm_fenv_clearexcept(); + V = NativeFP(V); + if (sys::llvm_fenv_testexcept()) { + sys::llvm_fenv_clearexcept(); + return 0; + } + + if (Ty->isHalfTy()) { + APFloat APF(V); + bool unused; + APF.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &unused); + return ConstantFP::get(Ty->getContext(), APF); + } + if (Ty->isFloatTy()) + return ConstantFP::get(Ty->getContext(), APFloat((float)V)); + if (Ty->isDoubleTy()) + return ConstantFP::get(Ty->getContext(), APFloat(V)); + llvm_unreachable("Can only constant fold half/float/double"); +} + +static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), + double V, double W, Type *Ty) { + sys::llvm_fenv_clearexcept(); + V = NativeFP(V, W); + if (sys::llvm_fenv_testexcept()) { + sys::llvm_fenv_clearexcept(); + return 0; + } + + if (Ty->isHalfTy()) { + APFloat APF(V); + bool unused; + APF.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &unused); + return ConstantFP::get(Ty->getContext(), APF); + } + if (Ty->isFloatTy()) + return ConstantFP::get(Ty->getContext(), APFloat((float)V)); + if (Ty->isDoubleTy()) + return ConstantFP::get(Ty->getContext(), APFloat(V)); + llvm_unreachable("Can only constant fold half/float/double"); +} + +/// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer +/// conversion of a constant floating point. If roundTowardZero is false, the +/// default IEEE rounding is used (toward nearest, ties to even). This matches +/// the behavior of the non-truncating SSE instructions in the default rounding +/// mode. The desired integer type Ty is used to select how many bits are +/// available for the result. Returns null if the conversion cannot be +/// performed, otherwise returns the Constant value resulting from the +/// conversion. +static Constant *ConstantFoldConvertToInt(const APFloat &Val, + bool roundTowardZero, Type *Ty) { + // All of these conversion intrinsics form an integer of at most 64bits. + unsigned ResultWidth = Ty->getIntegerBitWidth(); + assert(ResultWidth <= 64 && + "Can only constant fold conversions to 64 and 32 bit ints"); + + uint64_t UIntVal; + bool isExact = false; + APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero + : APFloat::rmNearestTiesToEven; + APFloat::opStatus status = Val.convertToInteger(&UIntVal, ResultWidth, + /*isSigned=*/true, mode, + &isExact); + if (status != APFloat::opOK && status != APFloat::opInexact) + return 0; + return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true); +} + +/// ConstantFoldCall - Attempt to constant fold a call to the specified function +/// with the specified arguments, returning null if unsuccessful. +Constant * +llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, + const TargetLibraryInfo *TLI) { + if (!F->hasName()) + return 0; + StringRef Name = F->getName(); + + Type *Ty = F->getReturnType(); + if (Operands.size() == 1) { + if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) { + if (F->getIntrinsicID() == Intrinsic::convert_to_fp16) { + APFloat Val(Op->getValueAPF()); + + bool lost = false; + Val.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &lost); + + return ConstantInt::get(F->getContext(), Val.bitcastToAPInt()); + } + if (!TLI) + return 0; + + if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) + return 0; + + /// We only fold functions with finite arguments. Folding NaN and inf is + /// likely to be aborted with an exception anyway, and some host libms + /// have known errors raising exceptions. + if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity()) + return 0; + + /// Currently APFloat versions of these functions do not exist, so we use + /// the host native double versions. Float versions are not called + /// directly but for all these it is true (float)(f((double)arg)) == + /// f(arg). Long double not supported yet. + double V; + if (Ty->isFloatTy()) + V = Op->getValueAPF().convertToFloat(); + else if (Ty->isDoubleTy()) + V = Op->getValueAPF().convertToDouble(); + else { + bool unused; + APFloat APF = Op->getValueAPF(); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused); + V = APF.convertToDouble(); + } + + switch (F->getIntrinsicID()) { + default: break; + case Intrinsic::fabs: + return ConstantFoldFP(fabs, V, Ty); +#if HAVE_LOG2 + case Intrinsic::log2: + return ConstantFoldFP(log2, V, Ty); +#endif +#if HAVE_LOG + case Intrinsic::log: + return ConstantFoldFP(log, V, Ty); +#endif +#if HAVE_LOG10 + case Intrinsic::log10: + return ConstantFoldFP(log10, V, Ty); +#endif +#if HAVE_EXP + case Intrinsic::exp: + return ConstantFoldFP(exp, V, Ty); +#endif +#if HAVE_EXP2 + case Intrinsic::exp2: + return ConstantFoldFP(exp2, V, Ty); +#endif + case Intrinsic::floor: + return ConstantFoldFP(floor, V, Ty); + } + + switch (Name[0]) { + case 'a': + if (Name == "acos" && TLI->has(LibFunc::acos)) + return ConstantFoldFP(acos, V, Ty); + else if (Name == "asin" && TLI->has(LibFunc::asin)) + return ConstantFoldFP(asin, V, Ty); + else if (Name == "atan" && TLI->has(LibFunc::atan)) + return ConstantFoldFP(atan, V, Ty); + break; + case 'c': + if (Name == "ceil" && TLI->has(LibFunc::ceil)) + return ConstantFoldFP(ceil, V, Ty); + else if (Name == "cos" && TLI->has(LibFunc::cos)) + return ConstantFoldFP(cos, V, Ty); + else if (Name == "cosh" && TLI->has(LibFunc::cosh)) + return ConstantFoldFP(cosh, V, Ty); + else if (Name == "cosf" && TLI->has(LibFunc::cosf)) + return ConstantFoldFP(cos, V, Ty); + break; + case 'e': + if (Name == "exp" && TLI->has(LibFunc::exp)) + return ConstantFoldFP(exp, V, Ty); + + if (Name == "exp2" && TLI->has(LibFunc::exp2)) { + // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a + // C99 library. + return ConstantFoldBinaryFP(pow, 2.0, V, Ty); + } + break; + case 'f': + if (Name == "fabs" && TLI->has(LibFunc::fabs)) + return ConstantFoldFP(fabs, V, Ty); + else if (Name == "floor" && TLI->has(LibFunc::floor)) + return ConstantFoldFP(floor, V, Ty); + break; + case 'l': + if (Name == "log" && V > 0 && TLI->has(LibFunc::log)) + return ConstantFoldFP(log, V, Ty); + else if (Name == "log10" && V > 0 && TLI->has(LibFunc::log10)) + return ConstantFoldFP(log10, V, Ty); + else if (F->getIntrinsicID() == Intrinsic::sqrt && + (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())) { + if (V >= -0.0) + return ConstantFoldFP(sqrt, V, Ty); + else // Undefined + return Constant::getNullValue(Ty); + } + break; + case 's': + if (Name == "sin" && TLI->has(LibFunc::sin)) + return ConstantFoldFP(sin, V, Ty); + else if (Name == "sinh" && TLI->has(LibFunc::sinh)) + return ConstantFoldFP(sinh, V, Ty); + else if (Name == "sqrt" && V >= 0 && TLI->has(LibFunc::sqrt)) + return ConstantFoldFP(sqrt, V, Ty); + else if (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc::sqrtf)) + return ConstantFoldFP(sqrt, V, Ty); + else if (Name == "sinf" && TLI->has(LibFunc::sinf)) + return ConstantFoldFP(sin, V, Ty); + break; + case 't': + if (Name == "tan" && TLI->has(LibFunc::tan)) + return ConstantFoldFP(tan, V, Ty); + else if (Name == "tanh" && TLI->has(LibFunc::tanh)) + return ConstantFoldFP(tanh, V, Ty); + break; + default: + break; + } + return 0; + } + + if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) { + switch (F->getIntrinsicID()) { + case Intrinsic::bswap: + return ConstantInt::get(F->getContext(), Op->getValue().byteSwap()); + case Intrinsic::ctpop: + return ConstantInt::get(Ty, Op->getValue().countPopulation()); + case Intrinsic::convert_from_fp16: { + APFloat Val(APFloat::IEEEhalf, Op->getValue()); + + bool lost = false; + APFloat::opStatus status = + Val.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &lost); + + // Conversion is always precise. + (void)status; + assert(status == APFloat::opOK && !lost && + "Precision lost during fp16 constfolding"); + + return ConstantFP::get(F->getContext(), Val); + } + default: + return 0; + } + } + + // Support ConstantVector in case we have an Undef in the top. + if (isa<ConstantVector>(Operands[0]) || + isa<ConstantDataVector>(Operands[0])) { + Constant *Op = cast<Constant>(Operands[0]); + switch (F->getIntrinsicID()) { + default: break; + case Intrinsic::x86_sse_cvtss2si: + case Intrinsic::x86_sse_cvtss2si64: + case Intrinsic::x86_sse2_cvtsd2si: + case Intrinsic::x86_sse2_cvtsd2si64: + if (ConstantFP *FPOp = + dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) + return ConstantFoldConvertToInt(FPOp->getValueAPF(), + /*roundTowardZero=*/false, Ty); + case Intrinsic::x86_sse_cvttss2si: + case Intrinsic::x86_sse_cvttss2si64: + case Intrinsic::x86_sse2_cvttsd2si: + case Intrinsic::x86_sse2_cvttsd2si64: + if (ConstantFP *FPOp = + dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) + return ConstantFoldConvertToInt(FPOp->getValueAPF(), + /*roundTowardZero=*/true, Ty); + } + } + + if (isa<UndefValue>(Operands[0])) { + if (F->getIntrinsicID() == Intrinsic::bswap) + return Operands[0]; + return 0; + } + + return 0; + } + + if (Operands.size() == 2) { + if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) { + if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) + return 0; + double Op1V; + if (Ty->isFloatTy()) + Op1V = Op1->getValueAPF().convertToFloat(); + else if (Ty->isDoubleTy()) + Op1V = Op1->getValueAPF().convertToDouble(); + else { + bool unused; + APFloat APF = Op1->getValueAPF(); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused); + Op1V = APF.convertToDouble(); + } + + if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) { + if (Op2->getType() != Op1->getType()) + return 0; + + double Op2V; + if (Ty->isFloatTy()) + Op2V = Op2->getValueAPF().convertToFloat(); + else if (Ty->isDoubleTy()) + Op2V = Op2->getValueAPF().convertToDouble(); + else { + bool unused; + APFloat APF = Op2->getValueAPF(); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused); + Op2V = APF.convertToDouble(); + } + + if (F->getIntrinsicID() == Intrinsic::pow) { + return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); + } + if (!TLI) + return 0; + if (Name == "pow" && TLI->has(LibFunc::pow)) + return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); + if (Name == "fmod" && TLI->has(LibFunc::fmod)) + return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty); + if (Name == "atan2" && TLI->has(LibFunc::atan2)) + return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); + } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) { + if (F->getIntrinsicID() == Intrinsic::powi && Ty->isHalfTy()) + return ConstantFP::get(F->getContext(), + APFloat((float)std::pow((float)Op1V, + (int)Op2C->getZExtValue()))); + if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy()) + return ConstantFP::get(F->getContext(), + APFloat((float)std::pow((float)Op1V, + (int)Op2C->getZExtValue()))); + if (F->getIntrinsicID() == Intrinsic::powi && Ty->isDoubleTy()) + return ConstantFP::get(F->getContext(), + APFloat((double)std::pow((double)Op1V, + (int)Op2C->getZExtValue()))); + } + return 0; + } + + if (ConstantInt *Op1 = dyn_cast<ConstantInt>(Operands[0])) { + if (ConstantInt *Op2 = dyn_cast<ConstantInt>(Operands[1])) { + switch (F->getIntrinsicID()) { + default: break; + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: { + APInt Res; + bool Overflow; + switch (F->getIntrinsicID()) { + default: llvm_unreachable("Invalid case"); + case Intrinsic::sadd_with_overflow: + Res = Op1->getValue().sadd_ov(Op2->getValue(), Overflow); + break; + case Intrinsic::uadd_with_overflow: + Res = Op1->getValue().uadd_ov(Op2->getValue(), Overflow); + break; + case Intrinsic::ssub_with_overflow: + Res = Op1->getValue().ssub_ov(Op2->getValue(), Overflow); + break; + case Intrinsic::usub_with_overflow: + Res = Op1->getValue().usub_ov(Op2->getValue(), Overflow); + break; + case Intrinsic::smul_with_overflow: + Res = Op1->getValue().smul_ov(Op2->getValue(), Overflow); + break; + case Intrinsic::umul_with_overflow: + Res = Op1->getValue().umul_ov(Op2->getValue(), Overflow); + break; + } + Constant *Ops[] = { + ConstantInt::get(F->getContext(), Res), + ConstantInt::get(Type::getInt1Ty(F->getContext()), Overflow) + }; + return ConstantStruct::get(cast<StructType>(F->getReturnType()), Ops); + } + case Intrinsic::cttz: + if (Op2->isOne() && Op1->isZero()) // cttz(0, 1) is undef. + return UndefValue::get(Ty); + return ConstantInt::get(Ty, Op1->getValue().countTrailingZeros()); + case Intrinsic::ctlz: + if (Op2->isOne() && Op1->isZero()) // ctlz(0, 1) is undef. + return UndefValue::get(Ty); + return ConstantInt::get(Ty, Op1->getValue().countLeadingZeros()); + } + } + + return 0; + } + return 0; + } + return 0; +} diff --git a/contrib/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm/lib/Analysis/CostModel.cpp new file mode 100644 index 000000000000..f9432584691d --- /dev/null +++ b/contrib/llvm/lib/Analysis/CostModel.cpp @@ -0,0 +1,518 @@ +//===- CostModel.cpp ------ Cost Model Analysis ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the cost model analysis. It provides a very basic cost +// estimation for LLVM-IR. This analysis uses the services of the codegen +// to approximate the cost of any IR instruction when lowered to machine +// instructions. The cost results are unit-less and the cost number represents +// the throughput of the machine assuming that all loads hit the cache, all +// branches are predicted, etc. The cost numbers can be added in order to +// compare two or more transformation alternatives. +// +//===----------------------------------------------------------------------===// + +#define CM_NAME "cost-model" +#define DEBUG_TYPE CM_NAME +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false), + cl::Hidden, + cl::desc("Recognize reduction patterns.")); + +namespace { + class CostModelAnalysis : public FunctionPass { + + public: + static char ID; // Class identification, replacement for typeinfo + CostModelAnalysis() : FunctionPass(ID), F(0), TTI(0) { + initializeCostModelAnalysisPass( + *PassRegistry::getPassRegistry()); + } + + /// Returns the expected cost of the instruction. + /// Returns -1 if the cost is unknown. + /// Note, this method does not cache the cost calculation and it + /// can be expensive in some cases. + unsigned getInstructionCost(const Instruction *I) const; + + private: + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool runOnFunction(Function &F); + virtual void print(raw_ostream &OS, const Module*) const; + + /// The function that we analyze. + Function *F; + /// Target information. + const TargetTransformInfo *TTI; + }; +} // End of anonymous namespace + +// Register this pass. +char CostModelAnalysis::ID = 0; +static const char cm_name[] = "Cost Model Analysis"; +INITIALIZE_PASS_BEGIN(CostModelAnalysis, CM_NAME, cm_name, false, true) +INITIALIZE_PASS_END (CostModelAnalysis, CM_NAME, cm_name, false, true) + +FunctionPass *llvm::createCostModelAnalysisPass() { + return new CostModelAnalysis(); +} + +void +CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); +} + +bool +CostModelAnalysis::runOnFunction(Function &F) { + this->F = &F; + TTI = getAnalysisIfAvailable<TargetTransformInfo>(); + + return false; +} + +static bool isReverseVectorMask(SmallVectorImpl<int> &Mask) { + for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i) + if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i)) + return false; + return true; +} + +static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { + TargetTransformInfo::OperandValueKind OpInfo = + TargetTransformInfo::OK_AnyValue; + + // Check for a splat of a constant. + ConstantDataVector *CDV = 0; + if ((CDV = dyn_cast<ConstantDataVector>(V))) + if (CDV->getSplatValue() != NULL) + OpInfo = TargetTransformInfo::OK_UniformConstantValue; + ConstantVector *CV = 0; + if ((CV = dyn_cast<ConstantVector>(V))) + if (CV->getSplatValue() != NULL) + OpInfo = TargetTransformInfo::OK_UniformConstantValue; + + return OpInfo; +} + +static bool matchMask(SmallVectorImpl<int> &M1, SmallVectorImpl<int> &M2) { + if (M1.size() != M2.size()) + return false; + + for (unsigned i = 0, e = M1.size(); i != e; ++i) + if (M1[i] != M2[i]) + return false; + + return true; +} + +static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, + unsigned Level) { + // We don't need a shuffle if we just want to have element 0 in position 0 of + // the vector. + if (!SI && Level == 0 && IsLeft) + return true; + else if (!SI) + return false; + + SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1); + + // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether + // we look at the left or right side. + for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2) + Mask[i] = val; + + SmallVector<int, 16> ActualMask = SI->getShuffleMask(); + if (!matchMask(Mask, ActualMask)) + return false; + + return true; +} + +static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp, + unsigned Level, unsigned NumLevels) { + // Match one level of pairwise operations. + // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> + // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> + // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 + if (BinOp == 0) + return false; + + assert(BinOp->getType()->isVectorTy() && "Expecting a vector type"); + + unsigned Opcode = BinOp->getOpcode(); + Value *L = BinOp->getOperand(0); + Value *R = BinOp->getOperand(1); + + ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(L); + if (!LS && Level) + return false; + ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(R); + if (!RS && Level) + return false; + + // On level 0 we can omit one shufflevector instruction. + if (!Level && !RS && !LS) + return false; + + // Shuffle inputs must match. + Value *NextLevelOpL = LS ? LS->getOperand(0) : 0; + Value *NextLevelOpR = RS ? RS->getOperand(0) : 0; + Value *NextLevelOp = 0; + if (NextLevelOpR && NextLevelOpL) { + // If we have two shuffles their operands must match. + if (NextLevelOpL != NextLevelOpR) + return false; + + NextLevelOp = NextLevelOpL; + } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) { + // On the first level we can omit the shufflevector <0, undef,...>. So the + // input to the other shufflevector <1, undef> must match with one of the + // inputs to the current binary operation. + // Example: + // %NextLevelOpL = shufflevector %R, <1, undef ...> + // %BinOp = fadd %NextLevelOpL, %R + if (NextLevelOpL && NextLevelOpL != R) + return false; + else if (NextLevelOpR && NextLevelOpR != L) + return false; + + NextLevelOp = NextLevelOpL ? R : L; + } else + return false; + + // Check that the next levels binary operation exists and matches with the + // current one. + BinaryOperator *NextLevelBinOp = 0; + if (Level + 1 != NumLevels) { + if (!(NextLevelBinOp = dyn_cast<BinaryOperator>(NextLevelOp))) + return false; + else if (NextLevelBinOp->getOpcode() != Opcode) + return false; + } + + // Shuffle mask for pairwise operation must match. + if (matchPairwiseShuffleMask(LS, true, Level)) { + if (!matchPairwiseShuffleMask(RS, false, Level)) + return false; + } else if (matchPairwiseShuffleMask(RS, true, Level)) { + if (!matchPairwiseShuffleMask(LS, false, Level)) + return false; + } else + return false; + + if (++Level == NumLevels) + return true; + + // Match next level. + return matchPairwiseReductionAtLevel(NextLevelBinOp, Level, NumLevels); +} + +static bool matchPairwiseReduction(const ExtractElementInst *ReduxRoot, + unsigned &Opcode, Type *&Ty) { + if (!EnableReduxCost) + return false; + + // Need to extract the first element. + ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); + unsigned Idx = ~0u; + if (CI) + Idx = CI->getZExtValue(); + if (Idx != 0) + return false; + + BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0)); + if (!RdxStart) + return false; + + Type *VecTy = ReduxRoot->getOperand(0)->getType(); + unsigned NumVecElems = VecTy->getVectorNumElements(); + if (!isPowerOf2_32(NumVecElems)) + return false; + + // We look for a sequence of shuffle,shuffle,add triples like the following + // that builds a pairwise reduction tree. + // + // (X0, X1, X2, X3) + // (X0 + X1, X2 + X3, undef, undef) + // ((X0 + X1) + (X2 + X3), undef, undef, undef) + // + // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> + // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> + // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 + // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, + // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> + // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, + // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> + // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 + // %r = extractelement <4 x float> %bin.rdx8, i32 0 + if (!matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems))) + return false; + + Opcode = RdxStart->getOpcode(); + Ty = VecTy; + + return true; +} + +static std::pair<Value *, ShuffleVectorInst *> +getShuffleAndOtherOprd(BinaryOperator *B) { + + Value *L = B->getOperand(0); + Value *R = B->getOperand(1); + ShuffleVectorInst *S = 0; + + if ((S = dyn_cast<ShuffleVectorInst>(L))) + return std::make_pair(R, S); + + S = dyn_cast<ShuffleVectorInst>(R); + return std::make_pair(L, S); +} + +static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, + unsigned &Opcode, Type *&Ty) { + if (!EnableReduxCost) + return false; + + // Need to extract the first element. + ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); + unsigned Idx = ~0u; + if (CI) + Idx = CI->getZExtValue(); + if (Idx != 0) + return false; + + BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0)); + if (!RdxStart) + return false; + unsigned RdxOpcode = RdxStart->getOpcode(); + + Type *VecTy = ReduxRoot->getOperand(0)->getType(); + unsigned NumVecElems = VecTy->getVectorNumElements(); + if (!isPowerOf2_32(NumVecElems)) + return false; + + // We look for a sequence of shuffles and adds like the following matching one + // fadd, shuffle vector pair at a time. + // + // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> + // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf + // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, + // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> + // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 + // %r = extractelement <4 x float> %bin.rdx8, i32 0 + + unsigned MaskStart = 1; + Value *RdxOp = RdxStart; + SmallVector<int, 32> ShuffleMask(NumVecElems, 0); + unsigned NumVecElemsRemain = NumVecElems; + while (NumVecElemsRemain - 1) { + // Check for the right reduction operation. + BinaryOperator *BinOp; + if (!(BinOp = dyn_cast<BinaryOperator>(RdxOp))) + return false; + if (BinOp->getOpcode() != RdxOpcode) + return false; + + Value *NextRdxOp; + ShuffleVectorInst *Shuffle; + tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp); + + // Check the current reduction operation and the shuffle use the same value. + if (Shuffle == 0) + return false; + if (Shuffle->getOperand(0) != NextRdxOp) + return false; + + // Check that shuffle masks matches. + for (unsigned j = 0; j != MaskStart; ++j) + ShuffleMask[j] = MaskStart + j; + // Fill the rest of the mask with -1 for undef. + std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1); + + SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); + if (!matchMask(ShuffleMask, Mask)) + return false; + + RdxOp = NextRdxOp; + NumVecElemsRemain /= 2; + MaskStart *= 2; + } + + Opcode = RdxOpcode; + Ty = VecTy; + return true; +} + +unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { + if (!TTI) + return -1; + + switch (I->getOpcode()) { + case Instruction::GetElementPtr:{ + Type *ValTy = I->getOperand(0)->getType()->getPointerElementType(); + return TTI->getAddressComputationCost(ValTy); + } + + case Instruction::Ret: + case Instruction::PHI: + case Instruction::Br: { + return TTI->getCFInstrCost(I->getOpcode()); + } + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + TargetTransformInfo::OperandValueKind Op1VK = + getOperandInfo(I->getOperand(0)); + TargetTransformInfo::OperandValueKind Op2VK = + getOperandInfo(I->getOperand(1)); + return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, + Op2VK); + } + case Instruction::Select: { + const SelectInst *SI = cast<SelectInst>(I); + Type *CondTy = SI->getCondition()->getType(); + return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy); + } + case Instruction::ICmp: + case Instruction::FCmp: { + Type *ValTy = I->getOperand(0)->getType(); + return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy); + } + case Instruction::Store: { + const StoreInst *SI = cast<StoreInst>(I); + Type *ValTy = SI->getValueOperand()->getType(); + return TTI->getMemoryOpCost(I->getOpcode(), ValTy, + SI->getAlignment(), + SI->getPointerAddressSpace()); + } + case Instruction::Load: { + const LoadInst *LI = cast<LoadInst>(I); + return TTI->getMemoryOpCost(I->getOpcode(), I->getType(), + LI->getAlignment(), + LI->getPointerAddressSpace()); + } + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: { + Type *SrcTy = I->getOperand(0)->getType(); + return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy); + } + case Instruction::ExtractElement: { + const ExtractElementInst * EEI = cast<ExtractElementInst>(I); + ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + + // Try to match a reduction sequence (series of shufflevector and vector + // adds followed by a extractelement). + unsigned ReduxOpCode; + Type *ReduxType; + + if (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) + return TTI->getReductionCost(ReduxOpCode, ReduxType, false); + else if (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) + return TTI->getReductionCost(ReduxOpCode, ReduxType, true); + + return TTI->getVectorInstrCost(I->getOpcode(), + EEI->getOperand(0)->getType(), Idx); + } + case Instruction::InsertElement: { + const InsertElementInst * IE = cast<InsertElementInst>(I); + ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + return TTI->getVectorInstrCost(I->getOpcode(), + IE->getType(), Idx); + } + case Instruction::ShuffleVector: { + const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); + Type *VecTypOp0 = Shuffle->getOperand(0)->getType(); + unsigned NumVecElems = VecTypOp0->getVectorNumElements(); + SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); + + if (NumVecElems == Mask.size() && isReverseVectorMask(Mask)) + return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0, + 0); + return -1; + } + case Instruction::Call: + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + SmallVector<Type*, 4> Tys; + for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J) + Tys.push_back(II->getArgOperand(J)->getType()); + + return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), + Tys); + } + return -1; + default: + // We don't have any information on this instruction. + return -1; + } +} + +void CostModelAnalysis::print(raw_ostream &OS, const Module*) const { + if (!F) + return; + + for (Function::iterator B = F->begin(), BE = F->end(); B != BE; ++B) { + for (BasicBlock::iterator it = B->begin(), e = B->end(); it != e; ++it) { + Instruction *Inst = it; + unsigned Cost = getInstructionCost(Inst); + if (Cost != (unsigned)-1) + OS << "Cost Model: Found an estimated cost of " << Cost; + else + OS << "Cost Model: Unknown cost"; + + OS << " for instruction: "<< *Inst << "\n"; + } + } +} diff --git a/contrib/llvm/lib/Analysis/Delinearization.cpp b/contrib/llvm/lib/Analysis/Delinearization.cpp new file mode 100644 index 000000000000..3ed0609cf38d --- /dev/null +++ b/contrib/llvm/lib/Analysis/Delinearization.cpp @@ -0,0 +1,133 @@ +//===---- Delinearization.cpp - MultiDimensional Index Delinearization ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements an analysis pass that tries to delinearize all GEP +// instructions in all loops using the SCEV analysis functionality. This pass is +// only used for testing purposes: if your pass needs delinearization, please +// use the on-demand SCEVAddRecExpr::delinearize() function. +// +//===----------------------------------------------------------------------===// + +#define DL_NAME "delinearize" +#define DEBUG_TYPE DL_NAME +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Pass.h" +#include "llvm/IR/Type.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + +class Delinearization : public FunctionPass { + Delinearization(const Delinearization &); // do not implement +protected: + Function *F; + LoopInfo *LI; + ScalarEvolution *SE; + +public: + static char ID; // Pass identification, replacement for typeid + + Delinearization() : FunctionPass(ID) { + initializeDelinearizationPass(*PassRegistry::getPassRegistry()); + } + virtual bool runOnFunction(Function &F); + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual void print(raw_ostream &O, const Module *M = 0) const; +}; + +} // end anonymous namespace + +void Delinearization::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<LoopInfo>(); + AU.addRequired<ScalarEvolution>(); +} + +bool Delinearization::runOnFunction(Function &F) { + this->F = &F; + SE = &getAnalysis<ScalarEvolution>(); + LI = &getAnalysis<LoopInfo>(); + return false; +} + +static Value *getPointerOperand(Instruction &Inst) { + if (LoadInst *Load = dyn_cast<LoadInst>(&Inst)) + return Load->getPointerOperand(); + else if (StoreInst *Store = dyn_cast<StoreInst>(&Inst)) + return Store->getPointerOperand(); + else if (GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(&Inst)) + return Gep->getPointerOperand(); + return NULL; +} + +void Delinearization::print(raw_ostream &O, const Module *) const { + O << "Delinearization on function " << F->getName() << ":\n"; + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + Instruction *Inst = &(*I); + + // Only analyze loads and stores. + if (!isa<StoreInst>(Inst) && !isa<LoadInst>(Inst) && + !isa<GetElementPtrInst>(Inst)) + continue; + + const BasicBlock *BB = Inst->getParent(); + // Delinearize the memory access as analyzed in all the surrounding loops. + // Do not analyze memory accesses outside loops. + for (Loop *L = LI->getLoopFor(BB); L != NULL; L = L->getParentLoop()) { + const SCEV *AccessFn = SE->getSCEVAtScope(getPointerOperand(*Inst), L); + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(AccessFn); + + // Do not try to delinearize memory accesses that are not AddRecs. + if (!AR) + break; + + O << "AddRec: " << *AR << "\n"; + + SmallVector<const SCEV *, 3> Subscripts, Sizes; + const SCEV *Res = AR->delinearize(*SE, Subscripts, Sizes); + int Size = Subscripts.size(); + if (Res == AR || Size == 0) { + O << "failed to delinearize\n"; + continue; + } + O << "Base offset: " << *Res << "\n"; + O << "ArrayDecl[UnknownSize]"; + for (int i = 0; i < Size - 1; i++) + O << "[" << *Sizes[i] << "]"; + O << " with elements of " << *Sizes[Size - 1] << " bytes.\n"; + + O << "ArrayRef"; + for (int i = 0; i < Size; i++) + O << "[" << *Subscripts[i] << "]"; + O << "\n"; + } + } +} + +char Delinearization::ID = 0; +static const char delinearization_name[] = "Delinearization"; +INITIALIZE_PASS_BEGIN(Delinearization, DL_NAME, delinearization_name, true, + true) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_END(Delinearization, DL_NAME, delinearization_name, true, true) + +FunctionPass *llvm::createDelinearizationPass() { return new Delinearization; } diff --git a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp new file mode 100644 index 000000000000..3b3e2ef155a0 --- /dev/null +++ b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -0,0 +1,3909 @@ +//===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// DependenceAnalysis is an LLVM pass that analyses dependences between memory +// accesses. Currently, it is an (incomplete) implementation of the approach +// described in +// +// Practical Dependence Testing +// Goff, Kennedy, Tseng +// PLDI 1991 +// +// There's a single entry point that analyzes the dependence between a pair +// of memory references in a function, returning either NULL, for no dependence, +// or a more-or-less detailed description of the dependence between them. +// +// Currently, the implementation cannot propagate constraints between +// coupled RDIV subscripts and lacks a multi-subscript MIV test. +// Both of these are conservative weaknesses; +// that is, not a source of correctness problems. +// +// The implementation depends on the GEP instruction to differentiate +// subscripts. Since Clang linearizes some array subscripts, the dependence +// analysis is using SCEV->delinearize to recover the representation of multiple +// subscripts, and thus avoid the more expensive and less precise MIV tests. The +// delinearization is controlled by the flag -da-delinearize. +// +// We should pay some careful attention to the possibility of integer overflow +// in the implementation of the various tests. This could happen with Add, +// Subtract, or Multiply, with both APInt's and SCEV's. +// +// Some non-linear subscript pairs can be handled by the GCD test +// (and perhaps other tests). +// Should explore how often these things occur. +// +// Finally, it seems like certain test cases expose weaknesses in the SCEV +// simplification, especially in the handling of sign and zero extensions. +// It could be useful to spend time exploring these. +// +// Please note that this is work in progress and the interface is subject to +// change. +// +//===----------------------------------------------------------------------===// +// // +// In memory of Ken Kennedy, 1945 - 2007 // +// // +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "da" + +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// statistics + +STATISTIC(TotalArrayPairs, "Array pairs tested"); +STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); +STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); +STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); +STATISTIC(ZIVapplications, "ZIV applications"); +STATISTIC(ZIVindependence, "ZIV independence"); +STATISTIC(StrongSIVapplications, "Strong SIV applications"); +STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); +STATISTIC(StrongSIVindependence, "Strong SIV independence"); +STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); +STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); +STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); +STATISTIC(ExactSIVapplications, "Exact SIV applications"); +STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); +STATISTIC(ExactSIVindependence, "Exact SIV independence"); +STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); +STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); +STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); +STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); +STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); +STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); +STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); +STATISTIC(DeltaApplications, "Delta applications"); +STATISTIC(DeltaSuccesses, "Delta successes"); +STATISTIC(DeltaIndependence, "Delta independence"); +STATISTIC(DeltaPropagations, "Delta propagations"); +STATISTIC(GCDapplications, "GCD applications"); +STATISTIC(GCDsuccesses, "GCD successes"); +STATISTIC(GCDindependence, "GCD independence"); +STATISTIC(BanerjeeApplications, "Banerjee applications"); +STATISTIC(BanerjeeIndependence, "Banerjee independence"); +STATISTIC(BanerjeeSuccesses, "Banerjee successes"); + +static cl::opt<bool> +Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore, + cl::desc("Try to delinearize array references.")); + +//===----------------------------------------------------------------------===// +// basics + +INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da", + "Dependence Analysis", true, true) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(DependenceAnalysis, "da", + "Dependence Analysis", true, true) + +char DependenceAnalysis::ID = 0; + + +FunctionPass *llvm::createDependenceAnalysisPass() { + return new DependenceAnalysis(); +} + + +bool DependenceAnalysis::runOnFunction(Function &F) { + this->F = &F; + AA = &getAnalysis<AliasAnalysis>(); + SE = &getAnalysis<ScalarEvolution>(); + LI = &getAnalysis<LoopInfo>(); + return false; +} + + +void DependenceAnalysis::releaseMemory() { +} + + +void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<AliasAnalysis>(); + AU.addRequiredTransitive<ScalarEvolution>(); + AU.addRequiredTransitive<LoopInfo>(); +} + + +// Used to test the dependence analyzer. +// Looks through the function, noting loads and stores. +// Calls depends() on every possible pair and prints out the result. +// Ignores all other instructions. +static +void dumpExampleDependence(raw_ostream &OS, Function *F, + DependenceAnalysis *DA) { + for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); + SrcI != SrcE; ++SrcI) { + if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) { + for (inst_iterator DstI = SrcI, DstE = inst_end(F); + DstI != DstE; ++DstI) { + if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) { + OS << "da analyze - "; + if (Dependence *D = DA->depends(&*SrcI, &*DstI, true)) { + D->dump(OS); + for (unsigned Level = 1; Level <= D->getLevels(); Level++) { + if (D->isSplitable(Level)) { + OS << "da analyze - split level = " << Level; + OS << ", iteration = " << *DA->getSplitIteration(D, Level); + OS << "!\n"; + } + } + delete D; + } + else + OS << "none!\n"; + } + } + } + } +} + + +void DependenceAnalysis::print(raw_ostream &OS, const Module*) const { + dumpExampleDependence(OS, F, const_cast<DependenceAnalysis *>(this)); +} + +//===----------------------------------------------------------------------===// +// Dependence methods + +// Returns true if this is an input dependence. +bool Dependence::isInput() const { + return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); +} + + +// Returns true if this is an output dependence. +bool Dependence::isOutput() const { + return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); +} + + +// Returns true if this is an flow (aka true) dependence. +bool Dependence::isFlow() const { + return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); +} + + +// Returns true if this is an anti dependence. +bool Dependence::isAnti() const { + return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); +} + + +// Returns true if a particular level is scalar; that is, +// if no subscript in the source or destination mention the induction +// variable associated with the loop at this level. +// Leave this out of line, so it will serve as a virtual method anchor +bool Dependence::isScalar(unsigned level) const { + return false; +} + + +//===----------------------------------------------------------------------===// +// FullDependence methods + +FullDependence::FullDependence(Instruction *Source, + Instruction *Destination, + bool PossiblyLoopIndependent, + unsigned CommonLevels) : + Dependence(Source, Destination), + Levels(CommonLevels), + LoopIndependent(PossiblyLoopIndependent) { + Consistent = true; + DV = CommonLevels ? new DVEntry[CommonLevels] : NULL; +} + +// The rest are simple getters that hide the implementation. + +// getDirection - Returns the direction associated with a particular level. +unsigned FullDependence::getDirection(unsigned Level) const { + assert(0 < Level && Level <= Levels && "Level out of range"); + return DV[Level - 1].Direction; +} + + +// Returns the distance (or NULL) associated with a particular level. +const SCEV *FullDependence::getDistance(unsigned Level) const { + assert(0 < Level && Level <= Levels && "Level out of range"); + return DV[Level - 1].Distance; +} + + +// Returns true if a particular level is scalar; that is, +// if no subscript in the source or destination mention the induction +// variable associated with the loop at this level. +bool FullDependence::isScalar(unsigned Level) const { + assert(0 < Level && Level <= Levels && "Level out of range"); + return DV[Level - 1].Scalar; +} + + +// Returns true if peeling the first iteration from this loop +// will break this dependence. +bool FullDependence::isPeelFirst(unsigned Level) const { + assert(0 < Level && Level <= Levels && "Level out of range"); + return DV[Level - 1].PeelFirst; +} + + +// Returns true if peeling the last iteration from this loop +// will break this dependence. +bool FullDependence::isPeelLast(unsigned Level) const { + assert(0 < Level && Level <= Levels && "Level out of range"); + return DV[Level - 1].PeelLast; +} + + +// Returns true if splitting this loop will break the dependence. +bool FullDependence::isSplitable(unsigned Level) const { + assert(0 < Level && Level <= Levels && "Level out of range"); + return DV[Level - 1].Splitable; +} + + +//===----------------------------------------------------------------------===// +// DependenceAnalysis::Constraint methods + +// If constraint is a point <X, Y>, returns X. +// Otherwise assert. +const SCEV *DependenceAnalysis::Constraint::getX() const { + assert(Kind == Point && "Kind should be Point"); + return A; +} + + +// If constraint is a point <X, Y>, returns Y. +// Otherwise assert. +const SCEV *DependenceAnalysis::Constraint::getY() const { + assert(Kind == Point && "Kind should be Point"); + return B; +} + + +// If constraint is a line AX + BY = C, returns A. +// Otherwise assert. +const SCEV *DependenceAnalysis::Constraint::getA() const { + assert((Kind == Line || Kind == Distance) && + "Kind should be Line (or Distance)"); + return A; +} + + +// If constraint is a line AX + BY = C, returns B. +// Otherwise assert. +const SCEV *DependenceAnalysis::Constraint::getB() const { + assert((Kind == Line || Kind == Distance) && + "Kind should be Line (or Distance)"); + return B; +} + + +// If constraint is a line AX + BY = C, returns C. +// Otherwise assert. +const SCEV *DependenceAnalysis::Constraint::getC() const { + assert((Kind == Line || Kind == Distance) && + "Kind should be Line (or Distance)"); + return C; +} + + +// If constraint is a distance, returns D. +// Otherwise assert. +const SCEV *DependenceAnalysis::Constraint::getD() const { + assert(Kind == Distance && "Kind should be Distance"); + return SE->getNegativeSCEV(C); +} + + +// Returns the loop associated with this constraint. +const Loop *DependenceAnalysis::Constraint::getAssociatedLoop() const { + assert((Kind == Distance || Kind == Line || Kind == Point) && + "Kind should be Distance, Line, or Point"); + return AssociatedLoop; +} + + +void DependenceAnalysis::Constraint::setPoint(const SCEV *X, + const SCEV *Y, + const Loop *CurLoop) { + Kind = Point; + A = X; + B = Y; + AssociatedLoop = CurLoop; +} + + +void DependenceAnalysis::Constraint::setLine(const SCEV *AA, + const SCEV *BB, + const SCEV *CC, + const Loop *CurLoop) { + Kind = Line; + A = AA; + B = BB; + C = CC; + AssociatedLoop = CurLoop; +} + + +void DependenceAnalysis::Constraint::setDistance(const SCEV *D, + const Loop *CurLoop) { + Kind = Distance; + A = SE->getConstant(D->getType(), 1); + B = SE->getNegativeSCEV(A); + C = SE->getNegativeSCEV(D); + AssociatedLoop = CurLoop; +} + + +void DependenceAnalysis::Constraint::setEmpty() { + Kind = Empty; +} + + +void DependenceAnalysis::Constraint::setAny(ScalarEvolution *NewSE) { + SE = NewSE; + Kind = Any; +} + + +// For debugging purposes. Dumps the constraint out to OS. +void DependenceAnalysis::Constraint::dump(raw_ostream &OS) const { + if (isEmpty()) + OS << " Empty\n"; + else if (isAny()) + OS << " Any\n"; + else if (isPoint()) + OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; + else if (isDistance()) + OS << " Distance is " << *getD() << + " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; + else if (isLine()) + OS << " Line is " << *getA() << "*X + " << + *getB() << "*Y = " << *getC() << "\n"; + else + llvm_unreachable("unknown constraint type in Constraint::dump"); +} + + +// Updates X with the intersection +// of the Constraints X and Y. Returns true if X has changed. +// Corresponds to Figure 4 from the paper +// +// Practical Dependence Testing +// Goff, Kennedy, Tseng +// PLDI 1991 +bool DependenceAnalysis::intersectConstraints(Constraint *X, + const Constraint *Y) { + ++DeltaApplications; + DEBUG(dbgs() << "\tintersect constraints\n"); + DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); + DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); + assert(!Y->isPoint() && "Y must not be a Point"); + if (X->isAny()) { + if (Y->isAny()) + return false; + *X = *Y; + return true; + } + if (X->isEmpty()) + return false; + if (Y->isEmpty()) { + X->setEmpty(); + return true; + } + + if (X->isDistance() && Y->isDistance()) { + DEBUG(dbgs() << "\t intersect 2 distances\n"); + if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) + return false; + if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { + X->setEmpty(); + ++DeltaSuccesses; + return true; + } + // Hmmm, interesting situation. + // I guess if either is constant, keep it and ignore the other. + if (isa<SCEVConstant>(Y->getD())) { + *X = *Y; + return true; + } + return false; + } + + // At this point, the pseudo-code in Figure 4 of the paper + // checks if (X->isPoint() && Y->isPoint()). + // This case can't occur in our implementation, + // since a Point can only arise as the result of intersecting + // two Line constraints, and the right-hand value, Y, is never + // the result of an intersection. + assert(!(X->isPoint() && Y->isPoint()) && + "We shouldn't ever see X->isPoint() && Y->isPoint()"); + + if (X->isLine() && Y->isLine()) { + DEBUG(dbgs() << "\t intersect 2 lines\n"); + const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); + const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); + if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { + // slopes are equal, so lines are parallel + DEBUG(dbgs() << "\t\tsame slope\n"); + Prod1 = SE->getMulExpr(X->getC(), Y->getB()); + Prod2 = SE->getMulExpr(X->getB(), Y->getC()); + if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) + return false; + if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { + X->setEmpty(); + ++DeltaSuccesses; + return true; + } + return false; + } + if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { + // slopes differ, so lines intersect + DEBUG(dbgs() << "\t\tdifferent slopes\n"); + const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); + const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); + const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); + const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); + const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); + const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); + const SCEVConstant *C1A2_C2A1 = + dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); + const SCEVConstant *C1B2_C2B1 = + dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); + const SCEVConstant *A1B2_A2B1 = + dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); + const SCEVConstant *A2B1_A1B2 = + dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); + if (!C1B2_C2B1 || !C1A2_C2A1 || + !A1B2_A2B1 || !A2B1_A1B2) + return false; + APInt Xtop = C1B2_C2B1->getValue()->getValue(); + APInt Xbot = A1B2_A2B1->getValue()->getValue(); + APInt Ytop = C1A2_C2A1->getValue()->getValue(); + APInt Ybot = A2B1_A1B2->getValue()->getValue(); + DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); + DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); + DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); + DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); + APInt Xq = Xtop; // these need to be initialized, even + APInt Xr = Xtop; // though they're just going to be overwritten + APInt::sdivrem(Xtop, Xbot, Xq, Xr); + APInt Yq = Ytop; + APInt Yr = Ytop; + APInt::sdivrem(Ytop, Ybot, Yq, Yr); + if (Xr != 0 || Yr != 0) { + X->setEmpty(); + ++DeltaSuccesses; + return true; + } + DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); + if (Xq.slt(0) || Yq.slt(0)) { + X->setEmpty(); + ++DeltaSuccesses; + return true; + } + if (const SCEVConstant *CUB = + collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { + APInt UpperBound = CUB->getValue()->getValue(); + DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); + if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { + X->setEmpty(); + ++DeltaSuccesses; + return true; + } + } + X->setPoint(SE->getConstant(Xq), + SE->getConstant(Yq), + X->getAssociatedLoop()); + ++DeltaSuccesses; + return true; + } + return false; + } + + // if (X->isLine() && Y->isPoint()) This case can't occur. + assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); + + if (X->isPoint() && Y->isLine()) { + DEBUG(dbgs() << "\t intersect Point and Line\n"); + const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); + const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); + const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); + if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) + return false; + if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { + X->setEmpty(); + ++DeltaSuccesses; + return true; + } + return false; + } + + llvm_unreachable("shouldn't reach the end of Constraint intersection"); + return false; +} + + +//===----------------------------------------------------------------------===// +// DependenceAnalysis methods + +// For debugging purposes. Dumps a dependence to OS. +void Dependence::dump(raw_ostream &OS) const { + bool Splitable = false; + if (isConfused()) + OS << "confused"; + else { + if (isConsistent()) + OS << "consistent "; + if (isFlow()) + OS << "flow"; + else if (isOutput()) + OS << "output"; + else if (isAnti()) + OS << "anti"; + else if (isInput()) + OS << "input"; + unsigned Levels = getLevels(); + OS << " ["; + for (unsigned II = 1; II <= Levels; ++II) { + if (isSplitable(II)) + Splitable = true; + if (isPeelFirst(II)) + OS << 'p'; + const SCEV *Distance = getDistance(II); + if (Distance) + OS << *Distance; + else if (isScalar(II)) + OS << "S"; + else { + unsigned Direction = getDirection(II); + if (Direction == DVEntry::ALL) + OS << "*"; + else { + if (Direction & DVEntry::LT) + OS << "<"; + if (Direction & DVEntry::EQ) + OS << "="; + if (Direction & DVEntry::GT) + OS << ">"; + } + } + if (isPeelLast(II)) + OS << 'p'; + if (II < Levels) + OS << " "; + } + if (isLoopIndependent()) + OS << "|<"; + OS << "]"; + if (Splitable) + OS << " splitable"; + } + OS << "!\n"; +} + + + +static +AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA, + const Value *A, + const Value *B) { + const Value *AObj = GetUnderlyingObject(A); + const Value *BObj = GetUnderlyingObject(B); + return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()), + BObj, AA->getTypeStoreSize(BObj->getType())); +} + + +// Returns true if the load or store can be analyzed. Atomic and volatile +// operations have properties which this analysis does not understand. +static +bool isLoadOrStore(const Instruction *I) { + if (const LoadInst *LI = dyn_cast<LoadInst>(I)) + return LI->isUnordered(); + else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) + return SI->isUnordered(); + return false; +} + + +static +Value *getPointerOperand(Instruction *I) { + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + return LI->getPointerOperand(); + if (StoreInst *SI = dyn_cast<StoreInst>(I)) + return SI->getPointerOperand(); + llvm_unreachable("Value is not load or store instruction"); + return 0; +} + + +// Examines the loop nesting of the Src and Dst +// instructions and establishes their shared loops. Sets the variables +// CommonLevels, SrcLevels, and MaxLevels. +// The source and destination instructions needn't be contained in the same +// loop. The routine establishNestingLevels finds the level of most deeply +// nested loop that contains them both, CommonLevels. An instruction that's +// not contained in a loop is at level = 0. MaxLevels is equal to the level +// of the source plus the level of the destination, minus CommonLevels. +// This lets us allocate vectors MaxLevels in length, with room for every +// distinct loop referenced in both the source and destination subscripts. +// The variable SrcLevels is the nesting depth of the source instruction. +// It's used to help calculate distinct loops referenced by the destination. +// Here's the map from loops to levels: +// 0 - unused +// 1 - outermost common loop +// ... - other common loops +// CommonLevels - innermost common loop +// ... - loops containing Src but not Dst +// SrcLevels - innermost loop containing Src but not Dst +// ... - loops containing Dst but not Src +// MaxLevels - innermost loops containing Dst but not Src +// Consider the follow code fragment: +// for (a = ...) { +// for (b = ...) { +// for (c = ...) { +// for (d = ...) { +// A[] = ...; +// } +// } +// for (e = ...) { +// for (f = ...) { +// for (g = ...) { +// ... = A[]; +// } +// } +// } +// } +// } +// If we're looking at the possibility of a dependence between the store +// to A (the Src) and the load from A (the Dst), we'll note that they +// have 2 loops in common, so CommonLevels will equal 2 and the direction +// vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. +// A map from loop names to loop numbers would look like +// a - 1 +// b - 2 = CommonLevels +// c - 3 +// d - 4 = SrcLevels +// e - 5 +// f - 6 +// g - 7 = MaxLevels +void DependenceAnalysis::establishNestingLevels(const Instruction *Src, + const Instruction *Dst) { + const BasicBlock *SrcBlock = Src->getParent(); + const BasicBlock *DstBlock = Dst->getParent(); + unsigned SrcLevel = LI->getLoopDepth(SrcBlock); + unsigned DstLevel = LI->getLoopDepth(DstBlock); + const Loop *SrcLoop = LI->getLoopFor(SrcBlock); + const Loop *DstLoop = LI->getLoopFor(DstBlock); + SrcLevels = SrcLevel; + MaxLevels = SrcLevel + DstLevel; + while (SrcLevel > DstLevel) { + SrcLoop = SrcLoop->getParentLoop(); + SrcLevel--; + } + while (DstLevel > SrcLevel) { + DstLoop = DstLoop->getParentLoop(); + DstLevel--; + } + while (SrcLoop != DstLoop) { + SrcLoop = SrcLoop->getParentLoop(); + DstLoop = DstLoop->getParentLoop(); + SrcLevel--; + } + CommonLevels = SrcLevel; + MaxLevels -= CommonLevels; +} + + +// Given one of the loops containing the source, return +// its level index in our numbering scheme. +unsigned DependenceAnalysis::mapSrcLoop(const Loop *SrcLoop) const { + return SrcLoop->getLoopDepth(); +} + + +// Given one of the loops containing the destination, +// return its level index in our numbering scheme. +unsigned DependenceAnalysis::mapDstLoop(const Loop *DstLoop) const { + unsigned D = DstLoop->getLoopDepth(); + if (D > CommonLevels) + return D - CommonLevels + SrcLevels; + else + return D; +} + + +// Returns true if Expression is loop invariant in LoopNest. +bool DependenceAnalysis::isLoopInvariant(const SCEV *Expression, + const Loop *LoopNest) const { + if (!LoopNest) + return true; + return SE->isLoopInvariant(Expression, LoopNest) && + isLoopInvariant(Expression, LoopNest->getParentLoop()); +} + + + +// Finds the set of loops from the LoopNest that +// have a level <= CommonLevels and are referred to by the SCEV Expression. +void DependenceAnalysis::collectCommonLoops(const SCEV *Expression, + const Loop *LoopNest, + SmallBitVector &Loops) const { + while (LoopNest) { + unsigned Level = LoopNest->getLoopDepth(); + if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) + Loops.set(Level); + LoopNest = LoopNest->getParentLoop(); + } +} + + +// removeMatchingExtensions - Examines a subscript pair. +// If the source and destination are identically sign (or zero) +// extended, it strips off the extension in an effect to simplify +// the actual analysis. +void DependenceAnalysis::removeMatchingExtensions(Subscript *Pair) { + const SCEV *Src = Pair->Src; + const SCEV *Dst = Pair->Dst; + if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || + (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { + const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src); + const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst); + if (SrcCast->getType() == DstCast->getType()) { + Pair->Src = SrcCast->getOperand(); + Pair->Dst = DstCast->getOperand(); + } + } +} + + +// Examine the scev and return true iff it's linear. +// Collect any loops mentioned in the set of "Loops". +bool DependenceAnalysis::checkSrcSubscript(const SCEV *Src, + const Loop *LoopNest, + SmallBitVector &Loops) { + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src); + if (!AddRec) + return isLoopInvariant(Src, LoopNest); + const SCEV *Start = AddRec->getStart(); + const SCEV *Step = AddRec->getStepRecurrence(*SE); + if (!isLoopInvariant(Step, LoopNest)) + return false; + Loops.set(mapSrcLoop(AddRec->getLoop())); + return checkSrcSubscript(Start, LoopNest, Loops); +} + + + +// Examine the scev and return true iff it's linear. +// Collect any loops mentioned in the set of "Loops". +bool DependenceAnalysis::checkDstSubscript(const SCEV *Dst, + const Loop *LoopNest, + SmallBitVector &Loops) { + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst); + if (!AddRec) + return isLoopInvariant(Dst, LoopNest); + const SCEV *Start = AddRec->getStart(); + const SCEV *Step = AddRec->getStepRecurrence(*SE); + if (!isLoopInvariant(Step, LoopNest)) + return false; + Loops.set(mapDstLoop(AddRec->getLoop())); + return checkDstSubscript(Start, LoopNest, Loops); +} + + +// Examines the subscript pair (the Src and Dst SCEVs) +// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. +// Collects the associated loops in a set. +DependenceAnalysis::Subscript::ClassificationKind +DependenceAnalysis::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, + const SCEV *Dst, const Loop *DstLoopNest, + SmallBitVector &Loops) { + SmallBitVector SrcLoops(MaxLevels + 1); + SmallBitVector DstLoops(MaxLevels + 1); + if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) + return Subscript::NonLinear; + if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) + return Subscript::NonLinear; + Loops = SrcLoops; + Loops |= DstLoops; + unsigned N = Loops.count(); + if (N == 0) + return Subscript::ZIV; + if (N == 1) + return Subscript::SIV; + if (N == 2 && (SrcLoops.count() == 0 || + DstLoops.count() == 0 || + (SrcLoops.count() == 1 && DstLoops.count() == 1))) + return Subscript::RDIV; + return Subscript::MIV; +} + + +// A wrapper around SCEV::isKnownPredicate. +// Looks for cases where we're interested in comparing for equality. +// If both X and Y have been identically sign or zero extended, +// it strips off the (confusing) extensions before invoking +// SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package +// will be similarly updated. +// +// If SCEV::isKnownPredicate can't prove the predicate, +// we try simple subtraction, which seems to help in some cases +// involving symbolics. +bool DependenceAnalysis::isKnownPredicate(ICmpInst::Predicate Pred, + const SCEV *X, + const SCEV *Y) const { + if (Pred == CmpInst::ICMP_EQ || + Pred == CmpInst::ICMP_NE) { + if ((isa<SCEVSignExtendExpr>(X) && + isa<SCEVSignExtendExpr>(Y)) || + (isa<SCEVZeroExtendExpr>(X) && + isa<SCEVZeroExtendExpr>(Y))) { + const SCEVCastExpr *CX = cast<SCEVCastExpr>(X); + const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y); + const SCEV *Xop = CX->getOperand(); + const SCEV *Yop = CY->getOperand(); + if (Xop->getType() == Yop->getType()) { + X = Xop; + Y = Yop; + } + } + } + if (SE->isKnownPredicate(Pred, X, Y)) + return true; + // If SE->isKnownPredicate can't prove the condition, + // we try the brute-force approach of subtracting + // and testing the difference. + // By testing with SE->isKnownPredicate first, we avoid + // the possibility of overflow when the arguments are constants. + const SCEV *Delta = SE->getMinusSCEV(X, Y); + switch (Pred) { + case CmpInst::ICMP_EQ: + return Delta->isZero(); + case CmpInst::ICMP_NE: + return SE->isKnownNonZero(Delta); + case CmpInst::ICMP_SGE: + return SE->isKnownNonNegative(Delta); + case CmpInst::ICMP_SLE: + return SE->isKnownNonPositive(Delta); + case CmpInst::ICMP_SGT: + return SE->isKnownPositive(Delta); + case CmpInst::ICMP_SLT: + return SE->isKnownNegative(Delta); + default: + llvm_unreachable("unexpected predicate in isKnownPredicate"); + } +} + + +// All subscripts are all the same type. +// Loop bound may be smaller (e.g., a char). +// Should zero extend loop bound, since it's always >= 0. +// This routine collects upper bound and extends if needed. +// Return null if no bound available. +const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L, + Type *T) const { + if (SE->hasLoopInvariantBackedgeTakenCount(L)) { + const SCEV *UB = SE->getBackedgeTakenCount(L); + return SE->getNoopOrZeroExtend(UB, T); + } + return NULL; +} + + +// Calls collectUpperBound(), then attempts to cast it to SCEVConstant. +// If the cast fails, returns NULL. +const SCEVConstant *DependenceAnalysis::collectConstantUpperBound(const Loop *L, + Type *T + ) const { + if (const SCEV *UB = collectUpperBound(L, T)) + return dyn_cast<SCEVConstant>(UB); + return NULL; +} + + +// testZIV - +// When we have a pair of subscripts of the form [c1] and [c2], +// where c1 and c2 are both loop invariant, we attack it using +// the ZIV test. Basically, we test by comparing the two values, +// but there are actually three possible results: +// 1) the values are equal, so there's a dependence +// 2) the values are different, so there's no dependence +// 3) the values might be equal, so we have to assume a dependence. +// +// Return true if dependence disproved. +bool DependenceAnalysis::testZIV(const SCEV *Src, + const SCEV *Dst, + FullDependence &Result) const { + DEBUG(dbgs() << " src = " << *Src << "\n"); + DEBUG(dbgs() << " dst = " << *Dst << "\n"); + ++ZIVapplications; + if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { + DEBUG(dbgs() << " provably dependent\n"); + return false; // provably dependent + } + if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { + DEBUG(dbgs() << " provably independent\n"); + ++ZIVindependence; + return true; // provably independent + } + DEBUG(dbgs() << " possibly dependent\n"); + Result.Consistent = false; + return false; // possibly dependent +} + + +// strongSIVtest - +// From the paper, Practical Dependence Testing, Section 4.2.1 +// +// When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], +// where i is an induction variable, c1 and c2 are loop invariant, +// and a is a constant, we can solve it exactly using the Strong SIV test. +// +// Can prove independence. Failing that, can compute distance (and direction). +// In the presence of symbolic terms, we can sometimes make progress. +// +// If there's a dependence, +// +// c1 + a*i = c2 + a*i' +// +// The dependence distance is +// +// d = i' - i = (c1 - c2)/a +// +// A dependence only exists if d is an integer and abs(d) <= U, where U is the +// loop's upper bound. If a dependence exists, the dependence direction is +// defined as +// +// { < if d > 0 +// direction = { = if d = 0 +// { > if d < 0 +// +// Return true if dependence disproved. +bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint) const { + DEBUG(dbgs() << "\tStrong SIV test\n"); + DEBUG(dbgs() << "\t Coeff = " << *Coeff); + DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); + DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); + DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); + DEBUG(dbgs() << "\t DstConst = " << *DstConst); + DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); + ++StrongSIVapplications; + assert(0 < Level && Level <= CommonLevels && "level out of range"); + Level--; + + const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); + DEBUG(dbgs() << "\t Delta = " << *Delta); + DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); + + // check that |Delta| < iteration count + if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { + DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); + DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); + const SCEV *AbsDelta = + SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); + const SCEV *AbsCoeff = + SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); + const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); + if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { + // Distance greater than trip count - no dependence + ++StrongSIVindependence; + ++StrongSIVsuccesses; + return true; + } + } + + // Can we compute distance? + if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { + APInt ConstDelta = cast<SCEVConstant>(Delta)->getValue()->getValue(); + APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getValue()->getValue(); + APInt Distance = ConstDelta; // these need to be initialized + APInt Remainder = ConstDelta; + APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); + DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); + DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); + // Make sure Coeff divides Delta exactly + if (Remainder != 0) { + // Coeff doesn't divide Distance, no dependence + ++StrongSIVindependence; + ++StrongSIVsuccesses; + return true; + } + Result.DV[Level].Distance = SE->getConstant(Distance); + NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); + if (Distance.sgt(0)) + Result.DV[Level].Direction &= Dependence::DVEntry::LT; + else if (Distance.slt(0)) + Result.DV[Level].Direction &= Dependence::DVEntry::GT; + else + Result.DV[Level].Direction &= Dependence::DVEntry::EQ; + ++StrongSIVsuccesses; + } + else if (Delta->isZero()) { + // since 0/X == 0 + Result.DV[Level].Distance = Delta; + NewConstraint.setDistance(Delta, CurLoop); + Result.DV[Level].Direction &= Dependence::DVEntry::EQ; + ++StrongSIVsuccesses; + } + else { + if (Coeff->isOne()) { + DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); + Result.DV[Level].Distance = Delta; // since X/1 == X + NewConstraint.setDistance(Delta, CurLoop); + } + else { + Result.Consistent = false; + NewConstraint.setLine(Coeff, + SE->getNegativeSCEV(Coeff), + SE->getNegativeSCEV(Delta), CurLoop); + } + + // maybe we can get a useful direction + bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); + bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); + bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); + bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); + bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); + // The double negatives above are confusing. + // It helps to read !SE->isKnownNonZero(Delta) + // as "Delta might be Zero" + unsigned NewDirection = Dependence::DVEntry::NONE; + if ((DeltaMaybePositive && CoeffMaybePositive) || + (DeltaMaybeNegative && CoeffMaybeNegative)) + NewDirection = Dependence::DVEntry::LT; + if (DeltaMaybeZero) + NewDirection |= Dependence::DVEntry::EQ; + if ((DeltaMaybeNegative && CoeffMaybePositive) || + (DeltaMaybePositive && CoeffMaybeNegative)) + NewDirection |= Dependence::DVEntry::GT; + if (NewDirection < Result.DV[Level].Direction) + ++StrongSIVsuccesses; + Result.DV[Level].Direction &= NewDirection; + } + return false; +} + + +// weakCrossingSIVtest - +// From the paper, Practical Dependence Testing, Section 4.2.2 +// +// When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], +// where i is an induction variable, c1 and c2 are loop invariant, +// and a is a constant, we can solve it exactly using the +// Weak-Crossing SIV test. +// +// Given c1 + a*i = c2 - a*i', we can look for the intersection of +// the two lines, where i = i', yielding +// +// c1 + a*i = c2 - a*i +// 2a*i = c2 - c1 +// i = (c2 - c1)/2a +// +// If i < 0, there is no dependence. +// If i > upperbound, there is no dependence. +// If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. +// If i = upperbound, there's a dependence with distance = 0. +// If i is integral, there's a dependence (all directions). +// If the non-integer part = 1/2, there's a dependence (<> directions). +// Otherwise, there's no dependence. +// +// Can prove independence. Failing that, +// can sometimes refine the directions. +// Can determine iteration for splitting. +// +// Return true if dependence disproved. +bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint, + const SCEV *&SplitIter) const { + DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); + DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); + DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); + DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); + ++WeakCrossingSIVapplications; + assert(0 < Level && Level <= CommonLevels && "Level out of range"); + Level--; + Result.Consistent = false; + const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); + DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); + if (Delta->isZero()) { + Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); + Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); + ++WeakCrossingSIVsuccesses; + if (!Result.DV[Level].Direction) { + ++WeakCrossingSIVindependence; + return true; + } + Result.DV[Level].Distance = Delta; // = 0 + return false; + } + const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); + if (!ConstCoeff) + return false; + + Result.DV[Level].Splitable = true; + if (SE->isKnownNegative(ConstCoeff)) { + ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); + assert(ConstCoeff && + "dynamic cast of negative of ConstCoeff should yield constant"); + Delta = SE->getNegativeSCEV(Delta); + } + assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); + + // compute SplitIter for use by DependenceAnalysis::getSplitIteration() + SplitIter = + SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0), + Delta), + SE->getMulExpr(SE->getConstant(Delta->getType(), 2), + ConstCoeff)); + DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); + + const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); + if (!ConstDelta) + return false; + + // We're certain that ConstCoeff > 0; therefore, + // if Delta < 0, then no dependence. + DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); + if (SE->isKnownNegative(Delta)) { + // No dependence, Delta < 0 + ++WeakCrossingSIVindependence; + ++WeakCrossingSIVsuccesses; + return true; + } + + // We're certain that Delta > 0 and ConstCoeff > 0. + // Check Delta/(2*ConstCoeff) against upper loop bound + if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { + DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); + const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); + const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), + ConstantTwo); + DEBUG(dbgs() << "\t ML = " << *ML << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { + // Delta too big, no dependence + ++WeakCrossingSIVindependence; + ++WeakCrossingSIVsuccesses; + return true; + } + if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { + // i = i' = UB + Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); + Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); + ++WeakCrossingSIVsuccesses; + if (!Result.DV[Level].Direction) { + ++WeakCrossingSIVindependence; + return true; + } + Result.DV[Level].Splitable = false; + Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0); + return false; + } + } + + // check that Coeff divides Delta + APInt APDelta = ConstDelta->getValue()->getValue(); + APInt APCoeff = ConstCoeff->getValue()->getValue(); + APInt Distance = APDelta; // these need to be initialzed + APInt Remainder = APDelta; + APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); + DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); + if (Remainder != 0) { + // Coeff doesn't divide Delta, no dependence + ++WeakCrossingSIVindependence; + ++WeakCrossingSIVsuccesses; + return true; + } + DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); + + // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible + APInt Two = APInt(Distance.getBitWidth(), 2, true); + Remainder = Distance.srem(Two); + DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); + if (Remainder != 0) { + // Equal direction isn't possible + Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); + ++WeakCrossingSIVsuccesses; + } + return false; +} + + +// Kirch's algorithm, from +// +// Optimizing Supercompilers for Supercomputers +// Michael Wolfe +// MIT Press, 1989 +// +// Program 2.1, page 29. +// Computes the GCD of AM and BM. +// Also finds a solution to the equation ax - by = gdc(a, b). +// Returns true iff the gcd divides Delta. +static +bool findGCD(unsigned Bits, APInt AM, APInt BM, APInt Delta, + APInt &G, APInt &X, APInt &Y) { + APInt A0(Bits, 1, true), A1(Bits, 0, true); + APInt B0(Bits, 0, true), B1(Bits, 1, true); + APInt G0 = AM.abs(); + APInt G1 = BM.abs(); + APInt Q = G0; // these need to be initialized + APInt R = G0; + APInt::sdivrem(G0, G1, Q, R); + while (R != 0) { + APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; + APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; + G0 = G1; G1 = R; + APInt::sdivrem(G0, G1, Q, R); + } + G = G1; + DEBUG(dbgs() << "\t GCD = " << G << "\n"); + X = AM.slt(0) ? -A1 : A1; + Y = BM.slt(0) ? B1 : -B1; + + // make sure gcd divides Delta + R = Delta.srem(G); + if (R != 0) + return true; // gcd doesn't divide Delta, no dependence + Q = Delta.sdiv(G); + X *= Q; + Y *= Q; + return false; +} + + +static +APInt floorOfQuotient(APInt A, APInt B) { + APInt Q = A; // these need to be initialized + APInt R = A; + APInt::sdivrem(A, B, Q, R); + if (R == 0) + return Q; + if ((A.sgt(0) && B.sgt(0)) || + (A.slt(0) && B.slt(0))) + return Q; + else + return Q - 1; +} + + +static +APInt ceilingOfQuotient(APInt A, APInt B) { + APInt Q = A; // these need to be initialized + APInt R = A; + APInt::sdivrem(A, B, Q, R); + if (R == 0) + return Q; + if ((A.sgt(0) && B.sgt(0)) || + (A.slt(0) && B.slt(0))) + return Q + 1; + else + return Q; +} + + +static +APInt maxAPInt(APInt A, APInt B) { + return A.sgt(B) ? A : B; +} + + +static +APInt minAPInt(APInt A, APInt B) { + return A.slt(B) ? A : B; +} + + +// exactSIVtest - +// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], +// where i is an induction variable, c1 and c2 are loop invariant, and a1 +// and a2 are constant, we can solve it exactly using an algorithm developed +// by Banerjee and Wolfe. See Section 2.5.3 in +// +// Optimizing Supercompilers for Supercomputers +// Michael Wolfe +// MIT Press, 1989 +// +// It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), +// so use them if possible. They're also a bit better with symbolics and, +// in the case of the strong SIV test, can compute Distances. +// +// Return true if dependence disproved. +bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff, + const SCEV *DstCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint) const { + DEBUG(dbgs() << "\tExact SIV test\n"); + DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); + DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); + DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); + DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); + ++ExactSIVapplications; + assert(0 < Level && Level <= CommonLevels && "Level out of range"); + Level--; + Result.Consistent = false; + const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); + DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), + Delta, CurLoop); + const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); + const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); + const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); + if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) + return false; + + // find gcd + APInt G, X, Y; + APInt AM = ConstSrcCoeff->getValue()->getValue(); + APInt BM = ConstDstCoeff->getValue()->getValue(); + unsigned Bits = AM.getBitWidth(); + if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { + // gcd doesn't divide Delta, no dependence + ++ExactSIVindependence; + ++ExactSIVsuccesses; + return true; + } + + DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); + + // since SCEV construction normalizes, LM = 0 + APInt UM(Bits, 1, true); + bool UMvalid = false; + // UM is perhaps unavailable, let's check + if (const SCEVConstant *CUB = + collectConstantUpperBound(CurLoop, Delta->getType())) { + UM = CUB->getValue()->getValue(); + DEBUG(dbgs() << "\t UM = " << UM << "\n"); + UMvalid = true; + } + + APInt TU(APInt::getSignedMaxValue(Bits)); + APInt TL(APInt::getSignedMinValue(Bits)); + + // test(BM/G, LM-X) and test(-BM/G, X-UM) + APInt TMUL = BM.sdiv(G); + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + if (UMvalid) { + TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + } + } + else { + TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + if (UMvalid) { + TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + } + } + + // test(AM/G, LM-Y) and test(-AM/G, Y-UM) + TMUL = AM.sdiv(G); + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + if (UMvalid) { + TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + } + } + else { + TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + if (UMvalid) { + TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + } + } + if (TL.sgt(TU)) { + ++ExactSIVindependence; + ++ExactSIVsuccesses; + return true; + } + + // explore directions + unsigned NewDirection = Dependence::DVEntry::NONE; + + // less than + APInt SaveTU(TU); // save these + APInt SaveTL(TL); + DEBUG(dbgs() << "\t exploring LT direction\n"); + TMUL = AM - BM; + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); + DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); + } + else { + TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); + DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); + } + if (TL.sle(TU)) { + NewDirection |= Dependence::DVEntry::LT; + ++ExactSIVsuccesses; + } + + // equal + TU = SaveTU; // restore + TL = SaveTL; + DEBUG(dbgs() << "\t exploring EQ direction\n"); + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); + DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); + } + else { + TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); + DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); + } + TMUL = BM - AM; + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); + DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); + } + else { + TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); + DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); + } + if (TL.sle(TU)) { + NewDirection |= Dependence::DVEntry::EQ; + ++ExactSIVsuccesses; + } + + // greater than + TU = SaveTU; // restore + TL = SaveTL; + DEBUG(dbgs() << "\t exploring GT direction\n"); + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); + DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); + } + else { + TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); + DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); + } + if (TL.sle(TU)) { + NewDirection |= Dependence::DVEntry::GT; + ++ExactSIVsuccesses; + } + + // finished + Result.DV[Level].Direction &= NewDirection; + if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) + ++ExactSIVindependence; + return Result.DV[Level].Direction == Dependence::DVEntry::NONE; +} + + + +// Return true if the divisor evenly divides the dividend. +static +bool isRemainderZero(const SCEVConstant *Dividend, + const SCEVConstant *Divisor) { + APInt ConstDividend = Dividend->getValue()->getValue(); + APInt ConstDivisor = Divisor->getValue()->getValue(); + return ConstDividend.srem(ConstDivisor) == 0; +} + + +// weakZeroSrcSIVtest - +// From the paper, Practical Dependence Testing, Section 4.2.2 +// +// When we have a pair of subscripts of the form [c1] and [c2 + a*i], +// where i is an induction variable, c1 and c2 are loop invariant, +// and a is a constant, we can solve it exactly using the +// Weak-Zero SIV test. +// +// Given +// +// c1 = c2 + a*i +// +// we get +// +// (c1 - c2)/a = i +// +// If i is not an integer, there's no dependence. +// If i < 0 or > UB, there's no dependence. +// If i = 0, the direction is <= and peeling the +// 1st iteration will break the dependence. +// If i = UB, the direction is >= and peeling the +// last iteration will break the dependence. +// Otherwise, the direction is *. +// +// Can prove independence. Failing that, we can sometimes refine +// the directions. Can sometimes show that first or last +// iteration carries all the dependences (so worth peeling). +// +// (see also weakZeroDstSIVtest) +// +// Return true if dependence disproved. +bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint) const { + // For the WeakSIV test, it's possible the loop isn't common to + // the Src and Dst loops. If it isn't, then there's no need to + // record a direction. + DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); + DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); + DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); + DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); + ++WeakZeroSIVapplications; + assert(0 < Level && Level <= MaxLevels && "Level out of range"); + Level--; + Result.Consistent = false; + const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); + NewConstraint.setLine(SE->getConstant(Delta->getType(), 0), + DstCoeff, Delta, CurLoop); + DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { + if (Level < CommonLevels) { + Result.DV[Level].Direction &= Dependence::DVEntry::LE; + Result.DV[Level].PeelFirst = true; + ++WeakZeroSIVsuccesses; + } + return false; // dependences caused by first iteration + } + const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); + if (!ConstCoeff) + return false; + const SCEV *AbsCoeff = + SE->isKnownNegative(ConstCoeff) ? + SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; + const SCEV *NewDelta = + SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; + + // check that Delta/SrcCoeff < iteration count + // really check NewDelta < count*AbsCoeff + if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { + DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); + const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); + if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { + ++WeakZeroSIVindependence; + ++WeakZeroSIVsuccesses; + return true; + } + if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { + // dependences caused by last iteration + if (Level < CommonLevels) { + Result.DV[Level].Direction &= Dependence::DVEntry::GE; + Result.DV[Level].PeelLast = true; + ++WeakZeroSIVsuccesses; + } + return false; + } + } + + // check that Delta/SrcCoeff >= 0 + // really check that NewDelta >= 0 + if (SE->isKnownNegative(NewDelta)) { + // No dependence, newDelta < 0 + ++WeakZeroSIVindependence; + ++WeakZeroSIVsuccesses; + return true; + } + + // if SrcCoeff doesn't divide Delta, then no dependence + if (isa<SCEVConstant>(Delta) && + !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { + ++WeakZeroSIVindependence; + ++WeakZeroSIVsuccesses; + return true; + } + return false; +} + + +// weakZeroDstSIVtest - +// From the paper, Practical Dependence Testing, Section 4.2.2 +// +// When we have a pair of subscripts of the form [c1 + a*i] and [c2], +// where i is an induction variable, c1 and c2 are loop invariant, +// and a is a constant, we can solve it exactly using the +// Weak-Zero SIV test. +// +// Given +// +// c1 + a*i = c2 +// +// we get +// +// i = (c2 - c1)/a +// +// If i is not an integer, there's no dependence. +// If i < 0 or > UB, there's no dependence. +// If i = 0, the direction is <= and peeling the +// 1st iteration will break the dependence. +// If i = UB, the direction is >= and peeling the +// last iteration will break the dependence. +// Otherwise, the direction is *. +// +// Can prove independence. Failing that, we can sometimes refine +// the directions. Can sometimes show that first or last +// iteration carries all the dependences (so worth peeling). +// +// (see also weakZeroSrcSIVtest) +// +// Return true if dependence disproved. +bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint) const { + // For the WeakSIV test, it's possible the loop isn't common to the + // Src and Dst loops. If it isn't, then there's no need to record a direction. + DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); + DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); + DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); + DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); + ++WeakZeroSIVapplications; + assert(0 < Level && Level <= SrcLevels && "Level out of range"); + Level--; + Result.Consistent = false; + const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); + NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0), + Delta, CurLoop); + DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { + if (Level < CommonLevels) { + Result.DV[Level].Direction &= Dependence::DVEntry::LE; + Result.DV[Level].PeelFirst = true; + ++WeakZeroSIVsuccesses; + } + return false; // dependences caused by first iteration + } + const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); + if (!ConstCoeff) + return false; + const SCEV *AbsCoeff = + SE->isKnownNegative(ConstCoeff) ? + SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; + const SCEV *NewDelta = + SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; + + // check that Delta/SrcCoeff < iteration count + // really check NewDelta < count*AbsCoeff + if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { + DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); + const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); + if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { + ++WeakZeroSIVindependence; + ++WeakZeroSIVsuccesses; + return true; + } + if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { + // dependences caused by last iteration + if (Level < CommonLevels) { + Result.DV[Level].Direction &= Dependence::DVEntry::GE; + Result.DV[Level].PeelLast = true; + ++WeakZeroSIVsuccesses; + } + return false; + } + } + + // check that Delta/SrcCoeff >= 0 + // really check that NewDelta >= 0 + if (SE->isKnownNegative(NewDelta)) { + // No dependence, newDelta < 0 + ++WeakZeroSIVindependence; + ++WeakZeroSIVsuccesses; + return true; + } + + // if SrcCoeff doesn't divide Delta, then no dependence + if (isa<SCEVConstant>(Delta) && + !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { + ++WeakZeroSIVindependence; + ++WeakZeroSIVsuccesses; + return true; + } + return false; +} + + +// exactRDIVtest - Tests the RDIV subscript pair for dependence. +// Things of the form [c1 + a*i] and [c2 + b*j], +// where i and j are induction variable, c1 and c2 are loop invariant, +// and a and b are constants. +// Returns true if any possible dependence is disproved. +// Marks the result as inconsistent. +// Works in some cases that symbolicRDIVtest doesn't, and vice versa. +bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff, + const SCEV *DstCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *SrcLoop, + const Loop *DstLoop, + FullDependence &Result) const { + DEBUG(dbgs() << "\tExact RDIV test\n"); + DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); + DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); + DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); + DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); + ++ExactRDIVapplications; + Result.Consistent = false; + const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); + DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); + const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); + const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); + if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) + return false; + + // find gcd + APInt G, X, Y; + APInt AM = ConstSrcCoeff->getValue()->getValue(); + APInt BM = ConstDstCoeff->getValue()->getValue(); + unsigned Bits = AM.getBitWidth(); + if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { + // gcd doesn't divide Delta, no dependence + ++ExactRDIVindependence; + return true; + } + + DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); + + // since SCEV construction seems to normalize, LM = 0 + APInt SrcUM(Bits, 1, true); + bool SrcUMvalid = false; + // SrcUM is perhaps unavailable, let's check + if (const SCEVConstant *UpperBound = + collectConstantUpperBound(SrcLoop, Delta->getType())) { + SrcUM = UpperBound->getValue()->getValue(); + DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); + SrcUMvalid = true; + } + + APInt DstUM(Bits, 1, true); + bool DstUMvalid = false; + // UM is perhaps unavailable, let's check + if (const SCEVConstant *UpperBound = + collectConstantUpperBound(DstLoop, Delta->getType())) { + DstUM = UpperBound->getValue()->getValue(); + DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); + DstUMvalid = true; + } + + APInt TU(APInt::getSignedMaxValue(Bits)); + APInt TL(APInt::getSignedMinValue(Bits)); + + // test(BM/G, LM-X) and test(-BM/G, X-UM) + APInt TMUL = BM.sdiv(G); + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + if (SrcUMvalid) { + TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + } + } + else { + TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + if (SrcUMvalid) { + TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + } + } + + // test(AM/G, LM-Y) and test(-AM/G, Y-UM) + TMUL = AM.sdiv(G); + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + if (DstUMvalid) { + TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + } + } + else { + TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + if (DstUMvalid) { + TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + } + } + if (TL.sgt(TU)) + ++ExactRDIVindependence; + return TL.sgt(TU); +} + + +// symbolicRDIVtest - +// In Section 4.5 of the Practical Dependence Testing paper,the authors +// introduce a special case of Banerjee's Inequalities (also called the +// Extreme-Value Test) that can handle some of the SIV and RDIV cases, +// particularly cases with symbolics. Since it's only able to disprove +// dependence (not compute distances or directions), we'll use it as a +// fall back for the other tests. +// +// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] +// where i and j are induction variables and c1 and c2 are loop invariants, +// we can use the symbolic tests to disprove some dependences, serving as a +// backup for the RDIV test. Note that i and j can be the same variable, +// letting this test serve as a backup for the various SIV tests. +// +// For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some +// 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) +// loop bounds for the i and j loops, respectively. So, ... +// +// c1 + a1*i = c2 + a2*j +// a1*i - a2*j = c2 - c1 +// +// To test for a dependence, we compute c2 - c1 and make sure it's in the +// range of the maximum and minimum possible values of a1*i - a2*j. +// Considering the signs of a1 and a2, we have 4 possible cases: +// +// 1) If a1 >= 0 and a2 >= 0, then +// a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 +// -a2*N2 <= c2 - c1 <= a1*N1 +// +// 2) If a1 >= 0 and a2 <= 0, then +// a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 +// 0 <= c2 - c1 <= a1*N1 - a2*N2 +// +// 3) If a1 <= 0 and a2 >= 0, then +// a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 +// a1*N1 - a2*N2 <= c2 - c1 <= 0 +// +// 4) If a1 <= 0 and a2 <= 0, then +// a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 +// a1*N1 <= c2 - c1 <= -a2*N2 +// +// return true if dependence disproved +bool DependenceAnalysis::symbolicRDIVtest(const SCEV *A1, + const SCEV *A2, + const SCEV *C1, + const SCEV *C2, + const Loop *Loop1, + const Loop *Loop2) const { + ++SymbolicRDIVapplications; + DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); + DEBUG(dbgs() << "\t A1 = " << *A1); + DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); + DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); + DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); + DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); + const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); + const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); + DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); + DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); + const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); + const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); + DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); + DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); + if (SE->isKnownNonNegative(A1)) { + if (SE->isKnownNonNegative(A2)) { + // A1 >= 0 && A2 >= 0 + if (N1) { + // make sure that c2 - c1 <= a1*N1 + const SCEV *A1N1 = SE->getMulExpr(A1, N1); + DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { + ++SymbolicRDIVindependence; + return true; + } + } + if (N2) { + // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 + const SCEV *A2N2 = SE->getMulExpr(A2, N2); + DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { + ++SymbolicRDIVindependence; + return true; + } + } + } + else if (SE->isKnownNonPositive(A2)) { + // a1 >= 0 && a2 <= 0 + if (N1 && N2) { + // make sure that c2 - c1 <= a1*N1 - a2*N2 + const SCEV *A1N1 = SE->getMulExpr(A1, N1); + const SCEV *A2N2 = SE->getMulExpr(A2, N2); + const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); + DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { + ++SymbolicRDIVindependence; + return true; + } + } + // make sure that 0 <= c2 - c1 + if (SE->isKnownNegative(C2_C1)) { + ++SymbolicRDIVindependence; + return true; + } + } + } + else if (SE->isKnownNonPositive(A1)) { + if (SE->isKnownNonNegative(A2)) { + // a1 <= 0 && a2 >= 0 + if (N1 && N2) { + // make sure that a1*N1 - a2*N2 <= c2 - c1 + const SCEV *A1N1 = SE->getMulExpr(A1, N1); + const SCEV *A2N2 = SE->getMulExpr(A2, N2); + const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); + DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { + ++SymbolicRDIVindependence; + return true; + } + } + // make sure that c2 - c1 <= 0 + if (SE->isKnownPositive(C2_C1)) { + ++SymbolicRDIVindependence; + return true; + } + } + else if (SE->isKnownNonPositive(A2)) { + // a1 <= 0 && a2 <= 0 + if (N1) { + // make sure that a1*N1 <= c2 - c1 + const SCEV *A1N1 = SE->getMulExpr(A1, N1); + DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { + ++SymbolicRDIVindependence; + return true; + } + } + if (N2) { + // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 + const SCEV *A2N2 = SE->getMulExpr(A2, N2); + DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { + ++SymbolicRDIVindependence; + return true; + } + } + } + } + return false; +} + + +// testSIV - +// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] +// where i is an induction variable, c1 and c2 are loop invariant, and a1 and +// a2 are constant, we attack it with an SIV test. While they can all be +// solved with the Exact SIV test, it's worthwhile to use simpler tests when +// they apply; they're cheaper and sometimes more precise. +// +// Return true if dependence disproved. +bool DependenceAnalysis::testSIV(const SCEV *Src, + const SCEV *Dst, + unsigned &Level, + FullDependence &Result, + Constraint &NewConstraint, + const SCEV *&SplitIter) const { + DEBUG(dbgs() << " src = " << *Src << "\n"); + DEBUG(dbgs() << " dst = " << *Dst << "\n"); + const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); + const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); + if (SrcAddRec && DstAddRec) { + const SCEV *SrcConst = SrcAddRec->getStart(); + const SCEV *DstConst = DstAddRec->getStart(); + const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); + const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); + const Loop *CurLoop = SrcAddRec->getLoop(); + assert(CurLoop == DstAddRec->getLoop() && + "both loops in SIV should be same"); + Level = mapSrcLoop(CurLoop); + bool disproven; + if (SrcCoeff == DstCoeff) + disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, + Level, Result, NewConstraint); + else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) + disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, + Level, Result, NewConstraint, SplitIter); + else + disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, + Level, Result, NewConstraint); + return disproven || + gcdMIVtest(Src, Dst, Result) || + symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); + } + if (SrcAddRec) { + const SCEV *SrcConst = SrcAddRec->getStart(); + const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); + const SCEV *DstConst = Dst; + const Loop *CurLoop = SrcAddRec->getLoop(); + Level = mapSrcLoop(CurLoop); + return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, + Level, Result, NewConstraint) || + gcdMIVtest(Src, Dst, Result); + } + if (DstAddRec) { + const SCEV *DstConst = DstAddRec->getStart(); + const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); + const SCEV *SrcConst = Src; + const Loop *CurLoop = DstAddRec->getLoop(); + Level = mapDstLoop(CurLoop); + return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, + CurLoop, Level, Result, NewConstraint) || + gcdMIVtest(Src, Dst, Result); + } + llvm_unreachable("SIV test expected at least one AddRec"); + return false; +} + + +// testRDIV - +// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] +// where i and j are induction variables, c1 and c2 are loop invariant, +// and a1 and a2 are constant, we can solve it exactly with an easy adaptation +// of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. +// It doesn't make sense to talk about distance or direction in this case, +// so there's no point in making special versions of the Strong SIV test or +// the Weak-crossing SIV test. +// +// With minor algebra, this test can also be used for things like +// [c1 + a1*i + a2*j][c2]. +// +// Return true if dependence disproved. +bool DependenceAnalysis::testRDIV(const SCEV *Src, + const SCEV *Dst, + FullDependence &Result) const { + // we have 3 possible situations here: + // 1) [a*i + b] and [c*j + d] + // 2) [a*i + c*j + b] and [d] + // 3) [b] and [a*i + c*j + d] + // We need to find what we've got and get organized + + const SCEV *SrcConst, *DstConst; + const SCEV *SrcCoeff, *DstCoeff; + const Loop *SrcLoop, *DstLoop; + + DEBUG(dbgs() << " src = " << *Src << "\n"); + DEBUG(dbgs() << " dst = " << *Dst << "\n"); + const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); + const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); + if (SrcAddRec && DstAddRec) { + SrcConst = SrcAddRec->getStart(); + SrcCoeff = SrcAddRec->getStepRecurrence(*SE); + SrcLoop = SrcAddRec->getLoop(); + DstConst = DstAddRec->getStart(); + DstCoeff = DstAddRec->getStepRecurrence(*SE); + DstLoop = DstAddRec->getLoop(); + } + else if (SrcAddRec) { + if (const SCEVAddRecExpr *tmpAddRec = + dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { + SrcConst = tmpAddRec->getStart(); + SrcCoeff = tmpAddRec->getStepRecurrence(*SE); + SrcLoop = tmpAddRec->getLoop(); + DstConst = Dst; + DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); + DstLoop = SrcAddRec->getLoop(); + } + else + llvm_unreachable("RDIV reached by surprising SCEVs"); + } + else if (DstAddRec) { + if (const SCEVAddRecExpr *tmpAddRec = + dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { + DstConst = tmpAddRec->getStart(); + DstCoeff = tmpAddRec->getStepRecurrence(*SE); + DstLoop = tmpAddRec->getLoop(); + SrcConst = Src; + SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); + SrcLoop = DstAddRec->getLoop(); + } + else + llvm_unreachable("RDIV reached by surprising SCEVs"); + } + else + llvm_unreachable("RDIV expected at least one AddRec"); + return exactRDIVtest(SrcCoeff, DstCoeff, + SrcConst, DstConst, + SrcLoop, DstLoop, + Result) || + gcdMIVtest(Src, Dst, Result) || + symbolicRDIVtest(SrcCoeff, DstCoeff, + SrcConst, DstConst, + SrcLoop, DstLoop); +} + + +// Tests the single-subscript MIV pair (Src and Dst) for dependence. +// Return true if dependence disproved. +// Can sometimes refine direction vectors. +bool DependenceAnalysis::testMIV(const SCEV *Src, + const SCEV *Dst, + const SmallBitVector &Loops, + FullDependence &Result) const { + DEBUG(dbgs() << " src = " << *Src << "\n"); + DEBUG(dbgs() << " dst = " << *Dst << "\n"); + Result.Consistent = false; + return gcdMIVtest(Src, Dst, Result) || + banerjeeMIVtest(Src, Dst, Loops, Result); +} + + +// Given a product, e.g., 10*X*Y, returns the first constant operand, +// in this case 10. If there is no constant part, returns NULL. +static +const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) { + for (unsigned Op = 0, Ops = Product->getNumOperands(); Op < Ops; Op++) { + if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Product->getOperand(Op))) + return Constant; + } + return NULL; +} + + +//===----------------------------------------------------------------------===// +// gcdMIVtest - +// Tests an MIV subscript pair for dependence. +// Returns true if any possible dependence is disproved. +// Marks the result as inconsistent. +// Can sometimes disprove the equal direction for 1 or more loops, +// as discussed in Michael Wolfe's book, +// High Performance Compilers for Parallel Computing, page 235. +// +// We spend some effort (code!) to handle cases like +// [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, +// but M and N are just loop-invariant variables. +// This should help us handle linearized subscripts; +// also makes this test a useful backup to the various SIV tests. +// +// It occurs to me that the presence of loop-invariant variables +// changes the nature of the test from "greatest common divisor" +// to "a common divisor". +bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, + const SCEV *Dst, + FullDependence &Result) const { + DEBUG(dbgs() << "starting gcd\n"); + ++GCDapplications; + unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); + APInt RunningGCD = APInt::getNullValue(BitWidth); + + // Examine Src coefficients. + // Compute running GCD and record source constant. + // Because we're looking for the constant at the end of the chain, + // we can't quit the loop just because the GCD == 1. + const SCEV *Coefficients = Src; + while (const SCEVAddRecExpr *AddRec = + dyn_cast<SCEVAddRecExpr>(Coefficients)) { + const SCEV *Coeff = AddRec->getStepRecurrence(*SE); + const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); + if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) + // If the coefficient is the product of a constant and other stuff, + // we can use the constant in the GCD computation. + Constant = getConstantPart(Product); + if (!Constant) + return false; + APInt ConstCoeff = Constant->getValue()->getValue(); + RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); + Coefficients = AddRec->getStart(); + } + const SCEV *SrcConst = Coefficients; + + // Examine Dst coefficients. + // Compute running GCD and record destination constant. + // Because we're looking for the constant at the end of the chain, + // we can't quit the loop just because the GCD == 1. + Coefficients = Dst; + while (const SCEVAddRecExpr *AddRec = + dyn_cast<SCEVAddRecExpr>(Coefficients)) { + const SCEV *Coeff = AddRec->getStepRecurrence(*SE); + const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); + if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) + // If the coefficient is the product of a constant and other stuff, + // we can use the constant in the GCD computation. + Constant = getConstantPart(Product); + if (!Constant) + return false; + APInt ConstCoeff = Constant->getValue()->getValue(); + RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); + Coefficients = AddRec->getStart(); + } + const SCEV *DstConst = Coefficients; + + APInt ExtraGCD = APInt::getNullValue(BitWidth); + const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); + DEBUG(dbgs() << " Delta = " << *Delta << "\n"); + const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); + if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { + // If Delta is a sum of products, we may be able to make further progress. + for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { + const SCEV *Operand = Sum->getOperand(Op); + if (isa<SCEVConstant>(Operand)) { + assert(!Constant && "Surprised to find multiple constants"); + Constant = cast<SCEVConstant>(Operand); + } + else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { + // Search for constant operand to participate in GCD; + // If none found; return false. + const SCEVConstant *ConstOp = getConstantPart(Product); + if (!ConstOp) + return false; + APInt ConstOpValue = ConstOp->getValue()->getValue(); + ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, + ConstOpValue.abs()); + } + else + return false; + } + } + if (!Constant) + return false; + APInt ConstDelta = cast<SCEVConstant>(Constant)->getValue()->getValue(); + DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); + if (ConstDelta == 0) + return false; + RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); + DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); + APInt Remainder = ConstDelta.srem(RunningGCD); + if (Remainder != 0) { + ++GCDindependence; + return true; + } + + // Try to disprove equal directions. + // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], + // the code above can't disprove the dependence because the GCD = 1. + // So we consider what happen if i = i' and what happens if j = j'. + // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], + // which is infeasible, so we can disallow the = direction for the i level. + // Setting j = j' doesn't help matters, so we end up with a direction vector + // of [<>, *] + // + // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], + // we need to remember that the constant part is 5 and the RunningGCD should + // be initialized to ExtraGCD = 30. + DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); + + bool Improved = false; + Coefficients = Src; + while (const SCEVAddRecExpr *AddRec = + dyn_cast<SCEVAddRecExpr>(Coefficients)) { + Coefficients = AddRec->getStart(); + const Loop *CurLoop = AddRec->getLoop(); + RunningGCD = ExtraGCD; + const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); + const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); + const SCEV *Inner = Src; + while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { + AddRec = cast<SCEVAddRecExpr>(Inner); + const SCEV *Coeff = AddRec->getStepRecurrence(*SE); + if (CurLoop == AddRec->getLoop()) + ; // SrcCoeff == Coeff + else { + if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) + // If the coefficient is the product of a constant and other stuff, + // we can use the constant in the GCD computation. + Constant = getConstantPart(Product); + else + Constant = cast<SCEVConstant>(Coeff); + APInt ConstCoeff = Constant->getValue()->getValue(); + RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); + } + Inner = AddRec->getStart(); + } + Inner = Dst; + while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { + AddRec = cast<SCEVAddRecExpr>(Inner); + const SCEV *Coeff = AddRec->getStepRecurrence(*SE); + if (CurLoop == AddRec->getLoop()) + DstCoeff = Coeff; + else { + if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) + // If the coefficient is the product of a constant and other stuff, + // we can use the constant in the GCD computation. + Constant = getConstantPart(Product); + else + Constant = cast<SCEVConstant>(Coeff); + APInt ConstCoeff = Constant->getValue()->getValue(); + RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); + } + Inner = AddRec->getStart(); + } + Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); + if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Delta)) + // If the coefficient is the product of a constant and other stuff, + // we can use the constant in the GCD computation. + Constant = getConstantPart(Product); + else if (isa<SCEVConstant>(Delta)) + Constant = cast<SCEVConstant>(Delta); + else { + // The difference of the two coefficients might not be a product + // or constant, in which case we give up on this direction. + continue; + } + APInt ConstCoeff = Constant->getValue()->getValue(); + RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); + DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); + if (RunningGCD != 0) { + Remainder = ConstDelta.srem(RunningGCD); + DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); + if (Remainder != 0) { + unsigned Level = mapSrcLoop(CurLoop); + Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); + Improved = true; + } + } + } + if (Improved) + ++GCDsuccesses; + DEBUG(dbgs() << "all done\n"); + return false; +} + + +//===----------------------------------------------------------------------===// +// banerjeeMIVtest - +// Use Banerjee's Inequalities to test an MIV subscript pair. +// (Wolfe, in the race-car book, calls this the Extreme Value Test.) +// Generally follows the discussion in Section 2.5.2 of +// +// Optimizing Supercompilers for Supercomputers +// Michael Wolfe +// +// The inequalities given on page 25 are simplified in that loops are +// normalized so that the lower bound is always 0 and the stride is always 1. +// For example, Wolfe gives +// +// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k +// +// where A_k is the coefficient of the kth index in the source subscript, +// B_k is the coefficient of the kth index in the destination subscript, +// U_k is the upper bound of the kth index, L_k is the lower bound of the Kth +// index, and N_k is the stride of the kth index. Since all loops are normalized +// by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the +// equation to +// +// LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 +// = (A^-_k - B_k)^- (U_k - 1) - B_k +// +// Similar simplifications are possible for the other equations. +// +// When we can't determine the number of iterations for a loop, +// we use NULL as an indicator for the worst case, infinity. +// When computing the upper bound, NULL denotes +inf; +// for the lower bound, NULL denotes -inf. +// +// Return true if dependence disproved. +bool DependenceAnalysis::banerjeeMIVtest(const SCEV *Src, + const SCEV *Dst, + const SmallBitVector &Loops, + FullDependence &Result) const { + DEBUG(dbgs() << "starting Banerjee\n"); + ++BanerjeeApplications; + DEBUG(dbgs() << " Src = " << *Src << '\n'); + const SCEV *A0; + CoefficientInfo *A = collectCoeffInfo(Src, true, A0); + DEBUG(dbgs() << " Dst = " << *Dst << '\n'); + const SCEV *B0; + CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); + BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; + const SCEV *Delta = SE->getMinusSCEV(B0, A0); + DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); + + // Compute bounds for all the * directions. + DEBUG(dbgs() << "\tBounds[*]\n"); + for (unsigned K = 1; K <= MaxLevels; ++K) { + Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; + Bound[K].Direction = Dependence::DVEntry::ALL; + Bound[K].DirSet = Dependence::DVEntry::NONE; + findBoundsALL(A, B, Bound, K); +#ifndef NDEBUG + DEBUG(dbgs() << "\t " << K << '\t'); + if (Bound[K].Lower[Dependence::DVEntry::ALL]) + DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); + else + DEBUG(dbgs() << "-inf\t"); + if (Bound[K].Upper[Dependence::DVEntry::ALL]) + DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); + else + DEBUG(dbgs() << "+inf\n"); +#endif + } + + // Test the *, *, *, ... case. + bool Disproved = false; + if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { + // Explore the direction vector hierarchy. + unsigned DepthExpanded = 0; + unsigned NewDeps = exploreDirections(1, A, B, Bound, + Loops, DepthExpanded, Delta); + if (NewDeps > 0) { + bool Improved = false; + for (unsigned K = 1; K <= CommonLevels; ++K) { + if (Loops[K]) { + unsigned Old = Result.DV[K - 1].Direction; + Result.DV[K - 1].Direction = Old & Bound[K].DirSet; + Improved |= Old != Result.DV[K - 1].Direction; + if (!Result.DV[K - 1].Direction) { + Improved = false; + Disproved = true; + break; + } + } + } + if (Improved) + ++BanerjeeSuccesses; + } + else { + ++BanerjeeIndependence; + Disproved = true; + } + } + else { + ++BanerjeeIndependence; + Disproved = true; + } + delete [] Bound; + delete [] A; + delete [] B; + return Disproved; +} + + +// Hierarchically expands the direction vector +// search space, combining the directions of discovered dependences +// in the DirSet field of Bound. Returns the number of distinct +// dependences discovered. If the dependence is disproved, +// it will return 0. +unsigned DependenceAnalysis::exploreDirections(unsigned Level, + CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + const SmallBitVector &Loops, + unsigned &DepthExpanded, + const SCEV *Delta) const { + if (Level > CommonLevels) { + // record result + DEBUG(dbgs() << "\t["); + for (unsigned K = 1; K <= CommonLevels; ++K) { + if (Loops[K]) { + Bound[K].DirSet |= Bound[K].Direction; +#ifndef NDEBUG + switch (Bound[K].Direction) { + case Dependence::DVEntry::LT: + DEBUG(dbgs() << " <"); + break; + case Dependence::DVEntry::EQ: + DEBUG(dbgs() << " ="); + break; + case Dependence::DVEntry::GT: + DEBUG(dbgs() << " >"); + break; + case Dependence::DVEntry::ALL: + DEBUG(dbgs() << " *"); + break; + default: + llvm_unreachable("unexpected Bound[K].Direction"); + } +#endif + } + } + DEBUG(dbgs() << " ]\n"); + return 1; + } + if (Loops[Level]) { + if (Level > DepthExpanded) { + DepthExpanded = Level; + // compute bounds for <, =, > at current level + findBoundsLT(A, B, Bound, Level); + findBoundsGT(A, B, Bound, Level); + findBoundsEQ(A, B, Bound, Level); +#ifndef NDEBUG + DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); + DEBUG(dbgs() << "\t <\t"); + if (Bound[Level].Lower[Dependence::DVEntry::LT]) + DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t'); + else + DEBUG(dbgs() << "-inf\t"); + if (Bound[Level].Upper[Dependence::DVEntry::LT]) + DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n'); + else + DEBUG(dbgs() << "+inf\n"); + DEBUG(dbgs() << "\t =\t"); + if (Bound[Level].Lower[Dependence::DVEntry::EQ]) + DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t'); + else + DEBUG(dbgs() << "-inf\t"); + if (Bound[Level].Upper[Dependence::DVEntry::EQ]) + DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n'); + else + DEBUG(dbgs() << "+inf\n"); + DEBUG(dbgs() << "\t >\t"); + if (Bound[Level].Lower[Dependence::DVEntry::GT]) + DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t'); + else + DEBUG(dbgs() << "-inf\t"); + if (Bound[Level].Upper[Dependence::DVEntry::GT]) + DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n'); + else + DEBUG(dbgs() << "+inf\n"); +#endif + } + + unsigned NewDeps = 0; + + // test bounds for <, *, *, ... + if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) + NewDeps += exploreDirections(Level + 1, A, B, Bound, + Loops, DepthExpanded, Delta); + + // Test bounds for =, *, *, ... + if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) + NewDeps += exploreDirections(Level + 1, A, B, Bound, + Loops, DepthExpanded, Delta); + + // test bounds for >, *, *, ... + if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) + NewDeps += exploreDirections(Level + 1, A, B, Bound, + Loops, DepthExpanded, Delta); + + Bound[Level].Direction = Dependence::DVEntry::ALL; + return NewDeps; + } + else + return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); +} + + +// Returns true iff the current bounds are plausible. +bool DependenceAnalysis::testBounds(unsigned char DirKind, + unsigned Level, + BoundInfo *Bound, + const SCEV *Delta) const { + Bound[Level].Direction = DirKind; + if (const SCEV *LowerBound = getLowerBound(Bound)) + if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) + return false; + if (const SCEV *UpperBound = getUpperBound(Bound)) + if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) + return false; + return true; +} + + +// Computes the upper and lower bounds for level K +// using the * direction. Records them in Bound. +// Wolfe gives the equations +// +// LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k +// UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k +// +// Since we normalize loops, we can simplify these equations to +// +// LB^*_k = (A^-_k - B^+_k)U_k +// UB^*_k = (A^+_k - B^-_k)U_k +// +// We must be careful to handle the case where the upper bound is unknown. +// Note that the lower bound is always <= 0 +// and the upper bound is always >= 0. +void DependenceAnalysis::findBoundsALL(CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + unsigned K) const { + Bound[K].Lower[Dependence::DVEntry::ALL] = NULL; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::ALL] = NULL; // Default value = +infinity. + if (Bound[K].Iterations) { + Bound[K].Lower[Dependence::DVEntry::ALL] = + SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), + Bound[K].Iterations); + Bound[K].Upper[Dependence::DVEntry::ALL] = + SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), + Bound[K].Iterations); + } + else { + // If the difference is 0, we won't need to know the number of iterations. + if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) + Bound[K].Lower[Dependence::DVEntry::ALL] = + SE->getConstant(A[K].Coeff->getType(), 0); + if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) + Bound[K].Upper[Dependence::DVEntry::ALL] = + SE->getConstant(A[K].Coeff->getType(), 0); + } +} + + +// Computes the upper and lower bounds for level K +// using the = direction. Records them in Bound. +// Wolfe gives the equations +// +// LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k +// UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k +// +// Since we normalize loops, we can simplify these equations to +// +// LB^=_k = (A_k - B_k)^- U_k +// UB^=_k = (A_k - B_k)^+ U_k +// +// We must be careful to handle the case where the upper bound is unknown. +// Note that the lower bound is always <= 0 +// and the upper bound is always >= 0. +void DependenceAnalysis::findBoundsEQ(CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + unsigned K) const { + Bound[K].Lower[Dependence::DVEntry::EQ] = NULL; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::EQ] = NULL; // Default value = +infinity. + if (Bound[K].Iterations) { + const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); + const SCEV *NegativePart = getNegativePart(Delta); + Bound[K].Lower[Dependence::DVEntry::EQ] = + SE->getMulExpr(NegativePart, Bound[K].Iterations); + const SCEV *PositivePart = getPositivePart(Delta); + Bound[K].Upper[Dependence::DVEntry::EQ] = + SE->getMulExpr(PositivePart, Bound[K].Iterations); + } + else { + // If the positive/negative part of the difference is 0, + // we won't need to know the number of iterations. + const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); + const SCEV *NegativePart = getNegativePart(Delta); + if (NegativePart->isZero()) + Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero + const SCEV *PositivePart = getPositivePart(Delta); + if (PositivePart->isZero()) + Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero + } +} + + +// Computes the upper and lower bounds for level K +// using the < direction. Records them in Bound. +// Wolfe gives the equations +// +// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k +// UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k +// +// Since we normalize loops, we can simplify these equations to +// +// LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k +// UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k +// +// We must be careful to handle the case where the upper bound is unknown. +void DependenceAnalysis::findBoundsLT(CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + unsigned K) const { + Bound[K].Lower[Dependence::DVEntry::LT] = NULL; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::LT] = NULL; // Default value = +infinity. + if (Bound[K].Iterations) { + const SCEV *Iter_1 = + SE->getMinusSCEV(Bound[K].Iterations, + SE->getConstant(Bound[K].Iterations->getType(), 1)); + const SCEV *NegPart = + getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); + Bound[K].Lower[Dependence::DVEntry::LT] = + SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); + const SCEV *PosPart = + getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); + Bound[K].Upper[Dependence::DVEntry::LT] = + SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); + } + else { + // If the positive/negative part of the difference is 0, + // we won't need to know the number of iterations. + const SCEV *NegPart = + getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); + if (NegPart->isZero()) + Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); + const SCEV *PosPart = + getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); + if (PosPart->isZero()) + Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); + } +} + + +// Computes the upper and lower bounds for level K +// using the > direction. Records them in Bound. +// Wolfe gives the equations +// +// LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k +// UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k +// +// Since we normalize loops, we can simplify these equations to +// +// LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k +// UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k +// +// We must be careful to handle the case where the upper bound is unknown. +void DependenceAnalysis::findBoundsGT(CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + unsigned K) const { + Bound[K].Lower[Dependence::DVEntry::GT] = NULL; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::GT] = NULL; // Default value = +infinity. + if (Bound[K].Iterations) { + const SCEV *Iter_1 = + SE->getMinusSCEV(Bound[K].Iterations, + SE->getConstant(Bound[K].Iterations->getType(), 1)); + const SCEV *NegPart = + getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); + Bound[K].Lower[Dependence::DVEntry::GT] = + SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); + const SCEV *PosPart = + getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); + Bound[K].Upper[Dependence::DVEntry::GT] = + SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); + } + else { + // If the positive/negative part of the difference is 0, + // we won't need to know the number of iterations. + const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); + if (NegPart->isZero()) + Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; + const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); + if (PosPart->isZero()) + Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; + } +} + + +// X^+ = max(X, 0) +const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const { + return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0)); +} + + +// X^- = min(X, 0) +const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const { + return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0)); +} + + +// Walks through the subscript, +// collecting each coefficient, the associated loop bounds, +// and recording its positive and negative parts for later use. +DependenceAnalysis::CoefficientInfo * +DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript, + bool SrcFlag, + const SCEV *&Constant) const { + const SCEV *Zero = SE->getConstant(Subscript->getType(), 0); + CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; + for (unsigned K = 1; K <= MaxLevels; ++K) { + CI[K].Coeff = Zero; + CI[K].PosPart = Zero; + CI[K].NegPart = Zero; + CI[K].Iterations = NULL; + } + while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { + const Loop *L = AddRec->getLoop(); + unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); + CI[K].Coeff = AddRec->getStepRecurrence(*SE); + CI[K].PosPart = getPositivePart(CI[K].Coeff); + CI[K].NegPart = getNegativePart(CI[K].Coeff); + CI[K].Iterations = collectUpperBound(L, Subscript->getType()); + Subscript = AddRec->getStart(); + } + Constant = Subscript; +#ifndef NDEBUG + DEBUG(dbgs() << "\tCoefficient Info\n"); + for (unsigned K = 1; K <= MaxLevels; ++K) { + DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); + DEBUG(dbgs() << "\tPos Part = "); + DEBUG(dbgs() << *CI[K].PosPart); + DEBUG(dbgs() << "\tNeg Part = "); + DEBUG(dbgs() << *CI[K].NegPart); + DEBUG(dbgs() << "\tUpper Bound = "); + if (CI[K].Iterations) + DEBUG(dbgs() << *CI[K].Iterations); + else + DEBUG(dbgs() << "+inf"); + DEBUG(dbgs() << '\n'); + } + DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); +#endif + return CI; +} + + +// Looks through all the bounds info and +// computes the lower bound given the current direction settings +// at each level. If the lower bound for any level is -inf, +// the result is -inf. +const SCEV *DependenceAnalysis::getLowerBound(BoundInfo *Bound) const { + const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; + for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { + if (Bound[K].Lower[Bound[K].Direction]) + Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); + else + Sum = NULL; + } + return Sum; +} + + +// Looks through all the bounds info and +// computes the upper bound given the current direction settings +// at each level. If the upper bound at any level is +inf, +// the result is +inf. +const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const { + const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; + for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { + if (Bound[K].Upper[Bound[K].Direction]) + Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); + else + Sum = NULL; + } + return Sum; +} + + +//===----------------------------------------------------------------------===// +// Constraint manipulation for Delta test. + +// Given a linear SCEV, +// return the coefficient (the step) +// corresponding to the specified loop. +// If there isn't one, return 0. +// For example, given a*i + b*j + c*k, zeroing the coefficient +// corresponding to the j loop would yield b. +const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr, + const Loop *TargetLoop) const { + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); + if (!AddRec) + return SE->getConstant(Expr->getType(), 0); + if (AddRec->getLoop() == TargetLoop) + return AddRec->getStepRecurrence(*SE); + return findCoefficient(AddRec->getStart(), TargetLoop); +} + + +// Given a linear SCEV, +// return the SCEV given by zeroing out the coefficient +// corresponding to the specified loop. +// For example, given a*i + b*j + c*k, zeroing the coefficient +// corresponding to the j loop would yield a*i + c*k. +const SCEV *DependenceAnalysis::zeroCoefficient(const SCEV *Expr, + const Loop *TargetLoop) const { + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); + if (!AddRec) + return Expr; // ignore + if (AddRec->getLoop() == TargetLoop) + return AddRec->getStart(); + return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), + AddRec->getStepRecurrence(*SE), + AddRec->getLoop(), + AddRec->getNoWrapFlags()); +} + + +// Given a linear SCEV Expr, +// return the SCEV given by adding some Value to the +// coefficient corresponding to the specified TargetLoop. +// For example, given a*i + b*j + c*k, adding 1 to the coefficient +// corresponding to the j loop would yield a*i + (b+1)*j + c*k. +const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr, + const Loop *TargetLoop, + const SCEV *Value) const { + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); + if (!AddRec) // create a new addRec + return SE->getAddRecExpr(Expr, + Value, + TargetLoop, + SCEV::FlagAnyWrap); // Worst case, with no info. + if (AddRec->getLoop() == TargetLoop) { + const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); + if (Sum->isZero()) + return AddRec->getStart(); + return SE->getAddRecExpr(AddRec->getStart(), + Sum, + AddRec->getLoop(), + AddRec->getNoWrapFlags()); + } + if (SE->isLoopInvariant(AddRec, TargetLoop)) + return SE->getAddRecExpr(AddRec, + Value, + TargetLoop, + SCEV::FlagAnyWrap); + return SE->getAddRecExpr(addToCoefficient(AddRec->getStart(), + TargetLoop, Value), + AddRec->getStepRecurrence(*SE), + AddRec->getLoop(), + AddRec->getNoWrapFlags()); +} + + +// Review the constraints, looking for opportunities +// to simplify a subscript pair (Src and Dst). +// Return true if some simplification occurs. +// If the simplification isn't exact (that is, if it is conservative +// in terms of dependence), set consistent to false. +// Corresponds to Figure 5 from the paper +// +// Practical Dependence Testing +// Goff, Kennedy, Tseng +// PLDI 1991 +bool DependenceAnalysis::propagate(const SCEV *&Src, + const SCEV *&Dst, + SmallBitVector &Loops, + SmallVectorImpl<Constraint> &Constraints, + bool &Consistent) { + bool Result = false; + for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) { + DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); + DEBUG(Constraints[LI].dump(dbgs())); + if (Constraints[LI].isDistance()) + Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); + else if (Constraints[LI].isLine()) + Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); + else if (Constraints[LI].isPoint()) + Result |= propagatePoint(Src, Dst, Constraints[LI]); + } + return Result; +} + + +// Attempt to propagate a distance +// constraint into a subscript pair (Src and Dst). +// Return true if some simplification occurs. +// If the simplification isn't exact (that is, if it is conservative +// in terms of dependence), set consistent to false. +bool DependenceAnalysis::propagateDistance(const SCEV *&Src, + const SCEV *&Dst, + Constraint &CurConstraint, + bool &Consistent) { + const Loop *CurLoop = CurConstraint.getAssociatedLoop(); + DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); + const SCEV *A_K = findCoefficient(Src, CurLoop); + if (A_K->isZero()) + return false; + const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); + Src = SE->getMinusSCEV(Src, DA_K); + Src = zeroCoefficient(Src, CurLoop); + DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); + DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); + Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); + DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); + if (!findCoefficient(Dst, CurLoop)->isZero()) + Consistent = false; + return true; +} + + +// Attempt to propagate a line +// constraint into a subscript pair (Src and Dst). +// Return true if some simplification occurs. +// If the simplification isn't exact (that is, if it is conservative +// in terms of dependence), set consistent to false. +bool DependenceAnalysis::propagateLine(const SCEV *&Src, + const SCEV *&Dst, + Constraint &CurConstraint, + bool &Consistent) { + const Loop *CurLoop = CurConstraint.getAssociatedLoop(); + const SCEV *A = CurConstraint.getA(); + const SCEV *B = CurConstraint.getB(); + const SCEV *C = CurConstraint.getC(); + DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n"); + DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); + DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); + if (A->isZero()) { + const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); + const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); + if (!Bconst || !Cconst) return false; + APInt Beta = Bconst->getValue()->getValue(); + APInt Charlie = Cconst->getValue()->getValue(); + APInt CdivB = Charlie.sdiv(Beta); + assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); + const SCEV *AP_K = findCoefficient(Dst, CurLoop); + // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); + Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); + Dst = zeroCoefficient(Dst, CurLoop); + if (!findCoefficient(Src, CurLoop)->isZero()) + Consistent = false; + } + else if (B->isZero()) { + const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); + const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); + if (!Aconst || !Cconst) return false; + APInt Alpha = Aconst->getValue()->getValue(); + APInt Charlie = Cconst->getValue()->getValue(); + APInt CdivA = Charlie.sdiv(Alpha); + assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); + const SCEV *A_K = findCoefficient(Src, CurLoop); + Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); + Src = zeroCoefficient(Src, CurLoop); + if (!findCoefficient(Dst, CurLoop)->isZero()) + Consistent = false; + } + else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { + const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); + const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); + if (!Aconst || !Cconst) return false; + APInt Alpha = Aconst->getValue()->getValue(); + APInt Charlie = Cconst->getValue()->getValue(); + APInt CdivA = Charlie.sdiv(Alpha); + assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); + const SCEV *A_K = findCoefficient(Src, CurLoop); + Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); + Src = zeroCoefficient(Src, CurLoop); + Dst = addToCoefficient(Dst, CurLoop, A_K); + if (!findCoefficient(Dst, CurLoop)->isZero()) + Consistent = false; + } + else { + // paper is incorrect here, or perhaps just misleading + const SCEV *A_K = findCoefficient(Src, CurLoop); + Src = SE->getMulExpr(Src, A); + Dst = SE->getMulExpr(Dst, A); + Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); + Src = zeroCoefficient(Src, CurLoop); + Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); + if (!findCoefficient(Dst, CurLoop)->isZero()) + Consistent = false; + } + DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); + DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); + return true; +} + + +// Attempt to propagate a point +// constraint into a subscript pair (Src and Dst). +// Return true if some simplification occurs. +bool DependenceAnalysis::propagatePoint(const SCEV *&Src, + const SCEV *&Dst, + Constraint &CurConstraint) { + const Loop *CurLoop = CurConstraint.getAssociatedLoop(); + const SCEV *A_K = findCoefficient(Src, CurLoop); + const SCEV *AP_K = findCoefficient(Dst, CurLoop); + const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); + const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); + DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); + Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); + Src = zeroCoefficient(Src, CurLoop); + DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); + DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); + Dst = zeroCoefficient(Dst, CurLoop); + DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); + return true; +} + + +// Update direction vector entry based on the current constraint. +void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, + const Constraint &CurConstraint + ) const { + DEBUG(dbgs() << "\tUpdate direction, constraint ="); + DEBUG(CurConstraint.dump(dbgs())); + if (CurConstraint.isAny()) + ; // use defaults + else if (CurConstraint.isDistance()) { + // this one is consistent, the others aren't + Level.Scalar = false; + Level.Distance = CurConstraint.getD(); + unsigned NewDirection = Dependence::DVEntry::NONE; + if (!SE->isKnownNonZero(Level.Distance)) // if may be zero + NewDirection = Dependence::DVEntry::EQ; + if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive + NewDirection |= Dependence::DVEntry::LT; + if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative + NewDirection |= Dependence::DVEntry::GT; + Level.Direction &= NewDirection; + } + else if (CurConstraint.isLine()) { + Level.Scalar = false; + Level.Distance = NULL; + // direction should be accurate + } + else if (CurConstraint.isPoint()) { + Level.Scalar = false; + Level.Distance = NULL; + unsigned NewDirection = Dependence::DVEntry::NONE; + if (!isKnownPredicate(CmpInst::ICMP_NE, + CurConstraint.getY(), + CurConstraint.getX())) + // if X may be = Y + NewDirection |= Dependence::DVEntry::EQ; + if (!isKnownPredicate(CmpInst::ICMP_SLE, + CurConstraint.getY(), + CurConstraint.getX())) + // if Y may be > X + NewDirection |= Dependence::DVEntry::LT; + if (!isKnownPredicate(CmpInst::ICMP_SGE, + CurConstraint.getY(), + CurConstraint.getX())) + // if Y may be < X + NewDirection |= Dependence::DVEntry::GT; + Level.Direction &= NewDirection; + } + else + llvm_unreachable("constraint has unexpected kind"); +} + +/// Check if we can delinearize the subscripts. If the SCEVs representing the +/// source and destination array references are recurrences on a nested loop, +/// this function flattens the nested recurrences into seperate recurrences +/// for each loop level. +bool +DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV, + SmallVectorImpl<Subscript> &Pair) const { + const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); + const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); + if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) + return false; + + SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts, SrcSizes, DstSizes; + SrcAR->delinearize(*SE, SrcSubscripts, SrcSizes); + DstAR->delinearize(*SE, DstSubscripts, DstSizes); + + int size = SrcSubscripts.size(); + int dstSize = DstSubscripts.size(); + if (size != dstSize || size < 2) + return false; + +#ifndef NDEBUG + DEBUG(errs() << "\nSrcSubscripts: "); + for (int i = 0; i < size; i++) + DEBUG(errs() << *SrcSubscripts[i]); + DEBUG(errs() << "\nDstSubscripts: "); + for (int i = 0; i < size; i++) + DEBUG(errs() << *DstSubscripts[i]); +#endif + + // The delinearization transforms a single-subscript MIV dependence test into + // a multi-subscript SIV dependence test that is easier to compute. So we + // resize Pair to contain as many pairs of subscripts as the delinearization + // has found, and then initialize the pairs following the delinearization. + Pair.resize(size); + for (int i = 0; i < size; ++i) { + Pair[i].Src = SrcSubscripts[i]; + Pair[i].Dst = DstSubscripts[i]; + + // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the + // delinearization has found, and add these constraints to the dependence + // check to avoid memory accesses overflow from one dimension into another. + // This is related to the problem of determining the existence of data + // dependences in array accesses using a different number of subscripts: in + // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc. + } + + return true; +} + +//===----------------------------------------------------------------------===// + +#ifndef NDEBUG +// For debugging purposes, dump a small bit vector to dbgs(). +static void dumpSmallBitVector(SmallBitVector &BV) { + dbgs() << "{"; + for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) { + dbgs() << VI; + if (BV.find_next(VI) >= 0) + dbgs() << ' '; + } + dbgs() << "}\n"; +} +#endif + + +// depends - +// Returns NULL if there is no dependence. +// Otherwise, return a Dependence with as many details as possible. +// Corresponds to Section 3.1 in the paper +// +// Practical Dependence Testing +// Goff, Kennedy, Tseng +// PLDI 1991 +// +// Care is required to keep the routine below, getSplitIteration(), +// up to date with respect to this routine. +Dependence *DependenceAnalysis::depends(Instruction *Src, + Instruction *Dst, + bool PossiblyLoopIndependent) { + if (Src == Dst) + PossiblyLoopIndependent = false; + + if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || + (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) + // if both instructions don't reference memory, there's no dependence + return NULL; + + if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { + // can only analyze simple loads and stores, i.e., no calls, invokes, etc. + DEBUG(dbgs() << "can only handle simple loads and stores\n"); + return new Dependence(Src, Dst); + } + + Value *SrcPtr = getPointerOperand(Src); + Value *DstPtr = getPointerOperand(Dst); + + switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) { + case AliasAnalysis::MayAlias: + case AliasAnalysis::PartialAlias: + // cannot analyse objects if we don't understand their aliasing. + DEBUG(dbgs() << "can't analyze may or partial alias\n"); + return new Dependence(Src, Dst); + case AliasAnalysis::NoAlias: + // If the objects noalias, they are distinct, accesses are independent. + DEBUG(dbgs() << "no alias\n"); + return NULL; + case AliasAnalysis::MustAlias: + break; // The underlying objects alias; test accesses for dependence. + } + + // establish loop nesting levels + establishNestingLevels(Src, Dst); + DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); + DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); + + FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); + ++TotalArrayPairs; + + // See if there are GEPs we can use. + bool UsefulGEP = false; + GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); + GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); + if (SrcGEP && DstGEP && + SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { + const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); + const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); + DEBUG(dbgs() << " SrcPtrSCEV = " << *SrcPtrSCEV << "\n"); + DEBUG(dbgs() << " DstPtrSCEV = " << *DstPtrSCEV << "\n"); + + UsefulGEP = + isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && + isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())); + } + unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; + SmallVector<Subscript, 4> Pair(Pairs); + if (UsefulGEP) { + DEBUG(dbgs() << " using GEPs\n"); + unsigned P = 0; + for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), + SrcEnd = SrcGEP->idx_end(), + DstIdx = DstGEP->idx_begin(); + SrcIdx != SrcEnd; + ++SrcIdx, ++DstIdx, ++P) { + Pair[P].Src = SE->getSCEV(*SrcIdx); + Pair[P].Dst = SE->getSCEV(*DstIdx); + } + } + else { + DEBUG(dbgs() << " ignoring GEPs\n"); + const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); + const SCEV *DstSCEV = SE->getSCEV(DstPtr); + DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); + DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); + Pair[0].Src = SrcSCEV; + Pair[0].Dst = DstSCEV; + } + + if (Delinearize && Pairs == 1 && CommonLevels > 1 && + tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) { + DEBUG(dbgs() << " delinerized GEP\n"); + Pairs = Pair.size(); + } + + for (unsigned P = 0; P < Pairs; ++P) { + Pair[P].Loops.resize(MaxLevels + 1); + Pair[P].GroupLoops.resize(MaxLevels + 1); + Pair[P].Group.resize(Pairs); + removeMatchingExtensions(&Pair[P]); + Pair[P].Classification = + classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), + Pair[P].Dst, LI->getLoopFor(Dst->getParent()), + Pair[P].Loops); + Pair[P].GroupLoops = Pair[P].Loops; + Pair[P].Group.set(P); + DEBUG(dbgs() << " subscript " << P << "\n"); + DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); + DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); + DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); + DEBUG(dbgs() << "\tloops = "); + DEBUG(dumpSmallBitVector(Pair[P].Loops)); + } + + SmallBitVector Separable(Pairs); + SmallBitVector Coupled(Pairs); + + // Partition subscripts into separable and minimally-coupled groups + // Algorithm in paper is algorithmically better; + // this may be faster in practice. Check someday. + // + // Here's an example of how it works. Consider this code: + // + // for (i = ...) { + // for (j = ...) { + // for (k = ...) { + // for (l = ...) { + // for (m = ...) { + // A[i][j][k][m] = ...; + // ... = A[0][j][l][i + j]; + // } + // } + // } + // } + // } + // + // There are 4 subscripts here: + // 0 [i] and [0] + // 1 [j] and [j] + // 2 [k] and [l] + // 3 [m] and [i + j] + // + // We've already classified each subscript pair as ZIV, SIV, etc., + // and collected all the loops mentioned by pair P in Pair[P].Loops. + // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops + // and set Pair[P].Group = {P}. + // + // Src Dst Classification Loops GroupLoops Group + // 0 [i] [0] SIV {1} {1} {0} + // 1 [j] [j] SIV {2} {2} {1} + // 2 [k] [l] RDIV {3,4} {3,4} {2} + // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} + // + // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. + // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. + // + // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. + // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. + // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, + // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added + // to either Separable or Coupled). + // + // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. + // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty, + // so Pair[3].Group = {0, 1, 3} and Done = false. + // + // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. + // Since Done remains true, we add 2 to the set of Separable pairs. + // + // Finally, we consider 3. There's nothing to compare it with, + // so Done remains true and we add it to the Coupled set. + // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. + // + // In the end, we've got 1 separable subscript and 1 coupled group. + for (unsigned SI = 0; SI < Pairs; ++SI) { + if (Pair[SI].Classification == Subscript::NonLinear) { + // ignore these, but collect loops for later + ++NonlinearSubscriptPairs; + collectCommonLoops(Pair[SI].Src, + LI->getLoopFor(Src->getParent()), + Pair[SI].Loops); + collectCommonLoops(Pair[SI].Dst, + LI->getLoopFor(Dst->getParent()), + Pair[SI].Loops); + Result.Consistent = false; + } + else if (Pair[SI].Classification == Subscript::ZIV) { + // always separable + Separable.set(SI); + } + else { + // SIV, RDIV, or MIV, so check for coupled group + bool Done = true; + for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { + SmallBitVector Intersection = Pair[SI].GroupLoops; + Intersection &= Pair[SJ].GroupLoops; + if (Intersection.any()) { + // accumulate set of all the loops in group + Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; + // accumulate set of all subscripts in group + Pair[SJ].Group |= Pair[SI].Group; + Done = false; + } + } + if (Done) { + if (Pair[SI].Group.count() == 1) { + Separable.set(SI); + ++SeparableSubscriptPairs; + } + else { + Coupled.set(SI); + ++CoupledSubscriptPairs; + } + } + } + } + + DEBUG(dbgs() << " Separable = "); + DEBUG(dumpSmallBitVector(Separable)); + DEBUG(dbgs() << " Coupled = "); + DEBUG(dumpSmallBitVector(Coupled)); + + Constraint NewConstraint; + NewConstraint.setAny(SE); + + // test separable subscripts + for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { + DEBUG(dbgs() << "testing subscript " << SI); + switch (Pair[SI].Classification) { + case Subscript::ZIV: + DEBUG(dbgs() << ", ZIV\n"); + if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) + return NULL; + break; + case Subscript::SIV: { + DEBUG(dbgs() << ", SIV\n"); + unsigned Level; + const SCEV *SplitIter = NULL; + if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, + Result, NewConstraint, SplitIter)) + return NULL; + break; + } + case Subscript::RDIV: + DEBUG(dbgs() << ", RDIV\n"); + if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) + return NULL; + break; + case Subscript::MIV: + DEBUG(dbgs() << ", MIV\n"); + if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) + return NULL; + break; + default: + llvm_unreachable("subscript has unexpected classification"); + } + } + + if (Coupled.count()) { + // test coupled subscript groups + DEBUG(dbgs() << "starting on coupled subscripts\n"); + DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); + SmallVector<Constraint, 4> Constraints(MaxLevels + 1); + for (unsigned II = 0; II <= MaxLevels; ++II) + Constraints[II].setAny(SE); + for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { + DEBUG(dbgs() << "testing subscript group " << SI << " { "); + SmallBitVector Group(Pair[SI].Group); + SmallBitVector Sivs(Pairs); + SmallBitVector Mivs(Pairs); + SmallBitVector ConstrainedLevels(MaxLevels + 1); + for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { + DEBUG(dbgs() << SJ << " "); + if (Pair[SJ].Classification == Subscript::SIV) + Sivs.set(SJ); + else + Mivs.set(SJ); + } + DEBUG(dbgs() << "}\n"); + while (Sivs.any()) { + bool Changed = false; + for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { + DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); + // SJ is an SIV subscript that's part of the current coupled group + unsigned Level; + const SCEV *SplitIter = NULL; + DEBUG(dbgs() << "SIV\n"); + if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, + Result, NewConstraint, SplitIter)) + return NULL; + ConstrainedLevels.set(Level); + if (intersectConstraints(&Constraints[Level], &NewConstraint)) { + if (Constraints[Level].isEmpty()) { + ++DeltaIndependence; + return NULL; + } + Changed = true; + } + Sivs.reset(SJ); + } + if (Changed) { + // propagate, possibly creating new SIVs and ZIVs + DEBUG(dbgs() << " propagating\n"); + DEBUG(dbgs() << "\tMivs = "); + DEBUG(dumpSmallBitVector(Mivs)); + for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { + // SJ is an MIV subscript that's part of the current coupled group + DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); + if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, + Constraints, Result.Consistent)) { + DEBUG(dbgs() << "\t Changed\n"); + ++DeltaPropagations; + Pair[SJ].Classification = + classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), + Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), + Pair[SJ].Loops); + switch (Pair[SJ].Classification) { + case Subscript::ZIV: + DEBUG(dbgs() << "ZIV\n"); + if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) + return NULL; + Mivs.reset(SJ); + break; + case Subscript::SIV: + Sivs.set(SJ); + Mivs.reset(SJ); + break; + case Subscript::RDIV: + case Subscript::MIV: + break; + default: + llvm_unreachable("bad subscript classification"); + } + } + } + } + } + + // test & propagate remaining RDIVs + for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { + if (Pair[SJ].Classification == Subscript::RDIV) { + DEBUG(dbgs() << "RDIV test\n"); + if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) + return NULL; + // I don't yet understand how to propagate RDIV results + Mivs.reset(SJ); + } + } + + // test remaining MIVs + // This code is temporary. + // Better to somehow test all remaining subscripts simultaneously. + for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { + if (Pair[SJ].Classification == Subscript::MIV) { + DEBUG(dbgs() << "MIV test\n"); + if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) + return NULL; + } + else + llvm_unreachable("expected only MIV subscripts at this point"); + } + + // update Result.DV from constraint vector + DEBUG(dbgs() << " updating\n"); + for (int SJ = ConstrainedLevels.find_first(); + SJ >= 0; SJ = ConstrainedLevels.find_next(SJ)) { + updateDirection(Result.DV[SJ - 1], Constraints[SJ]); + if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) + return NULL; + } + } + } + + // Make sure the Scalar flags are set correctly. + SmallBitVector CompleteLoops(MaxLevels + 1); + for (unsigned SI = 0; SI < Pairs; ++SI) + CompleteLoops |= Pair[SI].Loops; + for (unsigned II = 1; II <= CommonLevels; ++II) + if (CompleteLoops[II]) + Result.DV[II - 1].Scalar = false; + + if (PossiblyLoopIndependent) { + // Make sure the LoopIndependent flag is set correctly. + // All directions must include equal, otherwise no + // loop-independent dependence is possible. + for (unsigned II = 1; II <= CommonLevels; ++II) { + if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { + Result.LoopIndependent = false; + break; + } + } + } + else { + // On the other hand, if all directions are equal and there's no + // loop-independent dependence possible, then no dependence exists. + bool AllEqual = true; + for (unsigned II = 1; II <= CommonLevels; ++II) { + if (Result.getDirection(II) != Dependence::DVEntry::EQ) { + AllEqual = false; + break; + } + } + if (AllEqual) + return NULL; + } + + FullDependence *Final = new FullDependence(Result); + Result.DV = NULL; + return Final; +} + + + +//===----------------------------------------------------------------------===// +// getSplitIteration - +// Rather than spend rarely-used space recording the splitting iteration +// during the Weak-Crossing SIV test, we re-compute it on demand. +// The re-computation is basically a repeat of the entire dependence test, +// though simplified since we know that the dependence exists. +// It's tedious, since we must go through all propagations, etc. +// +// Care is required to keep this code up to date with respect to the routine +// above, depends(). +// +// Generally, the dependence analyzer will be used to build +// a dependence graph for a function (basically a map from instructions +// to dependences). Looking for cycles in the graph shows us loops +// that cannot be trivially vectorized/parallelized. +// +// We can try to improve the situation by examining all the dependences +// that make up the cycle, looking for ones we can break. +// Sometimes, peeling the first or last iteration of a loop will break +// dependences, and we've got flags for those possibilities. +// Sometimes, splitting a loop at some other iteration will do the trick, +// and we've got a flag for that case. Rather than waste the space to +// record the exact iteration (since we rarely know), we provide +// a method that calculates the iteration. It's a drag that it must work +// from scratch, but wonderful in that it's possible. +// +// Here's an example: +// +// for (i = 0; i < 10; i++) +// A[i] = ... +// ... = A[11 - i] +// +// There's a loop-carried flow dependence from the store to the load, +// found by the weak-crossing SIV test. The dependence will have a flag, +// indicating that the dependence can be broken by splitting the loop. +// Calling getSplitIteration will return 5. +// Splitting the loop breaks the dependence, like so: +// +// for (i = 0; i <= 5; i++) +// A[i] = ... +// ... = A[11 - i] +// for (i = 6; i < 10; i++) +// A[i] = ... +// ... = A[11 - i] +// +// breaks the dependence and allows us to vectorize/parallelize +// both loops. +const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, + unsigned SplitLevel) { + assert(Dep && "expected a pointer to a Dependence"); + assert(Dep->isSplitable(SplitLevel) && + "Dep should be splitable at SplitLevel"); + Instruction *Src = Dep->getSrc(); + Instruction *Dst = Dep->getDst(); + assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); + assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); + assert(isLoadOrStore(Src)); + assert(isLoadOrStore(Dst)); + Value *SrcPtr = getPointerOperand(Src); + Value *DstPtr = getPointerOperand(Dst); + assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) == + AliasAnalysis::MustAlias); + + // establish loop nesting levels + establishNestingLevels(Src, Dst); + + FullDependence Result(Src, Dst, false, CommonLevels); + + // See if there are GEPs we can use. + bool UsefulGEP = false; + GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); + GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); + if (SrcGEP && DstGEP && + SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { + const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); + const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); + UsefulGEP = + isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && + isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())); + } + unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; + SmallVector<Subscript, 4> Pair(Pairs); + if (UsefulGEP) { + unsigned P = 0; + for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), + SrcEnd = SrcGEP->idx_end(), + DstIdx = DstGEP->idx_begin(); + SrcIdx != SrcEnd; + ++SrcIdx, ++DstIdx, ++P) { + Pair[P].Src = SE->getSCEV(*SrcIdx); + Pair[P].Dst = SE->getSCEV(*DstIdx); + } + } + else { + const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); + const SCEV *DstSCEV = SE->getSCEV(DstPtr); + Pair[0].Src = SrcSCEV; + Pair[0].Dst = DstSCEV; + } + + if (Delinearize && Pairs == 1 && CommonLevels > 1 && + tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) { + DEBUG(dbgs() << " delinerized GEP\n"); + Pairs = Pair.size(); + } + + for (unsigned P = 0; P < Pairs; ++P) { + Pair[P].Loops.resize(MaxLevels + 1); + Pair[P].GroupLoops.resize(MaxLevels + 1); + Pair[P].Group.resize(Pairs); + removeMatchingExtensions(&Pair[P]); + Pair[P].Classification = + classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), + Pair[P].Dst, LI->getLoopFor(Dst->getParent()), + Pair[P].Loops); + Pair[P].GroupLoops = Pair[P].Loops; + Pair[P].Group.set(P); + } + + SmallBitVector Separable(Pairs); + SmallBitVector Coupled(Pairs); + + // partition subscripts into separable and minimally-coupled groups + for (unsigned SI = 0; SI < Pairs; ++SI) { + if (Pair[SI].Classification == Subscript::NonLinear) { + // ignore these, but collect loops for later + collectCommonLoops(Pair[SI].Src, + LI->getLoopFor(Src->getParent()), + Pair[SI].Loops); + collectCommonLoops(Pair[SI].Dst, + LI->getLoopFor(Dst->getParent()), + Pair[SI].Loops); + Result.Consistent = false; + } + else if (Pair[SI].Classification == Subscript::ZIV) + Separable.set(SI); + else { + // SIV, RDIV, or MIV, so check for coupled group + bool Done = true; + for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { + SmallBitVector Intersection = Pair[SI].GroupLoops; + Intersection &= Pair[SJ].GroupLoops; + if (Intersection.any()) { + // accumulate set of all the loops in group + Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; + // accumulate set of all subscripts in group + Pair[SJ].Group |= Pair[SI].Group; + Done = false; + } + } + if (Done) { + if (Pair[SI].Group.count() == 1) + Separable.set(SI); + else + Coupled.set(SI); + } + } + } + + Constraint NewConstraint; + NewConstraint.setAny(SE); + + // test separable subscripts + for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { + switch (Pair[SI].Classification) { + case Subscript::SIV: { + unsigned Level; + const SCEV *SplitIter = NULL; + (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, + Result, NewConstraint, SplitIter); + if (Level == SplitLevel) { + assert(SplitIter != NULL); + return SplitIter; + } + break; + } + case Subscript::ZIV: + case Subscript::RDIV: + case Subscript::MIV: + break; + default: + llvm_unreachable("subscript has unexpected classification"); + } + } + + if (Coupled.count()) { + // test coupled subscript groups + SmallVector<Constraint, 4> Constraints(MaxLevels + 1); + for (unsigned II = 0; II <= MaxLevels; ++II) + Constraints[II].setAny(SE); + for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { + SmallBitVector Group(Pair[SI].Group); + SmallBitVector Sivs(Pairs); + SmallBitVector Mivs(Pairs); + SmallBitVector ConstrainedLevels(MaxLevels + 1); + for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { + if (Pair[SJ].Classification == Subscript::SIV) + Sivs.set(SJ); + else + Mivs.set(SJ); + } + while (Sivs.any()) { + bool Changed = false; + for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { + // SJ is an SIV subscript that's part of the current coupled group + unsigned Level; + const SCEV *SplitIter = NULL; + (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, + Result, NewConstraint, SplitIter); + if (Level == SplitLevel && SplitIter) + return SplitIter; + ConstrainedLevels.set(Level); + if (intersectConstraints(&Constraints[Level], &NewConstraint)) + Changed = true; + Sivs.reset(SJ); + } + if (Changed) { + // propagate, possibly creating new SIVs and ZIVs + for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { + // SJ is an MIV subscript that's part of the current coupled group + if (propagate(Pair[SJ].Src, Pair[SJ].Dst, + Pair[SJ].Loops, Constraints, Result.Consistent)) { + Pair[SJ].Classification = + classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), + Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), + Pair[SJ].Loops); + switch (Pair[SJ].Classification) { + case Subscript::ZIV: + Mivs.reset(SJ); + break; + case Subscript::SIV: + Sivs.set(SJ); + Mivs.reset(SJ); + break; + case Subscript::RDIV: + case Subscript::MIV: + break; + default: + llvm_unreachable("bad subscript classification"); + } + } + } + } + } + } + } + llvm_unreachable("somehow reached end of routine"); + return NULL; +} diff --git a/contrib/llvm/lib/Analysis/DomPrinter.cpp b/contrib/llvm/lib/Analysis/DomPrinter.cpp new file mode 100644 index 000000000000..cde431459d50 --- /dev/null +++ b/contrib/llvm/lib/Analysis/DomPrinter.cpp @@ -0,0 +1,232 @@ +//===- DomPrinter.cpp - DOT printer for the dominance trees ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines '-dot-dom' and '-dot-postdom' analysis passes, which emit +// a dom.<fnname>.dot or postdom.<fnname>.dot file for each function in the +// program, with a graph of the dominance/postdominance tree of that +// function. +// +// There are also passes available to directly call dotty ('-view-dom' or +// '-view-postdom'). By appending '-only' like '-dot-dom-only' only the +// names of the bbs are printed, but the content is hidden. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/DomPrinter.h" +#include "llvm/Analysis/DOTGraphTraitsPass.h" +#include "llvm/Analysis/PostDominators.h" + +using namespace llvm; + +namespace llvm { +template<> +struct DOTGraphTraits<DomTreeNode*> : public DefaultDOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) + : DefaultDOTGraphTraits(isSimple) {} + + std::string getNodeLabel(DomTreeNode *Node, DomTreeNode *Graph) { + + BasicBlock *BB = Node->getBlock(); + + if (!BB) + return "Post dominance root node"; + + + if (isSimple()) + return DOTGraphTraits<const Function*> + ::getSimpleNodeLabel(BB, BB->getParent()); + else + return DOTGraphTraits<const Function*> + ::getCompleteNodeLabel(BB, BB->getParent()); + } +}; + +template<> +struct DOTGraphTraits<DominatorTree*> : public DOTGraphTraits<DomTreeNode*> { + + DOTGraphTraits (bool isSimple=false) + : DOTGraphTraits<DomTreeNode*>(isSimple) {} + + static std::string getGraphName(DominatorTree *DT) { + return "Dominator tree"; + } + + std::string getNodeLabel(DomTreeNode *Node, DominatorTree *G) { + return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode()); + } +}; + +template<> +struct DOTGraphTraits<PostDominatorTree*> + : public DOTGraphTraits<DomTreeNode*> { + + DOTGraphTraits (bool isSimple=false) + : DOTGraphTraits<DomTreeNode*>(isSimple) {} + + static std::string getGraphName(PostDominatorTree *DT) { + return "Post dominator tree"; + } + + std::string getNodeLabel(DomTreeNode *Node, PostDominatorTree *G ) { + return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode()); + } +}; +} + +namespace { +struct DomViewer + : public DOTGraphTraitsViewer<DominatorTree, false> { + static char ID; + DomViewer() : DOTGraphTraitsViewer<DominatorTree, false>("dom", ID){ + initializeDomViewerPass(*PassRegistry::getPassRegistry()); + } +}; + +struct DomOnlyViewer + : public DOTGraphTraitsViewer<DominatorTree, true> { + static char ID; + DomOnlyViewer() : DOTGraphTraitsViewer<DominatorTree, true>("domonly", ID){ + initializeDomOnlyViewerPass(*PassRegistry::getPassRegistry()); + } +}; + +struct PostDomViewer + : public DOTGraphTraitsViewer<PostDominatorTree, false> { + static char ID; + PostDomViewer() : + DOTGraphTraitsViewer<PostDominatorTree, false>("postdom", ID){ + initializePostDomViewerPass(*PassRegistry::getPassRegistry()); + } +}; + +struct PostDomOnlyViewer + : public DOTGraphTraitsViewer<PostDominatorTree, true> { + static char ID; + PostDomOnlyViewer() : + DOTGraphTraitsViewer<PostDominatorTree, true>("postdomonly", ID){ + initializePostDomOnlyViewerPass(*PassRegistry::getPassRegistry()); + } +}; +} // end anonymous namespace + +char DomViewer::ID = 0; +INITIALIZE_PASS(DomViewer, "view-dom", + "View dominance tree of function", false, false) + +char DomOnlyViewer::ID = 0; +INITIALIZE_PASS(DomOnlyViewer, "view-dom-only", + "View dominance tree of function (with no function bodies)", + false, false) + +char PostDomViewer::ID = 0; +INITIALIZE_PASS(PostDomViewer, "view-postdom", + "View postdominance tree of function", false, false) + +char PostDomOnlyViewer::ID = 0; +INITIALIZE_PASS(PostDomOnlyViewer, "view-postdom-only", + "View postdominance tree of function " + "(with no function bodies)", + false, false) + +namespace { +struct DomPrinter + : public DOTGraphTraitsPrinter<DominatorTree, false> { + static char ID; + DomPrinter() : DOTGraphTraitsPrinter<DominatorTree, false>("dom", ID) { + initializeDomPrinterPass(*PassRegistry::getPassRegistry()); + } +}; + +struct DomOnlyPrinter + : public DOTGraphTraitsPrinter<DominatorTree, true> { + static char ID; + DomOnlyPrinter() : DOTGraphTraitsPrinter<DominatorTree, true>("domonly", ID) { + initializeDomOnlyPrinterPass(*PassRegistry::getPassRegistry()); + } +}; + +struct PostDomPrinter + : public DOTGraphTraitsPrinter<PostDominatorTree, false> { + static char ID; + PostDomPrinter() : + DOTGraphTraitsPrinter<PostDominatorTree, false>("postdom", ID) { + initializePostDomPrinterPass(*PassRegistry::getPassRegistry()); + } +}; + +struct PostDomOnlyPrinter + : public DOTGraphTraitsPrinter<PostDominatorTree, true> { + static char ID; + PostDomOnlyPrinter() : + DOTGraphTraitsPrinter<PostDominatorTree, true>("postdomonly", ID) { + initializePostDomOnlyPrinterPass(*PassRegistry::getPassRegistry()); + } +}; +} // end anonymous namespace + + + +char DomPrinter::ID = 0; +INITIALIZE_PASS(DomPrinter, "dot-dom", + "Print dominance tree of function to 'dot' file", + false, false) + +char DomOnlyPrinter::ID = 0; +INITIALIZE_PASS(DomOnlyPrinter, "dot-dom-only", + "Print dominance tree of function to 'dot' file " + "(with no function bodies)", + false, false) + +char PostDomPrinter::ID = 0; +INITIALIZE_PASS(PostDomPrinter, "dot-postdom", + "Print postdominance tree of function to 'dot' file", + false, false) + +char PostDomOnlyPrinter::ID = 0; +INITIALIZE_PASS(PostDomOnlyPrinter, "dot-postdom-only", + "Print postdominance tree of function to 'dot' file " + "(with no function bodies)", + false, false) + +// Create methods available outside of this file, to use them +// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by +// the link time optimization. + +FunctionPass *llvm::createDomPrinterPass() { + return new DomPrinter(); +} + +FunctionPass *llvm::createDomOnlyPrinterPass() { + return new DomOnlyPrinter(); +} + +FunctionPass *llvm::createDomViewerPass() { + return new DomViewer(); +} + +FunctionPass *llvm::createDomOnlyViewerPass() { + return new DomOnlyViewer(); +} + +FunctionPass *llvm::createPostDomPrinterPass() { + return new PostDomPrinter(); +} + +FunctionPass *llvm::createPostDomOnlyPrinterPass() { + return new PostDomOnlyPrinter(); +} + +FunctionPass *llvm::createPostDomViewerPass() { + return new PostDomViewer(); +} + +FunctionPass *llvm::createPostDomOnlyViewerPass() { + return new PostDomOnlyViewer(); +} diff --git a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp new file mode 100644 index 000000000000..7e4a89f1bd57 --- /dev/null +++ b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp @@ -0,0 +1,141 @@ +//===- DominanceFrontier.cpp - Dominance Frontier Calculation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/DominanceFrontier.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +char DominanceFrontier::ID = 0; +INITIALIZE_PASS_BEGIN(DominanceFrontier, "domfrontier", + "Dominance Frontier Construction", true, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_END(DominanceFrontier, "domfrontier", + "Dominance Frontier Construction", true, true) + +namespace { + class DFCalculateWorkObject { + public: + DFCalculateWorkObject(BasicBlock *B, BasicBlock *P, + const DomTreeNode *N, + const DomTreeNode *PN) + : currentBB(B), parentBB(P), Node(N), parentNode(PN) {} + BasicBlock *currentBB; + BasicBlock *parentBB; + const DomTreeNode *Node; + const DomTreeNode *parentNode; + }; +} + +void DominanceFrontier::anchor() { } + +const DominanceFrontier::DomSetType & +DominanceFrontier::calculate(const DominatorTree &DT, + const DomTreeNode *Node) { + BasicBlock *BB = Node->getBlock(); + DomSetType *Result = NULL; + + std::vector<DFCalculateWorkObject> workList; + SmallPtrSet<BasicBlock *, 32> visited; + + workList.push_back(DFCalculateWorkObject(BB, NULL, Node, NULL)); + do { + DFCalculateWorkObject *currentW = &workList.back(); + assert (currentW && "Missing work object."); + + BasicBlock *currentBB = currentW->currentBB; + BasicBlock *parentBB = currentW->parentBB; + const DomTreeNode *currentNode = currentW->Node; + const DomTreeNode *parentNode = currentW->parentNode; + assert (currentBB && "Invalid work object. Missing current Basic Block"); + assert (currentNode && "Invalid work object. Missing current Node"); + DomSetType &S = Frontiers[currentBB]; + + // Visit each block only once. + if (visited.count(currentBB) == 0) { + visited.insert(currentBB); + + // Loop over CFG successors to calculate DFlocal[currentNode] + for (succ_iterator SI = succ_begin(currentBB), SE = succ_end(currentBB); + SI != SE; ++SI) { + // Does Node immediately dominate this successor? + if (DT[*SI]->getIDom() != currentNode) + S.insert(*SI); + } + } + + // At this point, S is DFlocal. Now we union in DFup's of our children... + // Loop through and visit the nodes that Node immediately dominates (Node's + // children in the IDomTree) + bool visitChild = false; + for (DomTreeNode::const_iterator NI = currentNode->begin(), + NE = currentNode->end(); NI != NE; ++NI) { + DomTreeNode *IDominee = *NI; + BasicBlock *childBB = IDominee->getBlock(); + if (visited.count(childBB) == 0) { + workList.push_back(DFCalculateWorkObject(childBB, currentBB, + IDominee, currentNode)); + visitChild = true; + } + } + + // If all children are visited or there is any child then pop this block + // from the workList. + if (!visitChild) { + + if (!parentBB) { + Result = &S; + break; + } + + DomSetType::const_iterator CDFI = S.begin(), CDFE = S.end(); + DomSetType &parentSet = Frontiers[parentBB]; + for (; CDFI != CDFE; ++CDFI) { + if (!DT.properlyDominates(parentNode, DT[*CDFI])) + parentSet.insert(*CDFI); + } + workList.pop_back(); + } + + } while (!workList.empty()); + + return *Result; +} + +void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const { + for (const_iterator I = begin(), E = end(); I != E; ++I) { + OS << " DomFrontier for BB "; + if (I->first) + WriteAsOperand(OS, I->first, false); + else + OS << " <<exit node>>"; + OS << " is:\t"; + + const std::set<BasicBlock*> &BBs = I->second; + + for (std::set<BasicBlock*>::const_iterator I = BBs.begin(), E = BBs.end(); + I != E; ++I) { + OS << ' '; + if (*I) + WriteAsOperand(OS, *I, false); + else + OS << "<<exit node>>"; + } + OS << "\n"; + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void DominanceFrontierBase::dump() const { + print(dbgs()); +} +#endif + diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp new file mode 100644 index 000000000000..f042964c21d9 --- /dev/null +++ b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp @@ -0,0 +1,264 @@ +//===- CallGraph.cpp - Build a Module's call graph ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CallGraph.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +CallGraph::CallGraph() + : ModulePass(ID), Root(0), ExternalCallingNode(0), CallsExternalNode(0) { + initializeCallGraphPass(*PassRegistry::getPassRegistry()); +} + +void CallGraph::addToCallGraph(Function *F) { + CallGraphNode *Node = getOrInsertFunction(F); + + // If this function has external linkage, anything could call it. + if (!F->hasLocalLinkage()) { + ExternalCallingNode->addCalledFunction(CallSite(), Node); + + // Found the entry point? + if (F->getName() == "main") { + if (Root) // Found multiple external mains? Don't pick one. + Root = ExternalCallingNode; + else + Root = Node; // Found a main, keep track of it! + } + } + + // If this function has its address taken, anything could call it. + if (F->hasAddressTaken()) + ExternalCallingNode->addCalledFunction(CallSite(), Node); + + // If this function is not defined in this translation unit, it could call + // anything. + if (F->isDeclaration() && !F->isIntrinsic()) + Node->addCalledFunction(CallSite(), CallsExternalNode); + + // Look for calls by this function. + for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB) + for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; + ++II) { + CallSite CS(cast<Value>(II)); + if (CS) { + const Function *Callee = CS.getCalledFunction(); + if (!Callee) + // Indirect calls of intrinsics are not allowed so no need to check. + Node->addCalledFunction(CS, CallsExternalNode); + else if (!Callee->isIntrinsic()) + Node->addCalledFunction(CS, getOrInsertFunction(Callee)); + } + } +} + +void CallGraph::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); +} + +bool CallGraph::runOnModule(Module &M) { + Mod = &M; + + ExternalCallingNode = getOrInsertFunction(0); + assert(!CallsExternalNode); + CallsExternalNode = new CallGraphNode(0); + Root = 0; + + // Add every function to the call graph. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + addToCallGraph(I); + + // If we didn't find a main function, use the external call graph node + if (Root == 0) + Root = ExternalCallingNode; + + return false; +} + +INITIALIZE_PASS(CallGraph, "basiccg", "CallGraph Construction", false, true) + +char CallGraph::ID = 0; + +void CallGraph::releaseMemory() { + /// CallsExternalNode is not in the function map, delete it explicitly. + if (CallsExternalNode) { + CallsExternalNode->allReferencesDropped(); + delete CallsExternalNode; + CallsExternalNode = 0; + } + + if (FunctionMap.empty()) + return; + +// Reset all node's use counts to zero before deleting them to prevent an +// assertion from firing. +#ifndef NDEBUG + for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end(); + I != E; ++I) + I->second->allReferencesDropped(); +#endif + + for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end(); + I != E; ++I) + delete I->second; + FunctionMap.clear(); +} + +void CallGraph::print(raw_ostream &OS, const Module*) const { + OS << "CallGraph Root is: "; + if (Function *F = Root->getFunction()) + OS << F->getName() << "\n"; + else { + OS << "<<null function: 0x" << Root << ">>\n"; + } + + for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I) + I->second->print(OS); +} +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void CallGraph::dump() const { + print(dbgs(), 0); +} +#endif + +//===----------------------------------------------------------------------===// +// Implementations of public modification methods +// + +// removeFunctionFromModule - Unlink the function from this module, returning +// it. Because this removes the function from the module, the call graph node +// is destroyed. This is only valid if the function does not call any other +// functions (ie, there are no edges in it's CGN). The easiest way to do this +// is to dropAllReferences before calling this. +// +Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) { + assert(CGN->empty() && "Cannot remove function from call " + "graph if it references other functions!"); + Function *F = CGN->getFunction(); // Get the function for the call graph node + delete CGN; // Delete the call graph node for this func + FunctionMap.erase(F); // Remove the call graph node from the map + + Mod->getFunctionList().remove(F); + return F; +} + +/// spliceFunction - Replace the function represented by this node by another. +/// This does not rescan the body of the function, so it is suitable when +/// splicing the body of the old function to the new while also updating all +/// callers from old to new. +/// +void CallGraph::spliceFunction(const Function *From, const Function *To) { + assert(FunctionMap.count(From) && "No CallGraphNode for function!"); + assert(!FunctionMap.count(To) && + "Pointing CallGraphNode at a function that already exists"); + FunctionMapTy::iterator I = FunctionMap.find(From); + I->second->F = const_cast<Function*>(To); + FunctionMap[To] = I->second; + FunctionMap.erase(I); +} + +// getOrInsertFunction - This method is identical to calling operator[], but +// it will insert a new CallGraphNode for the specified function if one does +// not already exist. +CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) { + CallGraphNode *&CGN = FunctionMap[F]; + if (CGN) return CGN; + + assert((!F || F->getParent() == Mod) && "Function not in current module!"); + return CGN = new CallGraphNode(const_cast<Function*>(F)); +} + +void CallGraphNode::print(raw_ostream &OS) const { + if (Function *F = getFunction()) + OS << "Call graph node for function: '" << F->getName() << "'"; + else + OS << "Call graph node <<null function>>"; + + OS << "<<" << this << ">> #uses=" << getNumReferences() << '\n'; + + for (const_iterator I = begin(), E = end(); I != E; ++I) { + OS << " CS<" << I->first << "> calls "; + if (Function *FI = I->second->getFunction()) + OS << "function '" << FI->getName() <<"'\n"; + else + OS << "external node\n"; + } + OS << '\n'; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void CallGraphNode::dump() const { print(dbgs()); } +#endif + +/// removeCallEdgeFor - This method removes the edge in the node for the +/// specified call site. Note that this method takes linear time, so it +/// should be used sparingly. +void CallGraphNode::removeCallEdgeFor(CallSite CS) { + for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { + assert(I != CalledFunctions.end() && "Cannot find callsite to remove!"); + if (I->first == CS.getInstruction()) { + I->second->DropRef(); + *I = CalledFunctions.back(); + CalledFunctions.pop_back(); + return; + } + } +} + +// removeAnyCallEdgeTo - This method removes any call edges from this node to +// the specified callee function. This takes more time to execute than +// removeCallEdgeTo, so it should not be used unless necessary. +void CallGraphNode::removeAnyCallEdgeTo(CallGraphNode *Callee) { + for (unsigned i = 0, e = CalledFunctions.size(); i != e; ++i) + if (CalledFunctions[i].second == Callee) { + Callee->DropRef(); + CalledFunctions[i] = CalledFunctions.back(); + CalledFunctions.pop_back(); + --i; --e; + } +} + +/// removeOneAbstractEdgeTo - Remove one edge associated with a null callsite +/// from this node to the specified callee function. +void CallGraphNode::removeOneAbstractEdgeTo(CallGraphNode *Callee) { + for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { + assert(I != CalledFunctions.end() && "Cannot find callee to remove!"); + CallRecord &CR = *I; + if (CR.second == Callee && CR.first == 0) { + Callee->DropRef(); + *I = CalledFunctions.back(); + CalledFunctions.pop_back(); + return; + } + } +} + +/// replaceCallEdge - This method replaces the edge in the node for the +/// specified call site with a new one. Note that this method takes linear +/// time, so it should be used sparingly. +void CallGraphNode::replaceCallEdge(CallSite CS, + CallSite NewCS, CallGraphNode *NewNode){ + for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { + assert(I != CalledFunctions.end() && "Cannot find callsite to remove!"); + if (I->first == CS.getInstruction()) { + I->second->DropRef(); + I->first = NewCS.getInstruction(); + I->second = NewNode; + NewNode->AddRef(); + return; + } + } +} + +// Enuse that users of CallGraph.h also link with this file +DEFINING_FILE_FOR(CallGraph) diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp new file mode 100644 index 000000000000..182beca3643e --- /dev/null +++ b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -0,0 +1,614 @@ +//===- CallGraphSCCPass.cpp - Pass that operates BU on call graph ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CallGraphSCCPass class, which is used for passes +// which are implemented as bottom-up traversals on the call graph. Because +// there may be cycles in the call graph, passes of this type operate on the +// call-graph in SCC order: that is, they process function bottom-up, except for +// recursive functions, which they process all at once. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "cgscc-passmgr" +#include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LegacyPassManagers.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +static cl::opt<unsigned> +MaxIterations("max-cg-scc-iterations", cl::ReallyHidden, cl::init(4)); + +STATISTIC(MaxSCCIterations, "Maximum CGSCCPassMgr iterations on one SCC"); + +//===----------------------------------------------------------------------===// +// CGPassManager +// +/// CGPassManager manages FPPassManagers and CallGraphSCCPasses. + +namespace { + +class CGPassManager : public ModulePass, public PMDataManager { +public: + static char ID; + explicit CGPassManager() + : ModulePass(ID), PMDataManager() { } + + /// run - Execute all of the passes scheduled for execution. Keep track of + /// whether any of the passes modifies the module, and if so, return true. + bool runOnModule(Module &M); + + using ModulePass::doInitialization; + using ModulePass::doFinalization; + + bool doInitialization(CallGraph &CG); + bool doFinalization(CallGraph &CG); + + /// Pass Manager itself does not invalidate any analysis info. + void getAnalysisUsage(AnalysisUsage &Info) const { + // CGPassManager walks SCC and it needs CallGraph. + Info.addRequired<CallGraph>(); + Info.setPreservesAll(); + } + + virtual const char *getPassName() const { + return "CallGraph Pass Manager"; + } + + virtual PMDataManager *getAsPMDataManager() { return this; } + virtual Pass *getAsPass() { return this; } + + // Print passes managed by this manager + void dumpPassStructure(unsigned Offset) { + errs().indent(Offset*2) << "Call Graph SCC Pass Manager\n"; + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + P->dumpPassStructure(Offset + 1); + dumpLastUses(P, Offset+1); + } + } + + Pass *getContainedPass(unsigned N) { + assert(N < PassVector.size() && "Pass number out of range!"); + return static_cast<Pass *>(PassVector[N]); + } + + virtual PassManagerType getPassManagerType() const { + return PMT_CallGraphPassManager; + } + +private: + bool RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, + bool &DevirtualizedCall); + + bool RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, + CallGraph &CG, bool &CallGraphUpToDate, + bool &DevirtualizedCall); + bool RefreshCallGraph(CallGraphSCC &CurSCC, CallGraph &CG, + bool IsCheckingMode); +}; + +} // end anonymous namespace. + +char CGPassManager::ID = 0; + + +bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, + CallGraph &CG, bool &CallGraphUpToDate, + bool &DevirtualizedCall) { + bool Changed = false; + PMDataManager *PM = P->getAsPMDataManager(); + + if (PM == 0) { + CallGraphSCCPass *CGSP = (CallGraphSCCPass*)P; + if (!CallGraphUpToDate) { + DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false); + CallGraphUpToDate = true; + } + + { + TimeRegion PassTimer(getPassTimer(CGSP)); + Changed = CGSP->runOnSCC(CurSCC); + } + + // After the CGSCCPass is done, when assertions are enabled, use + // RefreshCallGraph to verify that the callgraph was correctly updated. +#ifndef NDEBUG + if (Changed) + RefreshCallGraph(CurSCC, CG, true); +#endif + + return Changed; + } + + + assert(PM->getPassManagerType() == PMT_FunctionPassManager && + "Invalid CGPassManager member"); + FPPassManager *FPP = (FPPassManager*)P; + + // Run pass P on all functions in the current SCC. + for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end(); + I != E; ++I) { + if (Function *F = (*I)->getFunction()) { + dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName()); + TimeRegion PassTimer(getPassTimer(FPP)); + Changed |= FPP->runOnFunction(*F); + } + } + + // The function pass(es) modified the IR, they may have clobbered the + // callgraph. + if (Changed && CallGraphUpToDate) { + DEBUG(dbgs() << "CGSCCPASSMGR: Pass Dirtied SCC: " + << P->getPassName() << '\n'); + CallGraphUpToDate = false; + } + return Changed; +} + + +/// RefreshCallGraph - Scan the functions in the specified CFG and resync the +/// callgraph with the call sites found in it. This is used after +/// FunctionPasses have potentially munged the callgraph, and can be used after +/// CallGraphSCC passes to verify that they correctly updated the callgraph. +/// +/// This function returns true if it devirtualized an existing function call, +/// meaning it turned an indirect call into a direct call. This happens when +/// a function pass like GVN optimizes away stuff feeding the indirect call. +/// This never happens in checking mode. +/// +bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, + CallGraph &CG, bool CheckingMode) { + DenseMap<Value*, CallGraphNode*> CallSites; + + DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size() + << " nodes:\n"; + for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end(); + I != E; ++I) + (*I)->dump(); + ); + + bool MadeChange = false; + bool DevirtualizedCall = false; + + // Scan all functions in the SCC. + unsigned FunctionNo = 0; + for (CallGraphSCC::iterator SCCIdx = CurSCC.begin(), E = CurSCC.end(); + SCCIdx != E; ++SCCIdx, ++FunctionNo) { + CallGraphNode *CGN = *SCCIdx; + Function *F = CGN->getFunction(); + if (F == 0 || F->isDeclaration()) continue; + + // Walk the function body looking for call sites. Sync up the call sites in + // CGN with those actually in the function. + + // Keep track of the number of direct and indirect calls that were + // invalidated and removed. + unsigned NumDirectRemoved = 0, NumIndirectRemoved = 0; + + // Get the set of call sites currently in the function. + for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) { + // If this call site is null, then the function pass deleted the call + // entirely and the WeakVH nulled it out. + if (I->first == 0 || + // If we've already seen this call site, then the FunctionPass RAUW'd + // one call with another, which resulted in two "uses" in the edge + // list of the same call. + CallSites.count(I->first) || + + // If the call edge is not from a call or invoke, then the function + // pass RAUW'd a call with another value. This can happen when + // constant folding happens of well known functions etc. + !CallSite(I->first)) { + assert(!CheckingMode && + "CallGraphSCCPass did not update the CallGraph correctly!"); + + // If this was an indirect call site, count it. + if (I->second->getFunction() == 0) + ++NumIndirectRemoved; + else + ++NumDirectRemoved; + + // Just remove the edge from the set of callees, keep track of whether + // I points to the last element of the vector. + bool WasLast = I + 1 == E; + CGN->removeCallEdge(I); + + // If I pointed to the last element of the vector, we have to bail out: + // iterator checking rejects comparisons of the resultant pointer with + // end. + if (WasLast) + break; + E = CGN->end(); + continue; + } + + assert(!CallSites.count(I->first) && + "Call site occurs in node multiple times"); + CallSites.insert(std::make_pair(I->first, I->second)); + ++I; + } + + // Loop over all of the instructions in the function, getting the callsites. + // Keep track of the number of direct/indirect calls added. + unsigned NumDirectAdded = 0, NumIndirectAdded = 0; + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + CallSite CS(cast<Value>(I)); + if (!CS) continue; + Function *Callee = CS.getCalledFunction(); + if (Callee && Callee->isIntrinsic()) continue; + + // If this call site already existed in the callgraph, just verify it + // matches up to expectations and remove it from CallSites. + DenseMap<Value*, CallGraphNode*>::iterator ExistingIt = + CallSites.find(CS.getInstruction()); + if (ExistingIt != CallSites.end()) { + CallGraphNode *ExistingNode = ExistingIt->second; + + // Remove from CallSites since we have now seen it. + CallSites.erase(ExistingIt); + + // Verify that the callee is right. + if (ExistingNode->getFunction() == CS.getCalledFunction()) + continue; + + // If we are in checking mode, we are not allowed to actually mutate + // the callgraph. If this is a case where we can infer that the + // callgraph is less precise than it could be (e.g. an indirect call + // site could be turned direct), don't reject it in checking mode, and + // don't tweak it to be more precise. + if (CheckingMode && CS.getCalledFunction() && + ExistingNode->getFunction() == 0) + continue; + + assert(!CheckingMode && + "CallGraphSCCPass did not update the CallGraph correctly!"); + + // If not, we either went from a direct call to indirect, indirect to + // direct, or direct to different direct. + CallGraphNode *CalleeNode; + if (Function *Callee = CS.getCalledFunction()) { + CalleeNode = CG.getOrInsertFunction(Callee); + // Keep track of whether we turned an indirect call into a direct + // one. + if (ExistingNode->getFunction() == 0) { + DevirtualizedCall = true; + DEBUG(dbgs() << " CGSCCPASSMGR: Devirtualized call to '" + << Callee->getName() << "'\n"); + } + } else { + CalleeNode = CG.getCallsExternalNode(); + } + + // Update the edge target in CGN. + CGN->replaceCallEdge(CS, CS, CalleeNode); + MadeChange = true; + continue; + } + + assert(!CheckingMode && + "CallGraphSCCPass did not update the CallGraph correctly!"); + + // If the call site didn't exist in the CGN yet, add it. + CallGraphNode *CalleeNode; + if (Function *Callee = CS.getCalledFunction()) { + CalleeNode = CG.getOrInsertFunction(Callee); + ++NumDirectAdded; + } else { + CalleeNode = CG.getCallsExternalNode(); + ++NumIndirectAdded; + } + + CGN->addCalledFunction(CS, CalleeNode); + MadeChange = true; + } + + // We scanned the old callgraph node, removing invalidated call sites and + // then added back newly found call sites. One thing that can happen is + // that an old indirect call site was deleted and replaced with a new direct + // call. In this case, we have devirtualized a call, and CGSCCPM would like + // to iteratively optimize the new code. Unfortunately, we don't really + // have a great way to detect when this happens. As an approximation, we + // just look at whether the number of indirect calls is reduced and the + // number of direct calls is increased. There are tons of ways to fool this + // (e.g. DCE'ing an indirect call and duplicating an unrelated block with a + // direct call) but this is close enough. + if (NumIndirectRemoved > NumIndirectAdded && + NumDirectRemoved < NumDirectAdded) + DevirtualizedCall = true; + + // After scanning this function, if we still have entries in callsites, then + // they are dangling pointers. WeakVH should save us for this, so abort if + // this happens. + assert(CallSites.empty() && "Dangling pointers found in call sites map"); + + // Periodically do an explicit clear to remove tombstones when processing + // large scc's. + if ((FunctionNo & 15) == 15) + CallSites.clear(); + } + + DEBUG(if (MadeChange) { + dbgs() << "CGSCCPASSMGR: Refreshed SCC is now:\n"; + for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end(); + I != E; ++I) + (*I)->dump(); + if (DevirtualizedCall) + dbgs() << "CGSCCPASSMGR: Refresh devirtualized a call!\n"; + + } else { + dbgs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n"; + } + ); + (void)MadeChange; + + return DevirtualizedCall; +} + +/// RunAllPassesOnSCC - Execute the body of the entire pass manager on the +/// specified SCC. This keeps track of whether a function pass devirtualizes +/// any calls and returns it in DevirtualizedCall. +bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, + bool &DevirtualizedCall) { + bool Changed = false; + + // CallGraphUpToDate - Keep track of whether the callgraph is known to be + // up-to-date or not. The CGSSC pass manager runs two types of passes: + // CallGraphSCC Passes and other random function passes. Because other + // random function passes are not CallGraph aware, they may clobber the + // call graph by introducing new calls or deleting other ones. This flag + // is set to false when we run a function pass so that we know to clean up + // the callgraph when we need to run a CGSCCPass again. + bool CallGraphUpToDate = true; + + // Run all passes on current SCC. + for (unsigned PassNo = 0, e = getNumContainedPasses(); + PassNo != e; ++PassNo) { + Pass *P = getContainedPass(PassNo); + + // If we're in -debug-pass=Executions mode, construct the SCC node list, + // otherwise avoid constructing this string as it is expensive. + if (isPassDebuggingExecutionsOrMore()) { + std::string Functions; + #ifndef NDEBUG + raw_string_ostream OS(Functions); + for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end(); + I != E; ++I) { + if (I != CurSCC.begin()) OS << ", "; + (*I)->print(OS); + } + OS.flush(); + #endif + dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, Functions); + } + dumpRequiredSet(P); + + initializeAnalysisImpl(P); + + // Actually run this pass on the current SCC. + Changed |= RunPassOnSCC(P, CurSCC, CG, + CallGraphUpToDate, DevirtualizedCall); + + if (Changed) + dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, ""); + dumpPreservedSet(P); + + verifyPreservedAnalysis(P); + removeNotPreservedAnalysis(P); + recordAvailableAnalysis(P); + removeDeadPasses(P, "", ON_CG_MSG); + } + + // If the callgraph was left out of date (because the last pass run was a + // functionpass), refresh it before we move on to the next SCC. + if (!CallGraphUpToDate) + DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false); + return Changed; +} + +/// run - Execute all of the passes scheduled for execution. Keep track of +/// whether any of the passes modifies the module, and if so, return true. +bool CGPassManager::runOnModule(Module &M) { + CallGraph &CG = getAnalysis<CallGraph>(); + bool Changed = doInitialization(CG); + + // Walk the callgraph in bottom-up SCC order. + scc_iterator<CallGraph*> CGI = scc_begin(&CG); + + CallGraphSCC CurSCC(&CGI); + while (!CGI.isAtEnd()) { + // Copy the current SCC and increment past it so that the pass can hack + // on the SCC if it wants to without invalidating our iterator. + std::vector<CallGraphNode*> &NodeVec = *CGI; + CurSCC.initialize(&NodeVec[0], &NodeVec[0]+NodeVec.size()); + ++CGI; + + // At the top level, we run all the passes in this pass manager on the + // functions in this SCC. However, we support iterative compilation in the + // case where a function pass devirtualizes a call to a function. For + // example, it is very common for a function pass (often GVN or instcombine) + // to eliminate the addressing that feeds into a call. With that improved + // information, we would like the call to be an inline candidate, infer + // mod-ref information etc. + // + // Because of this, we allow iteration up to a specified iteration count. + // This only happens in the case of a devirtualized call, so we only burn + // compile time in the case that we're making progress. We also have a hard + // iteration count limit in case there is crazy code. + unsigned Iteration = 0; + bool DevirtualizedCall = false; + do { + DEBUG(if (Iteration) + dbgs() << " SCCPASSMGR: Re-visiting SCC, iteration #" + << Iteration << '\n'); + DevirtualizedCall = false; + Changed |= RunAllPassesOnSCC(CurSCC, CG, DevirtualizedCall); + } while (Iteration++ < MaxIterations && DevirtualizedCall); + + if (DevirtualizedCall) + DEBUG(dbgs() << " CGSCCPASSMGR: Stopped iteration after " << Iteration + << " times, due to -max-cg-scc-iterations\n"); + + if (Iteration > MaxSCCIterations) + MaxSCCIterations = Iteration; + + } + Changed |= doFinalization(CG); + return Changed; +} + + +/// Initialize CG +bool CGPassManager::doInitialization(CallGraph &CG) { + bool Changed = false; + for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) { + if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) { + assert(PM->getPassManagerType() == PMT_FunctionPassManager && + "Invalid CGPassManager member"); + Changed |= ((FPPassManager*)PM)->doInitialization(CG.getModule()); + } else { + Changed |= ((CallGraphSCCPass*)getContainedPass(i))->doInitialization(CG); + } + } + return Changed; +} + +/// Finalize CG +bool CGPassManager::doFinalization(CallGraph &CG) { + bool Changed = false; + for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) { + if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) { + assert(PM->getPassManagerType() == PMT_FunctionPassManager && + "Invalid CGPassManager member"); + Changed |= ((FPPassManager*)PM)->doFinalization(CG.getModule()); + } else { + Changed |= ((CallGraphSCCPass*)getContainedPass(i))->doFinalization(CG); + } + } + return Changed; +} + +//===----------------------------------------------------------------------===// +// CallGraphSCC Implementation +//===----------------------------------------------------------------------===// + +/// ReplaceNode - This informs the SCC and the pass manager that the specified +/// Old node has been deleted, and New is to be used in its place. +void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) { + assert(Old != New && "Should not replace node with self"); + for (unsigned i = 0; ; ++i) { + assert(i != Nodes.size() && "Node not in SCC"); + if (Nodes[i] != Old) continue; + Nodes[i] = New; + break; + } + + // Update the active scc_iterator so that it doesn't contain dangling + // pointers to the old CallGraphNode. + scc_iterator<CallGraph*> *CGI = (scc_iterator<CallGraph*>*)Context; + CGI->ReplaceNode(Old, New); +} + + +//===----------------------------------------------------------------------===// +// CallGraphSCCPass Implementation +//===----------------------------------------------------------------------===// + +/// Assign pass manager to manage this pass. +void CallGraphSCCPass::assignPassManager(PMStack &PMS, + PassManagerType PreferredType) { + // Find CGPassManager + while (!PMS.empty() && + PMS.top()->getPassManagerType() > PMT_CallGraphPassManager) + PMS.pop(); + + assert(!PMS.empty() && "Unable to handle Call Graph Pass"); + CGPassManager *CGP; + + if (PMS.top()->getPassManagerType() == PMT_CallGraphPassManager) + CGP = (CGPassManager*)PMS.top(); + else { + // Create new Call Graph SCC Pass Manager if it does not exist. + assert(!PMS.empty() && "Unable to create Call Graph Pass Manager"); + PMDataManager *PMD = PMS.top(); + + // [1] Create new Call Graph Pass Manager + CGP = new CGPassManager(); + + // [2] Set up new manager's top level manager + PMTopLevelManager *TPM = PMD->getTopLevelManager(); + TPM->addIndirectPassManager(CGP); + + // [3] Assign manager to manage this new manager. This may create + // and push new managers into PMS + Pass *P = CGP; + TPM->schedulePass(P); + + // [4] Push new manager into PMS + PMS.push(CGP); + } + + CGP->add(this); +} + +/// getAnalysisUsage - For this class, we declare that we require and preserve +/// the call graph. If the derived class implements this method, it should +/// always explicitly call the implementation here. +void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<CallGraph>(); + AU.addPreserved<CallGraph>(); +} + + +//===----------------------------------------------------------------------===// +// PrintCallGraphPass Implementation +//===----------------------------------------------------------------------===// + +namespace { + /// PrintCallGraphPass - Print a Module corresponding to a call graph. + /// + class PrintCallGraphPass : public CallGraphSCCPass { + std::string Banner; + raw_ostream &Out; // raw_ostream to print on. + + public: + static char ID; + PrintCallGraphPass(const std::string &B, raw_ostream &o) + : CallGraphSCCPass(ID), Banner(B), Out(o) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + bool runOnSCC(CallGraphSCC &SCC) { + Out << Banner; + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) + (*I)->getFunction()->print(Out); + return false; + } + }; + +} // end anonymous namespace. + +char PrintCallGraphPass::ID = 0; + +Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &O, + const std::string &Banner) const { + return new PrintCallGraphPass(Banner, O); +} + diff --git a/contrib/llvm/lib/Analysis/IPA/CallPrinter.cpp b/contrib/llvm/lib/Analysis/IPA/CallPrinter.cpp new file mode 100644 index 000000000000..306ae7a4dbfb --- /dev/null +++ b/contrib/llvm/lib/Analysis/IPA/CallPrinter.cpp @@ -0,0 +1,87 @@ +//===- CallPrinter.cpp - DOT printer for call graph -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines '-dot-callgraph', which emit a callgraph.<fnname>.dot +// containing the call graph of a module. +// +// There is also a pass available to directly call dotty ('-view-callgraph'). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallPrinter.h" +#include "llvm/Analysis/DOTGraphTraitsPass.h" + +using namespace llvm; + +namespace llvm { + +template<> +struct DOTGraphTraits<CallGraph*> : public DefaultDOTGraphTraits { + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(CallGraph *Graph) { + return "Call graph"; + } + + std::string getNodeLabel(CallGraphNode *Node, CallGraph *Graph) { + if (Function *Func = Node->getFunction()) + return Func->getName(); + + return "external node"; + } +}; + +} // end llvm namespace + +namespace { + +struct CallGraphViewer + : public DOTGraphTraitsModuleViewer<CallGraph, true> { + static char ID; + + CallGraphViewer() + : DOTGraphTraitsModuleViewer<CallGraph, true>("callgraph", ID) { + initializeCallGraphViewerPass(*PassRegistry::getPassRegistry()); + } +}; + +struct CallGraphPrinter + : public DOTGraphTraitsModulePrinter<CallGraph, true> { + static char ID; + + CallGraphPrinter() + : DOTGraphTraitsModulePrinter<CallGraph, true>("callgraph", ID) { + initializeCallGraphPrinterPass(*PassRegistry::getPassRegistry()); + } +}; + +} // end anonymous namespace + +char CallGraphViewer::ID = 0; +INITIALIZE_PASS(CallGraphViewer, "view-callgraph", + "View call graph", + false, false) + +char CallGraphPrinter::ID = 0; +INITIALIZE_PASS(CallGraphPrinter, "dot-callgraph", + "Print call graph to 'dot' file", + false, false) + +// Create methods available outside of this file, to use them +// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by +// the link time optimization. + +ModulePass *llvm::createCallGraphViewerPass() { + return new CallGraphViewer(); +} + +ModulePass *llvm::createCallGraphPrinterPass() { + return new CallGraphPrinter(); +} diff --git a/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp b/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp new file mode 100644 index 000000000000..1c4f17d3819a --- /dev/null +++ b/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp @@ -0,0 +1,101 @@ +//===- FindUsedTypes.cpp - Find all Types used by a module ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is used to seek out all of the types in use by the program. Note +// that this analysis explicitly does not include types only used by the symbol +// table. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/FindUsedTypes.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +char FindUsedTypes::ID = 0; +INITIALIZE_PASS(FindUsedTypes, "print-used-types", + "Find Used Types", false, true) + +// IncorporateType - Incorporate one type and all of its subtypes into the +// collection of used types. +// +void FindUsedTypes::IncorporateType(Type *Ty) { + // If ty doesn't already exist in the used types map, add it now, otherwise + // return. + if (!UsedTypes.insert(Ty)) return; // Already contain Ty. + + // Make sure to add any types this type references now. + // + for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end(); + I != E; ++I) + IncorporateType(*I); +} + +void FindUsedTypes::IncorporateValue(const Value *V) { + IncorporateType(V->getType()); + + // If this is a constant, it could be using other types... + if (const Constant *C = dyn_cast<Constant>(V)) { + if (!isa<GlobalValue>(C)) + for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end(); + OI != OE; ++OI) + IncorporateValue(*OI); + } +} + + +// run - This incorporates all types used by the specified module +// +bool FindUsedTypes::runOnModule(Module &m) { + UsedTypes.clear(); // reset if run multiple times... + + // Loop over global variables, incorporating their types + for (Module::const_global_iterator I = m.global_begin(), E = m.global_end(); + I != E; ++I) { + IncorporateType(I->getType()); + if (I->hasInitializer()) + IncorporateValue(I->getInitializer()); + } + + for (Module::iterator MI = m.begin(), ME = m.end(); MI != ME; ++MI) { + IncorporateType(MI->getType()); + const Function &F = *MI; + + // Loop over all of the instructions in the function, adding their return + // type as well as the types of their operands. + // + for (const_inst_iterator II = inst_begin(F), IE = inst_end(F); + II != IE; ++II) { + const Instruction &I = *II; + + IncorporateType(I.getType()); // Incorporate the type of the instruction + for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end(); + OI != OE; ++OI) + IncorporateValue(*OI); // Insert inst operand types as well + } + } + + return false; +} + +// Print the types found in the module. If the optional Module parameter is +// passed in, then the types are printed symbolically if possible, using the +// symbol table from the module. +// +void FindUsedTypes::print(raw_ostream &OS, const Module *M) const { + OS << "Types in use by this module:\n"; + for (SetVector<Type *>::const_iterator I = UsedTypes.begin(), + E = UsedTypes.end(); I != E; ++I) { + OS << " " << **I << '\n'; + } +} diff --git a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp new file mode 100644 index 000000000000..7ec46442bf4c --- /dev/null +++ b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp @@ -0,0 +1,607 @@ +//===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This simple pass provides alias and mod/ref information for global values +// that do not have their address taken, and keeps track of whether functions +// read or write memory (are "pure"). For this simple (but very common) case, +// we can provide pretty accurate and useful information. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "globalsmodref-aa" +#include "llvm/Analysis/Passes.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/InstIterator.h" +#include <set> +using namespace llvm; + +STATISTIC(NumNonAddrTakenGlobalVars, + "Number of global vars without address taken"); +STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken"); +STATISTIC(NumNoMemFunctions, "Number of functions that do not access memory"); +STATISTIC(NumReadMemFunctions, "Number of functions that only read memory"); +STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects"); + +namespace { + /// FunctionRecord - One instance of this structure is stored for every + /// function in the program. Later, the entries for these functions are + /// removed if the function is found to call an external function (in which + /// case we know nothing about it. + struct FunctionRecord { + /// GlobalInfo - Maintain mod/ref info for all of the globals without + /// addresses taken that are read or written (transitively) by this + /// function. + std::map<const GlobalValue*, unsigned> GlobalInfo; + + /// MayReadAnyGlobal - May read global variables, but it is not known which. + bool MayReadAnyGlobal; + + unsigned getInfoForGlobal(const GlobalValue *GV) const { + unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0; + std::map<const GlobalValue*, unsigned>::const_iterator I = + GlobalInfo.find(GV); + if (I != GlobalInfo.end()) + Effect |= I->second; + return Effect; + } + + /// FunctionEffect - Capture whether or not this function reads or writes to + /// ANY memory. If not, we can do a lot of aggressive analysis on it. + unsigned FunctionEffect; + + FunctionRecord() : MayReadAnyGlobal (false), FunctionEffect(0) {} + }; + + /// GlobalsModRef - The actual analysis pass. + class GlobalsModRef : public ModulePass, public AliasAnalysis { + /// NonAddressTakenGlobals - The globals that do not have their addresses + /// taken. + std::set<const GlobalValue*> NonAddressTakenGlobals; + + /// IndirectGlobals - The memory pointed to by this global is known to be + /// 'owned' by the global. + std::set<const GlobalValue*> IndirectGlobals; + + /// AllocsForIndirectGlobals - If an instruction allocates memory for an + /// indirect global, this map indicates which one. + std::map<const Value*, const GlobalValue*> AllocsForIndirectGlobals; + + /// FunctionInfo - For each function, keep track of what globals are + /// modified or read. + std::map<const Function*, FunctionRecord> FunctionInfo; + + public: + static char ID; + GlobalsModRef() : ModulePass(ID) { + initializeGlobalsModRefPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) { + InitializeAliasAnalysis(this); // set up super class + AnalyzeGlobals(M); // find non-addr taken globals + AnalyzeCallGraph(getAnalysis<CallGraph>(), M); // Propagate on CG + return false; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AliasAnalysis::getAnalysisUsage(AU); + AU.addRequired<CallGraph>(); + AU.setPreservesAll(); // Does not transform code + } + + //------------------------------------------------ + // Implement the AliasAnalysis API + // + AliasResult alias(const Location &LocA, const Location &LocB); + ModRefResult getModRefInfo(ImmutableCallSite CS, + const Location &Loc); + ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + return AliasAnalysis::getModRefInfo(CS1, CS2); + } + + /// getModRefBehavior - Return the behavior of the specified function if + /// called from the specified call site. The call site may be null in which + /// case the most generic behavior of this function should be returned. + ModRefBehavior getModRefBehavior(const Function *F) { + ModRefBehavior Min = UnknownModRefBehavior; + + if (FunctionRecord *FR = getFunctionInfo(F)) { + if (FR->FunctionEffect == 0) + Min = DoesNotAccessMemory; + else if ((FR->FunctionEffect & Mod) == 0) + Min = OnlyReadsMemory; + } + + return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min); + } + + /// getModRefBehavior - Return the behavior of the specified function if + /// called from the specified call site. The call site may be null in which + /// case the most generic behavior of this function should be returned. + ModRefBehavior getModRefBehavior(ImmutableCallSite CS) { + ModRefBehavior Min = UnknownModRefBehavior; + + if (const Function* F = CS.getCalledFunction()) + if (FunctionRecord *FR = getFunctionInfo(F)) { + if (FR->FunctionEffect == 0) + Min = DoesNotAccessMemory; + else if ((FR->FunctionEffect & Mod) == 0) + Min = OnlyReadsMemory; + } + + return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); + } + + virtual void deleteValue(Value *V); + virtual void copyValue(Value *From, Value *To); + virtual void addEscapingUse(Use &U); + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + + private: + /// getFunctionInfo - Return the function info for the function, or null if + /// we don't have anything useful to say about it. + FunctionRecord *getFunctionInfo(const Function *F) { + std::map<const Function*, FunctionRecord>::iterator I = + FunctionInfo.find(F); + if (I != FunctionInfo.end()) + return &I->second; + return 0; + } + + void AnalyzeGlobals(Module &M); + void AnalyzeCallGraph(CallGraph &CG, Module &M); + bool AnalyzeUsesOfPointer(Value *V, std::vector<Function*> &Readers, + std::vector<Function*> &Writers, + GlobalValue *OkayStoreDest = 0); + bool AnalyzeIndirectGlobalMemory(GlobalValue *GV); + }; +} + +char GlobalsModRef::ID = 0; +INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis, + "globalsmodref-aa", "Simple mod/ref analysis for globals", + false, true, false) +INITIALIZE_PASS_DEPENDENCY(CallGraph) +INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis, + "globalsmodref-aa", "Simple mod/ref analysis for globals", + false, true, false) + +Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); } + +/// AnalyzeGlobals - Scan through the users of all of the internal +/// GlobalValue's in the program. If none of them have their "address taken" +/// (really, their address passed to something nontrivial), record this fact, +/// and record the functions that they are used directly in. +void GlobalsModRef::AnalyzeGlobals(Module &M) { + std::vector<Function*> Readers, Writers; + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (I->hasLocalLinkage()) { + if (!AnalyzeUsesOfPointer(I, Readers, Writers)) { + // Remember that we are tracking this global. + NonAddressTakenGlobals.insert(I); + ++NumNonAddrTakenFunctions; + } + Readers.clear(); Writers.clear(); + } + + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + if (I->hasLocalLinkage()) { + if (!AnalyzeUsesOfPointer(I, Readers, Writers)) { + // Remember that we are tracking this global, and the mod/ref fns + NonAddressTakenGlobals.insert(I); + + for (unsigned i = 0, e = Readers.size(); i != e; ++i) + FunctionInfo[Readers[i]].GlobalInfo[I] |= Ref; + + if (!I->isConstant()) // No need to keep track of writers to constants + for (unsigned i = 0, e = Writers.size(); i != e; ++i) + FunctionInfo[Writers[i]].GlobalInfo[I] |= Mod; + ++NumNonAddrTakenGlobalVars; + + // If this global holds a pointer type, see if it is an indirect global. + if (I->getType()->getElementType()->isPointerTy() && + AnalyzeIndirectGlobalMemory(I)) + ++NumIndirectGlobalVars; + } + Readers.clear(); Writers.clear(); + } +} + +/// AnalyzeUsesOfPointer - Look at all of the users of the specified pointer. +/// If this is used by anything complex (i.e., the address escapes), return +/// true. Also, while we are at it, keep track of those functions that read and +/// write to the value. +/// +/// If OkayStoreDest is non-null, stores into this global are allowed. +bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, + std::vector<Function*> &Readers, + std::vector<Function*> &Writers, + GlobalValue *OkayStoreDest) { + if (!V->getType()->isPointerTy()) return true; + + for (Value::use_iterator UI = V->use_begin(), E=V->use_end(); UI != E; ++UI) { + User *U = *UI; + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { + Readers.push_back(LI->getParent()->getParent()); + } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + if (V == SI->getOperand(1)) { + Writers.push_back(SI->getParent()->getParent()); + } else if (SI->getOperand(1) != OkayStoreDest) { + return true; // Storing the pointer + } + } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { + if (AnalyzeUsesOfPointer(GEP, Readers, Writers)) return true; + } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { + if (AnalyzeUsesOfPointer(BCI, Readers, Writers, OkayStoreDest)) + return true; + } else if (isFreeCall(U, TLI)) { + Writers.push_back(cast<Instruction>(U)->getParent()->getParent()); + } else if (CallInst *CI = dyn_cast<CallInst>(U)) { + // Make sure that this is just the function being called, not that it is + // passing into the function. + for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) + if (CI->getArgOperand(i) == V) return true; + } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) { + // Make sure that this is just the function being called, not that it is + // passing into the function. + for (unsigned i = 0, e = II->getNumArgOperands(); i != e; ++i) + if (II->getArgOperand(i) == V) return true; + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { + if (CE->getOpcode() == Instruction::GetElementPtr || + CE->getOpcode() == Instruction::BitCast) { + if (AnalyzeUsesOfPointer(CE, Readers, Writers)) + return true; + } else { + return true; + } + } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) { + if (!isa<ConstantPointerNull>(ICI->getOperand(1))) + return true; // Allow comparison against null. + } else { + return true; + } + } + + return false; +} + +/// AnalyzeIndirectGlobalMemory - We found an non-address-taken global variable +/// which holds a pointer type. See if the global always points to non-aliased +/// heap memory: that is, all initializers of the globals are allocations, and +/// those allocations have no use other than initialization of the global. +/// Further, all loads out of GV must directly use the memory, not store the +/// pointer somewhere. If this is true, we consider the memory pointed to by +/// GV to be owned by GV and can disambiguate other pointers from it. +bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { + // Keep track of values related to the allocation of the memory, f.e. the + // value produced by the malloc call and any casts. + std::vector<Value*> AllocRelatedValues; + + // Walk the user list of the global. If we find anything other than a direct + // load or store, bail out. + for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){ + User *U = *I; + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { + // The pointer loaded from the global can only be used in simple ways: + // we allow addressing of it and loading storing to it. We do *not* allow + // storing the loaded pointer somewhere else or passing to a function. + std::vector<Function*> ReadersWriters; + if (AnalyzeUsesOfPointer(LI, ReadersWriters, ReadersWriters)) + return false; // Loaded pointer escapes. + // TODO: Could try some IP mod/ref of the loaded pointer. + } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + // Storing the global itself. + if (SI->getOperand(0) == GV) return false; + + // If storing the null pointer, ignore it. + if (isa<ConstantPointerNull>(SI->getOperand(0))) + continue; + + // Check the value being stored. + Value *Ptr = GetUnderlyingObject(SI->getOperand(0)); + + if (!isAllocLikeFn(Ptr, TLI)) + return false; // Too hard to analyze. + + // Analyze all uses of the allocation. If any of them are used in a + // non-simple way (e.g. stored to another global) bail out. + std::vector<Function*> ReadersWriters; + if (AnalyzeUsesOfPointer(Ptr, ReadersWriters, ReadersWriters, GV)) + return false; // Loaded pointer escapes. + + // Remember that this allocation is related to the indirect global. + AllocRelatedValues.push_back(Ptr); + } else { + // Something complex, bail out. + return false; + } + } + + // Okay, this is an indirect global. Remember all of the allocations for + // this global in AllocsForIndirectGlobals. + while (!AllocRelatedValues.empty()) { + AllocsForIndirectGlobals[AllocRelatedValues.back()] = GV; + AllocRelatedValues.pop_back(); + } + IndirectGlobals.insert(GV); + return true; +} + +/// AnalyzeCallGraph - At this point, we know the functions where globals are +/// immediately stored to and read from. Propagate this information up the call +/// graph to all callers and compute the mod/ref info for all memory for each +/// function. +void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { + // We do a bottom-up SCC traversal of the call graph. In other words, we + // visit all callees before callers (leaf-first). + for (scc_iterator<CallGraph*> I = scc_begin(&CG), E = scc_end(&CG); I != E; + ++I) { + std::vector<CallGraphNode *> &SCC = *I; + assert(!SCC.empty() && "SCC with no functions?"); + + if (!SCC[0]->getFunction()) { + // Calls externally - can't say anything useful. Remove any existing + // function records (may have been created when scanning globals). + for (unsigned i = 0, e = SCC.size(); i != e; ++i) + FunctionInfo.erase(SCC[i]->getFunction()); + continue; + } + + FunctionRecord &FR = FunctionInfo[SCC[0]->getFunction()]; + + bool KnowNothing = false; + unsigned FunctionEffect = 0; + + // Collect the mod/ref properties due to called functions. We only compute + // one mod-ref set. + for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) { + Function *F = SCC[i]->getFunction(); + if (!F) { + KnowNothing = true; + break; + } + + if (F->isDeclaration()) { + // Try to get mod/ref behaviour from function attributes. + if (F->doesNotAccessMemory()) { + // Can't do better than that! + } else if (F->onlyReadsMemory()) { + FunctionEffect |= Ref; + if (!F->isIntrinsic()) + // This function might call back into the module and read a global - + // consider every global as possibly being read by this function. + FR.MayReadAnyGlobal = true; + } else { + FunctionEffect |= ModRef; + // Can't say anything useful unless it's an intrinsic - they don't + // read or write global variables of the kind considered here. + KnowNothing = !F->isIntrinsic(); + } + continue; + } + + for (CallGraphNode::iterator CI = SCC[i]->begin(), E = SCC[i]->end(); + CI != E && !KnowNothing; ++CI) + if (Function *Callee = CI->second->getFunction()) { + if (FunctionRecord *CalleeFR = getFunctionInfo(Callee)) { + // Propagate function effect up. + FunctionEffect |= CalleeFR->FunctionEffect; + + // Incorporate callee's effects on globals into our info. + for (std::map<const GlobalValue*, unsigned>::iterator GI = + CalleeFR->GlobalInfo.begin(), E = CalleeFR->GlobalInfo.end(); + GI != E; ++GI) + FR.GlobalInfo[GI->first] |= GI->second; + FR.MayReadAnyGlobal |= CalleeFR->MayReadAnyGlobal; + } else { + // Can't say anything about it. However, if it is inside our SCC, + // then nothing needs to be done. + CallGraphNode *CalleeNode = CG[Callee]; + if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end()) + KnowNothing = true; + } + } else { + KnowNothing = true; + } + } + + // If we can't say anything useful about this SCC, remove all SCC functions + // from the FunctionInfo map. + if (KnowNothing) { + for (unsigned i = 0, e = SCC.size(); i != e; ++i) + FunctionInfo.erase(SCC[i]->getFunction()); + continue; + } + + // Scan the function bodies for explicit loads or stores. + for (unsigned i = 0, e = SCC.size(); i != e && FunctionEffect != ModRef;++i) + for (inst_iterator II = inst_begin(SCC[i]->getFunction()), + E = inst_end(SCC[i]->getFunction()); + II != E && FunctionEffect != ModRef; ++II) + if (LoadInst *LI = dyn_cast<LoadInst>(&*II)) { + FunctionEffect |= Ref; + if (LI->isVolatile()) + // Volatile loads may have side-effects, so mark them as writing + // memory (for example, a flag inside the processor). + FunctionEffect |= Mod; + } else if (StoreInst *SI = dyn_cast<StoreInst>(&*II)) { + FunctionEffect |= Mod; + if (SI->isVolatile()) + // Treat volatile stores as reading memory somewhere. + FunctionEffect |= Ref; + } else if (isAllocationFn(&*II, TLI) || isFreeCall(&*II, TLI)) { + FunctionEffect |= ModRef; + } else if (IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(&*II)) { + // The callgraph doesn't include intrinsic calls. + Function *Callee = Intrinsic->getCalledFunction(); + ModRefBehavior Behaviour = AliasAnalysis::getModRefBehavior(Callee); + FunctionEffect |= (Behaviour & ModRef); + } + + if ((FunctionEffect & Mod) == 0) + ++NumReadMemFunctions; + if (FunctionEffect == 0) + ++NumNoMemFunctions; + FR.FunctionEffect = FunctionEffect; + + // Finally, now that we know the full effect on this SCC, clone the + // information to each function in the SCC. + for (unsigned i = 1, e = SCC.size(); i != e; ++i) + FunctionInfo[SCC[i]->getFunction()] = FR; + } +} + + + +/// alias - If one of the pointers is to a global that we are tracking, and the +/// other is some random pointer, we know there cannot be an alias, because the +/// address of the global isn't taken. +AliasAnalysis::AliasResult +GlobalsModRef::alias(const Location &LocA, + const Location &LocB) { + // Get the base object these pointers point to. + const Value *UV1 = GetUnderlyingObject(LocA.Ptr); + const Value *UV2 = GetUnderlyingObject(LocB.Ptr); + + // If either of the underlying values is a global, they may be non-addr-taken + // globals, which we can answer queries about. + const GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1); + const GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2); + if (GV1 || GV2) { + // If the global's address is taken, pretend we don't know it's a pointer to + // the global. + if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = 0; + if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = 0; + + // If the two pointers are derived from two different non-addr-taken + // globals, or if one is and the other isn't, we know these can't alias. + if ((GV1 || GV2) && GV1 != GV2) + return NoAlias; + + // Otherwise if they are both derived from the same addr-taken global, we + // can't know the two accesses don't overlap. + } + + // These pointers may be based on the memory owned by an indirect global. If + // so, we may be able to handle this. First check to see if the base pointer + // is a direct load from an indirect global. + GV1 = GV2 = 0; + if (const LoadInst *LI = dyn_cast<LoadInst>(UV1)) + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0))) + if (IndirectGlobals.count(GV)) + GV1 = GV; + if (const LoadInst *LI = dyn_cast<LoadInst>(UV2)) + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0))) + if (IndirectGlobals.count(GV)) + GV2 = GV; + + // These pointers may also be from an allocation for the indirect global. If + // so, also handle them. + if (AllocsForIndirectGlobals.count(UV1)) + GV1 = AllocsForIndirectGlobals[UV1]; + if (AllocsForIndirectGlobals.count(UV2)) + GV2 = AllocsForIndirectGlobals[UV2]; + + // Now that we know whether the two pointers are related to indirect globals, + // use this to disambiguate the pointers. If either pointer is based on an + // indirect global and if they are not both based on the same indirect global, + // they cannot alias. + if ((GV1 || GV2) && GV1 != GV2) + return NoAlias; + + return AliasAnalysis::alias(LocA, LocB); +} + +AliasAnalysis::ModRefResult +GlobalsModRef::getModRefInfo(ImmutableCallSite CS, + const Location &Loc) { + unsigned Known = ModRef; + + // If we are asking for mod/ref info of a direct call with a pointer to a + // global we are tracking, return information if we have it. + if (const GlobalValue *GV = + dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr))) + if (GV->hasLocalLinkage()) + if (const Function *F = CS.getCalledFunction()) + if (NonAddressTakenGlobals.count(GV)) + if (const FunctionRecord *FR = getFunctionInfo(F)) + Known = FR->getInfoForGlobal(GV); + + if (Known == NoModRef) + return NoModRef; // No need to query other mod/ref analyses + return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, Loc)); +} + + +//===----------------------------------------------------------------------===// +// Methods to update the analysis as a result of the client transformation. +// +void GlobalsModRef::deleteValue(Value *V) { + if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { + if (NonAddressTakenGlobals.erase(GV)) { + // This global might be an indirect global. If so, remove it and remove + // any AllocRelatedValues for it. + if (IndirectGlobals.erase(GV)) { + // Remove any entries in AllocsForIndirectGlobals for this global. + for (std::map<const Value*, const GlobalValue*>::iterator + I = AllocsForIndirectGlobals.begin(), + E = AllocsForIndirectGlobals.end(); I != E; ) { + if (I->second == GV) { + AllocsForIndirectGlobals.erase(I++); + } else { + ++I; + } + } + } + } + } + + // Otherwise, if this is an allocation related to an indirect global, remove + // it. + AllocsForIndirectGlobals.erase(V); + + AliasAnalysis::deleteValue(V); +} + +void GlobalsModRef::copyValue(Value *From, Value *To) { + AliasAnalysis::copyValue(From, To); +} + +void GlobalsModRef::addEscapingUse(Use &U) { + // For the purposes of this analysis, it is conservatively correct to treat + // a newly escaping value equivalently to a deleted one. We could perhaps + // be more precise by processing the new use and attempting to update our + // saved analysis results to accommodate it. + deleteValue(U); + + AliasAnalysis::addEscapingUse(U); +} diff --git a/contrib/llvm/lib/Analysis/IPA/IPA.cpp b/contrib/llvm/lib/Analysis/IPA/IPA.cpp new file mode 100644 index 000000000000..47357cf92127 --- /dev/null +++ b/contrib/llvm/lib/Analysis/IPA/IPA.cpp @@ -0,0 +1,31 @@ +//===-- IPA.cpp -----------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the common initialization routines for the IPA library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/InitializePasses.h" +#include "llvm/PassRegistry.h" +#include "llvm-c/Initialization.h" + +using namespace llvm; + +/// initializeIPA - Initialize all passes linked into the IPA library. +void llvm::initializeIPA(PassRegistry &Registry) { + initializeCallGraphPass(Registry); + initializeCallGraphPrinterPass(Registry); + initializeCallGraphViewerPass(Registry); + initializeFindUsedTypesPass(Registry); + initializeGlobalsModRefPass(Registry); +} + +void LLVMInitializeIPA(LLVMPassRegistryRef R) { + initializeIPA(*unwrap(R)); +} diff --git a/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp b/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp new file mode 100644 index 000000000000..3bc796e53f90 --- /dev/null +++ b/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp @@ -0,0 +1,1304 @@ +//===- InlineCost.cpp - Cost analysis for inliner -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements inline cost analysis. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "inline-cost" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" +#include "llvm/InstVisitor.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); + +namespace { + +class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { + typedef InstVisitor<CallAnalyzer, bool> Base; + friend class InstVisitor<CallAnalyzer, bool>; + + // DataLayout if available, or null. + const DataLayout *const TD; + + /// The TargetTransformInfo available for this compilation. + const TargetTransformInfo &TTI; + + // The called function. + Function &F; + + int Threshold; + int Cost; + + bool IsCallerRecursive; + bool IsRecursiveCall; + bool ExposesReturnsTwice; + bool HasDynamicAlloca; + bool ContainsNoDuplicateCall; + bool HasReturn; + bool HasIndirectBr; + + /// Number of bytes allocated statically by the callee. + uint64_t AllocatedSize; + unsigned NumInstructions, NumVectorInstructions; + int FiftyPercentVectorBonus, TenPercentVectorBonus; + int VectorBonus; + + // While we walk the potentially-inlined instructions, we build up and + // maintain a mapping of simplified values specific to this callsite. The + // idea is to propagate any special information we have about arguments to + // this call through the inlinable section of the function, and account for + // likely simplifications post-inlining. The most important aspect we track + // is CFG altering simplifications -- when we prove a basic block dead, that + // can cause dramatic shifts in the cost of inlining a function. + DenseMap<Value *, Constant *> SimplifiedValues; + + // Keep track of the values which map back (through function arguments) to + // allocas on the caller stack which could be simplified through SROA. + DenseMap<Value *, Value *> SROAArgValues; + + // The mapping of caller Alloca values to their accumulated cost savings. If + // we have to disable SROA for one of the allocas, this tells us how much + // cost must be added. + DenseMap<Value *, int> SROAArgCosts; + + // Keep track of values which map to a pointer base and constant offset. + DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs; + + // Custom simplification helper routines. + bool isAllocaDerivedArg(Value *V); + bool lookupSROAArgAndCost(Value *V, Value *&Arg, + DenseMap<Value *, int>::iterator &CostIt); + void disableSROA(DenseMap<Value *, int>::iterator CostIt); + void disableSROA(Value *V); + void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, + int InstructionCost); + bool handleSROACandidate(bool IsSROAValid, + DenseMap<Value *, int>::iterator CostIt, + int InstructionCost); + bool isGEPOffsetConstant(GetElementPtrInst &GEP); + bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); + bool simplifyCallSite(Function *F, CallSite CS); + ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); + + // Custom analysis routines. + bool analyzeBlock(BasicBlock *BB); + + // Disable several entry points to the visitor so we don't accidentally use + // them by declaring but not defining them here. + void visit(Module *); void visit(Module &); + void visit(Function *); void visit(Function &); + void visit(BasicBlock *); void visit(BasicBlock &); + + // Provide base case for our instruction visit. + bool visitInstruction(Instruction &I); + + // Our visit overrides. + bool visitAlloca(AllocaInst &I); + bool visitPHI(PHINode &I); + bool visitGetElementPtr(GetElementPtrInst &I); + bool visitBitCast(BitCastInst &I); + bool visitPtrToInt(PtrToIntInst &I); + bool visitIntToPtr(IntToPtrInst &I); + bool visitCastInst(CastInst &I); + bool visitUnaryInstruction(UnaryInstruction &I); + bool visitCmpInst(CmpInst &I); + bool visitSub(BinaryOperator &I); + bool visitBinaryOperator(BinaryOperator &I); + bool visitLoad(LoadInst &I); + bool visitStore(StoreInst &I); + bool visitExtractValue(ExtractValueInst &I); + bool visitInsertValue(InsertValueInst &I); + bool visitCallSite(CallSite CS); + bool visitReturnInst(ReturnInst &RI); + bool visitBranchInst(BranchInst &BI); + bool visitSwitchInst(SwitchInst &SI); + bool visitIndirectBrInst(IndirectBrInst &IBI); + bool visitResumeInst(ResumeInst &RI); + bool visitUnreachableInst(UnreachableInst &I); + +public: + CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI, + Function &Callee, int Threshold) + : TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0), + IsCallerRecursive(false), IsRecursiveCall(false), + ExposesReturnsTwice(false), HasDynamicAlloca(false), + ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), + AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), + FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), + NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), + NumConstantPtrCmps(0), NumConstantPtrDiffs(0), + NumInstructionsSimplified(0), SROACostSavings(0), + SROACostSavingsLost(0) {} + + bool analyzeCall(CallSite CS); + + int getThreshold() { return Threshold; } + int getCost() { return Cost; } + + // Keep a bunch of stats about the cost savings found so we can print them + // out when debugging. + unsigned NumConstantArgs; + unsigned NumConstantOffsetPtrArgs; + unsigned NumAllocaArgs; + unsigned NumConstantPtrCmps; + unsigned NumConstantPtrDiffs; + unsigned NumInstructionsSimplified; + unsigned SROACostSavings; + unsigned SROACostSavingsLost; + + void dump(); +}; + +} // namespace + +/// \brief Test whether the given value is an Alloca-derived function argument. +bool CallAnalyzer::isAllocaDerivedArg(Value *V) { + return SROAArgValues.count(V); +} + +/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to. +/// Returns false if V does not map to a SROA-candidate. +bool CallAnalyzer::lookupSROAArgAndCost( + Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) { + if (SROAArgValues.empty() || SROAArgCosts.empty()) + return false; + + DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V); + if (ArgIt == SROAArgValues.end()) + return false; + + Arg = ArgIt->second; + CostIt = SROAArgCosts.find(Arg); + return CostIt != SROAArgCosts.end(); +} + +/// \brief Disable SROA for the candidate marked by this cost iterator. +/// +/// This marks the candidate as no longer viable for SROA, and adds the cost +/// savings associated with it back into the inline cost measurement. +void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) { + // If we're no longer able to perform SROA we need to undo its cost savings + // and prevent subsequent analysis. + Cost += CostIt->second; + SROACostSavings -= CostIt->second; + SROACostSavingsLost += CostIt->second; + SROAArgCosts.erase(CostIt); +} + +/// \brief If 'V' maps to a SROA candidate, disable SROA for it. +void CallAnalyzer::disableSROA(Value *V) { + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + if (lookupSROAArgAndCost(V, SROAArg, CostIt)) + disableSROA(CostIt); +} + +/// \brief Accumulate the given cost for a particular SROA candidate. +void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, + int InstructionCost) { + CostIt->second += InstructionCost; + SROACostSavings += InstructionCost; +} + +/// \brief Helper for the common pattern of handling a SROA candidate. +/// Either accumulates the cost savings if the SROA remains valid, or disables +/// SROA for the candidate. +bool CallAnalyzer::handleSROACandidate(bool IsSROAValid, + DenseMap<Value *, int>::iterator CostIt, + int InstructionCost) { + if (IsSROAValid) { + accumulateSROACost(CostIt, InstructionCost); + return true; + } + + disableSROA(CostIt); + return false; +} + +/// \brief Check whether a GEP's indices are all constant. +/// +/// Respects any simplified values known during the analysis of this callsite. +bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) { + for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) + if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I)) + return false; + + return true; +} + +/// \brief Accumulate a constant GEP offset into an APInt if possible. +/// +/// Returns false if unable to compute the offset for any reason. Respects any +/// simplified values known during the analysis of this callsite. +bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { + if (!TD) + return false; + + unsigned IntPtrWidth = TD->getPointerSizeInBits(); + assert(IntPtrWidth == Offset.getBitWidth()); + + for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); + GTI != GTE; ++GTI) { + ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand()); + if (!OpC) + if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand())) + OpC = dyn_cast<ConstantInt>(SimpleOp); + if (!OpC) + return false; + if (OpC->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (StructType *STy = dyn_cast<StructType>(*GTI)) { + unsigned ElementIdx = OpC->getZExtValue(); + const StructLayout *SL = TD->getStructLayout(STy); + Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); + continue; + } + + APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType())); + Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; + } + return true; +} + +bool CallAnalyzer::visitAlloca(AllocaInst &I) { + // FIXME: Check whether inlining will turn a dynamic alloca into a static + // alloca, and handle that case. + + // Accumulate the allocated size. + if (I.isStaticAlloca()) { + Type *Ty = I.getAllocatedType(); + AllocatedSize += (TD ? TD->getTypeAllocSize(Ty) : + Ty->getPrimitiveSizeInBits()); + } + + // We will happily inline static alloca instructions. + if (I.isStaticAlloca()) + return Base::visitAlloca(I); + + // FIXME: This is overly conservative. Dynamic allocas are inefficient for + // a variety of reasons, and so we would like to not inline them into + // functions which don't currently have a dynamic alloca. This simply + // disables inlining altogether in the presence of a dynamic alloca. + HasDynamicAlloca = true; + return false; +} + +bool CallAnalyzer::visitPHI(PHINode &I) { + // FIXME: We should potentially be tracking values through phi nodes, + // especially when they collapse to a single value due to deleted CFG edges + // during inlining. + + // FIXME: We need to propagate SROA *disabling* through phi nodes, even + // though we don't want to propagate it's bonuses. The idea is to disable + // SROA if it *might* be used in an inappropriate manner. + + // Phi nodes are always zero-cost. + return true; +} + +bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(), + SROAArg, CostIt); + + // Try to fold GEPs of constant-offset call site argument pointers. This + // requires target data and inbounds GEPs. + if (TD && I.isInBounds()) { + // Check if we have a base + offset for the pointer. + Value *Ptr = I.getPointerOperand(); + std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr); + if (BaseAndOffset.first) { + // Check if the offset of this GEP is constant, and if so accumulate it + // into Offset. + if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) { + // Non-constant GEPs aren't folded, and disable SROA. + if (SROACandidate) + disableSROA(CostIt); + return false; + } + + // Add the result as a new mapping to Base + Offset. + ConstantOffsetPtrs[&I] = BaseAndOffset; + + // Also handle SROA candidates here, we already know that the GEP is + // all-constant indexed. + if (SROACandidate) + SROAArgValues[&I] = SROAArg; + + return true; + } + } + + if (isGEPOffsetConstant(I)) { + if (SROACandidate) + SROAArgValues[&I] = SROAArg; + + // Constant GEPs are modeled as free. + return true; + } + + // Variable GEPs will require math and will disable SROA. + if (SROACandidate) + disableSROA(CostIt); + return false; +} + +bool CallAnalyzer::visitBitCast(BitCastInst &I) { + // Propagate constants through bitcasts. + Constant *COp = dyn_cast<Constant>(I.getOperand(0)); + if (!COp) + COp = SimplifiedValues.lookup(I.getOperand(0)); + if (COp) + if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } + + // Track base/offsets through casts + std::pair<Value *, APInt> BaseAndOffset + = ConstantOffsetPtrs.lookup(I.getOperand(0)); + // Casts don't change the offset, just wrap it up. + if (BaseAndOffset.first) + ConstantOffsetPtrs[&I] = BaseAndOffset; + + // Also look for SROA candidates here. + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; + + // Bitcasts are always zero cost. + return true; +} + +bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { + // Propagate constants through ptrtoint. + Constant *COp = dyn_cast<Constant>(I.getOperand(0)); + if (!COp) + COp = SimplifiedValues.lookup(I.getOperand(0)); + if (COp) + if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } + + // Track base/offset pairs when converted to a plain integer provided the + // integer is large enough to represent the pointer. + unsigned IntegerSize = I.getType()->getScalarSizeInBits(); + if (TD && IntegerSize >= TD->getPointerSizeInBits()) { + std::pair<Value *, APInt> BaseAndOffset + = ConstantOffsetPtrs.lookup(I.getOperand(0)); + if (BaseAndOffset.first) + ConstantOffsetPtrs[&I] = BaseAndOffset; + } + + // This is really weird. Technically, ptrtoint will disable SROA. However, + // unless that ptrtoint is *used* somewhere in the live basic blocks after + // inlining, it will be nuked, and SROA should proceed. All of the uses which + // would block SROA would also block SROA if applied directly to a pointer, + // and so we can just add the integer in here. The only places where SROA is + // preserved either cannot fire on an integer, or won't in-and-of themselves + // disable SROA (ext) w/o some later use that we would see and disable. + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; + + return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); +} + +bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { + // Propagate constants through ptrtoint. + Constant *COp = dyn_cast<Constant>(I.getOperand(0)); + if (!COp) + COp = SimplifiedValues.lookup(I.getOperand(0)); + if (COp) + if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } + + // Track base/offset pairs when round-tripped through a pointer without + // modifications provided the integer is not too large. + Value *Op = I.getOperand(0); + unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); + if (TD && IntegerSize <= TD->getPointerSizeInBits()) { + std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op); + if (BaseAndOffset.first) + ConstantOffsetPtrs[&I] = BaseAndOffset; + } + + // "Propagate" SROA here in the same manner as we do for ptrtoint above. + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + if (lookupSROAArgAndCost(Op, SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; + + return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); +} + +bool CallAnalyzer::visitCastInst(CastInst &I) { + // Propagate constants through ptrtoint. + Constant *COp = dyn_cast<Constant>(I.getOperand(0)); + if (!COp) + COp = SimplifiedValues.lookup(I.getOperand(0)); + if (COp) + if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } + + // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. + disableSROA(I.getOperand(0)); + + return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); +} + +bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { + Value *Operand = I.getOperand(0); + Constant *COp = dyn_cast<Constant>(Operand); + if (!COp) + COp = SimplifiedValues.lookup(Operand); + if (COp) + if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(), + COp, TD)) { + SimplifiedValues[&I] = C; + return true; + } + + // Disable any SROA on the argument to arbitrary unary operators. + disableSROA(Operand); + + return false; +} + +bool CallAnalyzer::visitCmpInst(CmpInst &I) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + // First try to handle simplified comparisons. + if (!isa<Constant>(LHS)) + if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) + LHS = SimpleLHS; + if (!isa<Constant>(RHS)) + if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) + RHS = SimpleRHS; + if (Constant *CLHS = dyn_cast<Constant>(LHS)) { + if (Constant *CRHS = dyn_cast<Constant>(RHS)) + if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) { + SimplifiedValues[&I] = C; + return true; + } + } + + if (I.getOpcode() == Instruction::FCmp) + return false; + + // Otherwise look for a comparison between constant offset pointers with + // a common base. + Value *LHSBase, *RHSBase; + APInt LHSOffset, RHSOffset; + llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); + if (LHSBase) { + llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); + if (RHSBase && LHSBase == RHSBase) { + // We have common bases, fold the icmp to a constant based on the + // offsets. + Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); + Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); + if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { + SimplifiedValues[&I] = C; + ++NumConstantPtrCmps; + return true; + } + } + } + + // If the comparison is an equality comparison with null, we can simplify it + // for any alloca-derived argument. + if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1))) + if (isAllocaDerivedArg(I.getOperand(0))) { + // We can actually predict the result of comparisons between an + // alloca-derived value and null. Note that this fires regardless of + // SROA firing. + bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE; + SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType()) + : ConstantInt::getFalse(I.getType()); + return true; + } + + // Finally check for SROA candidates in comparisons. + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { + if (isa<ConstantPointerNull>(I.getOperand(1))) { + accumulateSROACost(CostIt, InlineConstants::InstrCost); + return true; + } + + disableSROA(CostIt); + } + + return false; +} + +bool CallAnalyzer::visitSub(BinaryOperator &I) { + // Try to handle a special case: we can fold computing the difference of two + // constant-related pointers. + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + Value *LHSBase, *RHSBase; + APInt LHSOffset, RHSOffset; + llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); + if (LHSBase) { + llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); + if (RHSBase && LHSBase == RHSBase) { + // We have common bases, fold the subtract to a constant based on the + // offsets. + Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); + Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); + if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) { + SimplifiedValues[&I] = C; + ++NumConstantPtrDiffs; + return true; + } + } + } + + // Otherwise, fall back to the generic logic for simplifying and handling + // instructions. + return Base::visitSub(I); +} + +bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + if (!isa<Constant>(LHS)) + if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) + LHS = SimpleLHS; + if (!isa<Constant>(RHS)) + if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) + RHS = SimpleRHS; + Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, TD); + if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) { + SimplifiedValues[&I] = C; + return true; + } + + // Disable any SROA on arguments to arbitrary, unsimplified binary operators. + disableSROA(LHS); + disableSROA(RHS); + + return false; +} + +bool CallAnalyzer::visitLoad(LoadInst &I) { + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { + if (I.isSimple()) { + accumulateSROACost(CostIt, InlineConstants::InstrCost); + return true; + } + + disableSROA(CostIt); + } + + return false; +} + +bool CallAnalyzer::visitStore(StoreInst &I) { + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { + if (I.isSimple()) { + accumulateSROACost(CostIt, InlineConstants::InstrCost); + return true; + } + + disableSROA(CostIt); + } + + return false; +} + +bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) { + // Constant folding for extract value is trivial. + Constant *C = dyn_cast<Constant>(I.getAggregateOperand()); + if (!C) + C = SimplifiedValues.lookup(I.getAggregateOperand()); + if (C) { + SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices()); + return true; + } + + // SROA can look through these but give them a cost. + return false; +} + +bool CallAnalyzer::visitInsertValue(InsertValueInst &I) { + // Constant folding for insert value is trivial. + Constant *AggC = dyn_cast<Constant>(I.getAggregateOperand()); + if (!AggC) + AggC = SimplifiedValues.lookup(I.getAggregateOperand()); + Constant *InsertedC = dyn_cast<Constant>(I.getInsertedValueOperand()); + if (!InsertedC) + InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand()); + if (AggC && InsertedC) { + SimplifiedValues[&I] = ConstantExpr::getInsertValue(AggC, InsertedC, + I.getIndices()); + return true; + } + + // SROA can look through these but give them a cost. + return false; +} + +/// \brief Try to simplify a call site. +/// +/// Takes a concrete function and callsite and tries to actually simplify it by +/// analyzing the arguments and call itself with instsimplify. Returns true if +/// it has simplified the callsite to some other entity (a constant), making it +/// free. +bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { + // FIXME: Using the instsimplify logic directly for this is inefficient + // because we have to continually rebuild the argument list even when no + // simplifications can be performed. Until that is fixed with remapping + // inside of instsimplify, directly constant fold calls here. + if (!canConstantFoldCallTo(F)) + return false; + + // Try to re-map the arguments to constants. + SmallVector<Constant *, 4> ConstantArgs; + ConstantArgs.reserve(CS.arg_size()); + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I) { + Constant *C = dyn_cast<Constant>(*I); + if (!C) + C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I)); + if (!C) + return false; // This argument doesn't map to a constant. + + ConstantArgs.push_back(C); + } + if (Constant *C = ConstantFoldCall(F, ConstantArgs)) { + SimplifiedValues[CS.getInstruction()] = C; + return true; + } + + return false; +} + +bool CallAnalyzer::visitCallSite(CallSite CS) { + if (CS.hasFnAttr(Attribute::ReturnsTwice) && + !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::ReturnsTwice)) { + // This aborts the entire analysis. + ExposesReturnsTwice = true; + return false; + } + if (CS.isCall() && + cast<CallInst>(CS.getInstruction())->hasFnAttr(Attribute::NoDuplicate)) + ContainsNoDuplicateCall = true; + + if (Function *F = CS.getCalledFunction()) { + // When we have a concrete function, first try to simplify it directly. + if (simplifyCallSite(F, CS)) + return true; + + // Next check if it is an intrinsic we know about. + // FIXME: Lift this into part of the InstVisitor. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { + switch (II->getIntrinsicID()) { + default: + return Base::visitCallSite(CS); + + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: + // SROA can usually chew through these intrinsics, but they aren't free. + return false; + } + } + + if (F == CS.getInstruction()->getParent()->getParent()) { + // This flag will fully abort the analysis, so don't bother with anything + // else. + IsRecursiveCall = true; + return false; + } + + if (TTI.isLoweredToCall(F)) { + // We account for the average 1 instruction per call argument setup + // here. + Cost += CS.arg_size() * InlineConstants::InstrCost; + + // Everything other than inline ASM will also have a significant cost + // merely from making the call. + if (!isa<InlineAsm>(CS.getCalledValue())) + Cost += InlineConstants::CallPenalty; + } + + return Base::visitCallSite(CS); + } + + // Otherwise we're in a very special case -- an indirect function call. See + // if we can be particularly clever about this. + Value *Callee = CS.getCalledValue(); + + // First, pay the price of the argument setup. We account for the average + // 1 instruction per call argument setup here. + Cost += CS.arg_size() * InlineConstants::InstrCost; + + // Next, check if this happens to be an indirect function call to a known + // function in this inline context. If not, we've done all we can. + Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee)); + if (!F) + return Base::visitCallSite(CS); + + // If we have a constant that we are calling as a function, we can peer + // through it and see the function target. This happens not infrequently + // during devirtualization and so we want to give it a hefty bonus for + // inlining, but cap that bonus in the event that inlining wouldn't pan + // out. Pretend to inline the function, with a custom threshold. + CallAnalyzer CA(TD, TTI, *F, InlineConstants::IndirectCallThreshold); + if (CA.analyzeCall(CS)) { + // We were able to inline the indirect call! Subtract the cost from the + // bonus we want to apply, but don't go below zero. + Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost()); + } + + return Base::visitCallSite(CS); +} + +bool CallAnalyzer::visitReturnInst(ReturnInst &RI) { + // At least one return instruction will be free after inlining. + bool Free = !HasReturn; + HasReturn = true; + return Free; +} + +bool CallAnalyzer::visitBranchInst(BranchInst &BI) { + // We model unconditional branches as essentially free -- they really + // shouldn't exist at all, but handling them makes the behavior of the + // inliner more regular and predictable. Interestingly, conditional branches + // which will fold away are also free. + return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) || + dyn_cast_or_null<ConstantInt>( + SimplifiedValues.lookup(BI.getCondition())); +} + +bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { + // We model unconditional switches as free, see the comments on handling + // branches. + return isa<ConstantInt>(SI.getCondition()) || + dyn_cast_or_null<ConstantInt>( + SimplifiedValues.lookup(SI.getCondition())); +} + +bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) { + // We never want to inline functions that contain an indirectbr. This is + // incorrect because all the blockaddress's (in static global initializers + // for example) would be referring to the original function, and this + // indirect jump would jump from the inlined copy of the function into the + // original function which is extremely undefined behavior. + // FIXME: This logic isn't really right; we can safely inline functions with + // indirectbr's as long as no other function or global references the + // blockaddress of a block within the current function. And as a QOI issue, + // if someone is using a blockaddress without an indirectbr, and that + // reference somehow ends up in another function or global, we probably don't + // want to inline this function. + HasIndirectBr = true; + return false; +} + +bool CallAnalyzer::visitResumeInst(ResumeInst &RI) { + // FIXME: It's not clear that a single instruction is an accurate model for + // the inline cost of a resume instruction. + return false; +} + +bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) { + // FIXME: It might be reasonably to discount the cost of instructions leading + // to unreachable as they have the lowest possible impact on both runtime and + // code size. + return true; // No actual code is needed for unreachable. +} + +bool CallAnalyzer::visitInstruction(Instruction &I) { + // Some instructions are free. All of the free intrinsics can also be + // handled by SROA, etc. + if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I)) + return true; + + // We found something we don't understand or can't handle. Mark any SROA-able + // values in the operand list as no longer viable. + for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI) + disableSROA(*OI); + + return false; +} + + +/// \brief Analyze a basic block for its contribution to the inline cost. +/// +/// This method walks the analyzer over every instruction in the given basic +/// block and accounts for their cost during inlining at this callsite. It +/// aborts early if the threshold has been exceeded or an impossible to inline +/// construct has been detected. It returns false if inlining is no longer +/// viable, and true if inlining remains viable. +bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + ++NumInstructions; + if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy()) + ++NumVectorInstructions; + + // If the instruction simplified to a constant, there is no cost to this + // instruction. Visit the instructions using our InstVisitor to account for + // all of the per-instruction logic. The visit tree returns true if we + // consumed the instruction in any way, and false if the instruction's base + // cost should count against inlining. + if (Base::visit(I)) + ++NumInstructionsSimplified; + else + Cost += InlineConstants::InstrCost; + + // If the visit this instruction detected an uninlinable pattern, abort. + if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || + HasIndirectBr) + return false; + + // If the caller is a recursive function then we don't want to inline + // functions which allocate a lot of stack space because it would increase + // the caller stack usage dramatically. + if (IsCallerRecursive && + AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) + return false; + + if (NumVectorInstructions > NumInstructions/2) + VectorBonus = FiftyPercentVectorBonus; + else if (NumVectorInstructions > NumInstructions/10) + VectorBonus = TenPercentVectorBonus; + else + VectorBonus = 0; + + // Check if we've past the threshold so we don't spin in huge basic + // blocks that will never inline. + if (Cost > (Threshold + VectorBonus)) + return false; + } + + return true; +} + +/// \brief Compute the base pointer and cumulative constant offsets for V. +/// +/// This strips all constant offsets off of V, leaving it the base pointer, and +/// accumulates the total constant offset applied in the returned constant. It +/// returns 0 if V is not a pointer, and returns the constant '0' if there are +/// no constant offsets applied. +ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { + if (!TD || !V->getType()->isPointerTy()) + return 0; + + unsigned IntPtrWidth = TD->getPointerSizeInBits(); + APInt Offset = APInt::getNullValue(IntPtrWidth); + + // Even though we don't look through PHI nodes, we could be called on an + // instruction in an unreachable block, which may be on a cycle. + SmallPtrSet<Value *, 4> Visited; + Visited.insert(V); + do { + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { + if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset)) + return 0; + V = GEP->getPointerOperand(); + } else if (Operator::getOpcode(V) == Instruction::BitCast) { + V = cast<Operator>(V)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { + if (GA->mayBeOverridden()) + break; + V = GA->getAliasee(); + } else { + break; + } + assert(V->getType()->isPointerTy() && "Unexpected operand type!"); + } while (Visited.insert(V)); + + Type *IntPtrTy = TD->getIntPtrType(V->getContext()); + return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset)); +} + +/// \brief Analyze a call site for potential inlining. +/// +/// Returns true if inlining this call is viable, and false if it is not +/// viable. It computes the cost and adjusts the threshold based on numerous +/// factors and heuristics. If this method returns false but the computed cost +/// is below the computed threshold, then inlining was forcibly disabled by +/// some artifact of the routine. +bool CallAnalyzer::analyzeCall(CallSite CS) { + ++NumCallsAnalyzed; + + // Track whether the post-inlining function would have more than one basic + // block. A single basic block is often intended for inlining. Balloon the + // threshold by 50% until we pass the single-BB phase. + bool SingleBB = true; + int SingleBBBonus = Threshold / 2; + Threshold += SingleBBBonus; + + // Perform some tweaks to the cost and threshold based on the direct + // callsite information. + + // We want to more aggressively inline vector-dense kernels, so up the + // threshold, and we'll lower it if the % of vector instructions gets too + // low. + assert(NumInstructions == 0); + assert(NumVectorInstructions == 0); + FiftyPercentVectorBonus = Threshold; + TenPercentVectorBonus = Threshold / 2; + + // Give out bonuses per argument, as the instructions setting them up will + // be gone after inlining. + for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { + if (TD && CS.isByValArgument(I)) { + // We approximate the number of loads and stores needed by dividing the + // size of the byval type by the target's pointer size. + PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); + unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType()); + unsigned PointerSize = TD->getPointerSizeInBits(); + // Ceiling division. + unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; + + // If it generates more than 8 stores it is likely to be expanded as an + // inline memcpy so we take that as an upper bound. Otherwise we assume + // one load and one store per word copied. + // FIXME: The maxStoresPerMemcpy setting from the target should be used + // here instead of a magic number of 8, but it's not available via + // DataLayout. + NumStores = std::min(NumStores, 8U); + + Cost -= 2 * NumStores * InlineConstants::InstrCost; + } else { + // For non-byval arguments subtract off one instruction per call + // argument. + Cost -= InlineConstants::InstrCost; + } + } + + // If there is only one call of the function, and it has internal linkage, + // the cost of inlining it drops dramatically. + bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() && + &F == CS.getCalledFunction(); + if (OnlyOneCallAndLocalLinkage) + Cost += InlineConstants::LastCallToStaticBonus; + + // If the instruction after the call, or if the normal destination of the + // invoke is an unreachable instruction, the function is noreturn. As such, + // there is little point in inlining this unless there is literally zero + // cost. + Instruction *Instr = CS.getInstruction(); + if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) { + if (isa<UnreachableInst>(II->getNormalDest()->begin())) + Threshold = 1; + } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr))) + Threshold = 1; + + // If this function uses the coldcc calling convention, prefer not to inline + // it. + if (F.getCallingConv() == CallingConv::Cold) + Cost += InlineConstants::ColdccPenalty; + + // Check if we're done. This can happen due to bonuses and penalties. + if (Cost > Threshold) + return false; + + if (F.empty()) + return true; + + Function *Caller = CS.getInstruction()->getParent()->getParent(); + // Check if the caller function is recursive itself. + for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end(); + U != E; ++U) { + CallSite Site(cast<Value>(*U)); + if (!Site) + continue; + Instruction *I = Site.getInstruction(); + if (I->getParent()->getParent() == Caller) { + IsCallerRecursive = true; + break; + } + } + + // Populate our simplified values by mapping from function arguments to call + // arguments with known important simplifications. + CallSite::arg_iterator CAI = CS.arg_begin(); + for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end(); + FAI != FAE; ++FAI, ++CAI) { + assert(CAI != CS.arg_end()); + if (Constant *C = dyn_cast<Constant>(CAI)) + SimplifiedValues[FAI] = C; + + Value *PtrArg = *CAI; + if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { + ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue()); + + // We can SROA any pointer arguments derived from alloca instructions. + if (isa<AllocaInst>(PtrArg)) { + SROAArgValues[FAI] = PtrArg; + SROAArgCosts[PtrArg] = 0; + } + } + } + NumConstantArgs = SimplifiedValues.size(); + NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); + NumAllocaArgs = SROAArgValues.size(); + + // The worklist of live basic blocks in the callee *after* inlining. We avoid + // adding basic blocks of the callee which can be proven to be dead for this + // particular call site in order to get more accurate cost estimates. This + // requires a somewhat heavyweight iteration pattern: we need to walk the + // basic blocks in a breadth-first order as we insert live successors. To + // accomplish this, prioritizing for small iterations because we exit after + // crossing our threshold, we use a small-size optimized SetVector. + typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>, + SmallPtrSet<BasicBlock *, 16> > BBSetVector; + BBSetVector BBWorklist; + BBWorklist.insert(&F.getEntryBlock()); + // Note that we *must not* cache the size, this loop grows the worklist. + for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { + // Bail out the moment we cross the threshold. This means we'll under-count + // the cost, but only when undercounting doesn't matter. + if (Cost > (Threshold + VectorBonus)) + break; + + BasicBlock *BB = BBWorklist[Idx]; + if (BB->empty()) + continue; + + // Analyze the cost of this block. If we blow through the threshold, this + // returns false, and we can bail on out. + if (!analyzeBlock(BB)) { + if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || + HasIndirectBr) + return false; + + // If the caller is a recursive function then we don't want to inline + // functions which allocate a lot of stack space because it would increase + // the caller stack usage dramatically. + if (IsCallerRecursive && + AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) + return false; + + break; + } + + TerminatorInst *TI = BB->getTerminator(); + + // Add in the live successors by first checking whether we have terminator + // that may be simplified based on the values simplified by this call. + if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + if (BI->isConditional()) { + Value *Cond = BI->getCondition(); + if (ConstantInt *SimpleCond + = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { + BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0)); + continue; + } + } + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + Value *Cond = SI->getCondition(); + if (ConstantInt *SimpleCond + = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { + BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor()); + continue; + } + } + + // If we're unable to select a particular successor, just count all of + // them. + for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; + ++TIdx) + BBWorklist.insert(TI->getSuccessor(TIdx)); + + // If we had any successors at this point, than post-inlining is likely to + // have them as well. Note that we assume any basic blocks which existed + // due to branches or switches which folded above will also fold after + // inlining. + if (SingleBB && TI->getNumSuccessors() > 1) { + // Take off the bonus we applied to the threshold. + Threshold -= SingleBBBonus; + SingleBB = false; + } + } + + // If this is a noduplicate call, we can still inline as long as + // inlining this would cause the removal of the caller (so the instruction + // is not actually duplicated, just moved). + if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) + return false; + + Threshold += VectorBonus; + + return Cost < Threshold; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +/// \brief Dump stats about this call's analysis. +void CallAnalyzer::dump() { +#define DEBUG_PRINT_STAT(x) llvm::dbgs() << " " #x ": " << x << "\n" + DEBUG_PRINT_STAT(NumConstantArgs); + DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); + DEBUG_PRINT_STAT(NumAllocaArgs); + DEBUG_PRINT_STAT(NumConstantPtrCmps); + DEBUG_PRINT_STAT(NumConstantPtrDiffs); + DEBUG_PRINT_STAT(NumInstructionsSimplified); + DEBUG_PRINT_STAT(SROACostSavings); + DEBUG_PRINT_STAT(SROACostSavingsLost); + DEBUG_PRINT_STAT(ContainsNoDuplicateCall); +#undef DEBUG_PRINT_STAT +} +#endif + +INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", + true, true) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) +INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", + true, true) + +char InlineCostAnalysis::ID = 0; + +InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID), TD(0) {} + +InlineCostAnalysis::~InlineCostAnalysis() {} + +void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<TargetTransformInfo>(); + CallGraphSCCPass::getAnalysisUsage(AU); +} + +bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) { + TD = getAnalysisIfAvailable<DataLayout>(); + TTI = &getAnalysis<TargetTransformInfo>(); + return false; +} + +InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) { + return getInlineCost(CS, CS.getCalledFunction(), Threshold); +} + +/// \brief Test that two functions either have or have not the given attribute +/// at the same time. +static bool attributeMatches(Function *F1, Function *F2, + Attribute::AttrKind Attr) { + return F1->hasFnAttribute(Attr) == F2->hasFnAttribute(Attr); +} + +/// \brief Test that there are no attribute conflicts between Caller and Callee +/// that prevent inlining. +static bool functionsHaveCompatibleAttributes(Function *Caller, + Function *Callee) { + return attributeMatches(Caller, Callee, Attribute::SanitizeAddress) && + attributeMatches(Caller, Callee, Attribute::SanitizeMemory) && + attributeMatches(Caller, Callee, Attribute::SanitizeThread); +} + +InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, + int Threshold) { + // Cannot inline indirect calls. + if (!Callee) + return llvm::InlineCost::getNever(); + + // Calls to functions with always-inline attributes should be inlined + // whenever possible. + if (Callee->hasFnAttribute(Attribute::AlwaysInline)) { + if (isInlineViable(*Callee)) + return llvm::InlineCost::getAlways(); + return llvm::InlineCost::getNever(); + } + + // Never inline functions with conflicting attributes (unless callee has + // always-inline attribute). + if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee)) + return llvm::InlineCost::getNever(); + + // Don't inline this call if the caller has the optnone attribute. + if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone)) + return llvm::InlineCost::getNever(); + + // Don't inline functions which can be redefined at link-time to mean + // something else. Don't inline functions marked noinline or call sites + // marked noinline. + if (Callee->mayBeOverridden() || + Callee->hasFnAttribute(Attribute::NoInline) || CS.isNoInline()) + return llvm::InlineCost::getNever(); + + DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() + << "...\n"); + + CallAnalyzer CA(TD, *TTI, *Callee, Threshold); + bool ShouldInline = CA.analyzeCall(CS); + + DEBUG(CA.dump()); + + // Check if there was a reason to force inlining or no inlining. + if (!ShouldInline && CA.getCost() < CA.getThreshold()) + return InlineCost::getNever(); + if (ShouldInline && CA.getCost() >= CA.getThreshold()) + return InlineCost::getAlways(); + + return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); +} + +bool InlineCostAnalysis::isInlineViable(Function &F) { + bool ReturnsTwice = + F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::ReturnsTwice); + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + // Disallow inlining of functions which contain an indirect branch. + if (isa<IndirectBrInst>(BI->getTerminator())) + return false; + + for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; + ++II) { + CallSite CS(II); + if (!CS) + continue; + + // Disallow recursive calls. + if (&F == CS.getCalledFunction()) + return false; + + // Disallow calls which expose returns-twice to a function not previously + // attributed as such. + if (!ReturnsTwice && CS.isCall() && + cast<CallInst>(CS.getInstruction())->canReturnTwice()) + return false; + } + } + + return true; +} diff --git a/contrib/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm/lib/Analysis/IVUsers.cpp new file mode 100644 index 000000000000..b33e2cb9999e --- /dev/null +++ b/contrib/llvm/lib/Analysis/IVUsers.cpp @@ -0,0 +1,335 @@ +//===- IVUsers.cpp - Induction Variable Users -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements bookkeeping for "interesting" users of expressions +// computed from induction variables. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "iv-users" +#include "llvm/Analysis/IVUsers.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +using namespace llvm; + +char IVUsers::ID = 0; +INITIALIZE_PASS_BEGIN(IVUsers, "iv-users", + "Induction Variable Users", false, true) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_END(IVUsers, "iv-users", + "Induction Variable Users", false, true) + +Pass *llvm::createIVUsersPass() { + return new IVUsers(); +} + +/// isInteresting - Test whether the given expression is "interesting" when +/// used by the given expression, within the context of analyzing the +/// given loop. +static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, + ScalarEvolution *SE, LoopInfo *LI) { + // An addrec is interesting if it's affine or if it has an interesting start. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + // Keep things simple. Don't touch loop-variant strides unless they're + // only used outside the loop and we can simplify them. + if (AR->getLoop() == L) + return AR->isAffine() || + (!L->contains(I) && + SE->getSCEVAtScope(AR, LI->getLoopFor(I->getParent())) != AR); + // Otherwise recurse to see if the start value is interesting, and that + // the step value is not interesting, since we don't yet know how to + // do effective SCEV expansions for addrecs with interesting steps. + return isInteresting(AR->getStart(), I, L, SE, LI) && + !isInteresting(AR->getStepRecurrence(*SE), I, L, SE, LI); + } + + // An add is interesting if exactly one of its operands is interesting. + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + bool AnyInterestingYet = false; + for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end(); + OI != OE; ++OI) + if (isInteresting(*OI, I, L, SE, LI)) { + if (AnyInterestingYet) + return false; + AnyInterestingYet = true; + } + return AnyInterestingYet; + } + + // Nothing else is interesting here. + return false; +} + +/// Return true if all loop headers that dominate this block are in simplified +/// form. +static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT, + const LoopInfo *LI, + SmallPtrSet<Loop*,16> &SimpleLoopNests) { + Loop *NearestLoop = 0; + for (DomTreeNode *Rung = DT->getNode(BB); + Rung; Rung = Rung->getIDom()) { + BasicBlock *DomBB = Rung->getBlock(); + Loop *DomLoop = LI->getLoopFor(DomBB); + if (DomLoop && DomLoop->getHeader() == DomBB) { + // If the domtree walk reaches a loop with no preheader, return false. + if (!DomLoop->isLoopSimplifyForm()) + return false; + // If we have already checked this loop nest, stop checking. + if (SimpleLoopNests.count(DomLoop)) + break; + // If we have not already checked this loop nest, remember the loop + // header nearest to BB. The nearest loop may not contain BB. + if (!NearestLoop) + NearestLoop = DomLoop; + } + } + if (NearestLoop) + SimpleLoopNests.insert(NearestLoop); + return true; +} + +/// AddUsersImpl - Inspect the specified instruction. If it is a +/// reducible SCEV, recursively add its users to the IVUsesByStride set and +/// return true. Otherwise, return false. +bool IVUsers::AddUsersImpl(Instruction *I, + SmallPtrSet<Loop*,16> &SimpleLoopNests) { + // Add this IV user to the Processed set before returning false to ensure that + // all IV users are members of the set. See IVUsers::isIVUserOrOperand. + if (!Processed.insert(I)) + return true; // Instruction already handled. + + if (!SE->isSCEVable(I->getType())) + return false; // Void and FP expressions cannot be reduced. + + // IVUsers is used by LSR which assumes that all SCEV expressions are safe to + // pass to SCEVExpander. Expressions are not safe to expand if they represent + // operations that are not safe to speculate, namely integer division. + if (!isa<PHINode>(I) && !isSafeToSpeculativelyExecute(I, TD)) + return false; + + // LSR is not APInt clean, do not touch integers bigger than 64-bits. + // Also avoid creating IVs of non-native types. For example, we don't want a + // 64-bit IV in 32-bit code just because the loop has one 64-bit cast. + uint64_t Width = SE->getTypeSizeInBits(I->getType()); + if (Width > 64 || (TD && !TD->isLegalInteger(Width))) + return false; + + // Get the symbolic expression for this instruction. + const SCEV *ISE = SE->getSCEV(I); + + // If we've come to an uninteresting expression, stop the traversal and + // call this a user. + if (!isInteresting(ISE, I, L, SE, LI)) + return false; + + SmallPtrSet<Instruction *, 4> UniqueUsers; + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + if (!UniqueUsers.insert(User)) + continue; + + // Do not infinitely recurse on PHI nodes. + if (isa<PHINode>(User) && Processed.count(User)) + continue; + + // Only consider IVUsers that are dominated by simplified loop + // headers. Otherwise, SCEVExpander will crash. + BasicBlock *UseBB = User->getParent(); + // A phi's use is live out of its predecessor block. + if (PHINode *PHI = dyn_cast<PHINode>(User)) { + unsigned OperandNo = UI.getOperandNo(); + unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo); + UseBB = PHI->getIncomingBlock(ValNo); + } + if (!isSimplifiedLoopNest(UseBB, DT, LI, SimpleLoopNests)) + return false; + + // Descend recursively, but not into PHI nodes outside the current loop. + // It's important to see the entire expression outside the loop to get + // choices that depend on addressing mode use right, although we won't + // consider references outside the loop in all cases. + // If User is already in Processed, we don't want to recurse into it again, + // but do want to record a second reference in the same instruction. + bool AddUserToIVUsers = false; + if (LI->getLoopFor(User->getParent()) != L) { + if (isa<PHINode>(User) || Processed.count(User) || + !AddUsersImpl(User, SimpleLoopNests)) { + DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n' + << " OF SCEV: " << *ISE << '\n'); + AddUserToIVUsers = true; + } + } else if (Processed.count(User) || !AddUsersImpl(User, SimpleLoopNests)) { + DEBUG(dbgs() << "FOUND USER: " << *User << '\n' + << " OF SCEV: " << *ISE << '\n'); + AddUserToIVUsers = true; + } + + if (AddUserToIVUsers) { + // Okay, we found a user that we cannot reduce. + IVUses.push_back(new IVStrideUse(this, User, I)); + IVStrideUse &NewUse = IVUses.back(); + // Autodetect the post-inc loop set, populating NewUse.PostIncLoops. + // The regular return value here is discarded; instead of recording + // it, we just recompute it when we need it. + ISE = TransformForPostIncUse(NormalizeAutodetect, + ISE, User, I, + NewUse.PostIncLoops, + *SE, *DT); + DEBUG(if (SE->getSCEV(I) != ISE) + dbgs() << " NORMALIZED TO: " << *ISE << '\n'); + } + } + return true; +} + +bool IVUsers::AddUsersIfInteresting(Instruction *I) { + // SCEVExpander can only handle users that are dominated by simplified loop + // entries. Keep track of all loops that are only dominated by other simple + // loops so we don't traverse the domtree for each user. + SmallPtrSet<Loop*,16> SimpleLoopNests; + + return AddUsersImpl(I, SimpleLoopNests); +} + +IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) { + IVUses.push_back(new IVStrideUse(this, User, Operand)); + return IVUses.back(); +} + +IVUsers::IVUsers() + : LoopPass(ID) { + initializeIVUsersPass(*PassRegistry::getPassRegistry()); +} + +void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LoopInfo>(); + AU.addRequired<DominatorTree>(); + AU.addRequired<ScalarEvolution>(); + AU.setPreservesAll(); +} + +bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { + + L = l; + LI = &getAnalysis<LoopInfo>(); + DT = &getAnalysis<DominatorTree>(); + SE = &getAnalysis<ScalarEvolution>(); + TD = getAnalysisIfAvailable<DataLayout>(); + + // Find all uses of induction variables in this loop, and categorize + // them by stride. Start by finding all of the PHI nodes in the header for + // this loop. If they are induction variables, inspect their uses. + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) + (void)AddUsersIfInteresting(I); + + return false; +} + +void IVUsers::print(raw_ostream &OS, const Module *M) const { + OS << "IV Users for loop "; + WriteAsOperand(OS, L->getHeader(), false); + if (SE->hasLoopInvariantBackedgeTakenCount(L)) { + OS << " with backedge-taken count " + << *SE->getBackedgeTakenCount(L); + } + OS << ":\n"; + + for (ilist<IVStrideUse>::const_iterator UI = IVUses.begin(), + E = IVUses.end(); UI != E; ++UI) { + OS << " "; + WriteAsOperand(OS, UI->getOperandValToReplace(), false); + OS << " = " << *getReplacementExpr(*UI); + for (PostIncLoopSet::const_iterator + I = UI->PostIncLoops.begin(), + E = UI->PostIncLoops.end(); I != E; ++I) { + OS << " (post-inc with loop "; + WriteAsOperand(OS, (*I)->getHeader(), false); + OS << ")"; + } + OS << " in "; + UI->getUser()->print(OS); + OS << '\n'; + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void IVUsers::dump() const { + print(dbgs()); +} +#endif + +void IVUsers::releaseMemory() { + Processed.clear(); + IVUses.clear(); +} + +/// getReplacementExpr - Return a SCEV expression which computes the +/// value of the OperandValToReplace. +const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &IU) const { + return SE->getSCEV(IU.getOperandValToReplace()); +} + +/// getExpr - Return the expression for the use. +const SCEV *IVUsers::getExpr(const IVStrideUse &IU) const { + return + TransformForPostIncUse(Normalize, getReplacementExpr(IU), + IU.getUser(), IU.getOperandValToReplace(), + const_cast<PostIncLoopSet &>(IU.getPostIncLoops()), + *SE, *DT); +} + +static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) { + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + if (AR->getLoop() == L) + return AR; + return findAddRecForLoop(AR->getStart(), L); + } + + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) + if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L)) + return AR; + return 0; + } + + return 0; +} + +const SCEV *IVUsers::getStride(const IVStrideUse &IU, const Loop *L) const { + if (const SCEVAddRecExpr *AR = findAddRecForLoop(getExpr(IU), L)) + return AR->getStepRecurrence(*SE); + return 0; +} + +void IVStrideUse::transformToPostInc(const Loop *L) { + PostIncLoops.insert(L); +} + +void IVStrideUse::deleted() { + // Remove this user from the list. + Parent->Processed.erase(this->getUser()); + Parent->IVUses.erase(this); + // this now dangles! +} diff --git a/contrib/llvm/lib/Analysis/InstCount.cpp b/contrib/llvm/lib/Analysis/InstCount.cpp new file mode 100644 index 000000000000..75a49eb90a88 --- /dev/null +++ b/contrib/llvm/lib/Analysis/InstCount.cpp @@ -0,0 +1,87 @@ +//===-- InstCount.cpp - Collects the count of all instructions ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass collects the count of all instructions and reports them +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "instcount" +#include "llvm/Analysis/Passes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Function.h" +#include "llvm/InstVisitor.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +STATISTIC(TotalInsts , "Number of instructions (of all types)"); +STATISTIC(TotalBlocks, "Number of basic blocks"); +STATISTIC(TotalFuncs , "Number of non-external functions"); +STATISTIC(TotalMemInst, "Number of memory instructions"); + +#define HANDLE_INST(N, OPCODE, CLASS) \ + STATISTIC(Num ## OPCODE ## Inst, "Number of " #OPCODE " insts"); + +#include "llvm/IR/Instruction.def" + + +namespace { + class InstCount : public FunctionPass, public InstVisitor<InstCount> { + friend class InstVisitor<InstCount>; + + void visitFunction (Function &F) { ++TotalFuncs; } + void visitBasicBlock(BasicBlock &BB) { ++TotalBlocks; } + +#define HANDLE_INST(N, OPCODE, CLASS) \ + void visit##OPCODE(CLASS &) { ++Num##OPCODE##Inst; ++TotalInsts; } + +#include "llvm/IR/Instruction.def" + + void visitInstruction(Instruction &I) { + errs() << "Instruction Count does not know about " << I; + llvm_unreachable(0); + } + public: + static char ID; // Pass identification, replacement for typeid + InstCount() : FunctionPass(ID) { + initializeInstCountPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + virtual void print(raw_ostream &O, const Module *M) const {} + + }; +} + +char InstCount::ID = 0; +INITIALIZE_PASS(InstCount, "instcount", + "Counts the various types of Instructions", false, true) + +FunctionPass *llvm::createInstCountPass() { return new InstCount(); } + +// InstCount::run - This is the main Analysis entry point for a +// function. +// +bool InstCount::runOnFunction(Function &F) { + unsigned StartMemInsts = + NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst + + NumInvokeInst + NumAllocaInst; + visit(F); + unsigned EndMemInsts = + NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst + + NumInvokeInst + NumAllocaInst; + TotalMemInst += EndMemInsts-StartMemInsts; + return false; +} diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp new file mode 100644 index 000000000000..b867af1dc3de --- /dev/null +++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp @@ -0,0 +1,3239 @@ +//===- InstructionSimplify.cpp - Fold instruction operands ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements routines for folding instructions into simpler forms +// that do not require creating new instructions. This does constant folding +// ("add i32 1, 1" -> "2") but can also handle non-constant operands, either +// returning a constant ("and i32 %x, 0" -> "0") or an already existing value +// ("and i32 %x, %x" -> "%x"). All operands are assumed to have already been +// simplified: This is usually true and assuming it simplifies the logic (if +// they have not been simplified then results are correct but maybe suboptimal). +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "instsimplify" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/ConstantRange.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/PatternMatch.h" +#include "llvm/Support/ValueHandle.h" +using namespace llvm; +using namespace llvm::PatternMatch; + +enum { RecursionLimit = 3 }; + +STATISTIC(NumExpand, "Number of expansions"); +STATISTIC(NumFactor , "Number of factorizations"); +STATISTIC(NumReassoc, "Number of reassociations"); + +struct Query { + const DataLayout *TD; + const TargetLibraryInfo *TLI; + const DominatorTree *DT; + + Query(const DataLayout *td, const TargetLibraryInfo *tli, + const DominatorTree *dt) : TD(td), TLI(tli), DT(dt) {} +}; + +static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned); +static Value *SimplifyBinOp(unsigned, Value *, Value *, const Query &, + unsigned); +static Value *SimplifyCmpInst(unsigned, Value *, Value *, const Query &, + unsigned); +static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned); +static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned); +static Value *SimplifyTruncInst(Value *, Type *, const Query &, unsigned); + +/// getFalse - For a boolean type, or a vector of boolean type, return false, or +/// a vector with every element false, as appropriate for the type. +static Constant *getFalse(Type *Ty) { + assert(Ty->getScalarType()->isIntegerTy(1) && + "Expected i1 type or a vector of i1!"); + return Constant::getNullValue(Ty); +} + +/// getTrue - For a boolean type, or a vector of boolean type, return true, or +/// a vector with every element true, as appropriate for the type. +static Constant *getTrue(Type *Ty) { + assert(Ty->getScalarType()->isIntegerTy(1) && + "Expected i1 type or a vector of i1!"); + return Constant::getAllOnesValue(Ty); +} + +/// isSameCompare - Is V equivalent to the comparison "LHS Pred RHS"? +static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS, + Value *RHS) { + CmpInst *Cmp = dyn_cast<CmpInst>(V); + if (!Cmp) + return false; + CmpInst::Predicate CPred = Cmp->getPredicate(); + Value *CLHS = Cmp->getOperand(0), *CRHS = Cmp->getOperand(1); + if (CPred == Pred && CLHS == LHS && CRHS == RHS) + return true; + return CPred == CmpInst::getSwappedPredicate(Pred) && CLHS == RHS && + CRHS == LHS; +} + +/// ValueDominatesPHI - Does the given value dominate the specified phi node? +static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { + Instruction *I = dyn_cast<Instruction>(V); + if (!I) + // Arguments and constants dominate all instructions. + return true; + + // If we are processing instructions (and/or basic blocks) that have not been + // fully added to a function, the parent nodes may still be null. Simply + // return the conservative answer in these cases. + if (!I->getParent() || !P->getParent() || !I->getParent()->getParent()) + return false; + + // If we have a DominatorTree then do a precise test. + if (DT) { + if (!DT->isReachableFromEntry(P->getParent())) + return true; + if (!DT->isReachableFromEntry(I->getParent())) + return false; + return DT->dominates(I, P); + } + + // Otherwise, if the instruction is in the entry block, and is not an invoke, + // then it obviously dominates all phi nodes. + if (I->getParent() == &I->getParent()->getParent()->getEntryBlock() && + !isa<InvokeInst>(I)) + return true; + + return false; +} + +/// ExpandBinOp - Simplify "A op (B op' C)" by distributing op over op', turning +/// it into "(A op B) op' (A op C)". Here "op" is given by Opcode and "op'" is +/// given by OpcodeToExpand, while "A" corresponds to LHS and "B op' C" to RHS. +/// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)". +/// Returns the simplified value, or null if no simplification was performed. +static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, + unsigned OpcToExpand, const Query &Q, + unsigned MaxRecurse) { + Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand; + // Recursion is always used, so bail out at once if we already hit the limit. + if (!MaxRecurse--) + return 0; + + // Check whether the expression has the form "(A op' B) op C". + if (BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS)) + if (Op0->getOpcode() == OpcodeToExpand) { + // It does! Try turning it into "(A op C) op' (B op C)". + Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS; + // Do "A op C" and "B op C" both simplify? + if (Value *L = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse)) + if (Value *R = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) { + // They do! Return "L op' R" if it simplifies or is already available. + // If "L op' R" equals "A op' B" then "L op' R" is just the LHS. + if ((L == A && R == B) || (Instruction::isCommutative(OpcodeToExpand) + && L == B && R == A)) { + ++NumExpand; + return LHS; + } + // Otherwise return "L op' R" if it simplifies. + if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) { + ++NumExpand; + return V; + } + } + } + + // Check whether the expression has the form "A op (B op' C)". + if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS)) + if (Op1->getOpcode() == OpcodeToExpand) { + // It does! Try turning it into "(A op B) op' (A op C)". + Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1); + // Do "A op B" and "A op C" both simplify? + if (Value *L = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse)) + if (Value *R = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse)) { + // They do! Return "L op' R" if it simplifies or is already available. + // If "L op' R" equals "B op' C" then "L op' R" is just the RHS. + if ((L == B && R == C) || (Instruction::isCommutative(OpcodeToExpand) + && L == C && R == B)) { + ++NumExpand; + return RHS; + } + // Otherwise return "L op' R" if it simplifies. + if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) { + ++NumExpand; + return V; + } + } + } + + return 0; +} + +/// FactorizeBinOp - Simplify "LHS Opcode RHS" by factorizing out a common term +/// using the operation OpCodeToExtract. For example, when Opcode is Add and +/// OpCodeToExtract is Mul then this tries to turn "(A*B)+(A*C)" into "A*(B+C)". +/// Returns the simplified value, or null if no simplification was performed. +static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, + unsigned OpcToExtract, const Query &Q, + unsigned MaxRecurse) { + Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract; + // Recursion is always used, so bail out at once if we already hit the limit. + if (!MaxRecurse--) + return 0; + + BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS); + BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS); + + if (!Op0 || Op0->getOpcode() != OpcodeToExtract || + !Op1 || Op1->getOpcode() != OpcodeToExtract) + return 0; + + // The expression has the form "(A op' B) op (C op' D)". + Value *A = Op0->getOperand(0), *B = Op0->getOperand(1); + Value *C = Op1->getOperand(0), *D = Op1->getOperand(1); + + // Use left distributivity, i.e. "X op' (Y op Z) = (X op' Y) op (X op' Z)". + // Does the instruction have the form "(A op' B) op (A op' D)" or, in the + // commutative case, "(A op' B) op (C op' A)"? + if (A == C || (Instruction::isCommutative(OpcodeToExtract) && A == D)) { + Value *DD = A == C ? D : C; + // Form "A op' (B op DD)" if it simplifies completely. + // Does "B op DD" simplify? + if (Value *V = SimplifyBinOp(Opcode, B, DD, Q, MaxRecurse)) { + // It does! Return "A op' V" if it simplifies or is already available. + // If V equals B then "A op' V" is just the LHS. If V equals DD then + // "A op' V" is just the RHS. + if (V == B || V == DD) { + ++NumFactor; + return V == B ? LHS : RHS; + } + // Otherwise return "A op' V" if it simplifies. + if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, Q, MaxRecurse)) { + ++NumFactor; + return W; + } + } + } + + // Use right distributivity, i.e. "(X op Y) op' Z = (X op' Z) op (Y op' Z)". + // Does the instruction have the form "(A op' B) op (C op' B)" or, in the + // commutative case, "(A op' B) op (B op' D)"? + if (B == D || (Instruction::isCommutative(OpcodeToExtract) && B == C)) { + Value *CC = B == D ? C : D; + // Form "(A op CC) op' B" if it simplifies completely.. + // Does "A op CC" simplify? + if (Value *V = SimplifyBinOp(Opcode, A, CC, Q, MaxRecurse)) { + // It does! Return "V op' B" if it simplifies or is already available. + // If V equals A then "V op' B" is just the LHS. If V equals CC then + // "V op' B" is just the RHS. + if (V == A || V == CC) { + ++NumFactor; + return V == A ? LHS : RHS; + } + // Otherwise return "V op' B" if it simplifies. + if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, Q, MaxRecurse)) { + ++NumFactor; + return W; + } + } + } + + return 0; +} + +/// SimplifyAssociativeBinOp - Generic simplifications for associative binary +/// operations. Returns the simpler value, or null if none was found. +static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, + const Query &Q, unsigned MaxRecurse) { + Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc; + assert(Instruction::isAssociative(Opcode) && "Not an associative operation!"); + + // Recursion is always used, so bail out at once if we already hit the limit. + if (!MaxRecurse--) + return 0; + + BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS); + BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS); + + // Transform: "(A op B) op C" ==> "A op (B op C)" if it simplifies completely. + if (Op0 && Op0->getOpcode() == Opcode) { + Value *A = Op0->getOperand(0); + Value *B = Op0->getOperand(1); + Value *C = RHS; + + // Does "B op C" simplify? + if (Value *V = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) { + // It does! Return "A op V" if it simplifies or is already available. + // If V equals B then "A op V" is just the LHS. + if (V == B) return LHS; + // Otherwise return "A op V" if it simplifies. + if (Value *W = SimplifyBinOp(Opcode, A, V, Q, MaxRecurse)) { + ++NumReassoc; + return W; + } + } + } + + // Transform: "A op (B op C)" ==> "(A op B) op C" if it simplifies completely. + if (Op1 && Op1->getOpcode() == Opcode) { + Value *A = LHS; + Value *B = Op1->getOperand(0); + Value *C = Op1->getOperand(1); + + // Does "A op B" simplify? + if (Value *V = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse)) { + // It does! Return "V op C" if it simplifies or is already available. + // If V equals B then "V op C" is just the RHS. + if (V == B) return RHS; + // Otherwise return "V op C" if it simplifies. + if (Value *W = SimplifyBinOp(Opcode, V, C, Q, MaxRecurse)) { + ++NumReassoc; + return W; + } + } + } + + // The remaining transforms require commutativity as well as associativity. + if (!Instruction::isCommutative(Opcode)) + return 0; + + // Transform: "(A op B) op C" ==> "(C op A) op B" if it simplifies completely. + if (Op0 && Op0->getOpcode() == Opcode) { + Value *A = Op0->getOperand(0); + Value *B = Op0->getOperand(1); + Value *C = RHS; + + // Does "C op A" simplify? + if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) { + // It does! Return "V op B" if it simplifies or is already available. + // If V equals A then "V op B" is just the LHS. + if (V == A) return LHS; + // Otherwise return "V op B" if it simplifies. + if (Value *W = SimplifyBinOp(Opcode, V, B, Q, MaxRecurse)) { + ++NumReassoc; + return W; + } + } + } + + // Transform: "A op (B op C)" ==> "B op (C op A)" if it simplifies completely. + if (Op1 && Op1->getOpcode() == Opcode) { + Value *A = LHS; + Value *B = Op1->getOperand(0); + Value *C = Op1->getOperand(1); + + // Does "C op A" simplify? + if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) { + // It does! Return "B op V" if it simplifies or is already available. + // If V equals C then "B op V" is just the RHS. + if (V == C) return RHS; + // Otherwise return "B op V" if it simplifies. + if (Value *W = SimplifyBinOp(Opcode, B, V, Q, MaxRecurse)) { + ++NumReassoc; + return W; + } + } + } + + return 0; +} + +/// ThreadBinOpOverSelect - In the case of a binary operation with a select +/// instruction as an operand, try to simplify the binop by seeing whether +/// evaluating it on both branches of the select results in the same value. +/// Returns the common value if so, otherwise returns null. +static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS, + const Query &Q, unsigned MaxRecurse) { + // Recursion is always used, so bail out at once if we already hit the limit. + if (!MaxRecurse--) + return 0; + + SelectInst *SI; + if (isa<SelectInst>(LHS)) { + SI = cast<SelectInst>(LHS); + } else { + assert(isa<SelectInst>(RHS) && "No select instruction operand!"); + SI = cast<SelectInst>(RHS); + } + + // Evaluate the BinOp on the true and false branches of the select. + Value *TV; + Value *FV; + if (SI == LHS) { + TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, Q, MaxRecurse); + FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, Q, MaxRecurse); + } else { + TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), Q, MaxRecurse); + FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), Q, MaxRecurse); + } + + // If they simplified to the same value, then return the common value. + // If they both failed to simplify then return null. + if (TV == FV) + return TV; + + // If one branch simplified to undef, return the other one. + if (TV && isa<UndefValue>(TV)) + return FV; + if (FV && isa<UndefValue>(FV)) + return TV; + + // If applying the operation did not change the true and false select values, + // then the result of the binop is the select itself. + if (TV == SI->getTrueValue() && FV == SI->getFalseValue()) + return SI; + + // If one branch simplified and the other did not, and the simplified + // value is equal to the unsimplified one, return the simplified value. + // For example, select (cond, X, X & Z) & Z -> X & Z. + if ((FV && !TV) || (TV && !FV)) { + // Check that the simplified value has the form "X op Y" where "op" is the + // same as the original operation. + Instruction *Simplified = dyn_cast<Instruction>(FV ? FV : TV); + if (Simplified && Simplified->getOpcode() == Opcode) { + // The value that didn't simplify is "UnsimplifiedLHS op UnsimplifiedRHS". + // We already know that "op" is the same as for the simplified value. See + // if the operands match too. If so, return the simplified value. + Value *UnsimplifiedBranch = FV ? SI->getTrueValue() : SI->getFalseValue(); + Value *UnsimplifiedLHS = SI == LHS ? UnsimplifiedBranch : LHS; + Value *UnsimplifiedRHS = SI == LHS ? RHS : UnsimplifiedBranch; + if (Simplified->getOperand(0) == UnsimplifiedLHS && + Simplified->getOperand(1) == UnsimplifiedRHS) + return Simplified; + if (Simplified->isCommutative() && + Simplified->getOperand(1) == UnsimplifiedLHS && + Simplified->getOperand(0) == UnsimplifiedRHS) + return Simplified; + } + } + + return 0; +} + +/// ThreadCmpOverSelect - In the case of a comparison with a select instruction, +/// try to simplify the comparison by seeing whether both branches of the select +/// result in the same value. Returns the common value if so, otherwise returns +/// null. +static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, + Value *RHS, const Query &Q, + unsigned MaxRecurse) { + // Recursion is always used, so bail out at once if we already hit the limit. + if (!MaxRecurse--) + return 0; + + // Make sure the select is on the LHS. + if (!isa<SelectInst>(LHS)) { + std::swap(LHS, RHS); + Pred = CmpInst::getSwappedPredicate(Pred); + } + assert(isa<SelectInst>(LHS) && "Not comparing with a select instruction!"); + SelectInst *SI = cast<SelectInst>(LHS); + Value *Cond = SI->getCondition(); + Value *TV = SI->getTrueValue(); + Value *FV = SI->getFalseValue(); + + // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it. + // Does "cmp TV, RHS" simplify? + Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, Q, MaxRecurse); + if (TCmp == Cond) { + // It not only simplified, it simplified to the select condition. Replace + // it with 'true'. + TCmp = getTrue(Cond->getType()); + } else if (!TCmp) { + // It didn't simplify. However if "cmp TV, RHS" is equal to the select + // condition then we can replace it with 'true'. Otherwise give up. + if (!isSameCompare(Cond, Pred, TV, RHS)) + return 0; + TCmp = getTrue(Cond->getType()); + } + + // Does "cmp FV, RHS" simplify? + Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, Q, MaxRecurse); + if (FCmp == Cond) { + // It not only simplified, it simplified to the select condition. Replace + // it with 'false'. + FCmp = getFalse(Cond->getType()); + } else if (!FCmp) { + // It didn't simplify. However if "cmp FV, RHS" is equal to the select + // condition then we can replace it with 'false'. Otherwise give up. + if (!isSameCompare(Cond, Pred, FV, RHS)) + return 0; + FCmp = getFalse(Cond->getType()); + } + + // If both sides simplified to the same value, then use it as the result of + // the original comparison. + if (TCmp == FCmp) + return TCmp; + + // The remaining cases only make sense if the select condition has the same + // type as the result of the comparison, so bail out if this is not so. + if (Cond->getType()->isVectorTy() != RHS->getType()->isVectorTy()) + return 0; + // If the false value simplified to false, then the result of the compare + // is equal to "Cond && TCmp". This also catches the case when the false + // value simplified to false and the true value to true, returning "Cond". + if (match(FCmp, m_Zero())) + if (Value *V = SimplifyAndInst(Cond, TCmp, Q, MaxRecurse)) + return V; + // If the true value simplified to true, then the result of the compare + // is equal to "Cond || FCmp". + if (match(TCmp, m_One())) + if (Value *V = SimplifyOrInst(Cond, FCmp, Q, MaxRecurse)) + return V; + // Finally, if the false value simplified to true and the true value to + // false, then the result of the compare is equal to "!Cond". + if (match(FCmp, m_One()) && match(TCmp, m_Zero())) + if (Value *V = + SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()), + Q, MaxRecurse)) + return V; + + return 0; +} + +/// ThreadBinOpOverPHI - In the case of a binary operation with an operand that +/// is a PHI instruction, try to simplify the binop by seeing whether evaluating +/// it on the incoming phi values yields the same result for every value. If so +/// returns the common value, otherwise returns null. +static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, + const Query &Q, unsigned MaxRecurse) { + // Recursion is always used, so bail out at once if we already hit the limit. + if (!MaxRecurse--) + return 0; + + PHINode *PI; + if (isa<PHINode>(LHS)) { + PI = cast<PHINode>(LHS); + // Bail out if RHS and the phi may be mutually interdependent due to a loop. + if (!ValueDominatesPHI(RHS, PI, Q.DT)) + return 0; + } else { + assert(isa<PHINode>(RHS) && "No PHI instruction operand!"); + PI = cast<PHINode>(RHS); + // Bail out if LHS and the phi may be mutually interdependent due to a loop. + if (!ValueDominatesPHI(LHS, PI, Q.DT)) + return 0; + } + + // Evaluate the BinOp on the incoming phi values. + Value *CommonValue = 0; + for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) { + Value *Incoming = PI->getIncomingValue(i); + // If the incoming value is the phi node itself, it can safely be skipped. + if (Incoming == PI) continue; + Value *V = PI == LHS ? + SimplifyBinOp(Opcode, Incoming, RHS, Q, MaxRecurse) : + SimplifyBinOp(Opcode, LHS, Incoming, Q, MaxRecurse); + // If the operation failed to simplify, or simplified to a different value + // to previously, then give up. + if (!V || (CommonValue && V != CommonValue)) + return 0; + CommonValue = V; + } + + return CommonValue; +} + +/// ThreadCmpOverPHI - In the case of a comparison with a PHI instruction, try +/// try to simplify the comparison by seeing whether comparing with all of the +/// incoming phi values yields the same result every time. If so returns the +/// common result, otherwise returns null. +static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, + const Query &Q, unsigned MaxRecurse) { + // Recursion is always used, so bail out at once if we already hit the limit. + if (!MaxRecurse--) + return 0; + + // Make sure the phi is on the LHS. + if (!isa<PHINode>(LHS)) { + std::swap(LHS, RHS); + Pred = CmpInst::getSwappedPredicate(Pred); + } + assert(isa<PHINode>(LHS) && "Not comparing with a phi instruction!"); + PHINode *PI = cast<PHINode>(LHS); + + // Bail out if RHS and the phi may be mutually interdependent due to a loop. + if (!ValueDominatesPHI(RHS, PI, Q.DT)) + return 0; + + // Evaluate the BinOp on the incoming phi values. + Value *CommonValue = 0; + for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) { + Value *Incoming = PI->getIncomingValue(i); + // If the incoming value is the phi node itself, it can safely be skipped. + if (Incoming == PI) continue; + Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q, MaxRecurse); + // If the operation failed to simplify, or simplified to a different value + // to previously, then give up. + if (!V || (CommonValue && V != CommonValue)) + return 0; + CommonValue = V; + } + + return CommonValue; +} + +/// SimplifyAddInst - Given operands for an Add, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const Query &Q, unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) { + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(), Ops, + Q.TD, Q.TLI); + } + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + // X + undef -> undef + if (match(Op1, m_Undef())) + return Op1; + + // X + 0 -> X + if (match(Op1, m_Zero())) + return Op0; + + // X + (Y - X) -> Y + // (Y - X) + X -> Y + // Eg: X + -X -> 0 + Value *Y = 0; + if (match(Op1, m_Sub(m_Value(Y), m_Specific(Op0))) || + match(Op0, m_Sub(m_Value(Y), m_Specific(Op1)))) + return Y; + + // X + ~X -> -1 since ~X = -X-1 + if (match(Op0, m_Not(m_Specific(Op1))) || + match(Op1, m_Not(m_Specific(Op0)))) + return Constant::getAllOnesValue(Op0->getType()); + + /// i1 add -> xor. + if (MaxRecurse && Op0->getType()->isIntegerTy(1)) + if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1)) + return V; + + // Try some generic simplifications for associative operations. + if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, Q, + MaxRecurse)) + return V; + + // Mul distributes over Add. Try some generic simplifications based on this. + if (Value *V = FactorizeBinOp(Instruction::Add, Op0, Op1, Instruction::Mul, + Q, MaxRecurse)) + return V; + + // Threading Add over selects and phi nodes is pointless, so don't bother. + // Threading over the select in "A + select(cond, B, C)" means evaluating + // "A+B" and "A+C" and seeing if they are equal; but they are equal if and + // only if B and C are equal. If B and C are equal then (since we assume + // that operands have already been simplified) "select(cond, B, C)" should + // have been simplified to the common value of B and C already. Analysing + // "A+B" and "A+C" thus gains nothing, but costs compile time. Similarly + // for threading over phi nodes. + + return 0; +} + +Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const DataLayout *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT), + RecursionLimit); +} + +/// \brief Compute the base pointer and cumulative constant offsets for V. +/// +/// This strips all constant offsets off of V, leaving it the base pointer, and +/// accumulates the total constant offset applied in the returned constant. It +/// returns 0 if V is not a pointer, and returns the constant '0' if there are +/// no constant offsets applied. +/// +/// This is very similar to GetPointerBaseWithConstantOffset except it doesn't +/// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc. +/// folding. +static Constant *stripAndComputeConstantOffsets(const DataLayout *TD, + Value *&V, + bool AllowNonInbounds = false) { + assert(V->getType()->getScalarType()->isPointerTy()); + + // Without DataLayout, just be conservative for now. Theoretically, more could + // be done in this case. + if (!TD) + return ConstantInt::get(IntegerType::get(V->getContext(), 64), 0); + + Type *IntPtrTy = TD->getIntPtrType(V->getType())->getScalarType(); + APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth()); + + // Even though we don't look through PHI nodes, we could be called on an + // instruction in an unreachable block, which may be on a cycle. + SmallPtrSet<Value *, 4> Visited; + Visited.insert(V); + do { + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { + if ((!AllowNonInbounds && !GEP->isInBounds()) || + !GEP->accumulateConstantOffset(*TD, Offset)) + break; + V = GEP->getPointerOperand(); + } else if (Operator::getOpcode(V) == Instruction::BitCast) { + V = cast<Operator>(V)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { + if (GA->mayBeOverridden()) + break; + V = GA->getAliasee(); + } else { + break; + } + assert(V->getType()->getScalarType()->isPointerTy() && + "Unexpected operand type!"); + } while (Visited.insert(V)); + + Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset); + if (V->getType()->isVectorTy()) + return ConstantVector::getSplat(V->getType()->getVectorNumElements(), + OffsetIntPtr); + return OffsetIntPtr; +} + +/// \brief Compute the constant difference between two pointer values. +/// If the difference is not a constant, returns zero. +static Constant *computePointerDifference(const DataLayout *TD, + Value *LHS, Value *RHS) { + Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); + Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS); + + // If LHS and RHS are not related via constant offsets to the same base + // value, there is nothing we can do here. + if (LHS != RHS) + return 0; + + // Otherwise, the difference of LHS - RHS can be computed as: + // LHS - RHS + // = (LHSOffset + Base) - (RHSOffset + Base) + // = LHSOffset - RHSOffset + return ConstantExpr::getSub(LHSOffset, RHSOffset); +} + +/// SimplifySubInst - Given operands for a Sub, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const Query &Q, unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(), + Ops, Q.TD, Q.TLI); + } + + // X - undef -> undef + // undef - X -> undef + if (match(Op0, m_Undef()) || match(Op1, m_Undef())) + return UndefValue::get(Op0->getType()); + + // X - 0 -> X + if (match(Op1, m_Zero())) + return Op0; + + // X - X -> 0 + if (Op0 == Op1) + return Constant::getNullValue(Op0->getType()); + + // (X*2) - X -> X + // (X<<1) - X -> X + Value *X = 0; + if (match(Op0, m_Mul(m_Specific(Op1), m_ConstantInt<2>())) || + match(Op0, m_Shl(m_Specific(Op1), m_One()))) + return Op1; + + // (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies. + // For example, (X + Y) - Y -> X; (Y + X) - Y -> X + Value *Y = 0, *Z = Op1; + if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z + // See if "V === Y - Z" simplifies. + if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse-1)) + // It does! Now see if "X + V" simplifies. + if (Value *W = SimplifyBinOp(Instruction::Add, X, V, Q, MaxRecurse-1)) { + // It does, we successfully reassociated! + ++NumReassoc; + return W; + } + // See if "V === X - Z" simplifies. + if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1)) + // It does! Now see if "Y + V" simplifies. + if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, Q, MaxRecurse-1)) { + // It does, we successfully reassociated! + ++NumReassoc; + return W; + } + } + + // X - (Y + Z) -> (X - Y) - Z or (X - Z) - Y if everything simplifies. + // For example, X - (X + 1) -> -1 + X = Op0; + if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z) + // See if "V === X - Y" simplifies. + if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1)) + // It does! Now see if "V - Z" simplifies. + if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, Q, MaxRecurse-1)) { + // It does, we successfully reassociated! + ++NumReassoc; + return W; + } + // See if "V === X - Z" simplifies. + if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1)) + // It does! Now see if "V - Y" simplifies. + if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, Q, MaxRecurse-1)) { + // It does, we successfully reassociated! + ++NumReassoc; + return W; + } + } + + // Z - (X - Y) -> (Z - X) + Y if everything simplifies. + // For example, X - (X - Y) -> Y. + Z = Op0; + if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y) + // See if "V === Z - X" simplifies. + if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, Q, MaxRecurse-1)) + // It does! Now see if "V + Y" simplifies. + if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, Q, MaxRecurse-1)) { + // It does, we successfully reassociated! + ++NumReassoc; + return W; + } + + // trunc(X) - trunc(Y) -> trunc(X - Y) if everything simplifies. + if (MaxRecurse && match(Op0, m_Trunc(m_Value(X))) && + match(Op1, m_Trunc(m_Value(Y)))) + if (X->getType() == Y->getType()) + // See if "V === X - Y" simplifies. + if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1)) + // It does! Now see if "trunc V" simplifies. + if (Value *W = SimplifyTruncInst(V, Op0->getType(), Q, MaxRecurse-1)) + // It does, return the simplified "trunc V". + return W; + + // Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...). + if (match(Op0, m_PtrToInt(m_Value(X))) && + match(Op1, m_PtrToInt(m_Value(Y)))) + if (Constant *Result = computePointerDifference(Q.TD, X, Y)) + return ConstantExpr::getIntegerCast(Result, Op0->getType(), true); + + // Mul distributes over Sub. Try some generic simplifications based on this. + if (Value *V = FactorizeBinOp(Instruction::Sub, Op0, Op1, Instruction::Mul, + Q, MaxRecurse)) + return V; + + // i1 sub -> xor. + if (MaxRecurse && Op0->getType()->isIntegerTy(1)) + if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1)) + return V; + + // Threading Sub over selects and phi nodes is pointless, so don't bother. + // Threading over the select in "A - select(cond, B, C)" means evaluating + // "A-B" and "A-C" and seeing if they are equal; but they are equal if and + // only if B and C are equal. If B and C are equal then (since we assume + // that operands have already been simplified) "select(cond, B, C)" should + // have been simplified to the common value of B and C already. Analysing + // "A-B" and "A-C" thus gains nothing, but costs compile time. Similarly + // for threading over phi nodes. + + return 0; +} + +Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const DataLayout *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT), + RecursionLimit); +} + +/// Given operands for an FAdd, see if we can fold the result. If not, this +/// returns null. +static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const Query &Q, unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) { + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::FAdd, CLHS->getType(), + Ops, Q.TD, Q.TLI); + } + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + // fadd X, -0 ==> X + if (match(Op1, m_NegZero())) + return Op0; + + // fadd X, 0 ==> X, when we know X is not -0 + if (match(Op1, m_Zero()) && + (FMF.noSignedZeros() || CannotBeNegativeZero(Op0))) + return Op0; + + // fadd [nnan ninf] X, (fsub [nnan ninf] 0, X) ==> 0 + // where nnan and ninf have to occur at least once somewhere in this + // expression + Value *SubOp = 0; + if (match(Op1, m_FSub(m_AnyZero(), m_Specific(Op0)))) + SubOp = Op1; + else if (match(Op0, m_FSub(m_AnyZero(), m_Specific(Op1)))) + SubOp = Op0; + if (SubOp) { + Instruction *FSub = cast<Instruction>(SubOp); + if ((FMF.noNaNs() || FSub->hasNoNaNs()) && + (FMF.noInfs() || FSub->hasNoInfs())) + return Constant::getNullValue(Op0->getType()); + } + + return 0; +} + +/// Given operands for an FSub, see if we can fold the result. If not, this +/// returns null. +static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const Query &Q, unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) { + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::FSub, CLHS->getType(), + Ops, Q.TD, Q.TLI); + } + } + + // fsub X, 0 ==> X + if (match(Op1, m_Zero())) + return Op0; + + // fsub X, -0 ==> X, when we know X is not -0 + if (match(Op1, m_NegZero()) && + (FMF.noSignedZeros() || CannotBeNegativeZero(Op0))) + return Op0; + + // fsub 0, (fsub -0.0, X) ==> X + Value *X; + if (match(Op0, m_AnyZero())) { + if (match(Op1, m_FSub(m_NegZero(), m_Value(X)))) + return X; + if (FMF.noSignedZeros() && match(Op1, m_FSub(m_AnyZero(), m_Value(X)))) + return X; + } + + // fsub nnan ninf x, x ==> 0.0 + if (FMF.noNaNs() && FMF.noInfs() && Op0 == Op1) + return Constant::getNullValue(Op0->getType()); + + return 0; +} + +/// Given the operands for an FMul, see if we can fold the result +static Value *SimplifyFMulInst(Value *Op0, Value *Op1, + FastMathFlags FMF, + const Query &Q, + unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) { + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::FMul, CLHS->getType(), + Ops, Q.TD, Q.TLI); + } + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + // fmul X, 1.0 ==> X + if (match(Op1, m_FPOne())) + return Op0; + + // fmul nnan nsz X, 0 ==> 0 + if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero())) + return Op1; + + return 0; +} + +/// SimplifyMulInst - Given operands for a Mul, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) { + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(), + Ops, Q.TD, Q.TLI); + } + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + // X * undef -> 0 + if (match(Op1, m_Undef())) + return Constant::getNullValue(Op0->getType()); + + // X * 0 -> 0 + if (match(Op1, m_Zero())) + return Op1; + + // X * 1 -> X + if (match(Op1, m_One())) + return Op0; + + // (X / Y) * Y -> X if the division is exact. + Value *X = 0; + if (match(Op0, m_Exact(m_IDiv(m_Value(X), m_Specific(Op1)))) || // (X / Y) * Y + match(Op1, m_Exact(m_IDiv(m_Value(X), m_Specific(Op0))))) // Y * (X / Y) + return X; + + // i1 mul -> and. + if (MaxRecurse && Op0->getType()->isIntegerTy(1)) + if (Value *V = SimplifyAndInst(Op0, Op1, Q, MaxRecurse-1)) + return V; + + // Try some generic simplifications for associative operations. + if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, Q, + MaxRecurse)) + return V; + + // Mul distributes over Add. Try some generic simplifications based on this. + if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add, + Q, MaxRecurse)) + return V; + + // If the operation is with the result of a select instruction, check whether + // operating on either branch of the select always yields the same value. + if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) + if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, Q, + MaxRecurse)) + return V; + + // If the operation is with the result of a phi instruction, check whether + // operating on all incoming values of the phi always yields the same value. + if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) + if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, Q, + MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const DataLayout *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyFAddInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit); +} + +Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const DataLayout *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyFSubInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit); +} + +Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, + FastMathFlags FMF, + const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyFMulInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit); +} + +Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyMulInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); +} + +/// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, + const Query &Q, unsigned MaxRecurse) { + if (Constant *C0 = dyn_cast<Constant>(Op0)) { + if (Constant *C1 = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { C0, C1 }; + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI); + } + } + + bool isSigned = Opcode == Instruction::SDiv; + + // X / undef -> undef + if (match(Op1, m_Undef())) + return Op1; + + // undef / X -> 0 + if (match(Op0, m_Undef())) + return Constant::getNullValue(Op0->getType()); + + // 0 / X -> 0, we don't need to preserve faults! + if (match(Op0, m_Zero())) + return Op0; + + // X / 1 -> X + if (match(Op1, m_One())) + return Op0; + + if (Op0->getType()->isIntegerTy(1)) + // It can't be division by zero, hence it must be division by one. + return Op0; + + // X / X -> 1 + if (Op0 == Op1) + return ConstantInt::get(Op0->getType(), 1); + + // (X * Y) / Y -> X if the multiplication does not overflow. + Value *X = 0, *Y = 0; + if (match(Op0, m_Mul(m_Value(X), m_Value(Y))) && (X == Op1 || Y == Op1)) { + if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1 + OverflowingBinaryOperator *Mul = cast<OverflowingBinaryOperator>(Op0); + // If the Mul knows it does not overflow, then we are good to go. + if ((isSigned && Mul->hasNoSignedWrap()) || + (!isSigned && Mul->hasNoUnsignedWrap())) + return X; + // If X has the form X = A / Y then X * Y cannot overflow. + if (BinaryOperator *Div = dyn_cast<BinaryOperator>(X)) + if (Div->getOpcode() == Opcode && Div->getOperand(1) == Y) + return X; + } + + // (X rem Y) / Y -> 0 + if ((isSigned && match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) || + (!isSigned && match(Op0, m_URem(m_Value(), m_Specific(Op1))))) + return Constant::getNullValue(Op0->getType()); + + // If the operation is with the result of a select instruction, check whether + // operating on either branch of the select always yields the same value. + if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) + if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse)) + return V; + + // If the operation is with the result of a phi instruction, check whether + // operating on all incoming values of the phi always yields the same value. + if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) + if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) + return V; + + return 0; +} + +/// SimplifySDivInst - Given operands for an SDiv, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { + if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifySDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); +} + +/// SimplifyUDivInst - Given operands for a UDiv, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { + if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyUDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); +} + +static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q, + unsigned) { + // undef / X -> undef (the undef could be a snan). + if (match(Op0, m_Undef())) + return Op0; + + // X / undef -> undef + if (match(Op1, m_Undef())) + return Op1; + + return 0; +} + +Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyFDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); +} + +/// SimplifyRem - Given operands for an SRem or URem, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, + const Query &Q, unsigned MaxRecurse) { + if (Constant *C0 = dyn_cast<Constant>(Op0)) { + if (Constant *C1 = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { C0, C1 }; + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI); + } + } + + // X % undef -> undef + if (match(Op1, m_Undef())) + return Op1; + + // undef % X -> 0 + if (match(Op0, m_Undef())) + return Constant::getNullValue(Op0->getType()); + + // 0 % X -> 0, we don't need to preserve faults! + if (match(Op0, m_Zero())) + return Op0; + + // X % 0 -> undef, we don't need to preserve faults! + if (match(Op1, m_Zero())) + return UndefValue::get(Op0->getType()); + + // X % 1 -> 0 + if (match(Op1, m_One())) + return Constant::getNullValue(Op0->getType()); + + if (Op0->getType()->isIntegerTy(1)) + // It can't be remainder by zero, hence it must be remainder by one. + return Constant::getNullValue(Op0->getType()); + + // X % X -> 0 + if (Op0 == Op1) + return Constant::getNullValue(Op0->getType()); + + // If the operation is with the result of a select instruction, check whether + // operating on either branch of the select always yields the same value. + if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) + if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse)) + return V; + + // If the operation is with the result of a phi instruction, check whether + // operating on all incoming values of the phi always yields the same value. + if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) + if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) + return V; + + return 0; +} + +/// SimplifySRemInst - Given operands for an SRem, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { + if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifySRemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); +} + +/// SimplifyURemInst - Given operands for a URem, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { + if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyURemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); +} + +static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &, + unsigned) { + // undef % X -> undef (the undef could be a snan). + if (match(Op0, m_Undef())) + return Op0; + + // X % undef -> undef + if (match(Op1, m_Undef())) + return Op1; + + return 0; +} + +Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyFRemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); +} + +/// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, + const Query &Q, unsigned MaxRecurse) { + if (Constant *C0 = dyn_cast<Constant>(Op0)) { + if (Constant *C1 = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { C0, C1 }; + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI); + } + } + + // 0 shift by X -> 0 + if (match(Op0, m_Zero())) + return Op0; + + // X shift by 0 -> X + if (match(Op1, m_Zero())) + return Op0; + + // X shift by undef -> undef because it may shift by the bitwidth. + if (match(Op1, m_Undef())) + return Op1; + + // Shifting by the bitwidth or more is undefined. + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) + if (CI->getValue().getLimitedValue() >= + Op0->getType()->getScalarSizeInBits()) + return UndefValue::get(Op0->getType()); + + // If the operation is with the result of a select instruction, check whether + // operating on either branch of the select always yields the same value. + if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) + if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse)) + return V; + + // If the operation is with the result of a phi instruction, check whether + // operating on all incoming values of the phi always yields the same value. + if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) + if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) + return V; + + return 0; +} + +/// SimplifyShlInst - Given operands for an Shl, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const Query &Q, unsigned MaxRecurse) { + if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, Q, MaxRecurse)) + return V; + + // undef << X -> 0 + if (match(Op0, m_Undef())) + return Constant::getNullValue(Op0->getType()); + + // (X >> A) << A -> X + Value *X; + if (match(Op0, m_Exact(m_Shr(m_Value(X), m_Specific(Op1))))) + return X; + return 0; +} + +Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const DataLayout *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT), + RecursionLimit); +} + +/// SimplifyLShrInst - Given operands for an LShr, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, + const Query &Q, unsigned MaxRecurse) { + if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, Q, MaxRecurse)) + return V; + + // X >> X -> 0 + if (Op0 == Op1) + return Constant::getNullValue(Op0->getType()); + + // undef >>l X -> 0 + if (match(Op0, m_Undef())) + return Constant::getNullValue(Op0->getType()); + + // (X << A) >> A -> X + Value *X; + if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) && + cast<OverflowingBinaryOperator>(Op0)->hasNoUnsignedWrap()) + return X; + + return 0; +} + +Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, + const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyLShrInst(Op0, Op1, isExact, Query (TD, TLI, DT), + RecursionLimit); +} + +/// SimplifyAShrInst - Given operands for an AShr, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, + const Query &Q, unsigned MaxRecurse) { + if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, Q, MaxRecurse)) + return V; + + // X >> X -> 0 + if (Op0 == Op1) + return Constant::getNullValue(Op0->getType()); + + // all ones >>a X -> all ones + if (match(Op0, m_AllOnes())) + return Op0; + + // undef >>a X -> all ones + if (match(Op0, m_Undef())) + return Constant::getAllOnesValue(Op0->getType()); + + // (X << A) >> A -> X + Value *X; + if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) && + cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap()) + return X; + + return 0; +} + +Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, + const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyAShrInst(Op0, Op1, isExact, Query (TD, TLI, DT), + RecursionLimit); +} + +/// SimplifyAndInst - Given operands for an And, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) { + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::And, CLHS->getType(), + Ops, Q.TD, Q.TLI); + } + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + // X & undef -> 0 + if (match(Op1, m_Undef())) + return Constant::getNullValue(Op0->getType()); + + // X & X = X + if (Op0 == Op1) + return Op0; + + // X & 0 = 0 + if (match(Op1, m_Zero())) + return Op1; + + // X & -1 = X + if (match(Op1, m_AllOnes())) + return Op0; + + // A & ~A = ~A & A = 0 + if (match(Op0, m_Not(m_Specific(Op1))) || + match(Op1, m_Not(m_Specific(Op0)))) + return Constant::getNullValue(Op0->getType()); + + // (A | ?) & A = A + Value *A = 0, *B = 0; + if (match(Op0, m_Or(m_Value(A), m_Value(B))) && + (A == Op1 || B == Op1)) + return Op1; + + // A & (A | ?) = A + if (match(Op1, m_Or(m_Value(A), m_Value(B))) && + (A == Op0 || B == Op0)) + return Op0; + + // A & (-A) = A if A is a power of two or zero. + if (match(Op0, m_Neg(m_Specific(Op1))) || + match(Op1, m_Neg(m_Specific(Op0)))) { + if (isKnownToBeAPowerOfTwo(Op0, /*OrZero*/true)) + return Op0; + if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/true)) + return Op1; + } + + // Try some generic simplifications for associative operations. + if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, Q, + MaxRecurse)) + return V; + + // And distributes over Or. Try some generic simplifications based on this. + if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Or, + Q, MaxRecurse)) + return V; + + // And distributes over Xor. Try some generic simplifications based on this. + if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Xor, + Q, MaxRecurse)) + return V; + + // Or distributes over And. Try some generic simplifications based on this. + if (Value *V = FactorizeBinOp(Instruction::And, Op0, Op1, Instruction::Or, + Q, MaxRecurse)) + return V; + + // If the operation is with the result of a select instruction, check whether + // operating on either branch of the select always yields the same value. + if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) + if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, Q, + MaxRecurse)) + return V; + + // If the operation is with the result of a phi instruction, check whether + // operating on all incoming values of the phi always yields the same value. + if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) + if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, Q, + MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyAndInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); +} + +/// SimplifyOrInst - Given operands for an Or, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) { + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(), + Ops, Q.TD, Q.TLI); + } + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + // X | undef -> -1 + if (match(Op1, m_Undef())) + return Constant::getAllOnesValue(Op0->getType()); + + // X | X = X + if (Op0 == Op1) + return Op0; + + // X | 0 = X + if (match(Op1, m_Zero())) + return Op0; + + // X | -1 = -1 + if (match(Op1, m_AllOnes())) + return Op1; + + // A | ~A = ~A | A = -1 + if (match(Op0, m_Not(m_Specific(Op1))) || + match(Op1, m_Not(m_Specific(Op0)))) + return Constant::getAllOnesValue(Op0->getType()); + + // (A & ?) | A = A + Value *A = 0, *B = 0; + if (match(Op0, m_And(m_Value(A), m_Value(B))) && + (A == Op1 || B == Op1)) + return Op1; + + // A | (A & ?) = A + if (match(Op1, m_And(m_Value(A), m_Value(B))) && + (A == Op0 || B == Op0)) + return Op0; + + // ~(A & ?) | A = -1 + if (match(Op0, m_Not(m_And(m_Value(A), m_Value(B)))) && + (A == Op1 || B == Op1)) + return Constant::getAllOnesValue(Op1->getType()); + + // A | ~(A & ?) = -1 + if (match(Op1, m_Not(m_And(m_Value(A), m_Value(B)))) && + (A == Op0 || B == Op0)) + return Constant::getAllOnesValue(Op0->getType()); + + // Try some generic simplifications for associative operations. + if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q, + MaxRecurse)) + return V; + + // Or distributes over And. Try some generic simplifications based on this. + if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And, Q, + MaxRecurse)) + return V; + + // And distributes over Or. Try some generic simplifications based on this. + if (Value *V = FactorizeBinOp(Instruction::Or, Op0, Op1, Instruction::And, + Q, MaxRecurse)) + return V; + + // If the operation is with the result of a select instruction, check whether + // operating on either branch of the select always yields the same value. + if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) + if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, Q, + MaxRecurse)) + return V; + + // If the operation is with the result of a phi instruction, check whether + // operating on all incoming values of the phi always yields the same value. + if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) + if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyOrInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); +} + +/// SimplifyXorInst - Given operands for a Xor, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) { + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(), + Ops, Q.TD, Q.TLI); + } + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + // A ^ undef -> undef + if (match(Op1, m_Undef())) + return Op1; + + // A ^ 0 = A + if (match(Op1, m_Zero())) + return Op0; + + // A ^ A = 0 + if (Op0 == Op1) + return Constant::getNullValue(Op0->getType()); + + // A ^ ~A = ~A ^ A = -1 + if (match(Op0, m_Not(m_Specific(Op1))) || + match(Op1, m_Not(m_Specific(Op0)))) + return Constant::getAllOnesValue(Op0->getType()); + + // Try some generic simplifications for associative operations. + if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, Q, + MaxRecurse)) + return V; + + // And distributes over Xor. Try some generic simplifications based on this. + if (Value *V = FactorizeBinOp(Instruction::Xor, Op0, Op1, Instruction::And, + Q, MaxRecurse)) + return V; + + // Threading Xor over selects and phi nodes is pointless, so don't bother. + // Threading over the select in "A ^ select(cond, B, C)" means evaluating + // "A^B" and "A^C" and seeing if they are equal; but they are equal if and + // only if B and C are equal. If B and C are equal then (since we assume + // that operands have already been simplified) "select(cond, B, C)" should + // have been simplified to the common value of B and C already. Analysing + // "A^B" and "A^C" thus gains nothing, but costs compile time. Similarly + // for threading over phi nodes. + + return 0; +} + +Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyXorInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); +} + +static Type *GetCompareTy(Value *Op) { + return CmpInst::makeCmpResultType(Op->getType()); +} + +/// ExtractEquivalentCondition - Rummage around inside V looking for something +/// equivalent to the comparison "LHS Pred RHS". Return such a value if found, +/// otherwise return null. Helper function for analyzing max/min idioms. +static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, + Value *LHS, Value *RHS) { + SelectInst *SI = dyn_cast<SelectInst>(V); + if (!SI) + return 0; + CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition()); + if (!Cmp) + return 0; + Value *CmpLHS = Cmp->getOperand(0), *CmpRHS = Cmp->getOperand(1); + if (Pred == Cmp->getPredicate() && LHS == CmpLHS && RHS == CmpRHS) + return Cmp; + if (Pred == CmpInst::getSwappedPredicate(Cmp->getPredicate()) && + LHS == CmpRHS && RHS == CmpLHS) + return Cmp; + return 0; +} + +// A significant optimization not implemented here is assuming that alloca +// addresses are not equal to incoming argument values. They don't *alias*, +// as we say, but that doesn't mean they aren't equal, so we take a +// conservative approach. +// +// This is inspired in part by C++11 5.10p1: +// "Two pointers of the same type compare equal if and only if they are both +// null, both point to the same function, or both represent the same +// address." +// +// This is pretty permissive. +// +// It's also partly due to C11 6.5.9p6: +// "Two pointers compare equal if and only if both are null pointers, both are +// pointers to the same object (including a pointer to an object and a +// subobject at its beginning) or function, both are pointers to one past the +// last element of the same array object, or one is a pointer to one past the +// end of one array object and the other is a pointer to the start of a +// different array object that happens to immediately follow the first array +// object in the address space.) +// +// C11's version is more restrictive, however there's no reason why an argument +// couldn't be a one-past-the-end value for a stack object in the caller and be +// equal to the beginning of a stack object in the callee. +// +// If the C and C++ standards are ever made sufficiently restrictive in this +// area, it may be possible to update LLVM's semantics accordingly and reinstate +// this optimization. +static Constant *computePointerICmp(const DataLayout *TD, + const TargetLibraryInfo *TLI, + CmpInst::Predicate Pred, + Value *LHS, Value *RHS) { + // First, skip past any trivial no-ops. + LHS = LHS->stripPointerCasts(); + RHS = RHS->stripPointerCasts(); + + // A non-null pointer is not equal to a null pointer. + if (llvm::isKnownNonNull(LHS, TLI) && isa<ConstantPointerNull>(RHS) && + (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE)) + return ConstantInt::get(GetCompareTy(LHS), + !CmpInst::isTrueWhenEqual(Pred)); + + // We can only fold certain predicates on pointer comparisons. + switch (Pred) { + default: + return 0; + + // Equality comaprisons are easy to fold. + case CmpInst::ICMP_EQ: + case CmpInst::ICMP_NE: + break; + + // We can only handle unsigned relational comparisons because 'inbounds' on + // a GEP only protects against unsigned wrapping. + case CmpInst::ICMP_UGT: + case CmpInst::ICMP_UGE: + case CmpInst::ICMP_ULT: + case CmpInst::ICMP_ULE: + // However, we have to switch them to their signed variants to handle + // negative indices from the base pointer. + Pred = ICmpInst::getSignedPredicate(Pred); + break; + } + + // Strip off any constant offsets so that we can reason about them. + // It's tempting to use getUnderlyingObject or even just stripInBoundsOffsets + // here and compare base addresses like AliasAnalysis does, however there are + // numerous hazards. AliasAnalysis and its utilities rely on special rules + // governing loads and stores which don't apply to icmps. Also, AliasAnalysis + // doesn't need to guarantee pointer inequality when it says NoAlias. + Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); + Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS); + + // If LHS and RHS are related via constant offsets to the same base + // value, we can replace it with an icmp which just compares the offsets. + if (LHS == RHS) + return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset); + + // Various optimizations for (in)equality comparisons. + if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { + // Different non-empty allocations that exist at the same time have + // different addresses (if the program can tell). Global variables always + // exist, so they always exist during the lifetime of each other and all + // allocas. Two different allocas usually have different addresses... + // + // However, if there's an @llvm.stackrestore dynamically in between two + // allocas, they may have the same address. It's tempting to reduce the + // scope of the problem by only looking at *static* allocas here. That would + // cover the majority of allocas while significantly reducing the likelihood + // of having an @llvm.stackrestore pop up in the middle. However, it's not + // actually impossible for an @llvm.stackrestore to pop up in the middle of + // an entry block. Also, if we have a block that's not attached to a + // function, we can't tell if it's "static" under the current definition. + // Theoretically, this problem could be fixed by creating a new kind of + // instruction kind specifically for static allocas. Such a new instruction + // could be required to be at the top of the entry block, thus preventing it + // from being subject to a @llvm.stackrestore. Instcombine could even + // convert regular allocas into these special allocas. It'd be nifty. + // However, until then, this problem remains open. + // + // So, we'll assume that two non-empty allocas have different addresses + // for now. + // + // With all that, if the offsets are within the bounds of their allocations + // (and not one-past-the-end! so we can't use inbounds!), and their + // allocations aren't the same, the pointers are not equal. + // + // Note that it's not necessary to check for LHS being a global variable + // address, due to canonicalization and constant folding. + if (isa<AllocaInst>(LHS) && + (isa<AllocaInst>(RHS) || isa<GlobalVariable>(RHS))) { + ConstantInt *LHSOffsetCI = dyn_cast<ConstantInt>(LHSOffset); + ConstantInt *RHSOffsetCI = dyn_cast<ConstantInt>(RHSOffset); + uint64_t LHSSize, RHSSize; + if (LHSOffsetCI && RHSOffsetCI && + getObjectSize(LHS, LHSSize, TD, TLI) && + getObjectSize(RHS, RHSSize, TD, TLI)) { + const APInt &LHSOffsetValue = LHSOffsetCI->getValue(); + const APInt &RHSOffsetValue = RHSOffsetCI->getValue(); + if (!LHSOffsetValue.isNegative() && + !RHSOffsetValue.isNegative() && + LHSOffsetValue.ult(LHSSize) && + RHSOffsetValue.ult(RHSSize)) { + return ConstantInt::get(GetCompareTy(LHS), + !CmpInst::isTrueWhenEqual(Pred)); + } + } + + // Repeat the above check but this time without depending on DataLayout + // or being able to compute a precise size. + if (!cast<PointerType>(LHS->getType())->isEmptyTy() && + !cast<PointerType>(RHS->getType())->isEmptyTy() && + LHSOffset->isNullValue() && + RHSOffset->isNullValue()) + return ConstantInt::get(GetCompareTy(LHS), + !CmpInst::isTrueWhenEqual(Pred)); + } + + // Even if an non-inbounds GEP occurs along the path we can still optimize + // equality comparisons concerning the result. We avoid walking the whole + // chain again by starting where the last calls to + // stripAndComputeConstantOffsets left off and accumulate the offsets. + Constant *LHSNoBound = stripAndComputeConstantOffsets(TD, LHS, true); + Constant *RHSNoBound = stripAndComputeConstantOffsets(TD, RHS, true); + if (LHS == RHS) + return ConstantExpr::getICmp(Pred, + ConstantExpr::getAdd(LHSOffset, LHSNoBound), + ConstantExpr::getAdd(RHSOffset, RHSNoBound)); + } + + // Otherwise, fail. + return 0; +} + +/// SimplifyICmpInst - Given operands for an ICmpInst, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const Query &Q, unsigned MaxRecurse) { + CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; + assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!"); + + if (Constant *CLHS = dyn_cast<Constant>(LHS)) { + if (Constant *CRHS = dyn_cast<Constant>(RHS)) + return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.TD, Q.TLI); + + // If we have a constant, make sure it is on the RHS. + std::swap(LHS, RHS); + Pred = CmpInst::getSwappedPredicate(Pred); + } + + Type *ITy = GetCompareTy(LHS); // The return type. + Type *OpTy = LHS->getType(); // The operand type. + + // icmp X, X -> true/false + // X icmp undef -> true/false. For example, icmp ugt %X, undef -> false + // because X could be 0. + if (LHS == RHS || isa<UndefValue>(RHS)) + return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred)); + + // Special case logic when the operands have i1 type. + if (OpTy->getScalarType()->isIntegerTy(1)) { + switch (Pred) { + default: break; + case ICmpInst::ICMP_EQ: + // X == 1 -> X + if (match(RHS, m_One())) + return LHS; + break; + case ICmpInst::ICMP_NE: + // X != 0 -> X + if (match(RHS, m_Zero())) + return LHS; + break; + case ICmpInst::ICMP_UGT: + // X >u 0 -> X + if (match(RHS, m_Zero())) + return LHS; + break; + case ICmpInst::ICMP_UGE: + // X >=u 1 -> X + if (match(RHS, m_One())) + return LHS; + break; + case ICmpInst::ICMP_SLT: + // X <s 0 -> X + if (match(RHS, m_Zero())) + return LHS; + break; + case ICmpInst::ICMP_SLE: + // X <=s -1 -> X + if (match(RHS, m_One())) + return LHS; + break; + } + } + + // If we are comparing with zero then try hard since this is a common case. + if (match(RHS, m_Zero())) { + bool LHSKnownNonNegative, LHSKnownNegative; + switch (Pred) { + default: llvm_unreachable("Unknown ICmp predicate!"); + case ICmpInst::ICMP_ULT: + return getFalse(ITy); + case ICmpInst::ICMP_UGE: + return getTrue(ITy); + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_ULE: + if (isKnownNonZero(LHS, Q.TD)) + return getFalse(ITy); + break; + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_UGT: + if (isKnownNonZero(LHS, Q.TD)) + return getTrue(ITy); + break; + case ICmpInst::ICMP_SLT: + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD); + if (LHSKnownNegative) + return getTrue(ITy); + if (LHSKnownNonNegative) + return getFalse(ITy); + break; + case ICmpInst::ICMP_SLE: + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD); + if (LHSKnownNegative) + return getTrue(ITy); + if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.TD)) + return getFalse(ITy); + break; + case ICmpInst::ICMP_SGE: + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD); + if (LHSKnownNegative) + return getFalse(ITy); + if (LHSKnownNonNegative) + return getTrue(ITy); + break; + case ICmpInst::ICMP_SGT: + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD); + if (LHSKnownNegative) + return getFalse(ITy); + if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.TD)) + return getTrue(ITy); + break; + } + } + + // See if we are doing a comparison with a constant integer. + if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { + // Rule out tautological comparisons (eg., ult 0 or uge 0). + ConstantRange RHS_CR = ICmpInst::makeConstantRange(Pred, CI->getValue()); + if (RHS_CR.isEmptySet()) + return ConstantInt::getFalse(CI->getContext()); + if (RHS_CR.isFullSet()) + return ConstantInt::getTrue(CI->getContext()); + + // Many binary operators with constant RHS have easy to compute constant + // range. Use them to check whether the comparison is a tautology. + uint32_t Width = CI->getBitWidth(); + APInt Lower = APInt(Width, 0); + APInt Upper = APInt(Width, 0); + ConstantInt *CI2; + if (match(LHS, m_URem(m_Value(), m_ConstantInt(CI2)))) { + // 'urem x, CI2' produces [0, CI2). + Upper = CI2->getValue(); + } else if (match(LHS, m_SRem(m_Value(), m_ConstantInt(CI2)))) { + // 'srem x, CI2' produces (-|CI2|, |CI2|). + Upper = CI2->getValue().abs(); + Lower = (-Upper) + 1; + } else if (match(LHS, m_UDiv(m_ConstantInt(CI2), m_Value()))) { + // 'udiv CI2, x' produces [0, CI2]. + Upper = CI2->getValue() + 1; + } else if (match(LHS, m_UDiv(m_Value(), m_ConstantInt(CI2)))) { + // 'udiv x, CI2' produces [0, UINT_MAX / CI2]. + APInt NegOne = APInt::getAllOnesValue(Width); + if (!CI2->isZero()) + Upper = NegOne.udiv(CI2->getValue()) + 1; + } else if (match(LHS, m_SDiv(m_Value(), m_ConstantInt(CI2)))) { + // 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2]. + APInt IntMin = APInt::getSignedMinValue(Width); + APInt IntMax = APInt::getSignedMaxValue(Width); + APInt Val = CI2->getValue().abs(); + if (!Val.isMinValue()) { + Lower = IntMin.sdiv(Val); + Upper = IntMax.sdiv(Val) + 1; + } + } else if (match(LHS, m_LShr(m_Value(), m_ConstantInt(CI2)))) { + // 'lshr x, CI2' produces [0, UINT_MAX >> CI2]. + APInt NegOne = APInt::getAllOnesValue(Width); + if (CI2->getValue().ult(Width)) + Upper = NegOne.lshr(CI2->getValue()) + 1; + } else if (match(LHS, m_AShr(m_Value(), m_ConstantInt(CI2)))) { + // 'ashr x, CI2' produces [INT_MIN >> CI2, INT_MAX >> CI2]. + APInt IntMin = APInt::getSignedMinValue(Width); + APInt IntMax = APInt::getSignedMaxValue(Width); + if (CI2->getValue().ult(Width)) { + Lower = IntMin.ashr(CI2->getValue()); + Upper = IntMax.ashr(CI2->getValue()) + 1; + } + } else if (match(LHS, m_Or(m_Value(), m_ConstantInt(CI2)))) { + // 'or x, CI2' produces [CI2, UINT_MAX]. + Lower = CI2->getValue(); + } else if (match(LHS, m_And(m_Value(), m_ConstantInt(CI2)))) { + // 'and x, CI2' produces [0, CI2]. + Upper = CI2->getValue() + 1; + } + if (Lower != Upper) { + ConstantRange LHS_CR = ConstantRange(Lower, Upper); + if (RHS_CR.contains(LHS_CR)) + return ConstantInt::getTrue(RHS->getContext()); + if (RHS_CR.inverse().contains(LHS_CR)) + return ConstantInt::getFalse(RHS->getContext()); + } + } + + // Compare of cast, for example (zext X) != 0 -> X != 0 + if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) { + Instruction *LI = cast<CastInst>(LHS); + Value *SrcOp = LI->getOperand(0); + Type *SrcTy = SrcOp->getType(); + Type *DstTy = LI->getType(); + + // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input + // if the integer type is the same size as the pointer type. + if (MaxRecurse && Q.TD && isa<PtrToIntInst>(LI) && + Q.TD->getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) { + if (Constant *RHSC = dyn_cast<Constant>(RHS)) { + // Transfer the cast to the constant. + if (Value *V = SimplifyICmpInst(Pred, SrcOp, + ConstantExpr::getIntToPtr(RHSC, SrcTy), + Q, MaxRecurse-1)) + return V; + } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) { + if (RI->getOperand(0)->getType() == SrcTy) + // Compare without the cast. + if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0), + Q, MaxRecurse-1)) + return V; + } + } + + if (isa<ZExtInst>(LHS)) { + // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have the + // same type. + if (ZExtInst *RI = dyn_cast<ZExtInst>(RHS)) { + if (MaxRecurse && SrcTy == RI->getOperand(0)->getType()) + // Compare X and Y. Note that signed predicates become unsigned. + if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), + SrcOp, RI->getOperand(0), Q, + MaxRecurse-1)) + return V; + } + // Turn icmp (zext X), Cst into a compare of X and Cst if Cst is extended + // too. If not, then try to deduce the result of the comparison. + else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { + // Compute the constant that would happen if we truncated to SrcTy then + // reextended to DstTy. + Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy); + Constant *RExt = ConstantExpr::getCast(CastInst::ZExt, Trunc, DstTy); + + // If the re-extended constant didn't change then this is effectively + // also a case of comparing two zero-extended values. + if (RExt == CI && MaxRecurse) + if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), + SrcOp, Trunc, Q, MaxRecurse-1)) + return V; + + // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit + // there. Use this to work out the result of the comparison. + if (RExt != CI) { + switch (Pred) { + default: llvm_unreachable("Unknown ICmp predicate!"); + // LHS <u RHS. + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + return ConstantInt::getFalse(CI->getContext()); + + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + return ConstantInt::getTrue(CI->getContext()); + + // LHS is non-negative. If RHS is negative then LHS >s LHS. If RHS + // is non-negative then LHS <s RHS. + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + return CI->getValue().isNegative() ? + ConstantInt::getTrue(CI->getContext()) : + ConstantInt::getFalse(CI->getContext()); + + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + return CI->getValue().isNegative() ? + ConstantInt::getFalse(CI->getContext()) : + ConstantInt::getTrue(CI->getContext()); + } + } + } + } + + if (isa<SExtInst>(LHS)) { + // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the + // same type. + if (SExtInst *RI = dyn_cast<SExtInst>(RHS)) { + if (MaxRecurse && SrcTy == RI->getOperand(0)->getType()) + // Compare X and Y. Note that the predicate does not change. + if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0), + Q, MaxRecurse-1)) + return V; + } + // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended + // too. If not, then try to deduce the result of the comparison. + else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { + // Compute the constant that would happen if we truncated to SrcTy then + // reextended to DstTy. + Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy); + Constant *RExt = ConstantExpr::getCast(CastInst::SExt, Trunc, DstTy); + + // If the re-extended constant didn't change then this is effectively + // also a case of comparing two sign-extended values. + if (RExt == CI && MaxRecurse) + if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse-1)) + return V; + + // Otherwise the upper bits of LHS are all equal, while RHS has varying + // bits there. Use this to work out the result of the comparison. + if (RExt != CI) { + switch (Pred) { + default: llvm_unreachable("Unknown ICmp predicate!"); + case ICmpInst::ICMP_EQ: + return ConstantInt::getFalse(CI->getContext()); + case ICmpInst::ICMP_NE: + return ConstantInt::getTrue(CI->getContext()); + + // If RHS is non-negative then LHS <s RHS. If RHS is negative then + // LHS >s RHS. + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + return CI->getValue().isNegative() ? + ConstantInt::getTrue(CI->getContext()) : + ConstantInt::getFalse(CI->getContext()); + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + return CI->getValue().isNegative() ? + ConstantInt::getFalse(CI->getContext()) : + ConstantInt::getTrue(CI->getContext()); + + // If LHS is non-negative then LHS <u RHS. If LHS is negative then + // LHS >u RHS. + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + // Comparison is true iff the LHS <s 0. + if (MaxRecurse) + if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp, + Constant::getNullValue(SrcTy), + Q, MaxRecurse-1)) + return V; + break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + // Comparison is true iff the LHS >=s 0. + if (MaxRecurse) + if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp, + Constant::getNullValue(SrcTy), + Q, MaxRecurse-1)) + return V; + break; + } + } + } + } + } + + // Special logic for binary operators. + BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS); + BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS); + if (MaxRecurse && (LBO || RBO)) { + // Analyze the case when either LHS or RHS is an add instruction. + Value *A = 0, *B = 0, *C = 0, *D = 0; + // LHS = A + B (or A and B are null); RHS = C + D (or C and D are null). + bool NoLHSWrapProblem = false, NoRHSWrapProblem = false; + if (LBO && LBO->getOpcode() == Instruction::Add) { + A = LBO->getOperand(0); B = LBO->getOperand(1); + NoLHSWrapProblem = ICmpInst::isEquality(Pred) || + (CmpInst::isUnsigned(Pred) && LBO->hasNoUnsignedWrap()) || + (CmpInst::isSigned(Pred) && LBO->hasNoSignedWrap()); + } + if (RBO && RBO->getOpcode() == Instruction::Add) { + C = RBO->getOperand(0); D = RBO->getOperand(1); + NoRHSWrapProblem = ICmpInst::isEquality(Pred) || + (CmpInst::isUnsigned(Pred) && RBO->hasNoUnsignedWrap()) || + (CmpInst::isSigned(Pred) && RBO->hasNoSignedWrap()); + } + + // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow. + if ((A == RHS || B == RHS) && NoLHSWrapProblem) + if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A, + Constant::getNullValue(RHS->getType()), + Q, MaxRecurse-1)) + return V; + + // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow. + if ((C == LHS || D == LHS) && NoRHSWrapProblem) + if (Value *V = SimplifyICmpInst(Pred, + Constant::getNullValue(LHS->getType()), + C == LHS ? D : C, Q, MaxRecurse-1)) + return V; + + // icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow. + if (A && C && (A == C || A == D || B == C || B == D) && + NoLHSWrapProblem && NoRHSWrapProblem) { + // Determine Y and Z in the form icmp (X+Y), (X+Z). + Value *Y, *Z; + if (A == C) { + // C + B == C + D -> B == D + Y = B; + Z = D; + } else if (A == D) { + // D + B == C + D -> B == C + Y = B; + Z = C; + } else if (B == C) { + // A + C == C + D -> A == D + Y = A; + Z = D; + } else { + assert(B == D); + // A + D == C + D -> A == C + Y = A; + Z = C; + } + if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse-1)) + return V; + } + } + + // icmp pred (urem X, Y), Y + if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) { + bool KnownNonNegative, KnownNegative; + switch (Pred) { + default: + break; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD); + if (!KnownNonNegative) + break; + // fall-through + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + return getFalse(ITy); + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD); + if (!KnownNonNegative) + break; + // fall-through + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + return getTrue(ITy); + } + } + + // icmp pred X, (urem Y, X) + if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) { + bool KnownNonNegative, KnownNegative; + switch (Pred) { + default: + break; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD); + if (!KnownNonNegative) + break; + // fall-through + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + return getTrue(ITy); + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD); + if (!KnownNonNegative) + break; + // fall-through + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + return getFalse(ITy); + } + } + + // x udiv y <=u x. + if (LBO && match(LBO, m_UDiv(m_Specific(RHS), m_Value()))) { + // icmp pred (X /u Y), X + if (Pred == ICmpInst::ICMP_UGT) + return getFalse(ITy); + if (Pred == ICmpInst::ICMP_ULE) + return getTrue(ITy); + } + + if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() && + LBO->getOperand(1) == RBO->getOperand(1)) { + switch (LBO->getOpcode()) { + default: break; + case Instruction::UDiv: + case Instruction::LShr: + if (ICmpInst::isSigned(Pred)) + break; + // fall-through + case Instruction::SDiv: + case Instruction::AShr: + if (!LBO->isExact() || !RBO->isExact()) + break; + if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), + RBO->getOperand(0), Q, MaxRecurse-1)) + return V; + break; + case Instruction::Shl: { + bool NUW = LBO->hasNoUnsignedWrap() && RBO->hasNoUnsignedWrap(); + bool NSW = LBO->hasNoSignedWrap() && RBO->hasNoSignedWrap(); + if (!NUW && !NSW) + break; + if (!NSW && ICmpInst::isSigned(Pred)) + break; + if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), + RBO->getOperand(0), Q, MaxRecurse-1)) + return V; + break; + } + } + } + + // Simplify comparisons involving max/min. + Value *A, *B; + CmpInst::Predicate P = CmpInst::BAD_ICMP_PREDICATE; + CmpInst::Predicate EqP; // Chosen so that "A == max/min(A,B)" iff "A EqP B". + + // Signed variants on "max(a,b)>=a -> true". + if (match(LHS, m_SMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) { + if (A != RHS) std::swap(A, B); // smax(A, B) pred A. + EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B". + // We analyze this as smax(A, B) pred A. + P = Pred; + } else if (match(RHS, m_SMax(m_Value(A), m_Value(B))) && + (A == LHS || B == LHS)) { + if (A != LHS) std::swap(A, B); // A pred smax(A, B). + EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B". + // We analyze this as smax(A, B) swapped-pred A. + P = CmpInst::getSwappedPredicate(Pred); + } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) && + (A == RHS || B == RHS)) { + if (A != RHS) std::swap(A, B); // smin(A, B) pred A. + EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B". + // We analyze this as smax(-A, -B) swapped-pred -A. + // Note that we do not need to actually form -A or -B thanks to EqP. + P = CmpInst::getSwappedPredicate(Pred); + } else if (match(RHS, m_SMin(m_Value(A), m_Value(B))) && + (A == LHS || B == LHS)) { + if (A != LHS) std::swap(A, B); // A pred smin(A, B). + EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B". + // We analyze this as smax(-A, -B) pred -A. + // Note that we do not need to actually form -A or -B thanks to EqP. + P = Pred; + } + if (P != CmpInst::BAD_ICMP_PREDICATE) { + // Cases correspond to "max(A, B) p A". + switch (P) { + default: + break; + case CmpInst::ICMP_EQ: + case CmpInst::ICMP_SLE: + // Equivalent to "A EqP B". This may be the same as the condition tested + // in the max/min; if so, we can just return that. + if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B)) + return V; + if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B)) + return V; + // Otherwise, see if "A EqP B" simplifies. + if (MaxRecurse) + if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1)) + return V; + break; + case CmpInst::ICMP_NE: + case CmpInst::ICMP_SGT: { + CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP); + // Equivalent to "A InvEqP B". This may be the same as the condition + // tested in the max/min; if so, we can just return that. + if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B)) + return V; + if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B)) + return V; + // Otherwise, see if "A InvEqP B" simplifies. + if (MaxRecurse) + if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1)) + return V; + break; + } + case CmpInst::ICMP_SGE: + // Always true. + return getTrue(ITy); + case CmpInst::ICMP_SLT: + // Always false. + return getFalse(ITy); + } + } + + // Unsigned variants on "max(a,b)>=a -> true". + P = CmpInst::BAD_ICMP_PREDICATE; + if (match(LHS, m_UMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) { + if (A != RHS) std::swap(A, B); // umax(A, B) pred A. + EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B". + // We analyze this as umax(A, B) pred A. + P = Pred; + } else if (match(RHS, m_UMax(m_Value(A), m_Value(B))) && + (A == LHS || B == LHS)) { + if (A != LHS) std::swap(A, B); // A pred umax(A, B). + EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B". + // We analyze this as umax(A, B) swapped-pred A. + P = CmpInst::getSwappedPredicate(Pred); + } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) && + (A == RHS || B == RHS)) { + if (A != RHS) std::swap(A, B); // umin(A, B) pred A. + EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B". + // We analyze this as umax(-A, -B) swapped-pred -A. + // Note that we do not need to actually form -A or -B thanks to EqP. + P = CmpInst::getSwappedPredicate(Pred); + } else if (match(RHS, m_UMin(m_Value(A), m_Value(B))) && + (A == LHS || B == LHS)) { + if (A != LHS) std::swap(A, B); // A pred umin(A, B). + EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B". + // We analyze this as umax(-A, -B) pred -A. + // Note that we do not need to actually form -A or -B thanks to EqP. + P = Pred; + } + if (P != CmpInst::BAD_ICMP_PREDICATE) { + // Cases correspond to "max(A, B) p A". + switch (P) { + default: + break; + case CmpInst::ICMP_EQ: + case CmpInst::ICMP_ULE: + // Equivalent to "A EqP B". This may be the same as the condition tested + // in the max/min; if so, we can just return that. + if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B)) + return V; + if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B)) + return V; + // Otherwise, see if "A EqP B" simplifies. + if (MaxRecurse) + if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1)) + return V; + break; + case CmpInst::ICMP_NE: + case CmpInst::ICMP_UGT: { + CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP); + // Equivalent to "A InvEqP B". This may be the same as the condition + // tested in the max/min; if so, we can just return that. + if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B)) + return V; + if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B)) + return V; + // Otherwise, see if "A InvEqP B" simplifies. + if (MaxRecurse) + if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1)) + return V; + break; + } + case CmpInst::ICMP_UGE: + // Always true. + return getTrue(ITy); + case CmpInst::ICMP_ULT: + // Always false. + return getFalse(ITy); + } + } + + // Variants on "max(x,y) >= min(x,z)". + Value *C, *D; + if (match(LHS, m_SMax(m_Value(A), m_Value(B))) && + match(RHS, m_SMin(m_Value(C), m_Value(D))) && + (A == C || A == D || B == C || B == D)) { + // max(x, ?) pred min(x, ?). + if (Pred == CmpInst::ICMP_SGE) + // Always true. + return getTrue(ITy); + if (Pred == CmpInst::ICMP_SLT) + // Always false. + return getFalse(ITy); + } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) && + match(RHS, m_SMax(m_Value(C), m_Value(D))) && + (A == C || A == D || B == C || B == D)) { + // min(x, ?) pred max(x, ?). + if (Pred == CmpInst::ICMP_SLE) + // Always true. + return getTrue(ITy); + if (Pred == CmpInst::ICMP_SGT) + // Always false. + return getFalse(ITy); + } else if (match(LHS, m_UMax(m_Value(A), m_Value(B))) && + match(RHS, m_UMin(m_Value(C), m_Value(D))) && + (A == C || A == D || B == C || B == D)) { + // max(x, ?) pred min(x, ?). + if (Pred == CmpInst::ICMP_UGE) + // Always true. + return getTrue(ITy); + if (Pred == CmpInst::ICMP_ULT) + // Always false. + return getFalse(ITy); + } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) && + match(RHS, m_UMax(m_Value(C), m_Value(D))) && + (A == C || A == D || B == C || B == D)) { + // min(x, ?) pred max(x, ?). + if (Pred == CmpInst::ICMP_ULE) + // Always true. + return getTrue(ITy); + if (Pred == CmpInst::ICMP_UGT) + // Always false. + return getFalse(ITy); + } + + // Simplify comparisons of related pointers using a powerful, recursive + // GEP-walk when we have target data available.. + if (LHS->getType()->isPointerTy()) + if (Constant *C = computePointerICmp(Q.TD, Q.TLI, Pred, LHS, RHS)) + return C; + + if (GetElementPtrInst *GLHS = dyn_cast<GetElementPtrInst>(LHS)) { + if (GEPOperator *GRHS = dyn_cast<GEPOperator>(RHS)) { + if (GLHS->getPointerOperand() == GRHS->getPointerOperand() && + GLHS->hasAllConstantIndices() && GRHS->hasAllConstantIndices() && + (ICmpInst::isEquality(Pred) || + (GLHS->isInBounds() && GRHS->isInBounds() && + Pred == ICmpInst::getSignedPredicate(Pred)))) { + // The bases are equal and the indices are constant. Build a constant + // expression GEP with the same indices and a null base pointer to see + // what constant folding can make out of it. + Constant *Null = Constant::getNullValue(GLHS->getPointerOperandType()); + SmallVector<Value *, 4> IndicesLHS(GLHS->idx_begin(), GLHS->idx_end()); + Constant *NewLHS = ConstantExpr::getGetElementPtr(Null, IndicesLHS); + + SmallVector<Value *, 4> IndicesRHS(GRHS->idx_begin(), GRHS->idx_end()); + Constant *NewRHS = ConstantExpr::getGetElementPtr(Null, IndicesRHS); + return ConstantExpr::getICmp(Pred, NewLHS, NewRHS); + } + } + } + + // If the comparison is with the result of a select instruction, check whether + // comparing with either branch of the select always yields the same value. + if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS)) + if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse)) + return V; + + // If the comparison is with the result of a phi instruction, check whether + // doing the compare with each incoming phi value yields a common result. + if (isa<PHINode>(LHS) || isa<PHINode>(RHS)) + if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyICmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT), + RecursionLimit); +} + +/// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const Query &Q, unsigned MaxRecurse) { + CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; + assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!"); + + if (Constant *CLHS = dyn_cast<Constant>(LHS)) { + if (Constant *CRHS = dyn_cast<Constant>(RHS)) + return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.TD, Q.TLI); + + // If we have a constant, make sure it is on the RHS. + std::swap(LHS, RHS); + Pred = CmpInst::getSwappedPredicate(Pred); + } + + // Fold trivial predicates. + if (Pred == FCmpInst::FCMP_FALSE) + return ConstantInt::get(GetCompareTy(LHS), 0); + if (Pred == FCmpInst::FCMP_TRUE) + return ConstantInt::get(GetCompareTy(LHS), 1); + + if (isa<UndefValue>(RHS)) // fcmp pred X, undef -> undef + return UndefValue::get(GetCompareTy(LHS)); + + // fcmp x,x -> true/false. Not all compares are foldable. + if (LHS == RHS) { + if (CmpInst::isTrueWhenEqual(Pred)) + return ConstantInt::get(GetCompareTy(LHS), 1); + if (CmpInst::isFalseWhenEqual(Pred)) + return ConstantInt::get(GetCompareTy(LHS), 0); + } + + // Handle fcmp with constant RHS + if (Constant *RHSC = dyn_cast<Constant>(RHS)) { + // If the constant is a nan, see if we can fold the comparison based on it. + if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) { + if (CFP->getValueAPF().isNaN()) { + if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo" + return ConstantInt::getFalse(CFP->getContext()); + assert(FCmpInst::isUnordered(Pred) && + "Comparison must be either ordered or unordered!"); + // True if unordered. + return ConstantInt::getTrue(CFP->getContext()); + } + // Check whether the constant is an infinity. + if (CFP->getValueAPF().isInfinity()) { + if (CFP->getValueAPF().isNegative()) { + switch (Pred) { + case FCmpInst::FCMP_OLT: + // No value is ordered and less than negative infinity. + return ConstantInt::getFalse(CFP->getContext()); + case FCmpInst::FCMP_UGE: + // All values are unordered with or at least negative infinity. + return ConstantInt::getTrue(CFP->getContext()); + default: + break; + } + } else { + switch (Pred) { + case FCmpInst::FCMP_OGT: + // No value is ordered and greater than infinity. + return ConstantInt::getFalse(CFP->getContext()); + case FCmpInst::FCMP_ULE: + // All values are unordered with and at most infinity. + return ConstantInt::getTrue(CFP->getContext()); + default: + break; + } + } + } + } + } + + // If the comparison is with the result of a select instruction, check whether + // comparing with either branch of the select always yields the same value. + if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS)) + if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse)) + return V; + + // If the comparison is with the result of a phi instruction, check whether + // doing the compare with each incoming phi value yields a common result. + if (isa<PHINode>(LHS) || isa<PHINode>(RHS)) + if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT), + RecursionLimit); +} + +/// SimplifySelectInst - Given operands for a SelectInst, see if we can fold +/// the result. If not, this returns null. +static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, + Value *FalseVal, const Query &Q, + unsigned MaxRecurse) { + // select true, X, Y -> X + // select false, X, Y -> Y + if (ConstantInt *CB = dyn_cast<ConstantInt>(CondVal)) + return CB->getZExtValue() ? TrueVal : FalseVal; + + // select C, X, X -> X + if (TrueVal == FalseVal) + return TrueVal; + + if (isa<UndefValue>(CondVal)) { // select undef, X, Y -> X or Y + if (isa<Constant>(TrueVal)) + return TrueVal; + return FalseVal; + } + if (isa<UndefValue>(TrueVal)) // select C, undef, X -> X + return FalseVal; + if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X + return TrueVal; + + return 0; +} + +Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, + const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Query (TD, TLI, DT), + RecursionLimit); +} + +/// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) { + // The type of the GEP pointer operand. + PointerType *PtrTy = dyn_cast<PointerType>(Ops[0]->getType()); + // The GEP pointer operand is not a pointer, it's a vector of pointers. + if (!PtrTy) + return 0; + + // getelementptr P -> P. + if (Ops.size() == 1) + return Ops[0]; + + if (isa<UndefValue>(Ops[0])) { + // Compute the (pointer) type returned by the GEP instruction. + Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, Ops.slice(1)); + Type *GEPTy = PointerType::get(LastType, PtrTy->getAddressSpace()); + return UndefValue::get(GEPTy); + } + + if (Ops.size() == 2) { + // getelementptr P, 0 -> P. + if (ConstantInt *C = dyn_cast<ConstantInt>(Ops[1])) + if (C->isZero()) + return Ops[0]; + // getelementptr P, N -> P if P points to a type of zero size. + if (Q.TD) { + Type *Ty = PtrTy->getElementType(); + if (Ty->isSized() && Q.TD->getTypeAllocSize(Ty) == 0) + return Ops[0]; + } + } + + // Check to see if this is constant foldable. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (!isa<Constant>(Ops[i])) + return 0; + + return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1)); +} + +Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyGEPInst(Ops, Query (TD, TLI, DT), RecursionLimit); +} + +/// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we +/// can fold the result. If not, this returns null. +static Value *SimplifyInsertValueInst(Value *Agg, Value *Val, + ArrayRef<unsigned> Idxs, const Query &Q, + unsigned) { + if (Constant *CAgg = dyn_cast<Constant>(Agg)) + if (Constant *CVal = dyn_cast<Constant>(Val)) + return ConstantFoldInsertValueInstruction(CAgg, CVal, Idxs); + + // insertvalue x, undef, n -> x + if (match(Val, m_Undef())) + return Agg; + + // insertvalue x, (extractvalue y, n), n + if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Val)) + if (EV->getAggregateOperand()->getType() == Agg->getType() && + EV->getIndices() == Idxs) { + // insertvalue undef, (extractvalue y, n), n -> y + if (match(Agg, m_Undef())) + return EV->getAggregateOperand(); + + // insertvalue y, (extractvalue y, n), n -> y + if (Agg == EV->getAggregateOperand()) + return Agg; + } + + return 0; +} + +Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, + ArrayRef<unsigned> Idxs, + const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query (TD, TLI, DT), + RecursionLimit); +} + +/// SimplifyPHINode - See if we can fold the given phi. If not, returns null. +static Value *SimplifyPHINode(PHINode *PN, const Query &Q) { + // If all of the PHI's incoming values are the same then replace the PHI node + // with the common value. + Value *CommonValue = 0; + bool HasUndefInput = false; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *Incoming = PN->getIncomingValue(i); + // If the incoming value is the phi node itself, it can safely be skipped. + if (Incoming == PN) continue; + if (isa<UndefValue>(Incoming)) { + // Remember that we saw an undef value, but otherwise ignore them. + HasUndefInput = true; + continue; + } + if (CommonValue && Incoming != CommonValue) + return 0; // Not the same, bail out. + CommonValue = Incoming; + } + + // If CommonValue is null then all of the incoming values were either undef or + // equal to the phi node itself. + if (!CommonValue) + return UndefValue::get(PN->getType()); + + // If we have a PHI node like phi(X, undef, X), where X is defined by some + // instruction, we cannot return X as the result of the PHI node unless it + // dominates the PHI block. + if (HasUndefInput) + return ValueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : 0; + + return CommonValue; +} + +static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) { + if (Constant *C = dyn_cast<Constant>(Op)) + return ConstantFoldInstOperands(Instruction::Trunc, Ty, C, Q.TD, Q.TLI); + + return 0; +} + +Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyTruncInst(Op, Ty, Query (TD, TLI, DT), RecursionLimit); +} + +//=== Helper functions for higher up the class hierarchy. + +/// SimplifyBinOp - Given operands for a BinaryOperator, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + const Query &Q, unsigned MaxRecurse) { + switch (Opcode) { + case Instruction::Add: + return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, + Q, MaxRecurse); + case Instruction::FAdd: + return SimplifyFAddInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); + + case Instruction::Sub: + return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, + Q, MaxRecurse); + case Instruction::FSub: + return SimplifyFSubInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); + + case Instruction::Mul: return SimplifyMulInst (LHS, RHS, Q, MaxRecurse); + case Instruction::FMul: + return SimplifyFMulInst (LHS, RHS, FastMathFlags(), Q, MaxRecurse); + case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, Q, MaxRecurse); + case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse); + case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, Q, MaxRecurse); + case Instruction::SRem: return SimplifySRemInst(LHS, RHS, Q, MaxRecurse); + case Instruction::URem: return SimplifyURemInst(LHS, RHS, Q, MaxRecurse); + case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, Q, MaxRecurse); + case Instruction::Shl: + return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, + Q, MaxRecurse); + case Instruction::LShr: + return SimplifyLShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse); + case Instruction::AShr: + return SimplifyAShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse); + case Instruction::And: return SimplifyAndInst(LHS, RHS, Q, MaxRecurse); + case Instruction::Or: return SimplifyOrInst (LHS, RHS, Q, MaxRecurse); + case Instruction::Xor: return SimplifyXorInst(LHS, RHS, Q, MaxRecurse); + default: + if (Constant *CLHS = dyn_cast<Constant>(LHS)) + if (Constant *CRHS = dyn_cast<Constant>(RHS)) { + Constant *COps[] = {CLHS, CRHS}; + return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, Q.TD, + Q.TLI); + } + + // If the operation is associative, try some generic simplifications. + if (Instruction::isAssociative(Opcode)) + if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, Q, MaxRecurse)) + return V; + + // If the operation is with the result of a select instruction check whether + // operating on either branch of the select always yields the same value. + if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS)) + if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, Q, MaxRecurse)) + return V; + + // If the operation is with the result of a phi instruction, check whether + // operating on all incoming values of the phi always yields the same value. + if (isa<PHINode>(LHS) || isa<PHINode>(RHS)) + if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, Q, MaxRecurse)) + return V; + + return 0; + } +} + +Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + const DataLayout *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyBinOp(Opcode, LHS, RHS, Query (TD, TLI, DT), RecursionLimit); +} + +/// SimplifyCmpInst - Given operands for a CmpInst, see if we can +/// fold the result. +static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const Query &Q, unsigned MaxRecurse) { + if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate)) + return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse); + return SimplifyFCmpInst(Predicate, LHS, RHS, Q, MaxRecurse); +} + +Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const DataLayout *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyCmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT), + RecursionLimit); +} + +static bool IsIdempotent(Intrinsic::ID ID) { + switch (ID) { + default: return false; + + // Unary idempotent: f(f(x)) = f(x) + case Intrinsic::fabs: + case Intrinsic::floor: + case Intrinsic::ceil: + case Intrinsic::trunc: + case Intrinsic::rint: + case Intrinsic::nearbyint: + case Intrinsic::round: + return true; + } +} + +template <typename IterTy> +static Value *SimplifyIntrinsic(Intrinsic::ID IID, IterTy ArgBegin, IterTy ArgEnd, + const Query &Q, unsigned MaxRecurse) { + // Perform idempotent optimizations + if (!IsIdempotent(IID)) + return 0; + + // Unary Ops + if (std::distance(ArgBegin, ArgEnd) == 1) + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*ArgBegin)) + if (II->getIntrinsicID() == IID) + return II; + + return 0; +} + +template <typename IterTy> +static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, + const Query &Q, unsigned MaxRecurse) { + Type *Ty = V->getType(); + if (PointerType *PTy = dyn_cast<PointerType>(Ty)) + Ty = PTy->getElementType(); + FunctionType *FTy = cast<FunctionType>(Ty); + + // call undef -> undef + if (isa<UndefValue>(V)) + return UndefValue::get(FTy->getReturnType()); + + Function *F = dyn_cast<Function>(V); + if (!F) + return 0; + + if (unsigned IID = F->getIntrinsicID()) + if (Value *Ret = + SimplifyIntrinsic((Intrinsic::ID) IID, ArgBegin, ArgEnd, Q, MaxRecurse)) + return Ret; + + if (!canConstantFoldCallTo(F)) + return 0; + + SmallVector<Constant *, 4> ConstantArgs; + ConstantArgs.reserve(ArgEnd - ArgBegin); + for (IterTy I = ArgBegin, E = ArgEnd; I != E; ++I) { + Constant *C = dyn_cast<Constant>(*I); + if (!C) + return 0; + ConstantArgs.push_back(C); + } + + return ConstantFoldCall(F, ConstantArgs, Q.TLI); +} + +Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin, + User::op_iterator ArgEnd, const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(TD, TLI, DT), + RecursionLimit); +} + +Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args, + const DataLayout *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyCall(V, Args.begin(), Args.end(), Query(TD, TLI, DT), + RecursionLimit); +} + +/// SimplifyInstruction - See if we can compute a simplified version of this +/// instruction. If not, this returns null. +Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + Value *Result; + + switch (I->getOpcode()) { + default: + Result = ConstantFoldInstruction(I, TD, TLI); + break; + case Instruction::FAdd: + Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1), + I->getFastMathFlags(), TD, TLI, DT); + break; + case Instruction::Add: + Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1), + cast<BinaryOperator>(I)->hasNoSignedWrap(), + cast<BinaryOperator>(I)->hasNoUnsignedWrap(), + TD, TLI, DT); + break; + case Instruction::FSub: + Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1), + I->getFastMathFlags(), TD, TLI, DT); + break; + case Instruction::Sub: + Result = SimplifySubInst(I->getOperand(0), I->getOperand(1), + cast<BinaryOperator>(I)->hasNoSignedWrap(), + cast<BinaryOperator>(I)->hasNoUnsignedWrap(), + TD, TLI, DT); + break; + case Instruction::FMul: + Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1), + I->getFastMathFlags(), TD, TLI, DT); + break; + case Instruction::Mul: + Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); + break; + case Instruction::SDiv: + Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); + break; + case Instruction::UDiv: + Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); + break; + case Instruction::FDiv: + Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); + break; + case Instruction::SRem: + Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); + break; + case Instruction::URem: + Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); + break; + case Instruction::FRem: + Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); + break; + case Instruction::Shl: + Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1), + cast<BinaryOperator>(I)->hasNoSignedWrap(), + cast<BinaryOperator>(I)->hasNoUnsignedWrap(), + TD, TLI, DT); + break; + case Instruction::LShr: + Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1), + cast<BinaryOperator>(I)->isExact(), + TD, TLI, DT); + break; + case Instruction::AShr: + Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1), + cast<BinaryOperator>(I)->isExact(), + TD, TLI, DT); + break; + case Instruction::And: + Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); + break; + case Instruction::Or: + Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); + break; + case Instruction::Xor: + Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); + break; + case Instruction::ICmp: + Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(), + I->getOperand(0), I->getOperand(1), TD, TLI, DT); + break; + case Instruction::FCmp: + Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), + I->getOperand(0), I->getOperand(1), TD, TLI, DT); + break; + case Instruction::Select: + Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1), + I->getOperand(2), TD, TLI, DT); + break; + case Instruction::GetElementPtr: { + SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end()); + Result = SimplifyGEPInst(Ops, TD, TLI, DT); + break; + } + case Instruction::InsertValue: { + InsertValueInst *IV = cast<InsertValueInst>(I); + Result = SimplifyInsertValueInst(IV->getAggregateOperand(), + IV->getInsertedValueOperand(), + IV->getIndices(), TD, TLI, DT); + break; + } + case Instruction::PHI: + Result = SimplifyPHINode(cast<PHINode>(I), Query (TD, TLI, DT)); + break; + case Instruction::Call: { + CallSite CS(cast<CallInst>(I)); + Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), + TD, TLI, DT); + break; + } + case Instruction::Trunc: + Result = SimplifyTruncInst(I->getOperand(0), I->getType(), TD, TLI, DT); + break; + } + + /// If called on unreachable code, the above logic may report that the + /// instruction simplified to itself. Make life easier for users by + /// detecting that case here, returning a safe value instead. + return Result == I ? UndefValue::get(I->getType()) : Result; +} + +/// \brief Implementation of recursive simplification through an instructions +/// uses. +/// +/// This is the common implementation of the recursive simplification routines. +/// If we have a pre-simplified value in 'SimpleV', that is forcibly used to +/// replace the instruction 'I'. Otherwise, we simply add 'I' to the list of +/// instructions to process and attempt to simplify it using +/// InstructionSimplify. +/// +/// This routine returns 'true' only when *it* simplifies something. The passed +/// in simplified value does not count toward this. +static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, + const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + bool Simplified = false; + SmallSetVector<Instruction *, 8> Worklist; + + // If we have an explicit value to collapse to, do that round of the + // simplification loop by hand initially. + if (SimpleV) { + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; + ++UI) + if (*UI != I) + Worklist.insert(cast<Instruction>(*UI)); + + // Replace the instruction with its simplified value. + I->replaceAllUsesWith(SimpleV); + + // Gracefully handle edge cases where the instruction is not wired into any + // parent block. + if (I->getParent()) + I->eraseFromParent(); + } else { + Worklist.insert(I); + } + + // Note that we must test the size on each iteration, the worklist can grow. + for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) { + I = Worklist[Idx]; + + // See if this instruction simplifies. + SimpleV = SimplifyInstruction(I, TD, TLI, DT); + if (!SimpleV) + continue; + + Simplified = true; + + // Stash away all the uses of the old instruction so we can check them for + // recursive simplifications after a RAUW. This is cheaper than checking all + // uses of To on the recursive step in most cases. + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; + ++UI) + Worklist.insert(cast<Instruction>(*UI)); + + // Replace the instruction with its simplified value. + I->replaceAllUsesWith(SimpleV); + + // Gracefully handle edge cases where the instruction is not wired into any + // parent block. + if (I->getParent()) + I->eraseFromParent(); + } + return Simplified; +} + +bool llvm::recursivelySimplifyInstruction(Instruction *I, + const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return replaceAndRecursivelySimplifyImpl(I, 0, TD, TLI, DT); +} + +bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, + const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!"); + assert(SimpleV && "Must provide a simplified value."); + return replaceAndRecursivelySimplifyImpl(I, SimpleV, TD, TLI, DT); +} diff --git a/contrib/llvm/lib/Analysis/Interval.cpp b/contrib/llvm/lib/Analysis/Interval.cpp new file mode 100644 index 000000000000..26a0322407ec --- /dev/null +++ b/contrib/llvm/lib/Analysis/Interval.cpp @@ -0,0 +1,58 @@ +//===- Interval.cpp - Interval class code ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the definition of the Interval class, which represents a +// partition of a control flow graph of some kind. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Interval.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Interval Implementation +//===----------------------------------------------------------------------===// + +// isLoop - Find out if there is a back edge in this interval... +// +bool Interval::isLoop() const { + // There is a loop in this interval iff one of the predecessors of the header + // node lives in the interval. + for (::pred_iterator I = ::pred_begin(HeaderNode), E = ::pred_end(HeaderNode); + I != E; ++I) + if (contains(*I)) + return true; + return false; +} + + +void Interval::print(raw_ostream &OS) const { + OS << "-------------------------------------------------------------\n" + << "Interval Contents:\n"; + + // Print out all of the basic blocks in the interval... + for (std::vector<BasicBlock*>::const_iterator I = Nodes.begin(), + E = Nodes.end(); I != E; ++I) + OS << **I << "\n"; + + OS << "Interval Predecessors:\n"; + for (std::vector<BasicBlock*>::const_iterator I = Predecessors.begin(), + E = Predecessors.end(); I != E; ++I) + OS << **I << "\n"; + + OS << "Interval Successors:\n"; + for (std::vector<BasicBlock*>::const_iterator I = Successors.begin(), + E = Successors.end(); I != E; ++I) + OS << **I << "\n"; +} diff --git a/contrib/llvm/lib/Analysis/IntervalPartition.cpp b/contrib/llvm/lib/Analysis/IntervalPartition.cpp new file mode 100644 index 000000000000..2e259b147b8b --- /dev/null +++ b/contrib/llvm/lib/Analysis/IntervalPartition.cpp @@ -0,0 +1,114 @@ +//===- IntervalPartition.cpp - Interval Partition module code -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the definition of the IntervalPartition class, which +// calculates and represent the interval partition of a function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/IntervalIterator.h" +using namespace llvm; + +char IntervalPartition::ID = 0; +INITIALIZE_PASS(IntervalPartition, "intervals", + "Interval Partition Construction", true, true) + +//===----------------------------------------------------------------------===// +// IntervalPartition Implementation +//===----------------------------------------------------------------------===// + +// releaseMemory - Reset state back to before function was analyzed +void IntervalPartition::releaseMemory() { + for (unsigned i = 0, e = Intervals.size(); i != e; ++i) + delete Intervals[i]; + IntervalMap.clear(); + Intervals.clear(); + RootInterval = 0; +} + +void IntervalPartition::print(raw_ostream &O, const Module*) const { + for(unsigned i = 0, e = Intervals.size(); i != e; ++i) + Intervals[i]->print(O); +} + +// addIntervalToPartition - Add an interval to the internal list of intervals, +// and then add mappings from all of the basic blocks in the interval to the +// interval itself (in the IntervalMap). +// +void IntervalPartition::addIntervalToPartition(Interval *I) { + Intervals.push_back(I); + + // Add mappings for all of the basic blocks in I to the IntervalPartition + for (Interval::node_iterator It = I->Nodes.begin(), End = I->Nodes.end(); + It != End; ++It) + IntervalMap.insert(std::make_pair(*It, I)); +} + +// updatePredecessors - Interval generation only sets the successor fields of +// the interval data structures. After interval generation is complete, +// run through all of the intervals and propagate successor info as +// predecessor info. +// +void IntervalPartition::updatePredecessors(Interval *Int) { + BasicBlock *Header = Int->getHeaderNode(); + for (Interval::succ_iterator I = Int->Successors.begin(), + E = Int->Successors.end(); I != E; ++I) + getBlockInterval(*I)->Predecessors.push_back(Header); +} + +// IntervalPartition ctor - Build the first level interval partition for the +// specified function... +// +bool IntervalPartition::runOnFunction(Function &F) { + // Pass false to intervals_begin because we take ownership of it's memory + function_interval_iterator I = intervals_begin(&F, false); + assert(I != intervals_end(&F) && "No intervals in function!?!?!"); + + addIntervalToPartition(RootInterval = *I); + + ++I; // After the first one... + + // Add the rest of the intervals to the partition. + for (function_interval_iterator E = intervals_end(&F); I != E; ++I) + addIntervalToPartition(*I); + + // Now that we know all of the successor information, propagate this to the + // predecessors for each block. + for (unsigned i = 0, e = Intervals.size(); i != e; ++i) + updatePredecessors(Intervals[i]); + return false; +} + + +// IntervalPartition ctor - Build a reduced interval partition from an +// existing interval graph. This takes an additional boolean parameter to +// distinguish it from a copy constructor. Always pass in false for now. +// +IntervalPartition::IntervalPartition(IntervalPartition &IP, bool) + : FunctionPass(ID) { + assert(IP.getRootInterval() && "Cannot operate on empty IntervalPartitions!"); + + // Pass false to intervals_begin because we take ownership of it's memory + interval_part_interval_iterator I = intervals_begin(IP, false); + assert(I != intervals_end(IP) && "No intervals in interval partition!?!?!"); + + addIntervalToPartition(RootInterval = *I); + + ++I; // After the first one... + + // Add the rest of the intervals to the partition. + for (interval_part_interval_iterator E = intervals_end(IP); I != E; ++I) + addIntervalToPartition(*I); + + // Now that we know all of the successor information, propagate this to the + // predecessors for each block. + for (unsigned i = 0, e = Intervals.size(); i != e; ++i) + updatePredecessors(Intervals[i]); +} + diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp new file mode 100644 index 000000000000..b6970af4cdec --- /dev/null +++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp @@ -0,0 +1,1143 @@ +//===- LazyValueInfo.cpp - Value constraint analysis ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for lazy computation of value constraint +// information. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "lazy-value-info" +#include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/ConstantRange.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/PatternMatch.h" +#include "llvm/Support/ValueHandle.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include <map> +#include <stack> +using namespace llvm; +using namespace PatternMatch; + +char LazyValueInfo::ID = 0; +INITIALIZE_PASS_BEGIN(LazyValueInfo, "lazy-value-info", + "Lazy Value Information Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(LazyValueInfo, "lazy-value-info", + "Lazy Value Information Analysis", false, true) + +namespace llvm { + FunctionPass *createLazyValueInfoPass() { return new LazyValueInfo(); } +} + + +//===----------------------------------------------------------------------===// +// LVILatticeVal +//===----------------------------------------------------------------------===// + +/// LVILatticeVal - This is the information tracked by LazyValueInfo for each +/// value. +/// +/// FIXME: This is basically just for bringup, this can be made a lot more rich +/// in the future. +/// +namespace { +class LVILatticeVal { + enum LatticeValueTy { + /// undefined - This Value has no known value yet. + undefined, + + /// constant - This Value has a specific constant value. + constant, + /// notconstant - This Value is known to not have the specified value. + notconstant, + + /// constantrange - The Value falls within this range. + constantrange, + + /// overdefined - This value is not known to be constant, and we know that + /// it has a value. + overdefined + }; + + /// Val: This stores the current lattice value along with the Constant* for + /// the constant if this is a 'constant' or 'notconstant' value. + LatticeValueTy Tag; + Constant *Val; + ConstantRange Range; + +public: + LVILatticeVal() : Tag(undefined), Val(0), Range(1, true) {} + + static LVILatticeVal get(Constant *C) { + LVILatticeVal Res; + if (!isa<UndefValue>(C)) + Res.markConstant(C); + return Res; + } + static LVILatticeVal getNot(Constant *C) { + LVILatticeVal Res; + if (!isa<UndefValue>(C)) + Res.markNotConstant(C); + return Res; + } + static LVILatticeVal getRange(ConstantRange CR) { + LVILatticeVal Res; + Res.markConstantRange(CR); + return Res; + } + + bool isUndefined() const { return Tag == undefined; } + bool isConstant() const { return Tag == constant; } + bool isNotConstant() const { return Tag == notconstant; } + bool isConstantRange() const { return Tag == constantrange; } + bool isOverdefined() const { return Tag == overdefined; } + + Constant *getConstant() const { + assert(isConstant() && "Cannot get the constant of a non-constant!"); + return Val; + } + + Constant *getNotConstant() const { + assert(isNotConstant() && "Cannot get the constant of a non-notconstant!"); + return Val; + } + + ConstantRange getConstantRange() const { + assert(isConstantRange() && + "Cannot get the constant-range of a non-constant-range!"); + return Range; + } + + /// markOverdefined - Return true if this is a change in status. + bool markOverdefined() { + if (isOverdefined()) + return false; + Tag = overdefined; + return true; + } + + /// markConstant - Return true if this is a change in status. + bool markConstant(Constant *V) { + assert(V && "Marking constant with NULL"); + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) + return markConstantRange(ConstantRange(CI->getValue())); + if (isa<UndefValue>(V)) + return false; + + assert((!isConstant() || getConstant() == V) && + "Marking constant with different value"); + assert(isUndefined()); + Tag = constant; + Val = V; + return true; + } + + /// markNotConstant - Return true if this is a change in status. + bool markNotConstant(Constant *V) { + assert(V && "Marking constant with NULL"); + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) + return markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue())); + if (isa<UndefValue>(V)) + return false; + + assert((!isConstant() || getConstant() != V) && + "Marking constant !constant with same value"); + assert((!isNotConstant() || getNotConstant() == V) && + "Marking !constant with different value"); + assert(isUndefined() || isConstant()); + Tag = notconstant; + Val = V; + return true; + } + + /// markConstantRange - Return true if this is a change in status. + bool markConstantRange(const ConstantRange NewR) { + if (isConstantRange()) { + if (NewR.isEmptySet()) + return markOverdefined(); + + bool changed = Range != NewR; + Range = NewR; + return changed; + } + + assert(isUndefined()); + if (NewR.isEmptySet()) + return markOverdefined(); + + Tag = constantrange; + Range = NewR; + return true; + } + + /// mergeIn - Merge the specified lattice value into this one, updating this + /// one and returning true if anything changed. + bool mergeIn(const LVILatticeVal &RHS) { + if (RHS.isUndefined() || isOverdefined()) return false; + if (RHS.isOverdefined()) return markOverdefined(); + + if (isUndefined()) { + Tag = RHS.Tag; + Val = RHS.Val; + Range = RHS.Range; + return true; + } + + if (isConstant()) { + if (RHS.isConstant()) { + if (Val == RHS.Val) + return false; + return markOverdefined(); + } + + if (RHS.isNotConstant()) { + if (Val == RHS.Val) + return markOverdefined(); + + // Unless we can prove that the two Constants are different, we must + // move to overdefined. + // FIXME: use DataLayout/TargetLibraryInfo for smarter constant folding. + if (ConstantInt *Res = dyn_cast<ConstantInt>( + ConstantFoldCompareInstOperands(CmpInst::ICMP_NE, + getConstant(), + RHS.getNotConstant()))) + if (Res->isOne()) + return markNotConstant(RHS.getNotConstant()); + + return markOverdefined(); + } + + // RHS is a ConstantRange, LHS is a non-integer Constant. + + // FIXME: consider the case where RHS is a range [1, 0) and LHS is + // a function. The correct result is to pick up RHS. + + return markOverdefined(); + } + + if (isNotConstant()) { + if (RHS.isConstant()) { + if (Val == RHS.Val) + return markOverdefined(); + + // Unless we can prove that the two Constants are different, we must + // move to overdefined. + // FIXME: use DataLayout/TargetLibraryInfo for smarter constant folding. + if (ConstantInt *Res = dyn_cast<ConstantInt>( + ConstantFoldCompareInstOperands(CmpInst::ICMP_NE, + getNotConstant(), + RHS.getConstant()))) + if (Res->isOne()) + return false; + + return markOverdefined(); + } + + if (RHS.isNotConstant()) { + if (Val == RHS.Val) + return false; + return markOverdefined(); + } + + return markOverdefined(); + } + + assert(isConstantRange() && "New LVILattice type?"); + if (!RHS.isConstantRange()) + return markOverdefined(); + + ConstantRange NewR = Range.unionWith(RHS.getConstantRange()); + if (NewR.isFullSet()) + return markOverdefined(); + return markConstantRange(NewR); + } +}; + +} // end anonymous namespace. + +namespace llvm { +raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) + LLVM_ATTRIBUTE_USED; +raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) { + if (Val.isUndefined()) + return OS << "undefined"; + if (Val.isOverdefined()) + return OS << "overdefined"; + + if (Val.isNotConstant()) + return OS << "notconstant<" << *Val.getNotConstant() << '>'; + else if (Val.isConstantRange()) + return OS << "constantrange<" << Val.getConstantRange().getLower() << ", " + << Val.getConstantRange().getUpper() << '>'; + return OS << "constant<" << *Val.getConstant() << '>'; +} +} + +//===----------------------------------------------------------------------===// +// LazyValueInfoCache Decl +//===----------------------------------------------------------------------===// + +namespace { + /// LVIValueHandle - A callback value handle updates the cache when + /// values are erased. + class LazyValueInfoCache; + struct LVIValueHandle : public CallbackVH { + LazyValueInfoCache *Parent; + + LVIValueHandle(Value *V, LazyValueInfoCache *P) + : CallbackVH(V), Parent(P) { } + + void deleted(); + void allUsesReplacedWith(Value *V) { + deleted(); + } + }; +} + +namespace { + /// LazyValueInfoCache - This is the cache kept by LazyValueInfo which + /// maintains information about queries across the clients' queries. + class LazyValueInfoCache { + /// ValueCacheEntryTy - This is all of the cached block information for + /// exactly one Value*. The entries are sorted by the BasicBlock* of the + /// entries, allowing us to do a lookup with a binary search. + typedef std::map<AssertingVH<BasicBlock>, LVILatticeVal> ValueCacheEntryTy; + + /// ValueCache - This is all of the cached information for all values, + /// mapped from Value* to key information. + std::map<LVIValueHandle, ValueCacheEntryTy> ValueCache; + + /// OverDefinedCache - This tracks, on a per-block basis, the set of + /// values that are over-defined at the end of that block. This is required + /// for cache updating. + typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy; + DenseSet<OverDefinedPairTy> OverDefinedCache; + + /// SeenBlocks - Keep track of all blocks that we have ever seen, so we + /// don't spend time removing unused blocks from our caches. + DenseSet<AssertingVH<BasicBlock> > SeenBlocks; + + /// BlockValueStack - This stack holds the state of the value solver + /// during a query. It basically emulates the callstack of the naive + /// recursive value lookup process. + std::stack<std::pair<BasicBlock*, Value*> > BlockValueStack; + + friend struct LVIValueHandle; + + /// OverDefinedCacheUpdater - A helper object that ensures that the + /// OverDefinedCache is updated whenever solveBlockValue returns. + struct OverDefinedCacheUpdater { + LazyValueInfoCache *Parent; + Value *Val; + BasicBlock *BB; + LVILatticeVal &BBLV; + + OverDefinedCacheUpdater(Value *V, BasicBlock *B, LVILatticeVal &LV, + LazyValueInfoCache *P) + : Parent(P), Val(V), BB(B), BBLV(LV) { } + + bool markResult(bool changed) { + if (changed && BBLV.isOverdefined()) + Parent->OverDefinedCache.insert(std::make_pair(BB, Val)); + return changed; + } + }; + + + + LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB); + bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T, + LVILatticeVal &Result); + bool hasBlockValue(Value *Val, BasicBlock *BB); + + // These methods process one work item and may add more. A false value + // returned means that the work item was not completely processed and must + // be revisited after going through the new items. + bool solveBlockValue(Value *Val, BasicBlock *BB); + bool solveBlockValueNonLocal(LVILatticeVal &BBLV, + Value *Val, BasicBlock *BB); + bool solveBlockValuePHINode(LVILatticeVal &BBLV, + PHINode *PN, BasicBlock *BB); + bool solveBlockValueConstantRange(LVILatticeVal &BBLV, + Instruction *BBI, BasicBlock *BB); + + void solve(); + + ValueCacheEntryTy &lookup(Value *V) { + return ValueCache[LVIValueHandle(V, this)]; + } + + public: + /// getValueInBlock - This is the query interface to determine the lattice + /// value for the specified Value* at the end of the specified block. + LVILatticeVal getValueInBlock(Value *V, BasicBlock *BB); + + /// getValueOnEdge - This is the query interface to determine the lattice + /// value for the specified Value* that is true on the specified edge. + LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB); + + /// threadEdge - This is the update interface to inform the cache that an + /// edge from PredBB to OldSucc has been threaded to be from PredBB to + /// NewSucc. + void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc); + + /// eraseBlock - This is part of the update interface to inform the cache + /// that a block has been deleted. + void eraseBlock(BasicBlock *BB); + + /// clear - Empty the cache. + void clear() { + SeenBlocks.clear(); + ValueCache.clear(); + OverDefinedCache.clear(); + } + }; +} // end anonymous namespace + +void LVIValueHandle::deleted() { + typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy; + + SmallVector<OverDefinedPairTy, 4> ToErase; + for (DenseSet<OverDefinedPairTy>::iterator + I = Parent->OverDefinedCache.begin(), + E = Parent->OverDefinedCache.end(); + I != E; ++I) { + if (I->second == getValPtr()) + ToErase.push_back(*I); + } + + for (SmallVectorImpl<OverDefinedPairTy>::iterator I = ToErase.begin(), + E = ToErase.end(); I != E; ++I) + Parent->OverDefinedCache.erase(*I); + + // This erasure deallocates *this, so it MUST happen after we're done + // using any and all members of *this. + Parent->ValueCache.erase(*this); +} + +void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { + // Shortcut if we have never seen this block. + DenseSet<AssertingVH<BasicBlock> >::iterator I = SeenBlocks.find(BB); + if (I == SeenBlocks.end()) + return; + SeenBlocks.erase(I); + + SmallVector<OverDefinedPairTy, 4> ToErase; + for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(), + E = OverDefinedCache.end(); I != E; ++I) { + if (I->first == BB) + ToErase.push_back(*I); + } + + for (SmallVectorImpl<OverDefinedPairTy>::iterator I = ToErase.begin(), + E = ToErase.end(); I != E; ++I) + OverDefinedCache.erase(*I); + + for (std::map<LVIValueHandle, ValueCacheEntryTy>::iterator + I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I) + I->second.erase(BB); +} + +void LazyValueInfoCache::solve() { + while (!BlockValueStack.empty()) { + std::pair<BasicBlock*, Value*> &e = BlockValueStack.top(); + if (solveBlockValue(e.second, e.first)) { + assert(BlockValueStack.top() == e); + BlockValueStack.pop(); + } + } +} + +bool LazyValueInfoCache::hasBlockValue(Value *Val, BasicBlock *BB) { + // If already a constant, there is nothing to compute. + if (isa<Constant>(Val)) + return true; + + LVIValueHandle ValHandle(Val, this); + std::map<LVIValueHandle, ValueCacheEntryTy>::iterator I = + ValueCache.find(ValHandle); + if (I == ValueCache.end()) return false; + return I->second.count(BB); +} + +LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) { + // If already a constant, there is nothing to compute. + if (Constant *VC = dyn_cast<Constant>(Val)) + return LVILatticeVal::get(VC); + + SeenBlocks.insert(BB); + return lookup(Val)[BB]; +} + +bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { + if (isa<Constant>(Val)) + return true; + + ValueCacheEntryTy &Cache = lookup(Val); + SeenBlocks.insert(BB); + LVILatticeVal &BBLV = Cache[BB]; + + // OverDefinedCacheUpdater is a helper object that will update + // the OverDefinedCache for us when this method exits. Make sure to + // call markResult on it as we exist, passing a bool to indicate if the + // cache needs updating, i.e. if we have solve a new value or not. + OverDefinedCacheUpdater ODCacheUpdater(Val, BB, BBLV, this); + + // If we've already computed this block's value, return it. + if (!BBLV.isUndefined()) { + DEBUG(dbgs() << " reuse BB '" << BB->getName() << "' val=" << BBLV <<'\n'); + + // Since we're reusing a cached value here, we don't need to update the + // OverDefinedCahce. The cache will have been properly updated + // whenever the cached value was inserted. + ODCacheUpdater.markResult(false); + return true; + } + + // Otherwise, this is the first time we're seeing this block. Reset the + // lattice value to overdefined, so that cycles will terminate and be + // conservatively correct. + BBLV.markOverdefined(); + + Instruction *BBI = dyn_cast<Instruction>(Val); + if (BBI == 0 || BBI->getParent() != BB) { + return ODCacheUpdater.markResult(solveBlockValueNonLocal(BBLV, Val, BB)); + } + + if (PHINode *PN = dyn_cast<PHINode>(BBI)) { + return ODCacheUpdater.markResult(solveBlockValuePHINode(BBLV, PN, BB)); + } + + if (AllocaInst *AI = dyn_cast<AllocaInst>(BBI)) { + BBLV = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType())); + return ODCacheUpdater.markResult(true); + } + + // We can only analyze the definitions of certain classes of instructions + // (integral binops and casts at the moment), so bail if this isn't one. + LVILatticeVal Result; + if ((!isa<BinaryOperator>(BBI) && !isa<CastInst>(BBI)) || + !BBI->getType()->isIntegerTy()) { + DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined because inst def found.\n"); + BBLV.markOverdefined(); + return ODCacheUpdater.markResult(true); + } + + // FIXME: We're currently limited to binops with a constant RHS. This should + // be improved. + BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI); + if (BO && !isa<ConstantInt>(BO->getOperand(1))) { + DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined because inst def found.\n"); + + BBLV.markOverdefined(); + return ODCacheUpdater.markResult(true); + } + + return ODCacheUpdater.markResult(solveBlockValueConstantRange(BBLV, BBI, BB)); +} + +static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) { + if (LoadInst *L = dyn_cast<LoadInst>(I)) { + return L->getPointerAddressSpace() == 0 && + GetUnderlyingObject(L->getPointerOperand()) == Ptr; + } + if (StoreInst *S = dyn_cast<StoreInst>(I)) { + return S->getPointerAddressSpace() == 0 && + GetUnderlyingObject(S->getPointerOperand()) == Ptr; + } + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { + if (MI->isVolatile()) return false; + + // FIXME: check whether it has a valuerange that excludes zero? + ConstantInt *Len = dyn_cast<ConstantInt>(MI->getLength()); + if (!Len || Len->isZero()) return false; + + if (MI->getDestAddressSpace() == 0) + if (GetUnderlyingObject(MI->getRawDest()) == Ptr) + return true; + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) + if (MTI->getSourceAddressSpace() == 0) + if (GetUnderlyingObject(MTI->getRawSource()) == Ptr) + return true; + } + return false; +} + +bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV, + Value *Val, BasicBlock *BB) { + LVILatticeVal Result; // Start Undefined. + + // If this is a pointer, and there's a load from that pointer in this BB, + // then we know that the pointer can't be NULL. + bool NotNull = false; + if (Val->getType()->isPointerTy()) { + if (isKnownNonNull(Val)) { + NotNull = true; + } else { + Value *UnderlyingVal = GetUnderlyingObject(Val); + // If 'GetUnderlyingObject' didn't converge, skip it. It won't converge + // inside InstructionDereferencesPointer either. + if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, NULL, 1)) { + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); + BI != BE; ++BI) { + if (InstructionDereferencesPointer(BI, UnderlyingVal)) { + NotNull = true; + break; + } + } + } + } + } + + // If this is the entry block, we must be asking about an argument. The + // value is overdefined. + if (BB == &BB->getParent()->getEntryBlock()) { + assert(isa<Argument>(Val) && "Unknown live-in to the entry block"); + if (NotNull) { + PointerType *PTy = cast<PointerType>(Val->getType()); + Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy)); + } else { + Result.markOverdefined(); + } + BBLV = Result; + return true; + } + + // Loop over all of our predecessors, merging what we know from them into + // result. + bool EdgesMissing = false; + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + LVILatticeVal EdgeResult; + EdgesMissing |= !getEdgeValue(Val, *PI, BB, EdgeResult); + if (EdgesMissing) + continue; + + Result.mergeIn(EdgeResult); + + // If we hit overdefined, exit early. The BlockVals entry is already set + // to overdefined. + if (Result.isOverdefined()) { + DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined because of pred.\n"); + // If we previously determined that this is a pointer that can't be null + // then return that rather than giving up entirely. + if (NotNull) { + PointerType *PTy = cast<PointerType>(Val->getType()); + Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy)); + } + + BBLV = Result; + return true; + } + } + if (EdgesMissing) + return false; + + // Return the merged value, which is more precise than 'overdefined'. + assert(!Result.isOverdefined()); + BBLV = Result; + return true; +} + +bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV, + PHINode *PN, BasicBlock *BB) { + LVILatticeVal Result; // Start Undefined. + + // Loop over all of our predecessors, merging what we know from them into + // result. + bool EdgesMissing = false; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *PhiBB = PN->getIncomingBlock(i); + Value *PhiVal = PN->getIncomingValue(i); + LVILatticeVal EdgeResult; + EdgesMissing |= !getEdgeValue(PhiVal, PhiBB, BB, EdgeResult); + if (EdgesMissing) + continue; + + Result.mergeIn(EdgeResult); + + // If we hit overdefined, exit early. The BlockVals entry is already set + // to overdefined. + if (Result.isOverdefined()) { + DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined because of pred.\n"); + + BBLV = Result; + return true; + } + } + if (EdgesMissing) + return false; + + // Return the merged value, which is more precise than 'overdefined'. + assert(!Result.isOverdefined() && "Possible PHI in entry block?"); + BBLV = Result; + return true; +} + +bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV, + Instruction *BBI, + BasicBlock *BB) { + // Figure out the range of the LHS. If that fails, bail. + if (!hasBlockValue(BBI->getOperand(0), BB)) { + BlockValueStack.push(std::make_pair(BB, BBI->getOperand(0))); + return false; + } + + LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB); + if (!LHSVal.isConstantRange()) { + BBLV.markOverdefined(); + return true; + } + + ConstantRange LHSRange = LHSVal.getConstantRange(); + ConstantRange RHSRange(1); + IntegerType *ResultTy = cast<IntegerType>(BBI->getType()); + if (isa<BinaryOperator>(BBI)) { + if (ConstantInt *RHS = dyn_cast<ConstantInt>(BBI->getOperand(1))) { + RHSRange = ConstantRange(RHS->getValue()); + } else { + BBLV.markOverdefined(); + return true; + } + } + + // NOTE: We're currently limited by the set of operations that ConstantRange + // can evaluate symbolically. Enhancing that set will allows us to analyze + // more definitions. + LVILatticeVal Result; + switch (BBI->getOpcode()) { + case Instruction::Add: + Result.markConstantRange(LHSRange.add(RHSRange)); + break; + case Instruction::Sub: + Result.markConstantRange(LHSRange.sub(RHSRange)); + break; + case Instruction::Mul: + Result.markConstantRange(LHSRange.multiply(RHSRange)); + break; + case Instruction::UDiv: + Result.markConstantRange(LHSRange.udiv(RHSRange)); + break; + case Instruction::Shl: + Result.markConstantRange(LHSRange.shl(RHSRange)); + break; + case Instruction::LShr: + Result.markConstantRange(LHSRange.lshr(RHSRange)); + break; + case Instruction::Trunc: + Result.markConstantRange(LHSRange.truncate(ResultTy->getBitWidth())); + break; + case Instruction::SExt: + Result.markConstantRange(LHSRange.signExtend(ResultTy->getBitWidth())); + break; + case Instruction::ZExt: + Result.markConstantRange(LHSRange.zeroExtend(ResultTy->getBitWidth())); + break; + case Instruction::BitCast: + Result.markConstantRange(LHSRange); + break; + case Instruction::And: + Result.markConstantRange(LHSRange.binaryAnd(RHSRange)); + break; + case Instruction::Or: + Result.markConstantRange(LHSRange.binaryOr(RHSRange)); + break; + + // Unhandled instructions are overdefined. + default: + DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined because inst def found.\n"); + Result.markOverdefined(); + break; + } + + BBLV = Result; + return true; +} + +/// \brief Compute the value of Val on the edge BBFrom -> BBTo. Returns false if +/// Val is not constrained on the edge. +static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, + BasicBlock *BBTo, LVILatticeVal &Result) { + // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we + // know that v != 0. + if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) { + // If this is a conditional branch and only one successor goes to BBTo, then + // we maybe able to infer something from the condition. + if (BI->isConditional() && + BI->getSuccessor(0) != BI->getSuccessor(1)) { + bool isTrueDest = BI->getSuccessor(0) == BBTo; + assert(BI->getSuccessor(!isTrueDest) == BBTo && + "BBTo isn't a successor of BBFrom"); + + // If V is the condition of the branch itself, then we know exactly what + // it is. + if (BI->getCondition() == Val) { + Result = LVILatticeVal::get(ConstantInt::get( + Type::getInt1Ty(Val->getContext()), isTrueDest)); + return true; + } + + // If the condition of the branch is an equality comparison, we may be + // able to infer the value. + ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()); + if (ICI && isa<Constant>(ICI->getOperand(1))) { + if (ICI->isEquality() && ICI->getOperand(0) == Val) { + // We know that V has the RHS constant if this is a true SETEQ or + // false SETNE. + if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ)) + Result = LVILatticeVal::get(cast<Constant>(ICI->getOperand(1))); + else + Result = LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1))); + return true; + } + + // Recognize the range checking idiom that InstCombine produces. + // (X-C1) u< C2 --> [C1, C1+C2) + ConstantInt *NegOffset = 0; + if (ICI->getPredicate() == ICmpInst::ICMP_ULT) + match(ICI->getOperand(0), m_Add(m_Specific(Val), + m_ConstantInt(NegOffset))); + + ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1)); + if (CI && (ICI->getOperand(0) == Val || NegOffset)) { + // Calculate the range of values that would satisfy the comparison. + ConstantRange CmpRange(CI->getValue()); + ConstantRange TrueValues = + ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange); + + if (NegOffset) // Apply the offset from above. + TrueValues = TrueValues.subtract(NegOffset->getValue()); + + // If we're interested in the false dest, invert the condition. + if (!isTrueDest) TrueValues = TrueValues.inverse(); + + Result = LVILatticeVal::getRange(TrueValues); + return true; + } + } + } + } + + // If the edge was formed by a switch on the value, then we may know exactly + // what it is. + if (SwitchInst *SI = dyn_cast<SwitchInst>(BBFrom->getTerminator())) { + if (SI->getCondition() != Val) + return false; + + bool DefaultCase = SI->getDefaultDest() == BBTo; + unsigned BitWidth = Val->getType()->getIntegerBitWidth(); + ConstantRange EdgesVals(BitWidth, DefaultCase/*isFullSet*/); + + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) { + ConstantRange EdgeVal(i.getCaseValue()->getValue()); + if (DefaultCase) { + // It is possible that the default destination is the destination of + // some cases. There is no need to perform difference for those cases. + if (i.getCaseSuccessor() != BBTo) + EdgesVals = EdgesVals.difference(EdgeVal); + } else if (i.getCaseSuccessor() == BBTo) + EdgesVals = EdgesVals.unionWith(EdgeVal); + } + Result = LVILatticeVal::getRange(EdgesVals); + return true; + } + return false; +} + +/// \brief Compute the value of Val on the edge BBFrom -> BBTo, or the value at +/// the basic block if the edge does not constraint Val. +bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, + BasicBlock *BBTo, LVILatticeVal &Result) { + // If already a constant, there is nothing to compute. + if (Constant *VC = dyn_cast<Constant>(Val)) { + Result = LVILatticeVal::get(VC); + return true; + } + + if (getEdgeValueLocal(Val, BBFrom, BBTo, Result)) { + if (!Result.isConstantRange() || + Result.getConstantRange().getSingleElement()) + return true; + + // FIXME: this check should be moved to the beginning of the function when + // LVI better supports recursive values. Even for the single value case, we + // can intersect to detect dead code (an empty range). + if (!hasBlockValue(Val, BBFrom)) { + BlockValueStack.push(std::make_pair(BBFrom, Val)); + return false; + } + + // Try to intersect ranges of the BB and the constraint on the edge. + LVILatticeVal InBlock = getBlockValue(Val, BBFrom); + if (!InBlock.isConstantRange()) + return true; + + ConstantRange Range = + Result.getConstantRange().intersectWith(InBlock.getConstantRange()); + Result = LVILatticeVal::getRange(Range); + return true; + } + + if (!hasBlockValue(Val, BBFrom)) { + BlockValueStack.push(std::make_pair(BBFrom, Val)); + return false; + } + + // if we couldn't compute the value on the edge, use the value from the BB + Result = getBlockValue(Val, BBFrom); + return true; +} + +LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB) { + DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '" + << BB->getName() << "'\n"); + + BlockValueStack.push(std::make_pair(BB, V)); + solve(); + LVILatticeVal Result = getBlockValue(V, BB); + + DEBUG(dbgs() << " Result = " << Result << "\n"); + return Result; +} + +LVILatticeVal LazyValueInfoCache:: +getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB) { + DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '" + << FromBB->getName() << "' to '" << ToBB->getName() << "'\n"); + + LVILatticeVal Result; + if (!getEdgeValue(V, FromBB, ToBB, Result)) { + solve(); + bool WasFastQuery = getEdgeValue(V, FromBB, ToBB, Result); + (void)WasFastQuery; + assert(WasFastQuery && "More work to do after problem solved?"); + } + + DEBUG(dbgs() << " Result = " << Result << "\n"); + return Result; +} + +void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, + BasicBlock *NewSucc) { + // When an edge in the graph has been threaded, values that we could not + // determine a value for before (i.e. were marked overdefined) may be possible + // to solve now. We do NOT try to proactively update these values. Instead, + // we clear their entries from the cache, and allow lazy updating to recompute + // them when needed. + + // The updating process is fairly simple: we need to dropped cached info + // for all values that were marked overdefined in OldSucc, and for those same + // values in any successor of OldSucc (except NewSucc) in which they were + // also marked overdefined. + std::vector<BasicBlock*> worklist; + worklist.push_back(OldSucc); + + DenseSet<Value*> ClearSet; + for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(), + E = OverDefinedCache.end(); I != E; ++I) { + if (I->first == OldSucc) + ClearSet.insert(I->second); + } + + // Use a worklist to perform a depth-first search of OldSucc's successors. + // NOTE: We do not need a visited list since any blocks we have already + // visited will have had their overdefined markers cleared already, and we + // thus won't loop to their successors. + while (!worklist.empty()) { + BasicBlock *ToUpdate = worklist.back(); + worklist.pop_back(); + + // Skip blocks only accessible through NewSucc. + if (ToUpdate == NewSucc) continue; + + bool changed = false; + for (DenseSet<Value*>::iterator I = ClearSet.begin(), E = ClearSet.end(); + I != E; ++I) { + // If a value was marked overdefined in OldSucc, and is here too... + DenseSet<OverDefinedPairTy>::iterator OI = + OverDefinedCache.find(std::make_pair(ToUpdate, *I)); + if (OI == OverDefinedCache.end()) continue; + + // Remove it from the caches. + ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(*I, this)]; + ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate); + + assert(CI != Entry.end() && "Couldn't find entry to update?"); + Entry.erase(CI); + OverDefinedCache.erase(OI); + + // If we removed anything, then we potentially need to update + // blocks successors too. + changed = true; + } + + if (!changed) continue; + + worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate)); + } +} + +//===----------------------------------------------------------------------===// +// LazyValueInfo Impl +//===----------------------------------------------------------------------===// + +/// getCache - This lazily constructs the LazyValueInfoCache. +static LazyValueInfoCache &getCache(void *&PImpl) { + if (!PImpl) + PImpl = new LazyValueInfoCache(); + return *static_cast<LazyValueInfoCache*>(PImpl); +} + +bool LazyValueInfo::runOnFunction(Function &F) { + if (PImpl) + getCache(PImpl).clear(); + + TD = getAnalysisIfAvailable<DataLayout>(); + TLI = &getAnalysis<TargetLibraryInfo>(); + + // Fully lazy. + return false; +} + +void LazyValueInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<TargetLibraryInfo>(); +} + +void LazyValueInfo::releaseMemory() { + // If the cache was allocated, free it. + if (PImpl) { + delete &getCache(PImpl); + PImpl = 0; + } +} + +Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) { + LVILatticeVal Result = getCache(PImpl).getValueInBlock(V, BB); + + if (Result.isConstant()) + return Result.getConstant(); + if (Result.isConstantRange()) { + ConstantRange CR = Result.getConstantRange(); + if (const APInt *SingleVal = CR.getSingleElement()) + return ConstantInt::get(V->getContext(), *SingleVal); + } + return 0; +} + +/// getConstantOnEdge - Determine whether the specified value is known to be a +/// constant on the specified edge. Return null if not. +Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, + BasicBlock *ToBB) { + LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB); + + if (Result.isConstant()) + return Result.getConstant(); + if (Result.isConstantRange()) { + ConstantRange CR = Result.getConstantRange(); + if (const APInt *SingleVal = CR.getSingleElement()) + return ConstantInt::get(V->getContext(), *SingleVal); + } + return 0; +} + +/// getPredicateOnEdge - Determine whether the specified value comparison +/// with a constant is known to be true or false on the specified CFG edge. +/// Pred is a CmpInst predicate. +LazyValueInfo::Tristate +LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, + BasicBlock *FromBB, BasicBlock *ToBB) { + LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB); + + // If we know the value is a constant, evaluate the conditional. + Constant *Res = 0; + if (Result.isConstant()) { + Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD, + TLI); + if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res)) + return ResCI->isZero() ? False : True; + return Unknown; + } + + if (Result.isConstantRange()) { + ConstantInt *CI = dyn_cast<ConstantInt>(C); + if (!CI) return Unknown; + + ConstantRange CR = Result.getConstantRange(); + if (Pred == ICmpInst::ICMP_EQ) { + if (!CR.contains(CI->getValue())) + return False; + + if (CR.isSingleElement() && CR.contains(CI->getValue())) + return True; + } else if (Pred == ICmpInst::ICMP_NE) { + if (!CR.contains(CI->getValue())) + return True; + + if (CR.isSingleElement() && CR.contains(CI->getValue())) + return False; + } + + // Handle more complex predicates. + ConstantRange TrueValues = + ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue()); + if (TrueValues.contains(CR)) + return True; + if (TrueValues.inverse().contains(CR)) + return False; + return Unknown; + } + + if (Result.isNotConstant()) { + // If this is an equality comparison, we can try to fold it knowing that + // "V != C1". + if (Pred == ICmpInst::ICMP_EQ) { + // !C1 == C -> false iff C1 == C. + Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, + Result.getNotConstant(), C, TD, + TLI); + if (Res->isNullValue()) + return False; + } else if (Pred == ICmpInst::ICMP_NE) { + // !C1 != C -> true iff C1 == C. + Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, + Result.getNotConstant(), C, TD, + TLI); + if (Res->isNullValue()) + return True; + } + return Unknown; + } + + return Unknown; +} + +void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, + BasicBlock *NewSucc) { + if (PImpl) getCache(PImpl).threadEdge(PredBB, OldSucc, NewSucc); +} + +void LazyValueInfo::eraseBlock(BasicBlock *BB) { + if (PImpl) getCache(PImpl).eraseBlock(BB); +} diff --git a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp new file mode 100644 index 000000000000..fefa51660f92 --- /dev/null +++ b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp @@ -0,0 +1,137 @@ +//===- LibCallAliasAnalysis.cpp - Implement AliasAnalysis for libcalls ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LibCallAliasAnalysis class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LibCallAliasAnalysis.h" +#include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/Pass.h" +using namespace llvm; + +// Register this pass... +char LibCallAliasAnalysis::ID = 0; +INITIALIZE_AG_PASS(LibCallAliasAnalysis, AliasAnalysis, "libcall-aa", + "LibCall Alias Analysis", false, true, false) + +FunctionPass *llvm::createLibCallAliasAnalysisPass(LibCallInfo *LCI) { + return new LibCallAliasAnalysis(LCI); +} + +LibCallAliasAnalysis::~LibCallAliasAnalysis() { + delete LCI; +} + +void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AliasAnalysis::getAnalysisUsage(AU); + AU.setPreservesAll(); // Does not transform code +} + + + +/// AnalyzeLibCallDetails - Given a call to a function with the specified +/// LibCallFunctionInfo, see if we can improve the mod/ref footprint of the call +/// vs the specified pointer/size. +AliasAnalysis::ModRefResult +LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI, + ImmutableCallSite CS, + const Location &Loc) { + // If we have a function, check to see what kind of mod/ref effects it + // has. Start by including any info globally known about the function. + AliasAnalysis::ModRefResult MRInfo = FI->UniversalBehavior; + if (MRInfo == NoModRef) return MRInfo; + + // If that didn't tell us that the function is 'readnone', check to see + // if we have detailed info and if 'P' is any of the locations we know + // about. + const LibCallFunctionInfo::LocationMRInfo *Details = FI->LocationDetails; + if (Details == 0) + return MRInfo; + + // If the details array is of the 'DoesNot' kind, we only know something if + // the pointer is a match for one of the locations in 'Details'. If we find a + // match, we can prove some interactions cannot happen. + // + if (FI->DetailsType == LibCallFunctionInfo::DoesNot) { + // Find out if the pointer refers to a known location. + for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) { + const LibCallLocationInfo &LocInfo = + LCI->getLocationInfo(Details[i].LocationID); + LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc); + if (Res != LibCallLocationInfo::Yes) continue; + + // If we find a match against a location that we 'do not' interact with, + // learn this info into MRInfo. + return ModRefResult(MRInfo & ~Details[i].MRInfo); + } + return MRInfo; + } + + // If the details are of the 'DoesOnly' sort, we know something if the pointer + // is a match for one of the locations in 'Details'. Also, if we can prove + // that the pointers is *not* one of the locations in 'Details', we know that + // the call is NoModRef. + assert(FI->DetailsType == LibCallFunctionInfo::DoesOnly); + + // Find out if the pointer refers to a known location. + bool NoneMatch = true; + for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) { + const LibCallLocationInfo &LocInfo = + LCI->getLocationInfo(Details[i].LocationID); + LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc); + if (Res == LibCallLocationInfo::No) continue; + + // If we don't know if this pointer points to the location, then we have to + // assume it might alias in some case. + if (Res == LibCallLocationInfo::Unknown) { + NoneMatch = false; + continue; + } + + // If we know that this pointer definitely is pointing into the location, + // merge in this information. + return ModRefResult(MRInfo & Details[i].MRInfo); + } + + // If we found that the pointer is guaranteed to not match any of the + // locations in our 'DoesOnly' rule, then we know that the pointer must point + // to some other location. Since the libcall doesn't mod/ref any other + // locations, return NoModRef. + if (NoneMatch) + return NoModRef; + + // Otherwise, return any other info gained so far. + return MRInfo; +} + +// getModRefInfo - Check to see if the specified callsite can clobber the +// specified memory object. +// +AliasAnalysis::ModRefResult +LibCallAliasAnalysis::getModRefInfo(ImmutableCallSite CS, + const Location &Loc) { + ModRefResult MRInfo = ModRef; + + // If this is a direct call to a function that LCI knows about, get the + // information about the runtime function. + if (LCI) { + if (const Function *F = CS.getCalledFunction()) { + if (const LibCallFunctionInfo *FI = LCI->getFunctionInfo(F)) { + MRInfo = ModRefResult(MRInfo & AnalyzeLibCallDetails(FI, CS, Loc)); + if (MRInfo == NoModRef) return NoModRef; + } + } + } + + // The AliasAnalysis base class has some smarts, lets use them. + return (ModRefResult)(MRInfo | AliasAnalysis::getModRefInfo(CS, Loc)); +} diff --git a/contrib/llvm/lib/Analysis/LibCallSemantics.cpp b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp new file mode 100644 index 000000000000..0592ccb26c12 --- /dev/null +++ b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp @@ -0,0 +1,63 @@ +//===- LibCallSemantics.cpp - Describe library semantics ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements interfaces that can be used to describe language +// specific runtime library interfaces (e.g. libc, libm, etc) to LLVM +// optimizers. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/IR/Function.h" +using namespace llvm; + +/// getMap - This impl pointer in ~LibCallInfo is actually a StringMap. This +/// helper does the cast. +static StringMap<const LibCallFunctionInfo*> *getMap(void *Ptr) { + return static_cast<StringMap<const LibCallFunctionInfo*> *>(Ptr); +} + +LibCallInfo::~LibCallInfo() { + delete getMap(Impl); +} + +const LibCallLocationInfo &LibCallInfo::getLocationInfo(unsigned LocID) const { + // Get location info on the first call. + if (NumLocations == 0) + NumLocations = getLocationInfo(Locations); + + assert(LocID < NumLocations && "Invalid location ID!"); + return Locations[LocID]; +} + + +/// getFunctionInfo - Return the LibCallFunctionInfo object corresponding to +/// the specified function if we have it. If not, return null. +const LibCallFunctionInfo * +LibCallInfo::getFunctionInfo(const Function *F) const { + StringMap<const LibCallFunctionInfo*> *Map = getMap(Impl); + + /// If this is the first time we are querying for this info, lazily construct + /// the StringMap to index it. + if (Map == 0) { + Impl = Map = new StringMap<const LibCallFunctionInfo*>(); + + const LibCallFunctionInfo *Array = getFunctionInfoArray(); + if (Array == 0) return 0; + + // We now have the array of entries. Populate the StringMap. + for (unsigned i = 0; Array[i].Name; ++i) + (*Map)[Array[i].Name] = Array+i; + } + + // Look up this function in the string map. + return Map->lookup(F->getName()); +} + diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp new file mode 100644 index 000000000000..ec17f47acb86 --- /dev/null +++ b/contrib/llvm/lib/Analysis/Lint.cpp @@ -0,0 +1,720 @@ +//===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass statically checks for common and easily-identified constructs +// which produce undefined or likely unintended behavior in LLVM IR. +// +// It is not a guarantee of correctness, in two ways. First, it isn't +// comprehensive. There are checks which could be done statically which are +// not yet implemented. Some of these are indicated by TODO comments, but +// those aren't comprehensive either. Second, many conditions cannot be +// checked statically. This pass does no dynamic instrumentation, so it +// can't check for all possible problems. +// +// Another limitation is that it assumes all code will be executed. A store +// through a null pointer in a basic block which is never reached is harmless, +// but this pass will warn about it anyway. This is the main reason why most +// of these checks live here instead of in the Verifier pass. +// +// Optimization passes may make conditions that this pass checks for more or +// less obvious. If an optimization pass appears to be introducing a warning, +// it may be that the optimization pass is merely exposing an existing +// condition in the code. +// +// This code may be run before instcombine. In many cases, instcombine checks +// for the same kinds of things and turns instructions with undefined behavior +// into unreachable (or equivalent). Because of this, this pass makes some +// effort to look through bitcasts and so on. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Lint.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/InstVisitor.h" +#include "llvm/Pass.h" +#include "llvm/PassManager.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLibraryInfo.h" +using namespace llvm; + +namespace { + namespace MemRef { + static unsigned Read = 1; + static unsigned Write = 2; + static unsigned Callee = 4; + static unsigned Branchee = 8; + } + + class Lint : public FunctionPass, public InstVisitor<Lint> { + friend class InstVisitor<Lint>; + + void visitFunction(Function &F); + + void visitCallSite(CallSite CS); + void visitMemoryReference(Instruction &I, Value *Ptr, + uint64_t Size, unsigned Align, + Type *Ty, unsigned Flags); + + void visitCallInst(CallInst &I); + void visitInvokeInst(InvokeInst &I); + void visitReturnInst(ReturnInst &I); + void visitLoadInst(LoadInst &I); + void visitStoreInst(StoreInst &I); + void visitXor(BinaryOperator &I); + void visitSub(BinaryOperator &I); + void visitLShr(BinaryOperator &I); + void visitAShr(BinaryOperator &I); + void visitShl(BinaryOperator &I); + void visitSDiv(BinaryOperator &I); + void visitUDiv(BinaryOperator &I); + void visitSRem(BinaryOperator &I); + void visitURem(BinaryOperator &I); + void visitAllocaInst(AllocaInst &I); + void visitVAArgInst(VAArgInst &I); + void visitIndirectBrInst(IndirectBrInst &I); + void visitExtractElementInst(ExtractElementInst &I); + void visitInsertElementInst(InsertElementInst &I); + void visitUnreachableInst(UnreachableInst &I); + + Value *findValue(Value *V, bool OffsetOk) const; + Value *findValueImpl(Value *V, bool OffsetOk, + SmallPtrSet<Value *, 4> &Visited) const; + + public: + Module *Mod; + AliasAnalysis *AA; + DominatorTree *DT; + DataLayout *TD; + TargetLibraryInfo *TLI; + + std::string Messages; + raw_string_ostream MessagesStr; + + static char ID; // Pass identification, replacement for typeid + Lint() : FunctionPass(ID), MessagesStr(Messages) { + initializeLintPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<AliasAnalysis>(); + AU.addRequired<TargetLibraryInfo>(); + AU.addRequired<DominatorTree>(); + } + virtual void print(raw_ostream &O, const Module *M) const {} + + void WriteValue(const Value *V) { + if (!V) return; + if (isa<Instruction>(V)) { + MessagesStr << *V << '\n'; + } else { + WriteAsOperand(MessagesStr, V, true, Mod); + MessagesStr << '\n'; + } + } + + // CheckFailed - A check failed, so print out the condition and the message + // that failed. This provides a nice place to put a breakpoint if you want + // to see why something is not correct. + void CheckFailed(const Twine &Message, + const Value *V1 = 0, const Value *V2 = 0, + const Value *V3 = 0, const Value *V4 = 0) { + MessagesStr << Message.str() << "\n"; + WriteValue(V1); + WriteValue(V2); + WriteValue(V3); + WriteValue(V4); + } + }; +} + +char Lint::ID = 0; +INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", + false, true) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", + false, true) + +// Assert - We know that cond should be true, if not print an error message. +#define Assert(C, M) \ + do { if (!(C)) { CheckFailed(M); return; } } while (0) +#define Assert1(C, M, V1) \ + do { if (!(C)) { CheckFailed(M, V1); return; } } while (0) +#define Assert2(C, M, V1, V2) \ + do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0) +#define Assert3(C, M, V1, V2, V3) \ + do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0) +#define Assert4(C, M, V1, V2, V3, V4) \ + do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0) + +// Lint::run - This is the main Analysis entry point for a +// function. +// +bool Lint::runOnFunction(Function &F) { + Mod = F.getParent(); + AA = &getAnalysis<AliasAnalysis>(); + DT = &getAnalysis<DominatorTree>(); + TD = getAnalysisIfAvailable<DataLayout>(); + TLI = &getAnalysis<TargetLibraryInfo>(); + visit(F); + dbgs() << MessagesStr.str(); + Messages.clear(); + return false; +} + +void Lint::visitFunction(Function &F) { + // This isn't undefined behavior, it's just a little unusual, and it's a + // fairly common mistake to neglect to name a function. + Assert1(F.hasName() || F.hasLocalLinkage(), + "Unusual: Unnamed function with non-local linkage", &F); + + // TODO: Check for irreducible control flow. +} + +void Lint::visitCallSite(CallSite CS) { + Instruction &I = *CS.getInstruction(); + Value *Callee = CS.getCalledValue(); + + visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize, + 0, 0, MemRef::Callee); + + if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) { + Assert1(CS.getCallingConv() == F->getCallingConv(), + "Undefined behavior: Caller and callee calling convention differ", + &I); + + FunctionType *FT = F->getFunctionType(); + unsigned NumActualArgs = CS.arg_size(); + + Assert1(FT->isVarArg() ? + FT->getNumParams() <= NumActualArgs : + FT->getNumParams() == NumActualArgs, + "Undefined behavior: Call argument count mismatches callee " + "argument count", &I); + + Assert1(FT->getReturnType() == I.getType(), + "Undefined behavior: Call return type mismatches " + "callee return type", &I); + + // Check argument types (in case the callee was casted) and attributes. + // TODO: Verify that caller and callee attributes are compatible. + Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end(); + CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); + for (; AI != AE; ++AI) { + Value *Actual = *AI; + if (PI != PE) { + Argument *Formal = PI++; + Assert1(Formal->getType() == Actual->getType(), + "Undefined behavior: Call argument type mismatches " + "callee parameter type", &I); + + // Check that noalias arguments don't alias other arguments. This is + // not fully precise because we don't know the sizes of the dereferenced + // memory regions. + if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) + for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) + if (AI != BI && (*BI)->getType()->isPointerTy()) { + AliasAnalysis::AliasResult Result = AA->alias(*AI, *BI); + Assert1(Result != AliasAnalysis::MustAlias && + Result != AliasAnalysis::PartialAlias, + "Unusual: noalias argument aliases another argument", &I); + } + + // Check that an sret argument points to valid memory. + if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { + Type *Ty = + cast<PointerType>(Formal->getType())->getElementType(); + visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty), + TD ? TD->getABITypeAlignment(Ty) : 0, + Ty, MemRef::Read | MemRef::Write); + } + } + } + } + + if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall()) + for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); + AI != AE; ++AI) { + Value *Obj = findValue(*AI, /*OffsetOk=*/true); + Assert1(!isa<AllocaInst>(Obj), + "Undefined behavior: Call with \"tail\" keyword references " + "alloca", &I); + } + + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) + switch (II->getIntrinsicID()) { + default: break; + + // TODO: Check more intrinsics + + case Intrinsic::memcpy: { + MemCpyInst *MCI = cast<MemCpyInst>(&I); + // TODO: If the size is known, use it. + visitMemoryReference(I, MCI->getDest(), AliasAnalysis::UnknownSize, + MCI->getAlignment(), 0, + MemRef::Write); + visitMemoryReference(I, MCI->getSource(), AliasAnalysis::UnknownSize, + MCI->getAlignment(), 0, + MemRef::Read); + + // Check that the memcpy arguments don't overlap. The AliasAnalysis API + // isn't expressive enough for what we really want to do. Known partial + // overlap is not distinguished from the case where nothing is known. + uint64_t Size = 0; + if (const ConstantInt *Len = + dyn_cast<ConstantInt>(findValue(MCI->getLength(), + /*OffsetOk=*/false))) + if (Len->getValue().isIntN(32)) + Size = Len->getValue().getZExtValue(); + Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != + AliasAnalysis::MustAlias, + "Undefined behavior: memcpy source and destination overlap", &I); + break; + } + case Intrinsic::memmove: { + MemMoveInst *MMI = cast<MemMoveInst>(&I); + // TODO: If the size is known, use it. + visitMemoryReference(I, MMI->getDest(), AliasAnalysis::UnknownSize, + MMI->getAlignment(), 0, + MemRef::Write); + visitMemoryReference(I, MMI->getSource(), AliasAnalysis::UnknownSize, + MMI->getAlignment(), 0, + MemRef::Read); + break; + } + case Intrinsic::memset: { + MemSetInst *MSI = cast<MemSetInst>(&I); + // TODO: If the size is known, use it. + visitMemoryReference(I, MSI->getDest(), AliasAnalysis::UnknownSize, + MSI->getAlignment(), 0, + MemRef::Write); + break; + } + + case Intrinsic::vastart: + Assert1(I.getParent()->getParent()->isVarArg(), + "Undefined behavior: va_start called in a non-varargs function", + &I); + + visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, + 0, 0, MemRef::Read | MemRef::Write); + break; + case Intrinsic::vacopy: + visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, + 0, 0, MemRef::Write); + visitMemoryReference(I, CS.getArgument(1), AliasAnalysis::UnknownSize, + 0, 0, MemRef::Read); + break; + case Intrinsic::vaend: + visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, + 0, 0, MemRef::Read | MemRef::Write); + break; + + case Intrinsic::stackrestore: + // Stackrestore doesn't read or write memory, but it sets the + // stack pointer, which the compiler may read from or write to + // at any time, so check it for both readability and writeability. + visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, + 0, 0, MemRef::Read | MemRef::Write); + break; + } +} + +void Lint::visitCallInst(CallInst &I) { + return visitCallSite(&I); +} + +void Lint::visitInvokeInst(InvokeInst &I) { + return visitCallSite(&I); +} + +void Lint::visitReturnInst(ReturnInst &I) { + Function *F = I.getParent()->getParent(); + Assert1(!F->doesNotReturn(), + "Unusual: Return statement in function with noreturn attribute", + &I); + + if (Value *V = I.getReturnValue()) { + Value *Obj = findValue(V, /*OffsetOk=*/true); + Assert1(!isa<AllocaInst>(Obj), + "Unusual: Returning alloca value", &I); + } +} + +// TODO: Check that the reference is in bounds. +// TODO: Check readnone/readonly function attributes. +void Lint::visitMemoryReference(Instruction &I, + Value *Ptr, uint64_t Size, unsigned Align, + Type *Ty, unsigned Flags) { + // If no memory is being referenced, it doesn't matter if the pointer + // is valid. + if (Size == 0) + return; + + Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true); + Assert1(!isa<ConstantPointerNull>(UnderlyingObject), + "Undefined behavior: Null pointer dereference", &I); + Assert1(!isa<UndefValue>(UnderlyingObject), + "Undefined behavior: Undef pointer dereference", &I); + Assert1(!isa<ConstantInt>(UnderlyingObject) || + !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(), + "Unusual: All-ones pointer dereference", &I); + Assert1(!isa<ConstantInt>(UnderlyingObject) || + !cast<ConstantInt>(UnderlyingObject)->isOne(), + "Unusual: Address one pointer dereference", &I); + + if (Flags & MemRef::Write) { + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject)) + Assert1(!GV->isConstant(), + "Undefined behavior: Write to read-only memory", &I); + Assert1(!isa<Function>(UnderlyingObject) && + !isa<BlockAddress>(UnderlyingObject), + "Undefined behavior: Write to text section", &I); + } + if (Flags & MemRef::Read) { + Assert1(!isa<Function>(UnderlyingObject), + "Unusual: Load from function body", &I); + Assert1(!isa<BlockAddress>(UnderlyingObject), + "Undefined behavior: Load from block address", &I); + } + if (Flags & MemRef::Callee) { + Assert1(!isa<BlockAddress>(UnderlyingObject), + "Undefined behavior: Call to block address", &I); + } + if (Flags & MemRef::Branchee) { + Assert1(!isa<Constant>(UnderlyingObject) || + isa<BlockAddress>(UnderlyingObject), + "Undefined behavior: Branch to non-blockaddress", &I); + } + + // Check for buffer overflows and misalignment. + // Only handles memory references that read/write something simple like an + // alloca instruction or a global variable. + int64_t Offset = 0; + if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, TD)) { + // OK, so the access is to a constant offset from Ptr. Check that Ptr is + // something we can handle and if so extract the size of this base object + // along with its alignment. + uint64_t BaseSize = AliasAnalysis::UnknownSize; + unsigned BaseAlign = 0; + + if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { + Type *ATy = AI->getAllocatedType(); + if (TD && !AI->isArrayAllocation() && ATy->isSized()) + BaseSize = TD->getTypeAllocSize(ATy); + BaseAlign = AI->getAlignment(); + if (TD && BaseAlign == 0 && ATy->isSized()) + BaseAlign = TD->getABITypeAlignment(ATy); + } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { + // If the global may be defined differently in another compilation unit + // then don't warn about funky memory accesses. + if (GV->hasDefinitiveInitializer()) { + Type *GTy = GV->getType()->getElementType(); + if (TD && GTy->isSized()) + BaseSize = TD->getTypeAllocSize(GTy); + BaseAlign = GV->getAlignment(); + if (TD && BaseAlign == 0 && GTy->isSized()) + BaseAlign = TD->getABITypeAlignment(GTy); + } + } + + // Accesses from before the start or after the end of the object are not + // defined. + Assert1(Size == AliasAnalysis::UnknownSize || + BaseSize == AliasAnalysis::UnknownSize || + (Offset >= 0 && Offset + Size <= BaseSize), + "Undefined behavior: Buffer overflow", &I); + + // Accesses that say that the memory is more aligned than it is are not + // defined. + if (TD && Align == 0 && Ty && Ty->isSized()) + Align = TD->getABITypeAlignment(Ty); + Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), + "Undefined behavior: Memory reference address is misaligned", &I); + } +} + +void Lint::visitLoadInst(LoadInst &I) { + visitMemoryReference(I, I.getPointerOperand(), + AA->getTypeStoreSize(I.getType()), I.getAlignment(), + I.getType(), MemRef::Read); +} + +void Lint::visitStoreInst(StoreInst &I) { + visitMemoryReference(I, I.getPointerOperand(), + AA->getTypeStoreSize(I.getOperand(0)->getType()), + I.getAlignment(), + I.getOperand(0)->getType(), MemRef::Write); +} + +void Lint::visitXor(BinaryOperator &I) { + Assert1(!isa<UndefValue>(I.getOperand(0)) || + !isa<UndefValue>(I.getOperand(1)), + "Undefined result: xor(undef, undef)", &I); +} + +void Lint::visitSub(BinaryOperator &I) { + Assert1(!isa<UndefValue>(I.getOperand(0)) || + !isa<UndefValue>(I.getOperand(1)), + "Undefined result: sub(undef, undef)", &I); +} + +void Lint::visitLShr(BinaryOperator &I) { + if (ConstantInt *CI = + dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) + Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), + "Undefined result: Shift count out of range", &I); +} + +void Lint::visitAShr(BinaryOperator &I) { + if (ConstantInt *CI = + dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) + Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), + "Undefined result: Shift count out of range", &I); +} + +void Lint::visitShl(BinaryOperator &I) { + if (ConstantInt *CI = + dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) + Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), + "Undefined result: Shift count out of range", &I); +} + +static bool isZero(Value *V, DataLayout *DL) { + // Assume undef could be zero. + if (isa<UndefValue>(V)) + return true; + + VectorType *VecTy = dyn_cast<VectorType>(V->getType()); + if (!VecTy) { + unsigned BitWidth = V->getType()->getIntegerBitWidth(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(V, KnownZero, KnownOne, DL); + return KnownZero.isAllOnesValue(); + } + + // Per-component check doesn't work with zeroinitializer + Constant *C = dyn_cast<Constant>(V); + if (!C) + return false; + + if (C->isZeroValue()) + return true; + + // For a vector, KnownZero will only be true if all values are zero, so check + // this per component + unsigned BitWidth = VecTy->getElementType()->getIntegerBitWidth(); + for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) { + Constant *Elem = C->getAggregateElement(I); + if (isa<UndefValue>(Elem)) + return true; + + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(Elem, KnownZero, KnownOne, DL); + if (KnownZero.isAllOnesValue()) + return true; + } + + return false; +} + +void Lint::visitSDiv(BinaryOperator &I) { + Assert1(!isZero(I.getOperand(1), TD), + "Undefined behavior: Division by zero", &I); +} + +void Lint::visitUDiv(BinaryOperator &I) { + Assert1(!isZero(I.getOperand(1), TD), + "Undefined behavior: Division by zero", &I); +} + +void Lint::visitSRem(BinaryOperator &I) { + Assert1(!isZero(I.getOperand(1), TD), + "Undefined behavior: Division by zero", &I); +} + +void Lint::visitURem(BinaryOperator &I) { + Assert1(!isZero(I.getOperand(1), TD), + "Undefined behavior: Division by zero", &I); +} + +void Lint::visitAllocaInst(AllocaInst &I) { + if (isa<ConstantInt>(I.getArraySize())) + // This isn't undefined behavior, it's just an obvious pessimization. + Assert1(&I.getParent()->getParent()->getEntryBlock() == I.getParent(), + "Pessimization: Static alloca outside of entry block", &I); + + // TODO: Check for an unusual size (MSB set?) +} + +void Lint::visitVAArgInst(VAArgInst &I) { + visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0, 0, + MemRef::Read | MemRef::Write); +} + +void Lint::visitIndirectBrInst(IndirectBrInst &I) { + visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0, 0, + MemRef::Branchee); + + Assert1(I.getNumDestinations() != 0, + "Undefined behavior: indirectbr with no destinations", &I); +} + +void Lint::visitExtractElementInst(ExtractElementInst &I) { + if (ConstantInt *CI = + dyn_cast<ConstantInt>(findValue(I.getIndexOperand(), + /*OffsetOk=*/false))) + Assert1(CI->getValue().ult(I.getVectorOperandType()->getNumElements()), + "Undefined result: extractelement index out of range", &I); +} + +void Lint::visitInsertElementInst(InsertElementInst &I) { + if (ConstantInt *CI = + dyn_cast<ConstantInt>(findValue(I.getOperand(2), + /*OffsetOk=*/false))) + Assert1(CI->getValue().ult(I.getType()->getNumElements()), + "Undefined result: insertelement index out of range", &I); +} + +void Lint::visitUnreachableInst(UnreachableInst &I) { + // This isn't undefined behavior, it's merely suspicious. + Assert1(&I == I.getParent()->begin() || + prior(BasicBlock::iterator(&I))->mayHaveSideEffects(), + "Unusual: unreachable immediately preceded by instruction without " + "side effects", &I); +} + +/// findValue - Look through bitcasts and simple memory reference patterns +/// to identify an equivalent, but more informative, value. If OffsetOk +/// is true, look through getelementptrs with non-zero offsets too. +/// +/// Most analysis passes don't require this logic, because instcombine +/// will simplify most of these kinds of things away. But it's a goal of +/// this Lint pass to be useful even on non-optimized IR. +Value *Lint::findValue(Value *V, bool OffsetOk) const { + SmallPtrSet<Value *, 4> Visited; + return findValueImpl(V, OffsetOk, Visited); +} + +/// findValueImpl - Implementation helper for findValue. +Value *Lint::findValueImpl(Value *V, bool OffsetOk, + SmallPtrSet<Value *, 4> &Visited) const { + // Detect self-referential values. + if (!Visited.insert(V)) + return UndefValue::get(V->getType()); + + // TODO: Look through sext or zext cast, when the result is known to + // be interpreted as signed or unsigned, respectively. + // TODO: Look through eliminable cast pairs. + // TODO: Look through calls with unique return values. + // TODO: Look through vector insert/extract/shuffle. + V = OffsetOk ? GetUnderlyingObject(V, TD) : V->stripPointerCasts(); + if (LoadInst *L = dyn_cast<LoadInst>(V)) { + BasicBlock::iterator BBI = L; + BasicBlock *BB = L->getParent(); + SmallPtrSet<BasicBlock *, 4> VisitedBlocks; + for (;;) { + if (!VisitedBlocks.insert(BB)) break; + if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(), + BB, BBI, 6, AA)) + return findValueImpl(U, OffsetOk, Visited); + if (BBI != BB->begin()) break; + BB = BB->getUniquePredecessor(); + if (!BB) break; + BBI = BB->end(); + } + } else if (PHINode *PN = dyn_cast<PHINode>(V)) { + if (Value *W = PN->hasConstantValue()) + if (W != V) + return findValueImpl(W, OffsetOk, Visited); + } else if (CastInst *CI = dyn_cast<CastInst>(V)) { + if (CI->isNoopCast(TD ? TD->getIntPtrType(V->getContext()) : + Type::getInt64Ty(V->getContext()))) + return findValueImpl(CI->getOperand(0), OffsetOk, Visited); + } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) { + if (Value *W = FindInsertedValue(Ex->getAggregateOperand(), + Ex->getIndices())) + if (W != V) + return findValueImpl(W, OffsetOk, Visited); + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + // Same as above, but for ConstantExpr instead of Instruction. + if (Instruction::isCast(CE->getOpcode())) { + if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), + CE->getOperand(0)->getType(), + CE->getType(), + TD ? TD->getIntPtrType(V->getContext()) : + Type::getInt64Ty(V->getContext()))) + return findValueImpl(CE->getOperand(0), OffsetOk, Visited); + } else if (CE->getOpcode() == Instruction::ExtractValue) { + ArrayRef<unsigned> Indices = CE->getIndices(); + if (Value *W = FindInsertedValue(CE->getOperand(0), Indices)) + if (W != V) + return findValueImpl(W, OffsetOk, Visited); + } + } + + // As a last resort, try SimplifyInstruction or constant folding. + if (Instruction *Inst = dyn_cast<Instruction>(V)) { + if (Value *W = SimplifyInstruction(Inst, TD, TLI, DT)) + return findValueImpl(W, OffsetOk, Visited); + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + if (Value *W = ConstantFoldConstantExpression(CE, TD, TLI)) + if (W != V) + return findValueImpl(W, OffsetOk, Visited); + } + + return V; +} + +//===----------------------------------------------------------------------===// +// Implement the public interfaces to this file... +//===----------------------------------------------------------------------===// + +FunctionPass *llvm::createLintPass() { + return new Lint(); +} + +/// lintFunction - Check a function for errors, printing messages on stderr. +/// +void llvm::lintFunction(const Function &f) { + Function &F = const_cast<Function&>(f); + assert(!F.isDeclaration() && "Cannot lint external functions"); + + FunctionPassManager FPM(F.getParent()); + Lint *V = new Lint(); + FPM.add(V); + FPM.run(F); +} + +/// lintModule - Check a module for errors, printing messages on stderr. +/// +void llvm::lintModule(const Module &M) { + PassManager PM; + Lint *V = new Lint(); + PM.add(V); + PM.run(const_cast<Module&>(M)); +} diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp new file mode 100644 index 000000000000..0902a39a9f81 --- /dev/null +++ b/contrib/llvm/lib/Analysis/Loads.cpp @@ -0,0 +1,221 @@ +//===- Loads.cpp - Local load analysis ------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines simple local analyses for load instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Operator.h" +using namespace llvm; + +/// AreEquivalentAddressValues - Test if A and B will obviously have the same +/// value. This includes recognizing that %t0 and %t1 will have the same +/// value in code like this: +/// %t0 = getelementptr \@a, 0, 3 +/// store i32 0, i32* %t0 +/// %t1 = getelementptr \@a, 0, 3 +/// %t2 = load i32* %t1 +/// +static bool AreEquivalentAddressValues(const Value *A, const Value *B) { + // Test if the values are trivially equivalent. + if (A == B) return true; + + // Test if the values come from identical arithmetic instructions. + // Use isIdenticalToWhenDefined instead of isIdenticalTo because + // this function is only used when one address use dominates the + // other, which means that they'll always either have the same + // value or one of them will have an undefined value. + if (isa<BinaryOperator>(A) || isa<CastInst>(A) || + isa<PHINode>(A) || isa<GetElementPtrInst>(A)) + if (const Instruction *BI = dyn_cast<Instruction>(B)) + if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) + return true; + + // Otherwise they may not be equivalent. + return false; +} + +/// isSafeToLoadUnconditionally - Return true if we know that executing a load +/// from this value cannot trap. If it is not obviously safe to load from the +/// specified pointer, we do a quick local scan of the basic block containing +/// ScanFrom, to determine if the address is already accessed. +bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, + unsigned Align, const DataLayout *TD) { + int64_t ByteOffset = 0; + Value *Base = V; + Base = GetPointerBaseWithConstantOffset(V, ByteOffset, TD); + + if (ByteOffset < 0) // out of bounds + return false; + + Type *BaseType = 0; + unsigned BaseAlign = 0; + if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { + // An alloca is safe to load from as load as it is suitably aligned. + BaseType = AI->getAllocatedType(); + BaseAlign = AI->getAlignment(); + } else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { + // Global variables are safe to load from but their size cannot be + // guaranteed if they are overridden. + if (!GV->mayBeOverridden()) { + BaseType = GV->getType()->getElementType(); + BaseAlign = GV->getAlignment(); + } + } + + if (BaseType && BaseType->isSized()) { + if (TD && BaseAlign == 0) + BaseAlign = TD->getPrefTypeAlignment(BaseType); + + if (Align <= BaseAlign) { + if (!TD) + return true; // Loading directly from an alloca or global is OK. + + // Check if the load is within the bounds of the underlying object. + PointerType *AddrTy = cast<PointerType>(V->getType()); + uint64_t LoadSize = TD->getTypeStoreSize(AddrTy->getElementType()); + if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType) && + (Align == 0 || (ByteOffset % Align) == 0)) + return true; + } + } + + // Otherwise, be a little bit aggressive by scanning the local block where we + // want to check to see if the pointer is already being loaded or stored + // from/to. If so, the previous load or store would have already trapped, + // so there is no harm doing an extra load (also, CSE will later eliminate + // the load entirely). + BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin(); + + while (BBI != E) { + --BBI; + + // If we see a free or a call which may write to memory (i.e. which might do + // a free) the pointer could be marked invalid. + if (isa<CallInst>(BBI) && BBI->mayWriteToMemory() && + !isa<DbgInfoIntrinsic>(BBI)) + return false; + + if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { + if (AreEquivalentAddressValues(LI->getOperand(0), V)) return true; + } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { + if (AreEquivalentAddressValues(SI->getOperand(1), V)) return true; + } + } + return false; +} + +/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at the +/// instruction before ScanFrom) checking to see if we have the value at the +/// memory address *Ptr locally available within a small number of instructions. +/// If the value is available, return it. +/// +/// If not, return the iterator for the last validated instruction that the +/// value would be live through. If we scanned the entire block and didn't find +/// something that invalidates *Ptr or provides it, ScanFrom would be left at +/// begin() and this returns null. ScanFrom could also be left +/// +/// MaxInstsToScan specifies the maximum instructions to scan in the block. If +/// it is set to 0, it will scan the whole block. You can also optionally +/// specify an alias analysis implementation, which makes this more precise. +/// +/// If TBAATag is non-null and a load or store is found, the TBAA tag from the +/// load or store is recorded there. If there is no TBAA tag or if no access +/// is found, it is left unmodified. +Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, + BasicBlock::iterator &ScanFrom, + unsigned MaxInstsToScan, + AliasAnalysis *AA, + MDNode **TBAATag) { + if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; + + // If we're using alias analysis to disambiguate get the size of *Ptr. + uint64_t AccessSize = 0; + if (AA) { + Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType(); + AccessSize = AA->getTypeStoreSize(AccessTy); + } + + while (ScanFrom != ScanBB->begin()) { + // We must ignore debug info directives when counting (otherwise they + // would affect codegen). + Instruction *Inst = --ScanFrom; + if (isa<DbgInfoIntrinsic>(Inst)) + continue; + + // Restore ScanFrom to expected value in case next test succeeds + ScanFrom++; + + // Don't scan huge blocks. + if (MaxInstsToScan-- == 0) return 0; + + --ScanFrom; + // If this is a load of Ptr, the loaded value is available. + // (This is true even if the load is volatile or atomic, although + // those cases are unlikely.) + if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) + if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) { + if (TBAATag) *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa); + return LI; + } + + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + // If this is a store through Ptr, the value is available! + // (This is true even if the store is volatile or atomic, although + // those cases are unlikely.) + if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) { + if (TBAATag) *TBAATag = SI->getMetadata(LLVMContext::MD_tbaa); + return SI->getOperand(0); + } + + // If Ptr is an alloca and this is a store to a different alloca, ignore + // the store. This is a trivial form of alias analysis that is important + // for reg2mem'd code. + if ((isa<AllocaInst>(Ptr) || isa<GlobalVariable>(Ptr)) && + (isa<AllocaInst>(SI->getOperand(1)) || + isa<GlobalVariable>(SI->getOperand(1)))) + continue; + + // If we have alias analysis and it says the store won't modify the loaded + // value, ignore the store. + if (AA && + (AA->getModRefInfo(SI, Ptr, AccessSize) & AliasAnalysis::Mod) == 0) + continue; + + // Otherwise the store that may or may not alias the pointer, bail out. + ++ScanFrom; + return 0; + } + + // If this is some other instruction that may clobber Ptr, bail out. + if (Inst->mayWriteToMemory()) { + // If alias analysis claims that it really won't modify the load, + // ignore it. + if (AA && + (AA->getModRefInfo(Inst, Ptr, AccessSize) & AliasAnalysis::Mod) == 0) + continue; + + // May modify the pointer, bail out. + ++ScanFrom; + return 0; + } + } + + // Got to the start of the block, we didn't find it, but are done for this + // block. + return 0; +} diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp new file mode 100644 index 000000000000..e369633ba291 --- /dev/null +++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp @@ -0,0 +1,726 @@ +//===- LoopInfo.cpp - Natural Loop Calculator -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the LoopInfo class that is used to identify natural loops +// and determine the loop depth of various nodes of the CFG. Note that the +// loops identified may actually be several natural loops that share the same +// header node... not just a single natural loop. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfoImpl.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include <algorithm> +using namespace llvm; + +// Explicitly instantiate methods in LoopInfoImpl.h for IR-level Loops. +template class llvm::LoopBase<BasicBlock, Loop>; +template class llvm::LoopInfoBase<BasicBlock, Loop>; + +// Always verify loopinfo if expensive checking is enabled. +#ifdef XDEBUG +static bool VerifyLoopInfo = true; +#else +static bool VerifyLoopInfo = false; +#endif +static cl::opt<bool,true> +VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo), + cl::desc("Verify loop info (time consuming)")); + +char LoopInfo::ID = 0; +INITIALIZE_PASS_BEGIN(LoopInfo, "loops", "Natural Loop Information", true, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_END(LoopInfo, "loops", "Natural Loop Information", true, true) + +// Loop identifier metadata name. +static const char *const LoopMDName = "llvm.loop"; + +//===----------------------------------------------------------------------===// +// Loop implementation +// + +/// isLoopInvariant - Return true if the specified value is loop invariant +/// +bool Loop::isLoopInvariant(Value *V) const { + if (Instruction *I = dyn_cast<Instruction>(V)) + return !contains(I); + return true; // All non-instructions are loop invariant +} + +/// hasLoopInvariantOperands - Return true if all the operands of the +/// specified instruction are loop invariant. +bool Loop::hasLoopInvariantOperands(Instruction *I) const { + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (!isLoopInvariant(I->getOperand(i))) + return false; + + return true; +} + +/// makeLoopInvariant - If the given value is an instruciton inside of the +/// loop and it can be hoisted, do so to make it trivially loop-invariant. +/// Return true if the value after any hoisting is loop invariant. This +/// function can be used as a slightly more aggressive replacement for +/// isLoopInvariant. +/// +/// If InsertPt is specified, it is the point to hoist instructions to. +/// If null, the terminator of the loop preheader is used. +/// +bool Loop::makeLoopInvariant(Value *V, bool &Changed, + Instruction *InsertPt) const { + if (Instruction *I = dyn_cast<Instruction>(V)) + return makeLoopInvariant(I, Changed, InsertPt); + return true; // All non-instructions are loop-invariant. +} + +/// makeLoopInvariant - If the given instruction is inside of the +/// loop and it can be hoisted, do so to make it trivially loop-invariant. +/// Return true if the instruction after any hoisting is loop invariant. This +/// function can be used as a slightly more aggressive replacement for +/// isLoopInvariant. +/// +/// If InsertPt is specified, it is the point to hoist instructions to. +/// If null, the terminator of the loop preheader is used. +/// +bool Loop::makeLoopInvariant(Instruction *I, bool &Changed, + Instruction *InsertPt) const { + // Test if the value is already loop-invariant. + if (isLoopInvariant(I)) + return true; + if (!isSafeToSpeculativelyExecute(I)) + return false; + if (I->mayReadFromMemory()) + return false; + // The landingpad instruction is immobile. + if (isa<LandingPadInst>(I)) + return false; + // Determine the insertion point, unless one was given. + if (!InsertPt) { + BasicBlock *Preheader = getLoopPreheader(); + // Without a preheader, hoisting is not feasible. + if (!Preheader) + return false; + InsertPt = Preheader->getTerminator(); + } + // Don't hoist instructions with loop-variant operands. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (!makeLoopInvariant(I->getOperand(i), Changed, InsertPt)) + return false; + + // Hoist. + I->moveBefore(InsertPt); + Changed = true; + return true; +} + +/// getCanonicalInductionVariable - Check to see if the loop has a canonical +/// induction variable: an integer recurrence that starts at 0 and increments +/// by one each time through the loop. If so, return the phi node that +/// corresponds to it. +/// +/// The IndVarSimplify pass transforms loops to have a canonical induction +/// variable. +/// +PHINode *Loop::getCanonicalInductionVariable() const { + BasicBlock *H = getHeader(); + + BasicBlock *Incoming = 0, *Backedge = 0; + pred_iterator PI = pred_begin(H); + assert(PI != pred_end(H) && + "Loop must have at least one backedge!"); + Backedge = *PI++; + if (PI == pred_end(H)) return 0; // dead loop + Incoming = *PI++; + if (PI != pred_end(H)) return 0; // multiple backedges? + + if (contains(Incoming)) { + if (contains(Backedge)) + return 0; + std::swap(Incoming, Backedge); + } else if (!contains(Backedge)) + return 0; + + // Loop over all of the PHI nodes, looking for a canonical indvar. + for (BasicBlock::iterator I = H->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + if (ConstantInt *CI = + dyn_cast<ConstantInt>(PN->getIncomingValueForBlock(Incoming))) + if (CI->isNullValue()) + if (Instruction *Inc = + dyn_cast<Instruction>(PN->getIncomingValueForBlock(Backedge))) + if (Inc->getOpcode() == Instruction::Add && + Inc->getOperand(0) == PN) + if (ConstantInt *CI = dyn_cast<ConstantInt>(Inc->getOperand(1))) + if (CI->equalsInt(1)) + return PN; + } + return 0; +} + +/// isLCSSAForm - Return true if the Loop is in LCSSA form +bool Loop::isLCSSAForm(DominatorTree &DT) const { + for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) { + BasicBlock *BB = *BI; + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I) + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; + ++UI) { + User *U = *UI; + BasicBlock *UserBB = cast<Instruction>(U)->getParent(); + if (PHINode *P = dyn_cast<PHINode>(U)) + UserBB = P->getIncomingBlock(UI); + + // Check the current block, as a fast-path, before checking whether + // the use is anywhere in the loop. Most values are used in the same + // block they are defined in. Also, blocks not reachable from the + // entry are special; uses in them don't need to go through PHIs. + if (UserBB != BB && + !contains(UserBB) && + DT.isReachableFromEntry(UserBB)) + return false; + } + } + + return true; +} + +/// isLoopSimplifyForm - Return true if the Loop is in the form that +/// the LoopSimplify form transforms loops to, which is sometimes called +/// normal form. +bool Loop::isLoopSimplifyForm() const { + // Normal-form loops have a preheader, a single backedge, and all of their + // exits have all their predecessors inside the loop. + return getLoopPreheader() && getLoopLatch() && hasDedicatedExits(); +} + +/// isSafeToClone - Return true if the loop body is safe to clone in practice. +/// Routines that reform the loop CFG and split edges often fail on indirectbr. +bool Loop::isSafeToClone() const { + // Return false if any loop blocks contain indirectbrs, or there are any calls + // to noduplicate functions. + for (Loop::block_iterator I = block_begin(), E = block_end(); I != E; ++I) { + if (isa<IndirectBrInst>((*I)->getTerminator())) + return false; + + if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator())) + if (II->hasFnAttr(Attribute::NoDuplicate)) + return false; + + for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) { + if (const CallInst *CI = dyn_cast<CallInst>(BI)) { + if (CI->hasFnAttr(Attribute::NoDuplicate)) + return false; + } + } + } + return true; +} + +MDNode *Loop::getLoopID() const { + MDNode *LoopID = 0; + if (isLoopSimplifyForm()) { + LoopID = getLoopLatch()->getTerminator()->getMetadata(LoopMDName); + } else { + // Go through each predecessor of the loop header and check the + // terminator for the metadata. + BasicBlock *H = getHeader(); + for (block_iterator I = block_begin(), IE = block_end(); I != IE; ++I) { + TerminatorInst *TI = (*I)->getTerminator(); + MDNode *MD = 0; + + // Check if this terminator branches to the loop header. + for (unsigned i = 0, ie = TI->getNumSuccessors(); i != ie; ++i) { + if (TI->getSuccessor(i) == H) { + MD = TI->getMetadata(LoopMDName); + break; + } + } + if (!MD) + return 0; + + if (!LoopID) + LoopID = MD; + else if (MD != LoopID) + return 0; + } + } + if (!LoopID || LoopID->getNumOperands() == 0 || + LoopID->getOperand(0) != LoopID) + return 0; + return LoopID; +} + +void Loop::setLoopID(MDNode *LoopID) const { + assert(LoopID && "Loop ID should not be null"); + assert(LoopID->getNumOperands() > 0 && "Loop ID needs at least one operand"); + assert(LoopID->getOperand(0) == LoopID && "Loop ID should refer to itself"); + + if (isLoopSimplifyForm()) { + getLoopLatch()->getTerminator()->setMetadata(LoopMDName, LoopID); + return; + } + + BasicBlock *H = getHeader(); + for (block_iterator I = block_begin(), IE = block_end(); I != IE; ++I) { + TerminatorInst *TI = (*I)->getTerminator(); + for (unsigned i = 0, ie = TI->getNumSuccessors(); i != ie; ++i) { + if (TI->getSuccessor(i) == H) + TI->setMetadata(LoopMDName, LoopID); + } + } +} + +bool Loop::isAnnotatedParallel() const { + MDNode *desiredLoopIdMetadata = getLoopID(); + + if (!desiredLoopIdMetadata) + return false; + + // The loop branch contains the parallel loop metadata. In order to ensure + // that any parallel-loop-unaware optimization pass hasn't added loop-carried + // dependencies (thus converted the loop back to a sequential loop), check + // that all the memory instructions in the loop contain parallelism metadata + // that point to the same unique "loop id metadata" the loop branch does. + for (block_iterator BB = block_begin(), BE = block_end(); BB != BE; ++BB) { + for (BasicBlock::iterator II = (*BB)->begin(), EE = (*BB)->end(); + II != EE; II++) { + + if (!II->mayReadOrWriteMemory()) + continue; + + // The memory instruction can refer to the loop identifier metadata + // directly or indirectly through another list metadata (in case of + // nested parallel loops). The loop identifier metadata refers to + // itself so we can check both cases with the same routine. + MDNode *loopIdMD = II->getMetadata("llvm.mem.parallel_loop_access"); + + if (!loopIdMD) + return false; + + bool loopIdMDFound = false; + for (unsigned i = 0, e = loopIdMD->getNumOperands(); i < e; ++i) { + if (loopIdMD->getOperand(i) == desiredLoopIdMetadata) { + loopIdMDFound = true; + break; + } + } + + if (!loopIdMDFound) + return false; + } + } + return true; +} + + +/// hasDedicatedExits - Return true if no exit block for the loop +/// has a predecessor that is outside the loop. +bool Loop::hasDedicatedExits() const { + // Each predecessor of each exit block of a normal loop is contained + // within the loop. + SmallVector<BasicBlock *, 4> ExitBlocks; + getExitBlocks(ExitBlocks); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + for (pred_iterator PI = pred_begin(ExitBlocks[i]), + PE = pred_end(ExitBlocks[i]); PI != PE; ++PI) + if (!contains(*PI)) + return false; + // All the requirements are met. + return true; +} + +/// getUniqueExitBlocks - Return all unique successor blocks of this loop. +/// These are the blocks _outside of the current loop_ which are branched to. +/// This assumes that loop exits are in canonical form. +/// +void +Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const { + assert(hasDedicatedExits() && + "getUniqueExitBlocks assumes the loop has canonical form exits!"); + + SmallVector<BasicBlock *, 32> switchExitBlocks; + + for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) { + + BasicBlock *current = *BI; + switchExitBlocks.clear(); + + for (succ_iterator I = succ_begin(*BI), E = succ_end(*BI); I != E; ++I) { + // If block is inside the loop then it is not a exit block. + if (contains(*I)) + continue; + + pred_iterator PI = pred_begin(*I); + BasicBlock *firstPred = *PI; + + // If current basic block is this exit block's first predecessor + // then only insert exit block in to the output ExitBlocks vector. + // This ensures that same exit block is not inserted twice into + // ExitBlocks vector. + if (current != firstPred) + continue; + + // If a terminator has more then two successors, for example SwitchInst, + // then it is possible that there are multiple edges from current block + // to one exit block. + if (std::distance(succ_begin(current), succ_end(current)) <= 2) { + ExitBlocks.push_back(*I); + continue; + } + + // In case of multiple edges from current block to exit block, collect + // only one edge in ExitBlocks. Use switchExitBlocks to keep track of + // duplicate edges. + if (std::find(switchExitBlocks.begin(), switchExitBlocks.end(), *I) + == switchExitBlocks.end()) { + switchExitBlocks.push_back(*I); + ExitBlocks.push_back(*I); + } + } + } +} + +/// getUniqueExitBlock - If getUniqueExitBlocks would return exactly one +/// block, return that block. Otherwise return null. +BasicBlock *Loop::getUniqueExitBlock() const { + SmallVector<BasicBlock *, 8> UniqueExitBlocks; + getUniqueExitBlocks(UniqueExitBlocks); + if (UniqueExitBlocks.size() == 1) + return UniqueExitBlocks[0]; + return 0; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void Loop::dump() const { + print(dbgs()); +} +#endif + +//===----------------------------------------------------------------------===// +// UnloopUpdater implementation +// + +namespace { +/// Find the new parent loop for all blocks within the "unloop" whose last +/// backedges has just been removed. +class UnloopUpdater { + Loop *Unloop; + LoopInfo *LI; + + LoopBlocksDFS DFS; + + // Map unloop's immediate subloops to their nearest reachable parents. Nested + // loops within these subloops will not change parents. However, an immediate + // subloop's new parent will be the nearest loop reachable from either its own + // exits *or* any of its nested loop's exits. + DenseMap<Loop*, Loop*> SubloopParents; + + // Flag the presence of an irreducible backedge whose destination is a block + // directly contained by the original unloop. + bool FoundIB; + +public: + UnloopUpdater(Loop *UL, LoopInfo *LInfo) : + Unloop(UL), LI(LInfo), DFS(UL), FoundIB(false) {} + + void updateBlockParents(); + + void removeBlocksFromAncestors(); + + void updateSubloopParents(); + +protected: + Loop *getNearestLoop(BasicBlock *BB, Loop *BBLoop); +}; +} // end anonymous namespace + +/// updateBlockParents - Update the parent loop for all blocks that are directly +/// contained within the original "unloop". +void UnloopUpdater::updateBlockParents() { + if (Unloop->getNumBlocks()) { + // Perform a post order CFG traversal of all blocks within this loop, + // propagating the nearest loop from sucessors to predecessors. + LoopBlocksTraversal Traversal(DFS, LI); + for (LoopBlocksTraversal::POTIterator POI = Traversal.begin(), + POE = Traversal.end(); POI != POE; ++POI) { + + Loop *L = LI->getLoopFor(*POI); + Loop *NL = getNearestLoop(*POI, L); + + if (NL != L) { + // For reducible loops, NL is now an ancestor of Unloop. + assert((NL != Unloop && (!NL || NL->contains(Unloop))) && + "uninitialized successor"); + LI->changeLoopFor(*POI, NL); + } + else { + // Or the current block is part of a subloop, in which case its parent + // is unchanged. + assert((FoundIB || Unloop->contains(L)) && "uninitialized successor"); + } + } + } + // Each irreducible loop within the unloop induces a round of iteration using + // the DFS result cached by Traversal. + bool Changed = FoundIB; + for (unsigned NIters = 0; Changed; ++NIters) { + assert(NIters < Unloop->getNumBlocks() && "runaway iterative algorithm"); + + // Iterate over the postorder list of blocks, propagating the nearest loop + // from successors to predecessors as before. + Changed = false; + for (LoopBlocksDFS::POIterator POI = DFS.beginPostorder(), + POE = DFS.endPostorder(); POI != POE; ++POI) { + + Loop *L = LI->getLoopFor(*POI); + Loop *NL = getNearestLoop(*POI, L); + if (NL != L) { + assert(NL != Unloop && (!NL || NL->contains(Unloop)) && + "uninitialized successor"); + LI->changeLoopFor(*POI, NL); + Changed = true; + } + } + } +} + +/// removeBlocksFromAncestors - Remove unloop's blocks from all ancestors below +/// their new parents. +void UnloopUpdater::removeBlocksFromAncestors() { + // Remove all unloop's blocks (including those in nested subloops) from + // ancestors below the new parent loop. + for (Loop::block_iterator BI = Unloop->block_begin(), + BE = Unloop->block_end(); BI != BE; ++BI) { + Loop *OuterParent = LI->getLoopFor(*BI); + if (Unloop->contains(OuterParent)) { + while (OuterParent->getParentLoop() != Unloop) + OuterParent = OuterParent->getParentLoop(); + OuterParent = SubloopParents[OuterParent]; + } + // Remove blocks from former Ancestors except Unloop itself which will be + // deleted. + for (Loop *OldParent = Unloop->getParentLoop(); OldParent != OuterParent; + OldParent = OldParent->getParentLoop()) { + assert(OldParent && "new loop is not an ancestor of the original"); + OldParent->removeBlockFromLoop(*BI); + } + } +} + +/// updateSubloopParents - Update the parent loop for all subloops directly +/// nested within unloop. +void UnloopUpdater::updateSubloopParents() { + while (!Unloop->empty()) { + Loop *Subloop = *llvm::prior(Unloop->end()); + Unloop->removeChildLoop(llvm::prior(Unloop->end())); + + assert(SubloopParents.count(Subloop) && "DFS failed to visit subloop"); + if (Loop *Parent = SubloopParents[Subloop]) + Parent->addChildLoop(Subloop); + else + LI->addTopLevelLoop(Subloop); + } +} + +/// getNearestLoop - Return the nearest parent loop among this block's +/// successors. If a successor is a subloop header, consider its parent to be +/// the nearest parent of the subloop's exits. +/// +/// For subloop blocks, simply update SubloopParents and return NULL. +Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) { + + // Initially for blocks directly contained by Unloop, NearLoop == Unloop and + // is considered uninitialized. + Loop *NearLoop = BBLoop; + + Loop *Subloop = 0; + if (NearLoop != Unloop && Unloop->contains(NearLoop)) { + Subloop = NearLoop; + // Find the subloop ancestor that is directly contained within Unloop. + while (Subloop->getParentLoop() != Unloop) { + Subloop = Subloop->getParentLoop(); + assert(Subloop && "subloop is not an ancestor of the original loop"); + } + // Get the current nearest parent of the Subloop exits, initially Unloop. + NearLoop = + SubloopParents.insert(std::make_pair(Subloop, Unloop)).first->second; + } + + succ_iterator I = succ_begin(BB), E = succ_end(BB); + if (I == E) { + assert(!Subloop && "subloop blocks must have a successor"); + NearLoop = 0; // unloop blocks may now exit the function. + } + for (; I != E; ++I) { + if (*I == BB) + continue; // self loops are uninteresting + + Loop *L = LI->getLoopFor(*I); + if (L == Unloop) { + // This successor has not been processed. This path must lead to an + // irreducible backedge. + assert((FoundIB || !DFS.hasPostorder(*I)) && "should have seen IB"); + FoundIB = true; + } + if (L != Unloop && Unloop->contains(L)) { + // Successor is in a subloop. + if (Subloop) + continue; // Branching within subloops. Ignore it. + + // BB branches from the original into a subloop header. + assert(L->getParentLoop() == Unloop && "cannot skip into nested loops"); + + // Get the current nearest parent of the Subloop's exits. + L = SubloopParents[L]; + // L could be Unloop if the only exit was an irreducible backedge. + } + if (L == Unloop) { + continue; + } + // Handle critical edges from Unloop into a sibling loop. + if (L && !L->contains(Unloop)) { + L = L->getParentLoop(); + } + // Remember the nearest parent loop among successors or subloop exits. + if (NearLoop == Unloop || !NearLoop || NearLoop->contains(L)) + NearLoop = L; + } + if (Subloop) { + SubloopParents[Subloop] = NearLoop; + return BBLoop; + } + return NearLoop; +} + +//===----------------------------------------------------------------------===// +// LoopInfo implementation +// +bool LoopInfo::runOnFunction(Function &) { + releaseMemory(); + LI.Analyze(getAnalysis<DominatorTree>().getBase()); + return false; +} + +/// updateUnloop - The last backedge has been removed from a loop--now the +/// "unloop". Find a new parent for the blocks contained within unloop and +/// update the loop tree. We don't necessarily have valid dominators at this +/// point, but LoopInfo is still valid except for the removal of this loop. +/// +/// Note that Unloop may now be an empty loop. Calling Loop::getHeader without +/// checking first is illegal. +void LoopInfo::updateUnloop(Loop *Unloop) { + + // First handle the special case of no parent loop to simplify the algorithm. + if (!Unloop->getParentLoop()) { + // Since BBLoop had no parent, Unloop blocks are no longer in a loop. + for (Loop::block_iterator I = Unloop->block_begin(), + E = Unloop->block_end(); I != E; ++I) { + + // Don't reparent blocks in subloops. + if (getLoopFor(*I) != Unloop) + continue; + + // Blocks no longer have a parent but are still referenced by Unloop until + // the Unloop object is deleted. + LI.changeLoopFor(*I, 0); + } + + // Remove the loop from the top-level LoopInfo object. + for (LoopInfo::iterator I = LI.begin();; ++I) { + assert(I != LI.end() && "Couldn't find loop"); + if (*I == Unloop) { + LI.removeLoop(I); + break; + } + } + + // Move all of the subloops to the top-level. + while (!Unloop->empty()) + LI.addTopLevelLoop(Unloop->removeChildLoop(llvm::prior(Unloop->end()))); + + return; + } + + // Update the parent loop for all blocks within the loop. Blocks within + // subloops will not change parents. + UnloopUpdater Updater(Unloop, this); + Updater.updateBlockParents(); + + // Remove blocks from former ancestor loops. + Updater.removeBlocksFromAncestors(); + + // Add direct subloops as children in their new parent loop. + Updater.updateSubloopParents(); + + // Remove unloop from its parent loop. + Loop *ParentLoop = Unloop->getParentLoop(); + for (Loop::iterator I = ParentLoop->begin();; ++I) { + assert(I != ParentLoop->end() && "Couldn't find loop"); + if (*I == Unloop) { + ParentLoop->removeChildLoop(I); + break; + } + } +} + +void LoopInfo::verifyAnalysis() const { + // LoopInfo is a FunctionPass, but verifying every loop in the function + // each time verifyAnalysis is called is very expensive. The + // -verify-loop-info option can enable this. In order to perform some + // checking by default, LoopPass has been taught to call verifyLoop + // manually during loop pass sequences. + + if (!VerifyLoopInfo) return; + + DenseSet<const Loop*> Loops; + for (iterator I = begin(), E = end(); I != E; ++I) { + assert(!(*I)->getParentLoop() && "Top-level loop has a parent!"); + (*I)->verifyLoopNest(&Loops); + } + + // Verify that blocks are mapped to valid loops. + for (DenseMap<BasicBlock*, Loop*>::const_iterator I = LI.BBMap.begin(), + E = LI.BBMap.end(); I != E; ++I) { + assert(Loops.count(I->second) && "orphaned loop"); + assert(I->second->contains(I->first) && "orphaned block"); + } +} + +void LoopInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<DominatorTree>(); +} + +void LoopInfo::print(raw_ostream &OS, const Module*) const { + LI.print(OS); +} + +//===----------------------------------------------------------------------===// +// LoopBlocksDFS implementation +// + +/// Traverse the loop blocks and store the DFS result. +/// Useful for clients that just want the final DFS result and don't need to +/// visit blocks during the initial traversal. +void LoopBlocksDFS::perform(LoopInfo *LI) { + LoopBlocksTraversal Traversal(*this, LI); + for (LoopBlocksTraversal::POTIterator POI = Traversal.begin(), + POE = Traversal.end(); POI != POE; ++POI) ; +} diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp new file mode 100644 index 000000000000..acf2ba63bd32 --- /dev/null +++ b/contrib/llvm/lib/Analysis/LoopPass.cpp @@ -0,0 +1,367 @@ +//===- LoopPass.cpp - Loop Pass and Loop Pass Manager ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements LoopPass and LPPassManager. All loop optimization +// and transformation passes are derived from LoopPass. LPPassManager is +// responsible for managing LoopPasses. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Assembly/PrintModulePass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Timer.h" +using namespace llvm; + +namespace { + +/// PrintLoopPass - Print a Function corresponding to a Loop. +/// +class PrintLoopPass : public LoopPass { +private: + std::string Banner; + raw_ostream &Out; // raw_ostream to print on. + +public: + static char ID; + PrintLoopPass(const std::string &B, raw_ostream &o) + : LoopPass(ID), Banner(B), Out(o) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + bool runOnLoop(Loop *L, LPPassManager &) { + Out << Banner; + for (Loop::block_iterator b = L->block_begin(), be = L->block_end(); + b != be; + ++b) { + (*b)->print(Out); + } + return false; + } +}; + +char PrintLoopPass::ID = 0; +} + +//===----------------------------------------------------------------------===// +// LPPassManager +// + +char LPPassManager::ID = 0; + +LPPassManager::LPPassManager() + : FunctionPass(ID), PMDataManager() { + skipThisLoop = false; + redoThisLoop = false; + LI = NULL; + CurrentLoop = NULL; +} + +/// Delete loop from the loop queue and loop hierarchy (LoopInfo). +void LPPassManager::deleteLoopFromQueue(Loop *L) { + + LI->updateUnloop(L); + + // If L is current loop then skip rest of the passes and let + // runOnFunction remove L from LQ. Otherwise, remove L from LQ now + // and continue applying other passes on CurrentLoop. + if (CurrentLoop == L) + skipThisLoop = true; + + delete L; + + if (skipThisLoop) + return; + + for (std::deque<Loop *>::iterator I = LQ.begin(), + E = LQ.end(); I != E; ++I) { + if (*I == L) { + LQ.erase(I); + break; + } + } +} + +// Inset loop into loop nest (LoopInfo) and loop queue (LQ). +void LPPassManager::insertLoop(Loop *L, Loop *ParentLoop) { + + assert (CurrentLoop != L && "Cannot insert CurrentLoop"); + + // Insert into loop nest + if (ParentLoop) + ParentLoop->addChildLoop(L); + else + LI->addTopLevelLoop(L); + + insertLoopIntoQueue(L); +} + +void LPPassManager::insertLoopIntoQueue(Loop *L) { + // Insert L into loop queue + if (L == CurrentLoop) + redoLoop(L); + else if (!L->getParentLoop()) + // This is top level loop. + LQ.push_front(L); + else { + // Insert L after the parent loop. + for (std::deque<Loop *>::iterator I = LQ.begin(), + E = LQ.end(); I != E; ++I) { + if (*I == L->getParentLoop()) { + // deque does not support insert after. + ++I; + LQ.insert(I, 1, L); + break; + } + } + } +} + +// Reoptimize this loop. LPPassManager will re-insert this loop into the +// queue. This allows LoopPass to change loop nest for the loop. This +// utility may send LPPassManager into infinite loops so use caution. +void LPPassManager::redoLoop(Loop *L) { + assert (CurrentLoop == L && "Can redo only CurrentLoop"); + redoThisLoop = true; +} + +/// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for +/// all loop passes. +void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From, + BasicBlock *To, Loop *L) { + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + LoopPass *LP = getContainedPass(Index); + LP->cloneBasicBlockAnalysis(From, To, L); + } +} + +/// deleteSimpleAnalysisValue - Invoke deleteAnalysisValue hook for all passes. +void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) { + if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) { + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; + ++BI) { + Instruction &I = *BI; + deleteSimpleAnalysisValue(&I, L); + } + } + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + LoopPass *LP = getContainedPass(Index); + LP->deleteAnalysisValue(V, L); + } +} + + +// Recurse through all subloops and all loops into LQ. +static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) { + LQ.push_back(L); + for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I) + addLoopIntoQueue(*I, LQ); +} + +/// Pass Manager itself does not invalidate any analysis info. +void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const { + // LPPassManager needs LoopInfo. In the long term LoopInfo class will + // become part of LPPassManager. + Info.addRequired<LoopInfo>(); + Info.setPreservesAll(); +} + +/// run - Execute all of the passes scheduled for execution. Keep track of +/// whether any of the passes modifies the function, and if so, return true. +bool LPPassManager::runOnFunction(Function &F) { + LI = &getAnalysis<LoopInfo>(); + bool Changed = false; + + // Collect inherited analysis from Module level pass manager. + populateInheritedAnalysis(TPM->activeStack); + + // Populate the loop queue in reverse program order. There is no clear need to + // process sibling loops in either forward or reverse order. There may be some + // advantage in deleting uses in a later loop before optimizing the + // definitions in an earlier loop. If we find a clear reason to process in + // forward order, then a forward variant of LoopPassManager should be created. + // + // Note that LoopInfo::iterator visits loops in reverse program + // order. Here, reverse_iterator gives us a forward order, and the LoopQueue + // reverses the order a third time by popping from the back. + for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) + addLoopIntoQueue(*I, LQ); + + if (LQ.empty()) // No loops, skip calling finalizers + return false; + + // Initialization + for (std::deque<Loop *>::const_iterator I = LQ.begin(), E = LQ.end(); + I != E; ++I) { + Loop *L = *I; + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + LoopPass *P = getContainedPass(Index); + Changed |= P->doInitialization(L, *this); + } + } + + // Walk Loops + while (!LQ.empty()) { + + CurrentLoop = LQ.back(); + skipThisLoop = false; + redoThisLoop = false; + + // Run all passes on the current Loop. + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + LoopPass *P = getContainedPass(Index); + + dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG, + CurrentLoop->getHeader()->getName()); + dumpRequiredSet(P); + + initializeAnalysisImpl(P); + + { + PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader()); + TimeRegion PassTimer(getPassTimer(P)); + + Changed |= P->runOnLoop(CurrentLoop, *this); + } + + if (Changed) + dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG, + skipThisLoop ? "<deleted>" : + CurrentLoop->getHeader()->getName()); + dumpPreservedSet(P); + + if (!skipThisLoop) { + // Manually check that this loop is still healthy. This is done + // instead of relying on LoopInfo::verifyLoop since LoopInfo + // is a function pass and it's really expensive to verify every + // loop in the function every time. That level of checking can be + // enabled with the -verify-loop-info option. + { + TimeRegion PassTimer(getPassTimer(LI)); + CurrentLoop->verifyLoop(); + } + + // Then call the regular verifyAnalysis functions. + verifyPreservedAnalysis(P); + } + + removeNotPreservedAnalysis(P); + recordAvailableAnalysis(P); + removeDeadPasses(P, + skipThisLoop ? "<deleted>" : + CurrentLoop->getHeader()->getName(), + ON_LOOP_MSG); + + if (skipThisLoop) + // Do not run other passes on this loop. + break; + } + + // If the loop was deleted, release all the loop passes. This frees up + // some memory, and avoids trouble with the pass manager trying to call + // verifyAnalysis on them. + if (skipThisLoop) + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + freePass(P, "<deleted>", ON_LOOP_MSG); + } + + // Pop the loop from queue after running all passes. + LQ.pop_back(); + + if (redoThisLoop) + LQ.push_back(CurrentLoop); + } + + // Finalization + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + LoopPass *P = getContainedPass(Index); + Changed |= P->doFinalization(); + } + + return Changed; +} + +/// Print passes managed by this manager +void LPPassManager::dumpPassStructure(unsigned Offset) { + errs().indent(Offset*2) << "Loop Pass Manager\n"; + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + P->dumpPassStructure(Offset + 1); + dumpLastUses(P, Offset+1); + } +} + + +//===----------------------------------------------------------------------===// +// LoopPass + +Pass *LoopPass::createPrinterPass(raw_ostream &O, + const std::string &Banner) const { + return new PrintLoopPass(Banner, O); +} + +// Check if this pass is suitable for the current LPPassManager, if +// available. This pass P is not suitable for a LPPassManager if P +// is not preserving higher level analysis info used by other +// LPPassManager passes. In such case, pop LPPassManager from the +// stack. This will force assignPassManager() to create new +// LPPassManger as expected. +void LoopPass::preparePassManager(PMStack &PMS) { + + // Find LPPassManager + while (!PMS.empty() && + PMS.top()->getPassManagerType() > PMT_LoopPassManager) + PMS.pop(); + + // If this pass is destroying high level information that is used + // by other passes that are managed by LPM then do not insert + // this pass in current LPM. Use new LPPassManager. + if (PMS.top()->getPassManagerType() == PMT_LoopPassManager && + !PMS.top()->preserveHigherLevelAnalysis(this)) + PMS.pop(); +} + +/// Assign pass manager to manage this pass. +void LoopPass::assignPassManager(PMStack &PMS, + PassManagerType PreferredType) { + // Find LPPassManager + while (!PMS.empty() && + PMS.top()->getPassManagerType() > PMT_LoopPassManager) + PMS.pop(); + + LPPassManager *LPPM; + if (PMS.top()->getPassManagerType() == PMT_LoopPassManager) + LPPM = (LPPassManager*)PMS.top(); + else { + // Create new Loop Pass Manager if it does not exist. + assert (!PMS.empty() && "Unable to create Loop Pass Manager"); + PMDataManager *PMD = PMS.top(); + + // [1] Create new Loop Pass Manager + LPPM = new LPPassManager(); + LPPM->populateInheritedAnalysis(PMS); + + // [2] Set up new manager's top level manager + PMTopLevelManager *TPM = PMD->getTopLevelManager(); + TPM->addIndirectPassManager(LPPM); + + // [3] Assign manager to manage this new manager. This may create + // and push new managers into PMS + Pass *P = LPPM->getAsPass(); + TPM->schedulePass(P); + + // [4] Push new manager into PMS + PMS.push(LPPM); + } + + LPPM->add(this); +} diff --git a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp new file mode 100644 index 000000000000..d26aaf1b9048 --- /dev/null +++ b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp @@ -0,0 +1,192 @@ +//===- MemDepPrinter.cpp - Printer for MemoryDependenceAnalysis -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + struct MemDepPrinter : public FunctionPass { + const Function *F; + + enum DepType { + Clobber = 0, + Def, + NonFuncLocal, + Unknown + }; + + static const char *const DepTypeStr[]; + + typedef PointerIntPair<const Instruction *, 2, DepType> InstTypePair; + typedef std::pair<InstTypePair, const BasicBlock *> Dep; + typedef SmallSetVector<Dep, 4> DepSet; + typedef DenseMap<const Instruction *, DepSet> DepSetMap; + DepSetMap Deps; + + static char ID; // Pass identifcation, replacement for typeid + MemDepPrinter() : FunctionPass(ID) { + initializeMemDepPrinterPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F); + + void print(raw_ostream &OS, const Module * = 0) const; + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredTransitive<AliasAnalysis>(); + AU.addRequiredTransitive<MemoryDependenceAnalysis>(); + AU.setPreservesAll(); + } + + virtual void releaseMemory() { + Deps.clear(); + F = 0; + } + + private: + static InstTypePair getInstTypePair(MemDepResult dep) { + if (dep.isClobber()) + return InstTypePair(dep.getInst(), Clobber); + if (dep.isDef()) + return InstTypePair(dep.getInst(), Def); + if (dep.isNonFuncLocal()) + return InstTypePair(dep.getInst(), NonFuncLocal); + assert(dep.isUnknown() && "unexptected dependence type"); + return InstTypePair(dep.getInst(), Unknown); + } + static InstTypePair getInstTypePair(const Instruction* inst, DepType type) { + return InstTypePair(inst, type); + } + }; +} + +char MemDepPrinter::ID = 0; +INITIALIZE_PASS_BEGIN(MemDepPrinter, "print-memdeps", + "Print MemDeps of function", false, true) +INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) +INITIALIZE_PASS_END(MemDepPrinter, "print-memdeps", + "Print MemDeps of function", false, true) + +FunctionPass *llvm::createMemDepPrinter() { + return new MemDepPrinter(); +} + +const char *const MemDepPrinter::DepTypeStr[] + = {"Clobber", "Def", "NonFuncLocal", "Unknown"}; + +bool MemDepPrinter::runOnFunction(Function &F) { + this->F = &F; + AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); + MemoryDependenceAnalysis &MDA = getAnalysis<MemoryDependenceAnalysis>(); + + // All this code uses non-const interfaces because MemDep is not + // const-friendly, though nothing is actually modified. + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + Instruction *Inst = &*I; + + if (!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory()) + continue; + + MemDepResult Res = MDA.getDependency(Inst); + if (!Res.isNonLocal()) { + Deps[Inst].insert(std::make_pair(getInstTypePair(Res), + static_cast<BasicBlock *>(0))); + } else if (CallSite CS = cast<Value>(Inst)) { + const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI = + MDA.getNonLocalCallDependency(CS); + + DepSet &InstDeps = Deps[Inst]; + for (MemoryDependenceAnalysis::NonLocalDepInfo::const_iterator + I = NLDI.begin(), E = NLDI.end(); I != E; ++I) { + const MemDepResult &Res = I->getResult(); + InstDeps.insert(std::make_pair(getInstTypePair(Res), I->getBB())); + } + } else { + SmallVector<NonLocalDepResult, 4> NLDI; + if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + if (!LI->isUnordered()) { + // FIXME: Handle atomic/volatile loads. + Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown), + static_cast<BasicBlock *>(0))); + continue; + } + AliasAnalysis::Location Loc = AA.getLocation(LI); + MDA.getNonLocalPointerDependency(Loc, true, LI->getParent(), NLDI); + } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + if (!SI->isUnordered()) { + // FIXME: Handle atomic/volatile stores. + Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown), + static_cast<BasicBlock *>(0))); + continue; + } + AliasAnalysis::Location Loc = AA.getLocation(SI); + MDA.getNonLocalPointerDependency(Loc, false, SI->getParent(), NLDI); + } else if (VAArgInst *VI = dyn_cast<VAArgInst>(Inst)) { + AliasAnalysis::Location Loc = AA.getLocation(VI); + MDA.getNonLocalPointerDependency(Loc, false, VI->getParent(), NLDI); + } else { + llvm_unreachable("Unknown memory instruction!"); + } + + DepSet &InstDeps = Deps[Inst]; + for (SmallVectorImpl<NonLocalDepResult>::const_iterator + I = NLDI.begin(), E = NLDI.end(); I != E; ++I) { + const MemDepResult &Res = I->getResult(); + InstDeps.insert(std::make_pair(getInstTypePair(Res), I->getBB())); + } + } + } + + return false; +} + +void MemDepPrinter::print(raw_ostream &OS, const Module *M) const { + for (const_inst_iterator I = inst_begin(*F), E = inst_end(*F); I != E; ++I) { + const Instruction *Inst = &*I; + + DepSetMap::const_iterator DI = Deps.find(Inst); + if (DI == Deps.end()) + continue; + + const DepSet &InstDeps = DI->second; + + for (DepSet::const_iterator I = InstDeps.begin(), E = InstDeps.end(); + I != E; ++I) { + const Instruction *DepInst = I->first.getPointer(); + DepType type = I->first.getInt(); + const BasicBlock *DepBB = I->second; + + OS << " "; + OS << DepTypeStr[type]; + if (DepBB) { + OS << " in block "; + WriteAsOperand(OS, DepBB, /*PrintType=*/false, M); + } + if (DepInst) { + OS << " from: "; + DepInst->print(OS); + } + OS << "\n"; + } + + Inst->print(OS); + OS << "\n\n"; + } +} diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp new file mode 100644 index 000000000000..1db0f634c941 --- /dev/null +++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -0,0 +1,802 @@ +//===------ MemoryBuiltins.cpp - Identify calls to memory builtins --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of functions identifies calls to builtin functions that allocate +// or free memory. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "memory-builtins" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Local.h" +using namespace llvm; + +enum AllocType { + OpNewLike = 1<<0, // allocates; never returns null + MallocLike = 1<<1 | OpNewLike, // allocates; may return null + CallocLike = 1<<2, // allocates + bzero + ReallocLike = 1<<3, // reallocates + StrDupLike = 1<<4, + AllocLike = MallocLike | CallocLike | StrDupLike, + AnyAlloc = AllocLike | ReallocLike +}; + +struct AllocFnsTy { + LibFunc::Func Func; + AllocType AllocTy; + unsigned char NumParams; + // First and Second size parameters (or -1 if unused) + signed char FstParam, SndParam; +}; + +// FIXME: certain users need more information. E.g., SimplifyLibCalls needs to +// know which functions are nounwind, noalias, nocapture parameters, etc. +static const AllocFnsTy AllocationFnData[] = { + {LibFunc::malloc, MallocLike, 1, 0, -1}, + {LibFunc::valloc, MallocLike, 1, 0, -1}, + {LibFunc::Znwj, OpNewLike, 1, 0, -1}, // new(unsigned int) + {LibFunc::ZnwjRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new(unsigned int, nothrow) + {LibFunc::Znwm, OpNewLike, 1, 0, -1}, // new(unsigned long) + {LibFunc::ZnwmRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new(unsigned long, nothrow) + {LibFunc::Znaj, OpNewLike, 1, 0, -1}, // new[](unsigned int) + {LibFunc::ZnajRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow) + {LibFunc::Znam, OpNewLike, 1, 0, -1}, // new[](unsigned long) + {LibFunc::ZnamRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned long, nothrow) + {LibFunc::calloc, CallocLike, 2, 0, 1}, + {LibFunc::realloc, ReallocLike, 2, 1, -1}, + {LibFunc::reallocf, ReallocLike, 2, 1, -1}, + {LibFunc::strdup, StrDupLike, 1, -1, -1}, + {LibFunc::strndup, StrDupLike, 2, 1, -1} + // TODO: Handle "int posix_memalign(void **, size_t, size_t)" +}; + + +static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) { + if (LookThroughBitCast) + V = V->stripPointerCasts(); + + CallSite CS(const_cast<Value*>(V)); + if (!CS.getInstruction()) + return 0; + + if (CS.isNoBuiltin()) + return 0; + + Function *Callee = CS.getCalledFunction(); + if (!Callee || !Callee->isDeclaration()) + return 0; + return Callee; +} + +/// \brief Returns the allocation data for the given value if it is a call to a +/// known allocation function, and NULL otherwise. +static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, + const TargetLibraryInfo *TLI, + bool LookThroughBitCast = false) { + // Skip intrinsics + if (isa<IntrinsicInst>(V)) + return 0; + + Function *Callee = getCalledFunction(V, LookThroughBitCast); + if (!Callee) + return 0; + + // Make sure that the function is available. + StringRef FnName = Callee->getName(); + LibFunc::Func TLIFn; + if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn)) + return 0; + + unsigned i = 0; + bool found = false; + for ( ; i < array_lengthof(AllocationFnData); ++i) { + if (AllocationFnData[i].Func == TLIFn) { + found = true; + break; + } + } + if (!found) + return 0; + + const AllocFnsTy *FnData = &AllocationFnData[i]; + if ((FnData->AllocTy & AllocTy) != FnData->AllocTy) + return 0; + + // Check function prototype. + int FstParam = FnData->FstParam; + int SndParam = FnData->SndParam; + FunctionType *FTy = Callee->getFunctionType(); + + if (FTy->getReturnType() == Type::getInt8PtrTy(FTy->getContext()) && + FTy->getNumParams() == FnData->NumParams && + (FstParam < 0 || + (FTy->getParamType(FstParam)->isIntegerTy(32) || + FTy->getParamType(FstParam)->isIntegerTy(64))) && + (SndParam < 0 || + FTy->getParamType(SndParam)->isIntegerTy(32) || + FTy->getParamType(SndParam)->isIntegerTy(64))) + return FnData; + return 0; +} + +static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) { + ImmutableCallSite CS(LookThroughBitCast ? V->stripPointerCasts() : V); + return CS && CS.hasFnAttr(Attribute::NoAlias); +} + + +/// \brief Tests if a value is a call or invoke to a library function that +/// allocates or reallocates memory (either malloc, calloc, realloc, or strdup +/// like). +bool llvm::isAllocationFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, AnyAlloc, TLI, LookThroughBitCast); +} + +/// \brief Tests if a value is a call or invoke to a function that returns a +/// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions). +bool llvm::isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + // it's safe to consider realloc as noalias since accessing the original + // pointer is undefined behavior + return isAllocationFn(V, TLI, LookThroughBitCast) || + hasNoAliasAttr(V, LookThroughBitCast); +} + +/// \brief Tests if a value is a call or invoke to a library function that +/// allocates uninitialized memory (such as malloc). +bool llvm::isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, MallocLike, TLI, LookThroughBitCast); +} + +/// \brief Tests if a value is a call or invoke to a library function that +/// allocates zero-filled memory (such as calloc). +bool llvm::isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, CallocLike, TLI, LookThroughBitCast); +} + +/// \brief Tests if a value is a call or invoke to a library function that +/// allocates memory (either malloc, calloc, or strdup like). +bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, AllocLike, TLI, LookThroughBitCast); +} + +/// \brief Tests if a value is a call or invoke to a library function that +/// reallocates memory (such as realloc). +bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, ReallocLike, TLI, LookThroughBitCast); +} + +/// \brief Tests if a value is a call or invoke to a library function that +/// allocates memory and never returns null (such as operator new). +bool llvm::isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, OpNewLike, TLI, LookThroughBitCast); +} + +/// extractMallocCall - Returns the corresponding CallInst if the instruction +/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we +/// ignore InvokeInst here. +const CallInst *llvm::extractMallocCall(const Value *I, + const TargetLibraryInfo *TLI) { + return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : 0; +} + +static Value *computeArraySize(const CallInst *CI, const DataLayout *DL, + const TargetLibraryInfo *TLI, + bool LookThroughSExt = false) { + if (!CI) + return 0; + + // The size of the malloc's result type must be known to determine array size. + Type *T = getMallocAllocatedType(CI, TLI); + if (!T || !T->isSized() || !DL) + return 0; + + unsigned ElementSize = DL->getTypeAllocSize(T); + if (StructType *ST = dyn_cast<StructType>(T)) + ElementSize = DL->getStructLayout(ST)->getSizeInBytes(); + + // If malloc call's arg can be determined to be a multiple of ElementSize, + // return the multiple. Otherwise, return NULL. + Value *MallocArg = CI->getArgOperand(0); + Value *Multiple = 0; + if (ComputeMultiple(MallocArg, ElementSize, Multiple, + LookThroughSExt)) + return Multiple; + + return 0; +} + +/// isArrayMalloc - Returns the corresponding CallInst if the instruction +/// is a call to malloc whose array size can be determined and the array size +/// is not constant 1. Otherwise, return NULL. +const CallInst *llvm::isArrayMalloc(const Value *I, + const DataLayout *DL, + const TargetLibraryInfo *TLI) { + const CallInst *CI = extractMallocCall(I, TLI); + Value *ArraySize = computeArraySize(CI, DL, TLI); + + if (ConstantInt *ConstSize = dyn_cast_or_null<ConstantInt>(ArraySize)) + if (ConstSize->isOne()) + return CI; + + // CI is a non-array malloc or we can't figure out that it is an array malloc. + return 0; +} + +/// getMallocType - Returns the PointerType resulting from the malloc call. +/// The PointerType depends on the number of bitcast uses of the malloc call: +/// 0: PointerType is the calls' return type. +/// 1: PointerType is the bitcast's result type. +/// >1: Unique PointerType cannot be determined, return NULL. +PointerType *llvm::getMallocType(const CallInst *CI, + const TargetLibraryInfo *TLI) { + assert(isMallocLikeFn(CI, TLI) && "getMallocType and not malloc call"); + + PointerType *MallocType = 0; + unsigned NumOfBitCastUses = 0; + + // Determine if CallInst has a bitcast use. + for (Value::const_use_iterator UI = CI->use_begin(), E = CI->use_end(); + UI != E; ) + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(*UI++)) { + MallocType = cast<PointerType>(BCI->getDestTy()); + NumOfBitCastUses++; + } + + // Malloc call has 1 bitcast use, so type is the bitcast's destination type. + if (NumOfBitCastUses == 1) + return MallocType; + + // Malloc call was not bitcast, so type is the malloc function's return type. + if (NumOfBitCastUses == 0) + return cast<PointerType>(CI->getType()); + + // Type could not be determined. + return 0; +} + +/// getMallocAllocatedType - Returns the Type allocated by malloc call. +/// The Type depends on the number of bitcast uses of the malloc call: +/// 0: PointerType is the malloc calls' return type. +/// 1: PointerType is the bitcast's result type. +/// >1: Unique PointerType cannot be determined, return NULL. +Type *llvm::getMallocAllocatedType(const CallInst *CI, + const TargetLibraryInfo *TLI) { + PointerType *PT = getMallocType(CI, TLI); + return PT ? PT->getElementType() : 0; +} + +/// getMallocArraySize - Returns the array size of a malloc call. If the +/// argument passed to malloc is a multiple of the size of the malloced type, +/// then return that multiple. For non-array mallocs, the multiple is +/// constant 1. Otherwise, return NULL for mallocs whose array size cannot be +/// determined. +Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout *DL, + const TargetLibraryInfo *TLI, + bool LookThroughSExt) { + assert(isMallocLikeFn(CI, TLI) && "getMallocArraySize and not malloc call"); + return computeArraySize(CI, DL, TLI, LookThroughSExt); +} + + +/// extractCallocCall - Returns the corresponding CallInst if the instruction +/// is a calloc call. +const CallInst *llvm::extractCallocCall(const Value *I, + const TargetLibraryInfo *TLI) { + return isCallocLikeFn(I, TLI) ? cast<CallInst>(I) : 0; +} + + +/// isFreeCall - Returns non-null if the value is a call to the builtin free() +const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { + const CallInst *CI = dyn_cast<CallInst>(I); + if (!CI || isa<IntrinsicInst>(CI)) + return 0; + Function *Callee = CI->getCalledFunction(); + if (Callee == 0 || !Callee->isDeclaration()) + return 0; + + StringRef FnName = Callee->getName(); + LibFunc::Func TLIFn; + if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn)) + return 0; + + unsigned ExpectedNumParams; + if (TLIFn == LibFunc::free || + TLIFn == LibFunc::ZdlPv || // operator delete(void*) + TLIFn == LibFunc::ZdaPv) // operator delete[](void*) + ExpectedNumParams = 1; + else if (TLIFn == LibFunc::ZdlPvRKSt9nothrow_t || // delete(void*, nothrow) + TLIFn == LibFunc::ZdaPvRKSt9nothrow_t) // delete[](void*, nothrow) + ExpectedNumParams = 2; + else + return 0; + + // Check free prototype. + // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin + // attribute will exist. + FunctionType *FTy = Callee->getFunctionType(); + if (!FTy->getReturnType()->isVoidTy()) + return 0; + if (FTy->getNumParams() != ExpectedNumParams) + return 0; + if (FTy->getParamType(0) != Type::getInt8PtrTy(Callee->getContext())) + return 0; + + return CI; +} + + + +//===----------------------------------------------------------------------===// +// Utility functions to compute size of objects. +// + + +/// \brief Compute the size of the object pointed by Ptr. Returns true and the +/// object size in Size if successful, and false otherwise. +/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas, +/// byval arguments, and global variables. +bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *DL, + const TargetLibraryInfo *TLI, bool RoundToAlign) { + if (!DL) + return false; + + ObjectSizeOffsetVisitor Visitor(DL, TLI, Ptr->getContext(), RoundToAlign); + SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr)); + if (!Visitor.bothKnown(Data)) + return false; + + APInt ObjSize = Data.first, Offset = Data.second; + // check for overflow + if (Offset.slt(0) || ObjSize.ult(Offset)) + Size = 0; + else + Size = (ObjSize - Offset).getZExtValue(); + return true; +} + + +STATISTIC(ObjectVisitorArgument, + "Number of arguments with unsolved size and offset"); +STATISTIC(ObjectVisitorLoad, + "Number of load instructions with unsolved size and offset"); + + +APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) { + if (RoundToAlign && Align) + return APInt(IntTyBits, RoundUpToAlignment(Size.getZExtValue(), Align)); + return Size; +} + +ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *DL, + const TargetLibraryInfo *TLI, + LLVMContext &Context, + bool RoundToAlign) +: DL(DL), TLI(TLI), RoundToAlign(RoundToAlign) { + IntegerType *IntTy = DL->getIntPtrType(Context); + IntTyBits = IntTy->getBitWidth(); + Zero = APInt::getNullValue(IntTyBits); +} + +SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { + V = V->stripPointerCasts(); + if (Instruction *I = dyn_cast<Instruction>(V)) { + // If we have already seen this instruction, bail out. Cycles can happen in + // unreachable code after constant propagation. + if (!SeenInsts.insert(I)) + return unknown(); + + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) + return visitGEPOperator(*GEP); + return visit(*I); + } + if (Argument *A = dyn_cast<Argument>(V)) + return visitArgument(*A); + if (ConstantPointerNull *P = dyn_cast<ConstantPointerNull>(V)) + return visitConstantPointerNull(*P); + if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) + return visitGlobalAlias(*GA); + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) + return visitGlobalVariable(*GV); + if (UndefValue *UV = dyn_cast<UndefValue>(V)) + return visitUndefValue(*UV); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + if (CE->getOpcode() == Instruction::IntToPtr) + return unknown(); // clueless + if (CE->getOpcode() == Instruction::GetElementPtr) + return visitGEPOperator(cast<GEPOperator>(*CE)); + } + + DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V + << '\n'); + return unknown(); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { + if (!I.getAllocatedType()->isSized()) + return unknown(); + + APInt Size(IntTyBits, DL->getTypeAllocSize(I.getAllocatedType())); + if (!I.isArrayAllocation()) + return std::make_pair(align(Size, I.getAlignment()), Zero); + + Value *ArraySize = I.getArraySize(); + if (const ConstantInt *C = dyn_cast<ConstantInt>(ArraySize)) { + Size *= C->getValue().zextOrSelf(IntTyBits); + return std::make_pair(align(Size, I.getAlignment()), Zero); + } + return unknown(); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) { + // no interprocedural analysis is done at the moment + if (!A.hasByValAttr()) { + ++ObjectVisitorArgument; + return unknown(); + } + PointerType *PT = cast<PointerType>(A.getType()); + APInt Size(IntTyBits, DL->getTypeAllocSize(PT->getElementType())); + return std::make_pair(align(Size, A.getParamAlignment()), Zero); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) { + const AllocFnsTy *FnData = getAllocationData(CS.getInstruction(), AnyAlloc, + TLI); + if (!FnData) + return unknown(); + + // handle strdup-like functions separately + if (FnData->AllocTy == StrDupLike) { + APInt Size(IntTyBits, GetStringLength(CS.getArgument(0))); + if (!Size) + return unknown(); + + // strndup limits strlen + if (FnData->FstParam > 0) { + ConstantInt *Arg= dyn_cast<ConstantInt>(CS.getArgument(FnData->FstParam)); + if (!Arg) + return unknown(); + + APInt MaxSize = Arg->getValue().zextOrSelf(IntTyBits); + if (Size.ugt(MaxSize)) + Size = MaxSize + 1; + } + return std::make_pair(Size, Zero); + } + + ConstantInt *Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->FstParam)); + if (!Arg) + return unknown(); + + APInt Size = Arg->getValue().zextOrSelf(IntTyBits); + // size determined by just 1 parameter + if (FnData->SndParam < 0) + return std::make_pair(Size, Zero); + + Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->SndParam)); + if (!Arg) + return unknown(); + + Size *= Arg->getValue().zextOrSelf(IntTyBits); + return std::make_pair(Size, Zero); + + // TODO: handle more standard functions (+ wchar cousins): + // - strdup / strndup + // - strcpy / strncpy + // - strcat / strncat + // - memcpy / memmove + // - strcat / strncat + // - memset +} + +SizeOffsetType +ObjectSizeOffsetVisitor::visitConstantPointerNull(ConstantPointerNull&) { + return std::make_pair(Zero, Zero); +} + +SizeOffsetType +ObjectSizeOffsetVisitor::visitExtractElementInst(ExtractElementInst&) { + return unknown(); +} + +SizeOffsetType +ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) { + // Easy cases were already folded by previous passes. + return unknown(); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) { + SizeOffsetType PtrData = compute(GEP.getPointerOperand()); + APInt Offset(IntTyBits, 0); + if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(*DL, Offset)) + return unknown(); + + return std::make_pair(PtrData.first, PtrData.second + Offset); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalAlias(GlobalAlias &GA) { + if (GA.mayBeOverridden()) + return unknown(); + return compute(GA.getAliasee()); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){ + if (!GV.hasDefinitiveInitializer()) + return unknown(); + + APInt Size(IntTyBits, DL->getTypeAllocSize(GV.getType()->getElementType())); + return std::make_pair(align(Size, GV.getAlignment()), Zero); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitIntToPtrInst(IntToPtrInst&) { + // clueless + return unknown(); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst&) { + ++ObjectVisitorLoad; + return unknown(); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode&) { + // too complex to analyze statically. + return unknown(); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) { + SizeOffsetType TrueSide = compute(I.getTrueValue()); + SizeOffsetType FalseSide = compute(I.getFalseValue()); + if (bothKnown(TrueSide) && bothKnown(FalseSide) && TrueSide == FalseSide) + return TrueSide; + return unknown(); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitUndefValue(UndefValue&) { + return std::make_pair(Zero, Zero); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) { + DEBUG(dbgs() << "ObjectSizeOffsetVisitor unknown instruction:" << I << '\n'); + return unknown(); +} + +ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *DL, + const TargetLibraryInfo *TLI, + LLVMContext &Context, + bool RoundToAlign) +: DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)), + RoundToAlign(RoundToAlign) { + IntTy = DL->getIntPtrType(Context); + Zero = ConstantInt::get(IntTy, 0); +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) { + SizeOffsetEvalType Result = compute_(V); + + if (!bothKnown(Result)) { + // erase everything that was computed in this iteration from the cache, so + // that no dangling references are left behind. We could be a bit smarter if + // we kept a dependency graph. It's probably not worth the complexity. + for (PtrSetTy::iterator I=SeenVals.begin(), E=SeenVals.end(); I != E; ++I) { + CacheMapTy::iterator CacheIt = CacheMap.find(*I); + // non-computable results can be safely cached + if (CacheIt != CacheMap.end() && anyKnown(CacheIt->second)) + CacheMap.erase(CacheIt); + } + } + + SeenVals.clear(); + return Result; +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) { + ObjectSizeOffsetVisitor Visitor(DL, TLI, Context, RoundToAlign); + SizeOffsetType Const = Visitor.compute(V); + if (Visitor.bothKnown(Const)) + return std::make_pair(ConstantInt::get(Context, Const.first), + ConstantInt::get(Context, Const.second)); + + V = V->stripPointerCasts(); + + // check cache + CacheMapTy::iterator CacheIt = CacheMap.find(V); + if (CacheIt != CacheMap.end()) + return CacheIt->second; + + // always generate code immediately before the instruction being + // processed, so that the generated code dominates the same BBs + Instruction *PrevInsertPoint = Builder.GetInsertPoint(); + if (Instruction *I = dyn_cast<Instruction>(V)) + Builder.SetInsertPoint(I); + + // now compute the size and offset + SizeOffsetEvalType Result; + + // Record the pointers that were handled in this run, so that they can be + // cleaned later if something fails. We also use this set to break cycles that + // can occur in dead code. + if (!SeenVals.insert(V)) { + Result = unknown(); + } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { + Result = visitGEPOperator(*GEP); + } else if (Instruction *I = dyn_cast<Instruction>(V)) { + Result = visit(*I); + } else if (isa<Argument>(V) || + (isa<ConstantExpr>(V) && + cast<ConstantExpr>(V)->getOpcode() == Instruction::IntToPtr) || + isa<GlobalAlias>(V) || + isa<GlobalVariable>(V)) { + // ignore values where we cannot do more than what ObjectSizeVisitor can + Result = unknown(); + } else { + DEBUG(dbgs() << "ObjectSizeOffsetEvaluator::compute() unhandled value: " + << *V << '\n'); + Result = unknown(); + } + + if (PrevInsertPoint) + Builder.SetInsertPoint(PrevInsertPoint); + + // Don't reuse CacheIt since it may be invalid at this point. + CacheMap[V] = Result; + return Result; +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) { + if (!I.getAllocatedType()->isSized()) + return unknown(); + + // must be a VLA + assert(I.isArrayAllocation()); + Value *ArraySize = I.getArraySize(); + Value *Size = ConstantInt::get(ArraySize->getType(), + DL->getTypeAllocSize(I.getAllocatedType())); + Size = Builder.CreateMul(Size, ArraySize); + return std::make_pair(Size, Zero); +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitCallSite(CallSite CS) { + const AllocFnsTy *FnData = getAllocationData(CS.getInstruction(), AnyAlloc, + TLI); + if (!FnData) + return unknown(); + + // handle strdup-like functions separately + if (FnData->AllocTy == StrDupLike) { + // TODO + return unknown(); + } + + Value *FirstArg = CS.getArgument(FnData->FstParam); + FirstArg = Builder.CreateZExt(FirstArg, IntTy); + if (FnData->SndParam < 0) + return std::make_pair(FirstArg, Zero); + + Value *SecondArg = CS.getArgument(FnData->SndParam); + SecondArg = Builder.CreateZExt(SecondArg, IntTy); + Value *Size = Builder.CreateMul(FirstArg, SecondArg); + return std::make_pair(Size, Zero); + + // TODO: handle more standard functions (+ wchar cousins): + // - strdup / strndup + // - strcpy / strncpy + // - strcat / strncat + // - memcpy / memmove + // - strcat / strncat + // - memset +} + +SizeOffsetEvalType +ObjectSizeOffsetEvaluator::visitExtractElementInst(ExtractElementInst&) { + return unknown(); +} + +SizeOffsetEvalType +ObjectSizeOffsetEvaluator::visitExtractValueInst(ExtractValueInst&) { + return unknown(); +} + +SizeOffsetEvalType +ObjectSizeOffsetEvaluator::visitGEPOperator(GEPOperator &GEP) { + SizeOffsetEvalType PtrData = compute_(GEP.getPointerOperand()); + if (!bothKnown(PtrData)) + return unknown(); + + Value *Offset = EmitGEPOffset(&Builder, *DL, &GEP, /*NoAssumptions=*/true); + Offset = Builder.CreateAdd(PtrData.second, Offset); + return std::make_pair(PtrData.first, Offset); +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitIntToPtrInst(IntToPtrInst&) { + // clueless + return unknown(); +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitLoadInst(LoadInst&) { + return unknown(); +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) { + // create 2 PHIs: one for size and another for offset + PHINode *SizePHI = Builder.CreatePHI(IntTy, PHI.getNumIncomingValues()); + PHINode *OffsetPHI = Builder.CreatePHI(IntTy, PHI.getNumIncomingValues()); + + // insert right away in the cache to handle recursive PHIs + CacheMap[&PHI] = std::make_pair(SizePHI, OffsetPHI); + + // compute offset/size for each PHI incoming pointer + for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i) { + Builder.SetInsertPoint(PHI.getIncomingBlock(i)->getFirstInsertionPt()); + SizeOffsetEvalType EdgeData = compute_(PHI.getIncomingValue(i)); + + if (!bothKnown(EdgeData)) { + OffsetPHI->replaceAllUsesWith(UndefValue::get(IntTy)); + OffsetPHI->eraseFromParent(); + SizePHI->replaceAllUsesWith(UndefValue::get(IntTy)); + SizePHI->eraseFromParent(); + return unknown(); + } + SizePHI->addIncoming(EdgeData.first, PHI.getIncomingBlock(i)); + OffsetPHI->addIncoming(EdgeData.second, PHI.getIncomingBlock(i)); + } + + Value *Size = SizePHI, *Offset = OffsetPHI, *Tmp; + if ((Tmp = SizePHI->hasConstantValue())) { + Size = Tmp; + SizePHI->replaceAllUsesWith(Size); + SizePHI->eraseFromParent(); + } + if ((Tmp = OffsetPHI->hasConstantValue())) { + Offset = Tmp; + OffsetPHI->replaceAllUsesWith(Offset); + OffsetPHI->eraseFromParent(); + } + return std::make_pair(Size, Offset); +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitSelectInst(SelectInst &I) { + SizeOffsetEvalType TrueSide = compute_(I.getTrueValue()); + SizeOffsetEvalType FalseSide = compute_(I.getFalseValue()); + + if (!bothKnown(TrueSide) || !bothKnown(FalseSide)) + return unknown(); + if (TrueSide == FalseSide) + return TrueSide; + + Value *Size = Builder.CreateSelect(I.getCondition(), TrueSide.first, + FalseSide.first); + Value *Offset = Builder.CreateSelect(I.getCondition(), TrueSide.second, + FalseSide.second); + return std::make_pair(Size, Offset); +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitInstruction(Instruction &I) { + DEBUG(dbgs() << "ObjectSizeOffsetEvaluator unknown instruction:" << I <<'\n'); + return unknown(); +} diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp new file mode 100644 index 000000000000..84ff2eed12e4 --- /dev/null +++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -0,0 +1,1539 @@ +//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements an analysis that determines, for a given memory +// operation, what preceding memory operations it depends on. It builds on +// alias analysis information, and tries to provide a lazy, caching interface to +// a common kind of alias information query. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "memdep" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/PredIteratorCache.h" +using namespace llvm; + +STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses"); +STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses"); +STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses"); + +STATISTIC(NumCacheNonLocalPtr, + "Number of fully cached non-local ptr responses"); +STATISTIC(NumCacheDirtyNonLocalPtr, + "Number of cached, but dirty, non-local ptr responses"); +STATISTIC(NumUncacheNonLocalPtr, + "Number of uncached non-local ptr responses"); +STATISTIC(NumCacheCompleteNonLocalPtr, + "Number of block queries that were completely cached"); + +// Limit for the number of instructions to scan in a block. +static const int BlockScanLimit = 100; + +char MemoryDependenceAnalysis::ID = 0; + +// Register this pass... +INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep", + "Memory Dependence Analysis", false, true) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep", + "Memory Dependence Analysis", false, true) + +MemoryDependenceAnalysis::MemoryDependenceAnalysis() +: FunctionPass(ID), PredCache(0) { + initializeMemoryDependenceAnalysisPass(*PassRegistry::getPassRegistry()); +} +MemoryDependenceAnalysis::~MemoryDependenceAnalysis() { +} + +/// Clean up memory in between runs +void MemoryDependenceAnalysis::releaseMemory() { + LocalDeps.clear(); + NonLocalDeps.clear(); + NonLocalPointerDeps.clear(); + ReverseLocalDeps.clear(); + ReverseNonLocalDeps.clear(); + ReverseNonLocalPtrDeps.clear(); + PredCache->clear(); +} + + + +/// getAnalysisUsage - Does not modify anything. It uses Alias Analysis. +/// +void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<AliasAnalysis>(); +} + +bool MemoryDependenceAnalysis::runOnFunction(Function &) { + AA = &getAnalysis<AliasAnalysis>(); + TD = getAnalysisIfAvailable<DataLayout>(); + DT = getAnalysisIfAvailable<DominatorTree>(); + if (!PredCache) + PredCache.reset(new PredIteratorCache()); + return false; +} + +/// RemoveFromReverseMap - This is a helper function that removes Val from +/// 'Inst's set in ReverseMap. If the set becomes empty, remove Inst's entry. +template <typename KeyTy> +static void RemoveFromReverseMap(DenseMap<Instruction*, + SmallPtrSet<KeyTy, 4> > &ReverseMap, + Instruction *Inst, KeyTy Val) { + typename DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> >::iterator + InstIt = ReverseMap.find(Inst); + assert(InstIt != ReverseMap.end() && "Reverse map out of sync?"); + bool Found = InstIt->second.erase(Val); + assert(Found && "Invalid reverse map!"); (void)Found; + if (InstIt->second.empty()) + ReverseMap.erase(InstIt); +} + +/// GetLocation - If the given instruction references a specific memory +/// location, fill in Loc with the details, otherwise set Loc.Ptr to null. +/// Return a ModRefInfo value describing the general behavior of the +/// instruction. +static +AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, + AliasAnalysis::Location &Loc, + AliasAnalysis *AA) { + if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + if (LI->isUnordered()) { + Loc = AA->getLocation(LI); + return AliasAnalysis::Ref; + } + if (LI->getOrdering() == Monotonic) { + Loc = AA->getLocation(LI); + return AliasAnalysis::ModRef; + } + Loc = AliasAnalysis::Location(); + return AliasAnalysis::ModRef; + } + + if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + if (SI->isUnordered()) { + Loc = AA->getLocation(SI); + return AliasAnalysis::Mod; + } + if (SI->getOrdering() == Monotonic) { + Loc = AA->getLocation(SI); + return AliasAnalysis::ModRef; + } + Loc = AliasAnalysis::Location(); + return AliasAnalysis::ModRef; + } + + if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) { + Loc = AA->getLocation(V); + return AliasAnalysis::ModRef; + } + + if (const CallInst *CI = isFreeCall(Inst, AA->getTargetLibraryInfo())) { + // calls to free() deallocate the entire structure + Loc = AliasAnalysis::Location(CI->getArgOperand(0)); + return AliasAnalysis::Mod; + } + + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) + switch (II->getIntrinsicID()) { + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: + Loc = AliasAnalysis::Location(II->getArgOperand(1), + cast<ConstantInt>(II->getArgOperand(0)) + ->getZExtValue(), + II->getMetadata(LLVMContext::MD_tbaa)); + // These intrinsics don't really modify the memory, but returning Mod + // will allow them to be handled conservatively. + return AliasAnalysis::Mod; + case Intrinsic::invariant_end: + Loc = AliasAnalysis::Location(II->getArgOperand(2), + cast<ConstantInt>(II->getArgOperand(1)) + ->getZExtValue(), + II->getMetadata(LLVMContext::MD_tbaa)); + // These intrinsics don't really modify the memory, but returning Mod + // will allow them to be handled conservatively. + return AliasAnalysis::Mod; + default: + break; + } + + // Otherwise, just do the coarse-grained thing that always works. + if (Inst->mayWriteToMemory()) + return AliasAnalysis::ModRef; + if (Inst->mayReadFromMemory()) + return AliasAnalysis::Ref; + return AliasAnalysis::NoModRef; +} + +/// getCallSiteDependencyFrom - Private helper for finding the local +/// dependencies of a call site. +MemDepResult MemoryDependenceAnalysis:: +getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, + BasicBlock::iterator ScanIt, BasicBlock *BB) { + unsigned Limit = BlockScanLimit; + + // Walk backwards through the block, looking for dependencies + while (ScanIt != BB->begin()) { + // Limit the amount of scanning we do so we don't end up with quadratic + // running time on extreme testcases. + --Limit; + if (!Limit) + return MemDepResult::getUnknown(); + + Instruction *Inst = --ScanIt; + + // If this inst is a memory op, get the pointer it accessed + AliasAnalysis::Location Loc; + AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA); + if (Loc.Ptr) { + // A simple instruction. + if (AA->getModRefInfo(CS, Loc) != AliasAnalysis::NoModRef) + return MemDepResult::getClobber(Inst); + continue; + } + + if (CallSite InstCS = cast<Value>(Inst)) { + // Debug intrinsics don't cause dependences. + if (isa<DbgInfoIntrinsic>(Inst)) continue; + // If these two calls do not interfere, look past it. + switch (AA->getModRefInfo(CS, InstCS)) { + case AliasAnalysis::NoModRef: + // If the two calls are the same, return InstCS as a Def, so that + // CS can be found redundant and eliminated. + if (isReadOnlyCall && !(MR & AliasAnalysis::Mod) && + CS.getInstruction()->isIdenticalToWhenDefined(Inst)) + return MemDepResult::getDef(Inst); + + // Otherwise if the two calls don't interact (e.g. InstCS is readnone) + // keep scanning. + continue; + default: + return MemDepResult::getClobber(Inst); + } + } + + // If we could not obtain a pointer for the instruction and the instruction + // touches memory then assume that this is a dependency. + if (MR != AliasAnalysis::NoModRef) + return MemDepResult::getClobber(Inst); + } + + // No dependence found. If this is the entry block of the function, it is + // unknown, otherwise it is non-local. + if (BB != &BB->getParent()->getEntryBlock()) + return MemDepResult::getNonLocal(); + return MemDepResult::getNonFuncLocal(); +} + +/// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that +/// would fully overlap MemLoc if done as a wider legal integer load. +/// +/// MemLocBase, MemLocOffset are lazily computed here the first time the +/// base/offs of memloc is needed. +static bool +isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, + const Value *&MemLocBase, + int64_t &MemLocOffs, + const LoadInst *LI, + const DataLayout *TD) { + // If we have no target data, we can't do this. + if (TD == 0) return false; + + // If we haven't already computed the base/offset of MemLoc, do so now. + if (MemLocBase == 0) + MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, TD); + + unsigned Size = MemoryDependenceAnalysis:: + getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size, + LI, *TD); + return Size != 0; +} + +/// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that +/// looks at a memory location for a load (specified by MemLocBase, Offs, +/// and Size) and compares it against a load. If the specified load could +/// be safely widened to a larger integer load that is 1) still efficient, +/// 2) safe for the target, and 3) would provide the specified memory +/// location value, then this function returns the size in bytes of the +/// load width to use. If not, this returns zero. +unsigned MemoryDependenceAnalysis:: +getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, + unsigned MemLocSize, const LoadInst *LI, + const DataLayout &TD) { + // We can only extend simple integer loads. + if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) return 0; + + // Load widening is hostile to ThreadSanitizer: it may cause false positives + // or make the reports more cryptic (access sizes are wrong). + if (LI->getParent()->getParent()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeThread)) + return 0; + + // Get the base of this load. + int64_t LIOffs = 0; + const Value *LIBase = + GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, &TD); + + // If the two pointers are not based on the same pointer, we can't tell that + // they are related. + if (LIBase != MemLocBase) return 0; + + // Okay, the two values are based on the same pointer, but returned as + // no-alias. This happens when we have things like two byte loads at "P+1" + // and "P+3". Check to see if increasing the size of the "LI" load up to its + // alignment (or the largest native integer type) will allow us to load all + // the bits required by MemLoc. + + // If MemLoc is before LI, then no widening of LI will help us out. + if (MemLocOffs < LIOffs) return 0; + + // Get the alignment of the load in bytes. We assume that it is safe to load + // any legal integer up to this size without a problem. For example, if we're + // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can + // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it + // to i16. + unsigned LoadAlign = LI->getAlignment(); + + int64_t MemLocEnd = MemLocOffs+MemLocSize; + + // If no amount of rounding up will let MemLoc fit into LI, then bail out. + if (LIOffs+LoadAlign < MemLocEnd) return 0; + + // This is the size of the load to try. Start with the next larger power of + // two. + unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U; + NewLoadByteSize = NextPowerOf2(NewLoadByteSize); + + while (1) { + // If this load size is bigger than our known alignment or would not fit + // into a native integer register, then we fail. + if (NewLoadByteSize > LoadAlign || + !TD.fitsInLegalInteger(NewLoadByteSize*8)) + return 0; + + if (LIOffs+NewLoadByteSize > MemLocEnd && + LI->getParent()->getParent()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeAddress)) + // We will be reading past the location accessed by the original program. + // While this is safe in a regular build, Address Safety analysis tools + // may start reporting false warnings. So, don't do widening. + return 0; + + // If a load of this width would include all of MemLoc, then we succeed. + if (LIOffs+NewLoadByteSize >= MemLocEnd) + return NewLoadByteSize; + + NewLoadByteSize <<= 1; + } +} + +/// getPointerDependencyFrom - Return the instruction on which a memory +/// location depends. If isLoad is true, this routine ignores may-aliases with +/// read-only operations. If isLoad is false, this routine ignores may-aliases +/// with reads from read-only locations. If possible, pass the query +/// instruction as well; this function may take advantage of the metadata +/// annotated to the query instruction to refine the result. +MemDepResult MemoryDependenceAnalysis:: +getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, + BasicBlock::iterator ScanIt, BasicBlock *BB, + Instruction *QueryInst) { + + const Value *MemLocBase = 0; + int64_t MemLocOffset = 0; + unsigned Limit = BlockScanLimit; + bool isInvariantLoad = false; + if (isLoad && QueryInst) { + LoadInst *LI = dyn_cast<LoadInst>(QueryInst); + if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != 0) + isInvariantLoad = true; + } + + // Walk backwards through the basic block, looking for dependencies. + while (ScanIt != BB->begin()) { + Instruction *Inst = --ScanIt; + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) + // Debug intrinsics don't (and can't) cause dependencies. + if (isa<DbgInfoIntrinsic>(II)) continue; + + // Limit the amount of scanning we do so we don't end up with quadratic + // running time on extreme testcases. + --Limit; + if (!Limit) + return MemDepResult::getUnknown(); + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { + // If we reach a lifetime begin or end marker, then the query ends here + // because the value is undefined. + if (II->getIntrinsicID() == Intrinsic::lifetime_start) { + // FIXME: This only considers queries directly on the invariant-tagged + // pointer, not on query pointers that are indexed off of them. It'd + // be nice to handle that at some point (the right approach is to use + // GetPointerBaseWithConstantOffset). + if (AA->isMustAlias(AliasAnalysis::Location(II->getArgOperand(1)), + MemLoc)) + return MemDepResult::getDef(II); + continue; + } + } + + // Values depend on loads if the pointers are must aliased. This means that + // a load depends on another must aliased load from the same value. + if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + // Atomic loads have complications involved. + // FIXME: This is overly conservative. + if (!LI->isUnordered()) + return MemDepResult::getClobber(LI); + + AliasAnalysis::Location LoadLoc = AA->getLocation(LI); + + // If we found a pointer, check if it could be the same as our pointer. + AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc); + + if (isLoad) { + if (R == AliasAnalysis::NoAlias) { + // If this is an over-aligned integer load (for example, + // "load i8* %P, align 4") see if it would obviously overlap with the + // queried location if widened to a larger load (e.g. if the queried + // location is 1 byte at P+1). If so, return it as a load/load + // clobber result, allowing the client to decide to widen the load if + // it wants to. + if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) + if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() && + isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase, + MemLocOffset, LI, TD)) + return MemDepResult::getClobber(Inst); + + continue; + } + + // Must aliased loads are defs of each other. + if (R == AliasAnalysis::MustAlias) + return MemDepResult::getDef(Inst); + +#if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads + // in terms of clobbering loads, but since it does this by looking + // at the clobbering load directly, it doesn't know about any + // phi translation that may have happened along the way. + + // If we have a partial alias, then return this as a clobber for the + // client to handle. + if (R == AliasAnalysis::PartialAlias) + return MemDepResult::getClobber(Inst); +#endif + + // Random may-alias loads don't depend on each other without a + // dependence. + continue; + } + + // Stores don't depend on other no-aliased accesses. + if (R == AliasAnalysis::NoAlias) + continue; + + // Stores don't alias loads from read-only memory. + if (AA->pointsToConstantMemory(LoadLoc)) + continue; + + // Stores depend on may/must aliased loads. + return MemDepResult::getDef(Inst); + } + + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + // Atomic stores have complications involved. + // FIXME: This is overly conservative. + if (!SI->isUnordered()) + return MemDepResult::getClobber(SI); + + // If alias analysis can tell that this store is guaranteed to not modify + // the query pointer, ignore it. Use getModRefInfo to handle cases where + // the query pointer points to constant memory etc. + if (AA->getModRefInfo(SI, MemLoc) == AliasAnalysis::NoModRef) + continue; + + // Ok, this store might clobber the query pointer. Check to see if it is + // a must alias: in this case, we want to return this as a def. + AliasAnalysis::Location StoreLoc = AA->getLocation(SI); + + // If we found a pointer, check if it could be the same as our pointer. + AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc); + + if (R == AliasAnalysis::NoAlias) + continue; + if (R == AliasAnalysis::MustAlias) + return MemDepResult::getDef(Inst); + if (isInvariantLoad) + continue; + return MemDepResult::getClobber(Inst); + } + + // If this is an allocation, and if we know that the accessed pointer is to + // the allocation, return Def. This means that there is no dependence and + // the access can be optimized based on that. For example, a load could + // turn into undef. + // Note: Only determine this to be a malloc if Inst is the malloc call, not + // a subsequent bitcast of the malloc call result. There can be stores to + // the malloced memory between the malloc call and its bitcast uses, and we + // need to continue scanning until the malloc call. + const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo(); + if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, TLI)) { + const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD); + + if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr)) + return MemDepResult::getDef(Inst); + // Be conservative if the accessed pointer may alias the allocation. + if (AA->alias(Inst, AccessPtr) != AliasAnalysis::NoAlias) + return MemDepResult::getClobber(Inst); + // If the allocation is not aliased and does not read memory (like + // strdup), it is safe to ignore. + if (isa<AllocaInst>(Inst) || + isMallocLikeFn(Inst, TLI) || isCallocLikeFn(Inst, TLI)) + continue; + } + + // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. + AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc); + // If necessary, perform additional analysis. + if (MR == AliasAnalysis::ModRef) + MR = AA->callCapturesBefore(Inst, MemLoc, DT); + switch (MR) { + case AliasAnalysis::NoModRef: + // If the call has no effect on the queried pointer, just ignore it. + continue; + case AliasAnalysis::Mod: + return MemDepResult::getClobber(Inst); + case AliasAnalysis::Ref: + // If the call is known to never store to the pointer, and if this is a + // load query, we can safely ignore it (scan past it). + if (isLoad) + continue; + default: + // Otherwise, there is a potential dependence. Return a clobber. + return MemDepResult::getClobber(Inst); + } + } + + // No dependence found. If this is the entry block of the function, it is + // unknown, otherwise it is non-local. + if (BB != &BB->getParent()->getEntryBlock()) + return MemDepResult::getNonLocal(); + return MemDepResult::getNonFuncLocal(); +} + +/// getDependency - Return the instruction on which a memory operation +/// depends. +MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { + Instruction *ScanPos = QueryInst; + + // Check for a cached result + MemDepResult &LocalCache = LocalDeps[QueryInst]; + + // If the cached entry is non-dirty, just return it. Note that this depends + // on MemDepResult's default constructing to 'dirty'. + if (!LocalCache.isDirty()) + return LocalCache; + + // Otherwise, if we have a dirty entry, we know we can start the scan at that + // instruction, which may save us some work. + if (Instruction *Inst = LocalCache.getInst()) { + ScanPos = Inst; + + RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst); + } + + BasicBlock *QueryParent = QueryInst->getParent(); + + // Do the scan. + if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) { + // No dependence found. If this is the entry block of the function, it is + // unknown, otherwise it is non-local. + if (QueryParent != &QueryParent->getParent()->getEntryBlock()) + LocalCache = MemDepResult::getNonLocal(); + else + LocalCache = MemDepResult::getNonFuncLocal(); + } else { + AliasAnalysis::Location MemLoc; + AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA); + if (MemLoc.Ptr) { + // If we can do a pointer scan, make it happen. + bool isLoad = !(MR & AliasAnalysis::Mod); + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst)) + isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start; + + LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos, + QueryParent, QueryInst); + } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) { + CallSite QueryCS(QueryInst); + bool isReadOnly = AA->onlyReadsMemory(QueryCS); + LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos, + QueryParent); + } else + // Non-memory instruction. + LocalCache = MemDepResult::getUnknown(); + } + + // Remember the result! + if (Instruction *I = LocalCache.getInst()) + ReverseLocalDeps[I].insert(QueryInst); + + return LocalCache; +} + +#ifndef NDEBUG +/// AssertSorted - This method is used when -debug is specified to verify that +/// cache arrays are properly kept sorted. +static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, + int Count = -1) { + if (Count == -1) Count = Cache.size(); + if (Count == 0) return; + + for (unsigned i = 1; i != unsigned(Count); ++i) + assert(!(Cache[i] < Cache[i-1]) && "Cache isn't sorted!"); +} +#endif + +/// getNonLocalCallDependency - Perform a full dependency query for the +/// specified call, returning the set of blocks that the value is +/// potentially live across. The returned set of results will include a +/// "NonLocal" result for all blocks where the value is live across. +/// +/// This method assumes the instruction returns a "NonLocal" dependency +/// within its own block. +/// +/// This returns a reference to an internal data structure that may be +/// invalidated on the next non-local query or when an instruction is +/// removed. Clients must copy this data if they want it around longer than +/// that. +const MemoryDependenceAnalysis::NonLocalDepInfo & +MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { + assert(getDependency(QueryCS.getInstruction()).isNonLocal() && + "getNonLocalCallDependency should only be used on calls with non-local deps!"); + PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()]; + NonLocalDepInfo &Cache = CacheP.first; + + /// DirtyBlocks - This is the set of blocks that need to be recomputed. In + /// the cached case, this can happen due to instructions being deleted etc. In + /// the uncached case, this starts out as the set of predecessors we care + /// about. + SmallVector<BasicBlock*, 32> DirtyBlocks; + + if (!Cache.empty()) { + // Okay, we have a cache entry. If we know it is not dirty, just return it + // with no computation. + if (!CacheP.second) { + ++NumCacheNonLocal; + return Cache; + } + + // If we already have a partially computed set of results, scan them to + // determine what is dirty, seeding our initial DirtyBlocks worklist. + for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end(); + I != E; ++I) + if (I->getResult().isDirty()) + DirtyBlocks.push_back(I->getBB()); + + // Sort the cache so that we can do fast binary search lookups below. + std::sort(Cache.begin(), Cache.end()); + + ++NumCacheDirtyNonLocal; + //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: " + // << Cache.size() << " cached: " << *QueryInst; + } else { + // Seed DirtyBlocks with each of the preds of QueryInst's block. + BasicBlock *QueryBB = QueryCS.getInstruction()->getParent(); + for (BasicBlock **PI = PredCache->GetPreds(QueryBB); *PI; ++PI) + DirtyBlocks.push_back(*PI); + ++NumUncacheNonLocal; + } + + // isReadonlyCall - If this is a read-only call, we can be more aggressive. + bool isReadonlyCall = AA->onlyReadsMemory(QueryCS); + + SmallPtrSet<BasicBlock*, 64> Visited; + + unsigned NumSortedEntries = Cache.size(); + DEBUG(AssertSorted(Cache)); + + // Iterate while we still have blocks to update. + while (!DirtyBlocks.empty()) { + BasicBlock *DirtyBB = DirtyBlocks.back(); + DirtyBlocks.pop_back(); + + // Already processed this block? + if (!Visited.insert(DirtyBB)) + continue; + + // Do a binary search to see if we already have an entry for this block in + // the cache set. If so, find it. + DEBUG(AssertSorted(Cache, NumSortedEntries)); + NonLocalDepInfo::iterator Entry = + std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries, + NonLocalDepEntry(DirtyBB)); + if (Entry != Cache.begin() && prior(Entry)->getBB() == DirtyBB) + --Entry; + + NonLocalDepEntry *ExistingResult = 0; + if (Entry != Cache.begin()+NumSortedEntries && + Entry->getBB() == DirtyBB) { + // If we already have an entry, and if it isn't already dirty, the block + // is done. + if (!Entry->getResult().isDirty()) + continue; + + // Otherwise, remember this slot so we can update the value. + ExistingResult = &*Entry; + } + + // If the dirty entry has a pointer, start scanning from it so we don't have + // to rescan the entire block. + BasicBlock::iterator ScanPos = DirtyBB->end(); + if (ExistingResult) { + if (Instruction *Inst = ExistingResult->getResult().getInst()) { + ScanPos = Inst; + // We're removing QueryInst's use of Inst. + RemoveFromReverseMap(ReverseNonLocalDeps, Inst, + QueryCS.getInstruction()); + } + } + + // Find out if this block has a local dependency for QueryInst. + MemDepResult Dep; + + if (ScanPos != DirtyBB->begin()) { + Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB); + } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) { + // No dependence found. If this is the entry block of the function, it is + // a clobber, otherwise it is unknown. + Dep = MemDepResult::getNonLocal(); + } else { + Dep = MemDepResult::getNonFuncLocal(); + } + + // If we had a dirty entry for the block, update it. Otherwise, just add + // a new entry. + if (ExistingResult) + ExistingResult->setResult(Dep); + else + Cache.push_back(NonLocalDepEntry(DirtyBB, Dep)); + + // If the block has a dependency (i.e. it isn't completely transparent to + // the value), remember the association! + if (!Dep.isNonLocal()) { + // Keep the ReverseNonLocalDeps map up to date so we can efficiently + // update this when we remove instructions. + if (Instruction *Inst = Dep.getInst()) + ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction()); + } else { + + // If the block *is* completely transparent to the load, we need to check + // the predecessors of this block. Add them to our worklist. + for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI) + DirtyBlocks.push_back(*PI); + } + } + + return Cache; +} + +/// getNonLocalPointerDependency - Perform a full dependency query for an +/// access to the specified (non-volatile) memory location, returning the +/// set of instructions that either define or clobber the value. +/// +/// This method assumes the pointer has a "NonLocal" dependency within its +/// own block. +/// +void MemoryDependenceAnalysis:: +getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad, + BasicBlock *FromBB, + SmallVectorImpl<NonLocalDepResult> &Result) { + assert(Loc.Ptr->getType()->isPointerTy() && + "Can't get pointer deps of a non-pointer!"); + Result.clear(); + + PHITransAddr Address(const_cast<Value *>(Loc.Ptr), TD); + + // This is the set of blocks we've inspected, and the pointer we consider in + // each block. Because of critical edges, we currently bail out if querying + // a block with multiple different pointers. This can happen during PHI + // translation. + DenseMap<BasicBlock*, Value*> Visited; + if (!getNonLocalPointerDepFromBB(Address, Loc, isLoad, FromBB, + Result, Visited, true)) + return; + Result.clear(); + Result.push_back(NonLocalDepResult(FromBB, + MemDepResult::getUnknown(), + const_cast<Value *>(Loc.Ptr))); +} + +/// GetNonLocalInfoForBlock - Compute the memdep value for BB with +/// Pointer/PointeeSize using either cached information in Cache or by doing a +/// lookup (which may use dirty cache info if available). If we do a lookup, +/// add the result to the cache. +MemDepResult MemoryDependenceAnalysis:: +GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, + bool isLoad, BasicBlock *BB, + NonLocalDepInfo *Cache, unsigned NumSortedEntries) { + + // Do a binary search to see if we already have an entry for this block in + // the cache set. If so, find it. + NonLocalDepInfo::iterator Entry = + std::upper_bound(Cache->begin(), Cache->begin()+NumSortedEntries, + NonLocalDepEntry(BB)); + if (Entry != Cache->begin() && (Entry-1)->getBB() == BB) + --Entry; + + NonLocalDepEntry *ExistingResult = 0; + if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB) + ExistingResult = &*Entry; + + // If we have a cached entry, and it is non-dirty, use it as the value for + // this dependency. + if (ExistingResult && !ExistingResult->getResult().isDirty()) { + ++NumCacheNonLocalPtr; + return ExistingResult->getResult(); + } + + // Otherwise, we have to scan for the value. If we have a dirty cache + // entry, start scanning from its position, otherwise we scan from the end + // of the block. + BasicBlock::iterator ScanPos = BB->end(); + if (ExistingResult && ExistingResult->getResult().getInst()) { + assert(ExistingResult->getResult().getInst()->getParent() == BB && + "Instruction invalidated?"); + ++NumCacheDirtyNonLocalPtr; + ScanPos = ExistingResult->getResult().getInst(); + + // Eliminating the dirty entry from 'Cache', so update the reverse info. + ValueIsLoadPair CacheKey(Loc.Ptr, isLoad); + RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey); + } else { + ++NumUncacheNonLocalPtr; + } + + // Scan the block for the dependency. + MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB); + + // If we had a dirty entry for the block, update it. Otherwise, just add + // a new entry. + if (ExistingResult) + ExistingResult->setResult(Dep); + else + Cache->push_back(NonLocalDepEntry(BB, Dep)); + + // If the block has a dependency (i.e. it isn't completely transparent to + // the value), remember the reverse association because we just added it + // to Cache! + if (!Dep.isDef() && !Dep.isClobber()) + return Dep; + + // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently + // update MemDep when we remove instructions. + Instruction *Inst = Dep.getInst(); + assert(Inst && "Didn't depend on anything?"); + ValueIsLoadPair CacheKey(Loc.Ptr, isLoad); + ReverseNonLocalPtrDeps[Inst].insert(CacheKey); + return Dep; +} + +/// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain +/// number of elements in the array that are already properly ordered. This is +/// optimized for the case when only a few entries are added. +static void +SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, + unsigned NumSortedEntries) { + switch (Cache.size() - NumSortedEntries) { + case 0: + // done, no new entries. + break; + case 2: { + // Two new entries, insert the last one into place. + NonLocalDepEntry Val = Cache.back(); + Cache.pop_back(); + MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry = + std::upper_bound(Cache.begin(), Cache.end()-1, Val); + Cache.insert(Entry, Val); + // FALL THROUGH. + } + case 1: + // One new entry, Just insert the new value at the appropriate position. + if (Cache.size() != 1) { + NonLocalDepEntry Val = Cache.back(); + Cache.pop_back(); + MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry = + std::upper_bound(Cache.begin(), Cache.end(), Val); + Cache.insert(Entry, Val); + } + break; + default: + // Added many values, do a full scale sort. + std::sort(Cache.begin(), Cache.end()); + break; + } +} + +/// getNonLocalPointerDepFromBB - Perform a dependency query based on +/// pointer/pointeesize starting at the end of StartBB. Add any clobber/def +/// results to the results vector and keep track of which blocks are visited in +/// 'Visited'. +/// +/// This has special behavior for the first block queries (when SkipFirstBlock +/// is true). In this special case, it ignores the contents of the specified +/// block and starts returning dependence info for its predecessors. +/// +/// This function returns false on success, or true to indicate that it could +/// not compute dependence information for some reason. This should be treated +/// as a clobber dependence on the first instruction in the predecessor block. +bool MemoryDependenceAnalysis:: +getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, + const AliasAnalysis::Location &Loc, + bool isLoad, BasicBlock *StartBB, + SmallVectorImpl<NonLocalDepResult> &Result, + DenseMap<BasicBlock*, Value*> &Visited, + bool SkipFirstBlock) { + // Look up the cached info for Pointer. + ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad); + + // Set up a temporary NLPI value. If the map doesn't yet have an entry for + // CacheKey, this value will be inserted as the associated value. Otherwise, + // it'll be ignored, and we'll have to check to see if the cached size and + // tbaa tag are consistent with the current query. + NonLocalPointerInfo InitialNLPI; + InitialNLPI.Size = Loc.Size; + InitialNLPI.TBAATag = Loc.TBAATag; + + // Get the NLPI for CacheKey, inserting one into the map if it doesn't + // already have one. + std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair = + NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI)); + NonLocalPointerInfo *CacheInfo = &Pair.first->second; + + // If we already have a cache entry for this CacheKey, we may need to do some + // work to reconcile the cache entry and the current query. + if (!Pair.second) { + if (CacheInfo->Size < Loc.Size) { + // The query's Size is greater than the cached one. Throw out the + // cached data and proceed with the query at the greater size. + CacheInfo->Pair = BBSkipFirstBlockPair(); + CacheInfo->Size = Loc.Size; + for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(), + DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI) + if (Instruction *Inst = DI->getResult().getInst()) + RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey); + CacheInfo->NonLocalDeps.clear(); + } else if (CacheInfo->Size > Loc.Size) { + // This query's Size is less than the cached one. Conservatively restart + // the query using the greater size. + return getNonLocalPointerDepFromBB(Pointer, + Loc.getWithNewSize(CacheInfo->Size), + isLoad, StartBB, Result, Visited, + SkipFirstBlock); + } + + // If the query's TBAATag is inconsistent with the cached one, + // conservatively throw out the cached data and restart the query with + // no tag if needed. + if (CacheInfo->TBAATag != Loc.TBAATag) { + if (CacheInfo->TBAATag) { + CacheInfo->Pair = BBSkipFirstBlockPair(); + CacheInfo->TBAATag = 0; + for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(), + DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI) + if (Instruction *Inst = DI->getResult().getInst()) + RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey); + CacheInfo->NonLocalDeps.clear(); + } + if (Loc.TBAATag) + return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutTBAATag(), + isLoad, StartBB, Result, Visited, + SkipFirstBlock); + } + } + + NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps; + + // If we have valid cached information for exactly the block we are + // investigating, just return it with no recomputation. + if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) { + // We have a fully cached result for this query then we can just return the + // cached results and populate the visited set. However, we have to verify + // that we don't already have conflicting results for these blocks. Check + // to ensure that if a block in the results set is in the visited set that + // it was for the same pointer query. + if (!Visited.empty()) { + for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); + I != E; ++I) { + DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB()); + if (VI == Visited.end() || VI->second == Pointer.getAddr()) + continue; + + // We have a pointer mismatch in a block. Just return clobber, saying + // that something was clobbered in this result. We could also do a + // non-fully cached query, but there is little point in doing this. + return true; + } + } + + Value *Addr = Pointer.getAddr(); + for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); + I != E; ++I) { + Visited.insert(std::make_pair(I->getBB(), Addr)); + if (I->getResult().isNonLocal()) { + continue; + } + + if (!DT) { + Result.push_back(NonLocalDepResult(I->getBB(), + MemDepResult::getUnknown(), + Addr)); + } else if (DT->isReachableFromEntry(I->getBB())) { + Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Addr)); + } + } + ++NumCacheCompleteNonLocalPtr; + return false; + } + + // Otherwise, either this is a new block, a block with an invalid cache + // pointer or one that we're about to invalidate by putting more info into it + // than its valid cache info. If empty, the result will be valid cache info, + // otherwise it isn't. + if (Cache->empty()) + CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock); + else + CacheInfo->Pair = BBSkipFirstBlockPair(); + + SmallVector<BasicBlock*, 32> Worklist; + Worklist.push_back(StartBB); + + // PredList used inside loop. + SmallVector<std::pair<BasicBlock*, PHITransAddr>, 16> PredList; + + // Keep track of the entries that we know are sorted. Previously cached + // entries will all be sorted. The entries we add we only sort on demand (we + // don't insert every element into its sorted position). We know that we + // won't get any reuse from currently inserted values, because we don't + // revisit blocks after we insert info for them. + unsigned NumSortedEntries = Cache->size(); + DEBUG(AssertSorted(*Cache)); + + while (!Worklist.empty()) { + BasicBlock *BB = Worklist.pop_back_val(); + + // Skip the first block if we have it. + if (!SkipFirstBlock) { + // Analyze the dependency of *Pointer in FromBB. See if we already have + // been here. + assert(Visited.count(BB) && "Should check 'visited' before adding to WL"); + + // Get the dependency info for Pointer in BB. If we have cached + // information, we will use it, otherwise we compute it. + DEBUG(AssertSorted(*Cache, NumSortedEntries)); + MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache, + NumSortedEntries); + + // If we got a Def or Clobber, add this to the list of results. + if (!Dep.isNonLocal()) { + if (!DT) { + Result.push_back(NonLocalDepResult(BB, + MemDepResult::getUnknown(), + Pointer.getAddr())); + continue; + } else if (DT->isReachableFromEntry(BB)) { + Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr())); + continue; + } + } + } + + // If 'Pointer' is an instruction defined in this block, then we need to do + // phi translation to change it into a value live in the predecessor block. + // If not, we just add the predecessors to the worklist and scan them with + // the same Pointer. + if (!Pointer.NeedsPHITranslationFromBlock(BB)) { + SkipFirstBlock = false; + SmallVector<BasicBlock*, 16> NewBlocks; + for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { + // Verify that we haven't looked at this block yet. + std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool> + InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr())); + if (InsertRes.second) { + // First time we've looked at *PI. + NewBlocks.push_back(*PI); + continue; + } + + // If we have seen this block before, but it was with a different + // pointer then we have a phi translation failure and we have to treat + // this as a clobber. + if (InsertRes.first->second != Pointer.getAddr()) { + // Make sure to clean up the Visited map before continuing on to + // PredTranslationFailure. + for (unsigned i = 0; i < NewBlocks.size(); i++) + Visited.erase(NewBlocks[i]); + goto PredTranslationFailure; + } + } + Worklist.append(NewBlocks.begin(), NewBlocks.end()); + continue; + } + + // We do need to do phi translation, if we know ahead of time we can't phi + // translate this value, don't even try. + if (!Pointer.IsPotentiallyPHITranslatable()) + goto PredTranslationFailure; + + // We may have added values to the cache list before this PHI translation. + // If so, we haven't done anything to ensure that the cache remains sorted. + // Sort it now (if needed) so that recursive invocations of + // getNonLocalPointerDepFromBB and other routines that could reuse the cache + // value will only see properly sorted cache arrays. + if (Cache && NumSortedEntries != Cache->size()) { + SortNonLocalDepInfoCache(*Cache, NumSortedEntries); + NumSortedEntries = Cache->size(); + } + Cache = 0; + + PredList.clear(); + for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { + BasicBlock *Pred = *PI; + PredList.push_back(std::make_pair(Pred, Pointer)); + + // Get the PHI translated pointer in this predecessor. This can fail if + // not translatable, in which case the getAddr() returns null. + PHITransAddr &PredPointer = PredList.back().second; + PredPointer.PHITranslateValue(BB, Pred, 0); + + Value *PredPtrVal = PredPointer.getAddr(); + + // Check to see if we have already visited this pred block with another + // pointer. If so, we can't do this lookup. This failure can occur + // with PHI translation when a critical edge exists and the PHI node in + // the successor translates to a pointer value different than the + // pointer the block was first analyzed with. + std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool> + InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal)); + + if (!InsertRes.second) { + // We found the pred; take it off the list of preds to visit. + PredList.pop_back(); + + // If the predecessor was visited with PredPtr, then we already did + // the analysis and can ignore it. + if (InsertRes.first->second == PredPtrVal) + continue; + + // Otherwise, the block was previously analyzed with a different + // pointer. We can't represent the result of this case, so we just + // treat this as a phi translation failure. + + // Make sure to clean up the Visited map before continuing on to + // PredTranslationFailure. + for (unsigned i = 0, n = PredList.size(); i < n; ++i) + Visited.erase(PredList[i].first); + + goto PredTranslationFailure; + } + } + + // Actually process results here; this need to be a separate loop to avoid + // calling getNonLocalPointerDepFromBB for blocks we don't want to return + // any results for. (getNonLocalPointerDepFromBB will modify our + // datastructures in ways the code after the PredTranslationFailure label + // doesn't expect.) + for (unsigned i = 0, n = PredList.size(); i < n; ++i) { + BasicBlock *Pred = PredList[i].first; + PHITransAddr &PredPointer = PredList[i].second; + Value *PredPtrVal = PredPointer.getAddr(); + + bool CanTranslate = true; + // If PHI translation was unable to find an available pointer in this + // predecessor, then we have to assume that the pointer is clobbered in + // that predecessor. We can still do PRE of the load, which would insert + // a computation of the pointer in this predecessor. + if (PredPtrVal == 0) + CanTranslate = false; + + // FIXME: it is entirely possible that PHI translating will end up with + // the same value. Consider PHI translating something like: + // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need* + // to recurse here, pedantically speaking. + + // If getNonLocalPointerDepFromBB fails here, that means the cached + // result conflicted with the Visited list; we have to conservatively + // assume it is unknown, but this also does not block PRE of the load. + if (!CanTranslate || + getNonLocalPointerDepFromBB(PredPointer, + Loc.getWithNewPtr(PredPtrVal), + isLoad, Pred, + Result, Visited)) { + // Add the entry to the Result list. + NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal); + Result.push_back(Entry); + + // Since we had a phi translation failure, the cache for CacheKey won't + // include all of the entries that we need to immediately satisfy future + // queries. Mark this in NonLocalPointerDeps by setting the + // BBSkipFirstBlockPair pointer to null. This requires reuse of the + // cached value to do more work but not miss the phi trans failure. + NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey]; + NLPI.Pair = BBSkipFirstBlockPair(); + continue; + } + } + + // Refresh the CacheInfo/Cache pointer so that it isn't invalidated. + CacheInfo = &NonLocalPointerDeps[CacheKey]; + Cache = &CacheInfo->NonLocalDeps; + NumSortedEntries = Cache->size(); + + // Since we did phi translation, the "Cache" set won't contain all of the + // results for the query. This is ok (we can still use it to accelerate + // specific block queries) but we can't do the fastpath "return all + // results from the set" Clear out the indicator for this. + CacheInfo->Pair = BBSkipFirstBlockPair(); + SkipFirstBlock = false; + continue; + + PredTranslationFailure: + // The following code is "failure"; we can't produce a sane translation + // for the given block. It assumes that we haven't modified any of + // our datastructures while processing the current block. + + if (Cache == 0) { + // Refresh the CacheInfo/Cache pointer if it got invalidated. + CacheInfo = &NonLocalPointerDeps[CacheKey]; + Cache = &CacheInfo->NonLocalDeps; + NumSortedEntries = Cache->size(); + } + + // Since we failed phi translation, the "Cache" set won't contain all of the + // results for the query. This is ok (we can still use it to accelerate + // specific block queries) but we can't do the fastpath "return all + // results from the set". Clear out the indicator for this. + CacheInfo->Pair = BBSkipFirstBlockPair(); + + // If *nothing* works, mark the pointer as unknown. + // + // If this is the magic first block, return this as a clobber of the whole + // incoming value. Since we can't phi translate to one of the predecessors, + // we have to bail out. + if (SkipFirstBlock) + return true; + + for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) { + assert(I != Cache->rend() && "Didn't find current block??"); + if (I->getBB() != BB) + continue; + + assert(I->getResult().isNonLocal() && + "Should only be here with transparent block"); + I->setResult(MemDepResult::getUnknown()); + Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), + Pointer.getAddr())); + break; + } + } + + // Okay, we're done now. If we added new values to the cache, re-sort it. + SortNonLocalDepInfoCache(*Cache, NumSortedEntries); + DEBUG(AssertSorted(*Cache)); + return false; +} + +/// RemoveCachedNonLocalPointerDependencies - If P exists in +/// CachedNonLocalPointerInfo, remove it. +void MemoryDependenceAnalysis:: +RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) { + CachedNonLocalPointerInfo::iterator It = + NonLocalPointerDeps.find(P); + if (It == NonLocalPointerDeps.end()) return; + + // Remove all of the entries in the BB->val map. This involves removing + // instructions from the reverse map. + NonLocalDepInfo &PInfo = It->second.NonLocalDeps; + + for (unsigned i = 0, e = PInfo.size(); i != e; ++i) { + Instruction *Target = PInfo[i].getResult().getInst(); + if (Target == 0) continue; // Ignore non-local dep results. + assert(Target->getParent() == PInfo[i].getBB()); + + // Eliminating the dirty entry from 'Cache', so update the reverse info. + RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P); + } + + // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo). + NonLocalPointerDeps.erase(It); +} + + +/// invalidateCachedPointerInfo - This method is used to invalidate cached +/// information about the specified pointer, because it may be too +/// conservative in memdep. This is an optional call that can be used when +/// the client detects an equivalence between the pointer and some other +/// value and replaces the other value with ptr. This can make Ptr available +/// in more places that cached info does not necessarily keep. +void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) { + // If Ptr isn't really a pointer, just ignore it. + if (!Ptr->getType()->isPointerTy()) return; + // Flush store info for the pointer. + RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false)); + // Flush load info for the pointer. + RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true)); +} + +/// invalidateCachedPredecessors - Clear the PredIteratorCache info. +/// This needs to be done when the CFG changes, e.g., due to splitting +/// critical edges. +void MemoryDependenceAnalysis::invalidateCachedPredecessors() { + PredCache->clear(); +} + +/// removeInstruction - Remove an instruction from the dependence analysis, +/// updating the dependence of instructions that previously depended on it. +/// This method attempts to keep the cache coherent using the reverse map. +void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { + // Walk through the Non-local dependencies, removing this one as the value + // for any cached queries. + NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst); + if (NLDI != NonLocalDeps.end()) { + NonLocalDepInfo &BlockMap = NLDI->second.first; + for (NonLocalDepInfo::iterator DI = BlockMap.begin(), DE = BlockMap.end(); + DI != DE; ++DI) + if (Instruction *Inst = DI->getResult().getInst()) + RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst); + NonLocalDeps.erase(NLDI); + } + + // If we have a cached local dependence query for this instruction, remove it. + // + LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst); + if (LocalDepEntry != LocalDeps.end()) { + // Remove us from DepInst's reverse set now that the local dep info is gone. + if (Instruction *Inst = LocalDepEntry->second.getInst()) + RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst); + + // Remove this local dependency info. + LocalDeps.erase(LocalDepEntry); + } + + // If we have any cached pointer dependencies on this instruction, remove + // them. If the instruction has non-pointer type, then it can't be a pointer + // base. + + // Remove it from both the load info and the store info. The instruction + // can't be in either of these maps if it is non-pointer. + if (RemInst->getType()->isPointerTy()) { + RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false)); + RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true)); + } + + // Loop over all of the things that depend on the instruction we're removing. + // + SmallVector<std::pair<Instruction*, Instruction*>, 8> ReverseDepsToAdd; + + // If we find RemInst as a clobber or Def in any of the maps for other values, + // we need to replace its entry with a dirty version of the instruction after + // it. If RemInst is a terminator, we use a null dirty value. + // + // Using a dirty version of the instruction after RemInst saves having to scan + // the entire block to get to this point. + MemDepResult NewDirtyVal; + if (!RemInst->isTerminator()) + NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst)); + + ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst); + if (ReverseDepIt != ReverseLocalDeps.end()) { + SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second; + // RemInst can't be the terminator if it has local stuff depending on it. + assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) && + "Nothing can locally depend on a terminator"); + + for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(), + E = ReverseDeps.end(); I != E; ++I) { + Instruction *InstDependingOnRemInst = *I; + assert(InstDependingOnRemInst != RemInst && + "Already removed our local dep info"); + + LocalDeps[InstDependingOnRemInst] = NewDirtyVal; + + // Make sure to remember that new things depend on NewDepInst. + assert(NewDirtyVal.getInst() && "There is no way something else can have " + "a local dep on this if it is a terminator!"); + ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(), + InstDependingOnRemInst)); + } + + ReverseLocalDeps.erase(ReverseDepIt); + + // Add new reverse deps after scanning the set, to avoid invalidating the + // 'ReverseDeps' reference. + while (!ReverseDepsToAdd.empty()) { + ReverseLocalDeps[ReverseDepsToAdd.back().first] + .insert(ReverseDepsToAdd.back().second); + ReverseDepsToAdd.pop_back(); + } + } + + ReverseDepIt = ReverseNonLocalDeps.find(RemInst); + if (ReverseDepIt != ReverseNonLocalDeps.end()) { + SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second; + for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end(); + I != E; ++I) { + assert(*I != RemInst && "Already removed NonLocalDep info for RemInst"); + + PerInstNLInfo &INLD = NonLocalDeps[*I]; + // The information is now dirty! + INLD.second = true; + + for (NonLocalDepInfo::iterator DI = INLD.first.begin(), + DE = INLD.first.end(); DI != DE; ++DI) { + if (DI->getResult().getInst() != RemInst) continue; + + // Convert to a dirty entry for the subsequent instruction. + DI->setResult(NewDirtyVal); + + if (Instruction *NextI = NewDirtyVal.getInst()) + ReverseDepsToAdd.push_back(std::make_pair(NextI, *I)); + } + } + + ReverseNonLocalDeps.erase(ReverseDepIt); + + // Add new reverse deps after scanning the set, to avoid invalidating 'Set' + while (!ReverseDepsToAdd.empty()) { + ReverseNonLocalDeps[ReverseDepsToAdd.back().first] + .insert(ReverseDepsToAdd.back().second); + ReverseDepsToAdd.pop_back(); + } + } + + // If the instruction is in ReverseNonLocalPtrDeps then it appears as a + // value in the NonLocalPointerDeps info. + ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt = + ReverseNonLocalPtrDeps.find(RemInst); + if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) { + SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second; + SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd; + + for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(), + E = Set.end(); I != E; ++I) { + ValueIsLoadPair P = *I; + assert(P.getPointer() != RemInst && + "Already removed NonLocalPointerDeps info for RemInst"); + + NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps; + + // The cache is not valid for any specific block anymore. + NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair(); + + // Update any entries for RemInst to use the instruction after it. + for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end(); + DI != DE; ++DI) { + if (DI->getResult().getInst() != RemInst) continue; + + // Convert to a dirty entry for the subsequent instruction. + DI->setResult(NewDirtyVal); + + if (Instruction *NewDirtyInst = NewDirtyVal.getInst()) + ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P)); + } + + // Re-sort the NonLocalDepInfo. Changing the dirty entry to its + // subsequent value may invalidate the sortedness. + std::sort(NLPDI.begin(), NLPDI.end()); + } + + ReverseNonLocalPtrDeps.erase(ReversePtrDepIt); + + while (!ReversePtrDepsToAdd.empty()) { + ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first] + .insert(ReversePtrDepsToAdd.back().second); + ReversePtrDepsToAdd.pop_back(); + } + } + + + assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?"); + AA->deleteValue(RemInst); + DEBUG(verifyRemoved(RemInst)); +} +/// verifyRemoved - Verify that the specified instruction does not occur +/// in our internal data structures. +void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { + for (LocalDepMapType::const_iterator I = LocalDeps.begin(), + E = LocalDeps.end(); I != E; ++I) { + assert(I->first != D && "Inst occurs in data structures"); + assert(I->second.getInst() != D && + "Inst occurs in data structures"); + } + + for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(), + E = NonLocalPointerDeps.end(); I != E; ++I) { + assert(I->first.getPointer() != D && "Inst occurs in NLPD map key"); + const NonLocalDepInfo &Val = I->second.NonLocalDeps; + for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end(); + II != E; ++II) + assert(II->getResult().getInst() != D && "Inst occurs as NLPD value"); + } + + for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(), + E = NonLocalDeps.end(); I != E; ++I) { + assert(I->first != D && "Inst occurs in data structures"); + const PerInstNLInfo &INLD = I->second; + for (NonLocalDepInfo::const_iterator II = INLD.first.begin(), + EE = INLD.first.end(); II != EE; ++II) + assert(II->getResult().getInst() != D && "Inst occurs in data structures"); + } + + for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(), + E = ReverseLocalDeps.end(); I != E; ++I) { + assert(I->first != D && "Inst occurs in data structures"); + for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(), + EE = I->second.end(); II != EE; ++II) + assert(*II != D && "Inst occurs in data structures"); + } + + for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(), + E = ReverseNonLocalDeps.end(); + I != E; ++I) { + assert(I->first != D && "Inst occurs in data structures"); + for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(), + EE = I->second.end(); II != EE; ++II) + assert(*II != D && "Inst occurs in data structures"); + } + + for (ReverseNonLocalPtrDepTy::const_iterator + I = ReverseNonLocalPtrDeps.begin(), + E = ReverseNonLocalPtrDeps.end(); I != E; ++I) { + assert(I->first != D && "Inst occurs in rev NLPD map"); + + for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(), + E = I->second.end(); II != E; ++II) + assert(*II != ValueIsLoadPair(D, false) && + *II != ValueIsLoadPair(D, true) && + "Inst occurs in ReverseNonLocalPtrDeps map"); + } + +} diff --git a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp new file mode 100644 index 000000000000..03415375263a --- /dev/null +++ b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -0,0 +1,87 @@ +//===-- ModuleDebugInfoPrinter.cpp - Prints module debug info metadata ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass decodes the debug info metadata in a module and prints in a +// (sufficiently-prepared-) human-readable form. +// +// For example, run this pass from opt along with the -analyze option, and +// it'll print to standard output. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/Pass.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + class ModuleDebugInfoPrinter : public ModulePass { + DebugInfoFinder Finder; + public: + static char ID; // Pass identification, replacement for typeid + ModuleDebugInfoPrinter() : ModulePass(ID) { + initializeModuleDebugInfoPrinterPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnModule(Module &M); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + virtual void print(raw_ostream &O, const Module *M) const; + }; +} + +char ModuleDebugInfoPrinter::ID = 0; +INITIALIZE_PASS(ModuleDebugInfoPrinter, "module-debuginfo", + "Decodes module-level debug info", false, true) + +ModulePass *llvm::createModuleDebugInfoPrinterPass() { + return new ModuleDebugInfoPrinter(); +} + +bool ModuleDebugInfoPrinter::runOnModule(Module &M) { + Finder.processModule(M); + return false; +} + +void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const { + for (DebugInfoFinder::iterator I = Finder.compile_unit_begin(), + E = Finder.compile_unit_end(); I != E; ++I) { + O << "Compile Unit: "; + DICompileUnit(*I).print(O); + O << '\n'; + } + + for (DebugInfoFinder::iterator I = Finder.subprogram_begin(), + E = Finder.subprogram_end(); I != E; ++I) { + O << "Subprogram: "; + DISubprogram(*I).print(O); + O << '\n'; + } + + for (DebugInfoFinder::iterator I = Finder.global_variable_begin(), + E = Finder.global_variable_end(); I != E; ++I) { + O << "GlobalVariable: "; + DIGlobalVariable(*I).print(O); + O << '\n'; + } + + for (DebugInfoFinder::iterator I = Finder.type_begin(), + E = Finder.type_end(); I != E; ++I) { + O << "Type: "; + DIType(*I).print(O); + O << '\n'; + } +} diff --git a/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp new file mode 100644 index 000000000000..907e9621baed --- /dev/null +++ b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp @@ -0,0 +1,88 @@ +//===- NoAliasAnalysis.cpp - Minimal Alias Analysis Impl ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the default implementation of the Alias Analysis interface +// that simply returns "I don't know" for all queries. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Pass.h" +using namespace llvm; + +namespace { + /// NoAA - This class implements the -no-aa pass, which always returns "I + /// don't know" for alias queries. NoAA is unlike other alias analysis + /// implementations, in that it does not chain to a previous analysis. As + /// such it doesn't follow many of the rules that other alias analyses must. + /// + struct NoAA : public ImmutablePass, public AliasAnalysis { + static char ID; // Class identification, replacement for typeinfo + NoAA() : ImmutablePass(ID) { + initializeNoAAPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + } + + virtual void initializePass() { + // Note: NoAA does not call InitializeAliasAnalysis because it's + // special and does not support chaining. + TD = getAnalysisIfAvailable<DataLayout>(); + } + + virtual AliasResult alias(const Location &LocA, const Location &LocB) { + return MayAlias; + } + + virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS) { + return UnknownModRefBehavior; + } + virtual ModRefBehavior getModRefBehavior(const Function *F) { + return UnknownModRefBehavior; + } + + virtual bool pointsToConstantMemory(const Location &Loc, + bool OrLocal) { + return false; + } + virtual ModRefResult getModRefInfo(ImmutableCallSite CS, + const Location &Loc) { + return ModRef; + } + virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + return ModRef; + } + + virtual void deleteValue(Value *V) {} + virtual void copyValue(Value *From, Value *To) {} + virtual void addEscapingUse(Use &U) {} + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + }; +} // End of anonymous namespace + +// Register this pass... +char NoAA::ID = 0; +INITIALIZE_AG_PASS(NoAA, AliasAnalysis, "no-aa", + "No Alias Analysis (always returns 'may' alias)", + true, true, true) + +ImmutablePass *llvm::createNoAAPass() { return new NoAA(); } diff --git a/contrib/llvm/lib/Analysis/PHITransAddr.cpp b/contrib/llvm/lib/Analysis/PHITransAddr.cpp new file mode 100644 index 000000000000..e6af0663feaa --- /dev/null +++ b/contrib/llvm/lib/Analysis/PHITransAddr.cpp @@ -0,0 +1,443 @@ +//===- PHITransAddr.cpp - PHI Translation for Addresses -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PHITransAddr class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +static bool CanPHITrans(Instruction *Inst) { + if (isa<PHINode>(Inst) || + isa<GetElementPtrInst>(Inst)) + return true; + + if (isa<CastInst>(Inst) && + isSafeToSpeculativelyExecute(Inst)) + return true; + + if (Inst->getOpcode() == Instruction::Add && + isa<ConstantInt>(Inst->getOperand(1))) + return true; + + // cerr << "MEMDEP: Could not PHI translate: " << *Pointer; + // if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst)) + // cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0); + return false; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void PHITransAddr::dump() const { + if (Addr == 0) { + dbgs() << "PHITransAddr: null\n"; + return; + } + dbgs() << "PHITransAddr: " << *Addr << "\n"; + for (unsigned i = 0, e = InstInputs.size(); i != e; ++i) + dbgs() << " Input #" << i << " is " << *InstInputs[i] << "\n"; +} +#endif + + +static bool VerifySubExpr(Value *Expr, + SmallVectorImpl<Instruction*> &InstInputs) { + // If this is a non-instruction value, there is nothing to do. + Instruction *I = dyn_cast<Instruction>(Expr); + if (I == 0) return true; + + // If it's an instruction, it is either in Tmp or its operands recursively + // are. + SmallVectorImpl<Instruction*>::iterator Entry = + std::find(InstInputs.begin(), InstInputs.end(), I); + if (Entry != InstInputs.end()) { + InstInputs.erase(Entry); + return true; + } + + // If it isn't in the InstInputs list it is a subexpr incorporated into the + // address. Sanity check that it is phi translatable. + if (!CanPHITrans(I)) { + errs() << "Non phi translatable instruction found in PHITransAddr:\n"; + errs() << *I << '\n'; + llvm_unreachable("Either something is missing from InstInputs or " + "CanPHITrans is wrong."); + } + + // Validate the operands of the instruction. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (!VerifySubExpr(I->getOperand(i), InstInputs)) + return false; + + return true; +} + +/// Verify - Check internal consistency of this data structure. If the +/// structure is valid, it returns true. If invalid, it prints errors and +/// returns false. +bool PHITransAddr::Verify() const { + if (Addr == 0) return true; + + SmallVector<Instruction*, 8> Tmp(InstInputs.begin(), InstInputs.end()); + + if (!VerifySubExpr(Addr, Tmp)) + return false; + + if (!Tmp.empty()) { + errs() << "PHITransAddr contains extra instructions:\n"; + for (unsigned i = 0, e = InstInputs.size(); i != e; ++i) + errs() << " InstInput #" << i << " is " << *InstInputs[i] << "\n"; + llvm_unreachable("This is unexpected."); + } + + // a-ok. + return true; +} + + +/// IsPotentiallyPHITranslatable - If this needs PHI translation, return true +/// if we have some hope of doing it. This should be used as a filter to +/// avoid calling PHITranslateValue in hopeless situations. +bool PHITransAddr::IsPotentiallyPHITranslatable() const { + // If the input value is not an instruction, or if it is not defined in CurBB, + // then we don't need to phi translate it. + Instruction *Inst = dyn_cast<Instruction>(Addr); + return Inst == 0 || CanPHITrans(Inst); +} + + +static void RemoveInstInputs(Value *V, + SmallVectorImpl<Instruction*> &InstInputs) { + Instruction *I = dyn_cast<Instruction>(V); + if (I == 0) return; + + // If the instruction is in the InstInputs list, remove it. + SmallVectorImpl<Instruction*>::iterator Entry = + std::find(InstInputs.begin(), InstInputs.end(), I); + if (Entry != InstInputs.end()) { + InstInputs.erase(Entry); + return; + } + + assert(!isa<PHINode>(I) && "Error, removing something that isn't an input"); + + // Otherwise, it must have instruction inputs itself. Zap them recursively. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(i))) + RemoveInstInputs(Op, InstInputs); + } +} + +Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, + BasicBlock *PredBB, + const DominatorTree *DT) { + // If this is a non-instruction value, it can't require PHI translation. + Instruction *Inst = dyn_cast<Instruction>(V); + if (Inst == 0) return V; + + // Determine whether 'Inst' is an input to our PHI translatable expression. + bool isInput = std::count(InstInputs.begin(), InstInputs.end(), Inst); + + // Handle inputs instructions if needed. + if (isInput) { + if (Inst->getParent() != CurBB) { + // If it is an input defined in a different block, then it remains an + // input. + return Inst; + } + + // If 'Inst' is defined in this block and is an input that needs to be phi + // translated, we need to incorporate the value into the expression or fail. + + // In either case, the instruction itself isn't an input any longer. + InstInputs.erase(std::find(InstInputs.begin(), InstInputs.end(), Inst)); + + // If this is a PHI, go ahead and translate it. + if (PHINode *PN = dyn_cast<PHINode>(Inst)) + return AddAsInput(PN->getIncomingValueForBlock(PredBB)); + + // If this is a non-phi value, and it is analyzable, we can incorporate it + // into the expression by making all instruction operands be inputs. + if (!CanPHITrans(Inst)) + return 0; + + // All instruction operands are now inputs (and of course, they may also be + // defined in this block, so they may need to be phi translated themselves. + for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) + if (Instruction *Op = dyn_cast<Instruction>(Inst->getOperand(i))) + InstInputs.push_back(Op); + } + + // Ok, it must be an intermediate result (either because it started that way + // or because we just incorporated it into the expression). See if its + // operands need to be phi translated, and if so, reconstruct it. + + if (CastInst *Cast = dyn_cast<CastInst>(Inst)) { + if (!isSafeToSpeculativelyExecute(Cast)) return 0; + Value *PHIIn = PHITranslateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT); + if (PHIIn == 0) return 0; + if (PHIIn == Cast->getOperand(0)) + return Cast; + + // Find an available version of this cast. + + // Constants are trivial to find. + if (Constant *C = dyn_cast<Constant>(PHIIn)) + return AddAsInput(ConstantExpr::getCast(Cast->getOpcode(), + C, Cast->getType())); + + // Otherwise we have to see if a casted version of the incoming pointer + // is available. If so, we can use it, otherwise we have to fail. + for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end(); + UI != E; ++UI) { + if (CastInst *CastI = dyn_cast<CastInst>(*UI)) + if (CastI->getOpcode() == Cast->getOpcode() && + CastI->getType() == Cast->getType() && + (!DT || DT->dominates(CastI->getParent(), PredBB))) + return CastI; + } + return 0; + } + + // Handle getelementptr with at least one PHI translatable operand. + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { + SmallVector<Value*, 8> GEPOps; + bool AnyChanged = false; + for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { + Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB, DT); + if (GEPOp == 0) return 0; + + AnyChanged |= GEPOp != GEP->getOperand(i); + GEPOps.push_back(GEPOp); + } + + if (!AnyChanged) + return GEP; + + // Simplify the GEP to handle 'gep x, 0' -> x etc. + if (Value *V = SimplifyGEPInst(GEPOps, TD, TLI, DT)) { + for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) + RemoveInstInputs(GEPOps[i], InstInputs); + + return AddAsInput(V); + } + + // Scan to see if we have this GEP available. + Value *APHIOp = GEPOps[0]; + for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end(); + UI != E; ++UI) { + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI)) + if (GEPI->getType() == GEP->getType() && + GEPI->getNumOperands() == GEPOps.size() && + GEPI->getParent()->getParent() == CurBB->getParent() && + (!DT || DT->dominates(GEPI->getParent(), PredBB))) { + bool Mismatch = false; + for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) + if (GEPI->getOperand(i) != GEPOps[i]) { + Mismatch = true; + break; + } + if (!Mismatch) + return GEPI; + } + } + return 0; + } + + // Handle add with a constant RHS. + if (Inst->getOpcode() == Instruction::Add && + isa<ConstantInt>(Inst->getOperand(1))) { + // PHI translate the LHS. + Constant *RHS = cast<ConstantInt>(Inst->getOperand(1)); + bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap(); + bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap(); + + Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT); + if (LHS == 0) return 0; + + // If the PHI translated LHS is an add of a constant, fold the immediates. + if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS)) + if (BOp->getOpcode() == Instruction::Add) + if (ConstantInt *CI = dyn_cast<ConstantInt>(BOp->getOperand(1))) { + LHS = BOp->getOperand(0); + RHS = ConstantExpr::getAdd(RHS, CI); + isNSW = isNUW = false; + + // If the old 'LHS' was an input, add the new 'LHS' as an input. + if (std::count(InstInputs.begin(), InstInputs.end(), BOp)) { + RemoveInstInputs(BOp, InstInputs); + AddAsInput(LHS); + } + } + + // See if the add simplifies away. + if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, TLI, DT)) { + // If we simplified the operands, the LHS is no longer an input, but Res + // is. + RemoveInstInputs(LHS, InstInputs); + return AddAsInput(Res); + } + + // If we didn't modify the add, just return it. + if (LHS == Inst->getOperand(0) && RHS == Inst->getOperand(1)) + return Inst; + + // Otherwise, see if we have this add available somewhere. + for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end(); + UI != E; ++UI) { + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI)) + if (BO->getOpcode() == Instruction::Add && + BO->getOperand(0) == LHS && BO->getOperand(1) == RHS && + BO->getParent()->getParent() == CurBB->getParent() && + (!DT || DT->dominates(BO->getParent(), PredBB))) + return BO; + } + + return 0; + } + + // Otherwise, we failed. + return 0; +} + + +/// PHITranslateValue - PHI translate the current address up the CFG from +/// CurBB to Pred, updating our state to reflect any needed changes. If the +/// dominator tree DT is non-null, the translated value must dominate +/// PredBB. This returns true on failure and sets Addr to null. +bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB, + const DominatorTree *DT) { + assert(Verify() && "Invalid PHITransAddr!"); + Addr = PHITranslateSubExpr(Addr, CurBB, PredBB, DT); + assert(Verify() && "Invalid PHITransAddr!"); + + if (DT) { + // Make sure the value is live in the predecessor. + if (Instruction *Inst = dyn_cast_or_null<Instruction>(Addr)) + if (!DT->dominates(Inst->getParent(), PredBB)) + Addr = 0; + } + + return Addr == 0; +} + +/// PHITranslateWithInsertion - PHI translate this value into the specified +/// predecessor block, inserting a computation of the value if it is +/// unavailable. +/// +/// All newly created instructions are added to the NewInsts list. This +/// returns null on failure. +/// +Value *PHITransAddr:: +PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB, + const DominatorTree &DT, + SmallVectorImpl<Instruction*> &NewInsts) { + unsigned NISize = NewInsts.size(); + + // Attempt to PHI translate with insertion. + Addr = InsertPHITranslatedSubExpr(Addr, CurBB, PredBB, DT, NewInsts); + + // If successful, return the new value. + if (Addr) return Addr; + + // If not, destroy any intermediate instructions inserted. + while (NewInsts.size() != NISize) + NewInsts.pop_back_val()->eraseFromParent(); + return 0; +} + + +/// InsertPHITranslatedPointer - Insert a computation of the PHI translated +/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB +/// block. All newly created instructions are added to the NewInsts list. +/// This returns null on failure. +/// +Value *PHITransAddr:: +InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, + BasicBlock *PredBB, const DominatorTree &DT, + SmallVectorImpl<Instruction*> &NewInsts) { + // See if we have a version of this value already available and dominating + // PredBB. If so, there is no need to insert a new instance of it. + PHITransAddr Tmp(InVal, TD); + if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT)) + return Tmp.getAddr(); + + // If we don't have an available version of this value, it must be an + // instruction. + Instruction *Inst = cast<Instruction>(InVal); + + // Handle cast of PHI translatable value. + if (CastInst *Cast = dyn_cast<CastInst>(Inst)) { + if (!isSafeToSpeculativelyExecute(Cast)) return 0; + Value *OpVal = InsertPHITranslatedSubExpr(Cast->getOperand(0), + CurBB, PredBB, DT, NewInsts); + if (OpVal == 0) return 0; + + // Otherwise insert a cast at the end of PredBB. + CastInst *New = CastInst::Create(Cast->getOpcode(), + OpVal, InVal->getType(), + InVal->getName()+".phi.trans.insert", + PredBB->getTerminator()); + NewInsts.push_back(New); + return New; + } + + // Handle getelementptr with at least one PHI operand. + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { + SmallVector<Value*, 8> GEPOps; + BasicBlock *CurBB = GEP->getParent(); + for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { + Value *OpVal = InsertPHITranslatedSubExpr(GEP->getOperand(i), + CurBB, PredBB, DT, NewInsts); + if (OpVal == 0) return 0; + GEPOps.push_back(OpVal); + } + + GetElementPtrInst *Result = + GetElementPtrInst::Create(GEPOps[0], makeArrayRef(GEPOps).slice(1), + InVal->getName()+".phi.trans.insert", + PredBB->getTerminator()); + Result->setIsInBounds(GEP->isInBounds()); + NewInsts.push_back(Result); + return Result; + } + +#if 0 + // FIXME: This code works, but it is unclear that we actually want to insert + // a big chain of computation in order to make a value available in a block. + // This needs to be evaluated carefully to consider its cost trade offs. + + // Handle add with a constant RHS. + if (Inst->getOpcode() == Instruction::Add && + isa<ConstantInt>(Inst->getOperand(1))) { + // PHI translate the LHS. + Value *OpVal = InsertPHITranslatedSubExpr(Inst->getOperand(0), + CurBB, PredBB, DT, NewInsts); + if (OpVal == 0) return 0; + + BinaryOperator *Res = BinaryOperator::CreateAdd(OpVal, Inst->getOperand(1), + InVal->getName()+".phi.trans.insert", + PredBB->getTerminator()); + Res->setHasNoSignedWrap(cast<BinaryOperator>(Inst)->hasNoSignedWrap()); + Res->setHasNoUnsignedWrap(cast<BinaryOperator>(Inst)->hasNoUnsignedWrap()); + NewInsts.push_back(Res); + return Res; + } +#endif + + return 0; +} diff --git a/contrib/llvm/lib/Analysis/PostDominators.cpp b/contrib/llvm/lib/Analysis/PostDominators.cpp new file mode 100644 index 000000000000..96804a01edc6 --- /dev/null +++ b/contrib/llvm/lib/Analysis/PostDominators.cpp @@ -0,0 +1,51 @@ +//===- PostDominators.cpp - Post-Dominator Calculation --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the post-dominator construction algorithms. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "postdomtree" + +#include "llvm/Analysis/PostDominators.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/Analysis/DominatorInternals.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// PostDominatorTree Implementation +//===----------------------------------------------------------------------===// + +char PostDominatorTree::ID = 0; +INITIALIZE_PASS(PostDominatorTree, "postdomtree", + "Post-Dominator Tree Construction", true, true) + +bool PostDominatorTree::runOnFunction(Function &F) { + DT->recalculate(F); + return false; +} + +PostDominatorTree::~PostDominatorTree() { + delete DT; +} + +void PostDominatorTree::print(raw_ostream &OS, const Module *) const { + DT->print(OS); +} + + +FunctionPass* llvm::createPostDomTree() { + return new PostDominatorTree(); +} + diff --git a/contrib/llvm/lib/Analysis/PtrUseVisitor.cpp b/contrib/llvm/lib/Analysis/PtrUseVisitor.cpp new file mode 100644 index 000000000000..0a342b2167e4 --- /dev/null +++ b/contrib/llvm/lib/Analysis/PtrUseVisitor.cpp @@ -0,0 +1,36 @@ +//===- PtrUseVisitor.cpp - InstVisitors over a pointers uses --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// Implementation of the pointer use visitors. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/PtrUseVisitor.h" + +using namespace llvm; + +void detail::PtrUseVisitorBase::enqueueUsers(Instruction &I) { + for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); + UI != UE; ++UI) { + if (VisitedUses.insert(&UI.getUse())) { + UseToVisit NewU = { + UseToVisit::UseAndIsOffsetKnownPair(&UI.getUse(), IsOffsetKnown), + Offset + }; + Worklist.push_back(llvm_move(NewU)); + } + } +} + +bool detail::PtrUseVisitorBase::adjustOffsetForGEP(GetElementPtrInst &GEPI) { + if (!IsOffsetKnown) + return false; + + return GEPI.accumulateConstantOffset(DL, Offset); +} diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp new file mode 100644 index 000000000000..563568876763 --- /dev/null +++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp @@ -0,0 +1,865 @@ +//===- RegionInfo.cpp - SESE region detection analysis --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Detects single entry single exit regions in the control flow graph. +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "region" +#include "llvm/Analysis/RegionInfo.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/RegionIterator.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Debug.h" +#include <algorithm> +#include <set> + +using namespace llvm; + +// Always verify if expensive checking is enabled. +#ifdef XDEBUG +static bool VerifyRegionInfo = true; +#else +static bool VerifyRegionInfo = false; +#endif + +static cl::opt<bool,true> +VerifyRegionInfoX("verify-region-info", cl::location(VerifyRegionInfo), + cl::desc("Verify region info (time consuming)")); + +STATISTIC(numRegions, "The # of regions"); +STATISTIC(numSimpleRegions, "The # of simple regions"); + +static cl::opt<enum Region::PrintStyle> printStyle("print-region-style", + cl::Hidden, + cl::desc("style of printing regions"), + cl::values( + clEnumValN(Region::PrintNone, "none", "print no details"), + clEnumValN(Region::PrintBB, "bb", + "print regions in detail with block_iterator"), + clEnumValN(Region::PrintRN, "rn", + "print regions in detail with element_iterator"), + clEnumValEnd)); +//===----------------------------------------------------------------------===// +/// Region Implementation +Region::Region(BasicBlock *Entry, BasicBlock *Exit, RegionInfo* RInfo, + DominatorTree *dt, Region *Parent) + : RegionNode(Parent, Entry, 1), RI(RInfo), DT(dt), exit(Exit) {} + +Region::~Region() { + // Free the cached nodes. + for (BBNodeMapT::iterator it = BBNodeMap.begin(), + ie = BBNodeMap.end(); it != ie; ++it) + delete it->second; + + // Only clean the cache for this Region. Caches of child Regions will be + // cleaned when the child Regions are deleted. + BBNodeMap.clear(); + + for (iterator I = begin(), E = end(); I != E; ++I) + delete *I; +} + +void Region::replaceEntry(BasicBlock *BB) { + entry.setPointer(BB); +} + +void Region::replaceExit(BasicBlock *BB) { + assert(exit && "No exit to replace!"); + exit = BB; +} + +void Region::replaceEntryRecursive(BasicBlock *NewEntry) { + std::vector<Region *> RegionQueue; + BasicBlock *OldEntry = getEntry(); + + RegionQueue.push_back(this); + while (!RegionQueue.empty()) { + Region *R = RegionQueue.back(); + RegionQueue.pop_back(); + + R->replaceEntry(NewEntry); + for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI) + if ((*RI)->getEntry() == OldEntry) + RegionQueue.push_back(*RI); + } +} + +void Region::replaceExitRecursive(BasicBlock *NewExit) { + std::vector<Region *> RegionQueue; + BasicBlock *OldExit = getExit(); + + RegionQueue.push_back(this); + while (!RegionQueue.empty()) { + Region *R = RegionQueue.back(); + RegionQueue.pop_back(); + + R->replaceExit(NewExit); + for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI) + if ((*RI)->getExit() == OldExit) + RegionQueue.push_back(*RI); + } +} + +bool Region::contains(const BasicBlock *B) const { + BasicBlock *BB = const_cast<BasicBlock*>(B); + + if (!DT->getNode(BB)) + return false; + + BasicBlock *entry = getEntry(), *exit = getExit(); + + // Toplevel region. + if (!exit) + return true; + + return (DT->dominates(entry, BB) + && !(DT->dominates(exit, BB) && DT->dominates(entry, exit))); +} + +bool Region::contains(const Loop *L) const { + // BBs that are not part of any loop are element of the Loop + // described by the NULL pointer. This loop is not part of any region, + // except if the region describes the whole function. + if (L == 0) + return getExit() == 0; + + if (!contains(L->getHeader())) + return false; + + SmallVector<BasicBlock *, 8> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + for (SmallVectorImpl<BasicBlock*>::iterator BI = ExitingBlocks.begin(), + BE = ExitingBlocks.end(); BI != BE; ++BI) + if (!contains(*BI)) + return false; + + return true; +} + +Loop *Region::outermostLoopInRegion(Loop *L) const { + if (!contains(L)) + return 0; + + while (L && contains(L->getParentLoop())) { + L = L->getParentLoop(); + } + + return L; +} + +Loop *Region::outermostLoopInRegion(LoopInfo *LI, BasicBlock* BB) const { + assert(LI && BB && "LI and BB cannot be null!"); + Loop *L = LI->getLoopFor(BB); + return outermostLoopInRegion(L); +} + +BasicBlock *Region::getEnteringBlock() const { + BasicBlock *entry = getEntry(); + BasicBlock *Pred; + BasicBlock *enteringBlock = 0; + + for (pred_iterator PI = pred_begin(entry), PE = pred_end(entry); PI != PE; + ++PI) { + Pred = *PI; + if (DT->getNode(Pred) && !contains(Pred)) { + if (enteringBlock) + return 0; + + enteringBlock = Pred; + } + } + + return enteringBlock; +} + +BasicBlock *Region::getExitingBlock() const { + BasicBlock *exit = getExit(); + BasicBlock *Pred; + BasicBlock *exitingBlock = 0; + + if (!exit) + return 0; + + for (pred_iterator PI = pred_begin(exit), PE = pred_end(exit); PI != PE; + ++PI) { + Pred = *PI; + if (contains(Pred)) { + if (exitingBlock) + return 0; + + exitingBlock = Pred; + } + } + + return exitingBlock; +} + +bool Region::isSimple() const { + return !isTopLevelRegion() && getEnteringBlock() && getExitingBlock(); +} + +std::string Region::getNameStr() const { + std::string exitName; + std::string entryName; + + if (getEntry()->getName().empty()) { + raw_string_ostream OS(entryName); + + WriteAsOperand(OS, getEntry(), false); + } else + entryName = getEntry()->getName(); + + if (getExit()) { + if (getExit()->getName().empty()) { + raw_string_ostream OS(exitName); + + WriteAsOperand(OS, getExit(), false); + } else + exitName = getExit()->getName(); + } else + exitName = "<Function Return>"; + + return entryName + " => " + exitName; +} + +void Region::verifyBBInRegion(BasicBlock *BB) const { + if (!contains(BB)) + llvm_unreachable("Broken region found!"); + + BasicBlock *entry = getEntry(), *exit = getExit(); + + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) + if (!contains(*SI) && exit != *SI) + llvm_unreachable("Broken region found!"); + + if (entry != BB) + for (pred_iterator SI = pred_begin(BB), SE = pred_end(BB); SI != SE; ++SI) + if (!contains(*SI)) + llvm_unreachable("Broken region found!"); +} + +void Region::verifyWalk(BasicBlock *BB, std::set<BasicBlock*> *visited) const { + BasicBlock *exit = getExit(); + + visited->insert(BB); + + verifyBBInRegion(BB); + + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) + if (*SI != exit && visited->find(*SI) == visited->end()) + verifyWalk(*SI, visited); +} + +void Region::verifyRegion() const { + // Only do verification when user wants to, otherwise this expensive + // check will be invoked by PassManager. + if (!VerifyRegionInfo) return; + + std::set<BasicBlock*> visited; + verifyWalk(getEntry(), &visited); +} + +void Region::verifyRegionNest() const { + for (Region::const_iterator RI = begin(), RE = end(); RI != RE; ++RI) + (*RI)->verifyRegionNest(); + + verifyRegion(); +} + +Region::element_iterator Region::element_begin() { + return GraphTraits<Region*>::nodes_begin(this); +} + +Region::element_iterator Region::element_end() { + return GraphTraits<Region*>::nodes_end(this); +} + +Region::const_element_iterator Region::element_begin() const { + return GraphTraits<const Region*>::nodes_begin(this); +} + +Region::const_element_iterator Region::element_end() const { + return GraphTraits<const Region*>::nodes_end(this); +} + +Region* Region::getSubRegionNode(BasicBlock *BB) const { + Region *R = RI->getRegionFor(BB); + + if (!R || R == this) + return 0; + + // If we pass the BB out of this region, that means our code is broken. + assert(contains(R) && "BB not in current region!"); + + while (contains(R->getParent()) && R->getParent() != this) + R = R->getParent(); + + if (R->getEntry() != BB) + return 0; + + return R; +} + +RegionNode* Region::getBBNode(BasicBlock *BB) const { + assert(contains(BB) && "Can get BB node out of this region!"); + + BBNodeMapT::const_iterator at = BBNodeMap.find(BB); + + if (at != BBNodeMap.end()) + return at->second; + + RegionNode *NewNode = new RegionNode(const_cast<Region*>(this), BB); + BBNodeMap.insert(std::make_pair(BB, NewNode)); + return NewNode; +} + +RegionNode* Region::getNode(BasicBlock *BB) const { + assert(contains(BB) && "Can get BB node out of this region!"); + if (Region* Child = getSubRegionNode(BB)) + return Child->getNode(); + + return getBBNode(BB); +} + +void Region::transferChildrenTo(Region *To) { + for (iterator I = begin(), E = end(); I != E; ++I) { + (*I)->parent = To; + To->children.push_back(*I); + } + children.clear(); +} + +void Region::addSubRegion(Region *SubRegion, bool moveChildren) { + assert(SubRegion->parent == 0 && "SubRegion already has a parent!"); + assert(std::find(begin(), end(), SubRegion) == children.end() + && "Subregion already exists!"); + + SubRegion->parent = this; + children.push_back(SubRegion); + + if (!moveChildren) + return; + + assert(SubRegion->children.size() == 0 + && "SubRegions that contain children are not supported"); + + for (element_iterator I = element_begin(), E = element_end(); I != E; ++I) + if (!(*I)->isSubRegion()) { + BasicBlock *BB = (*I)->getNodeAs<BasicBlock>(); + + if (SubRegion->contains(BB)) + RI->setRegionFor(BB, SubRegion); + } + + std::vector<Region*> Keep; + for (iterator I = begin(), E = end(); I != E; ++I) + if (SubRegion->contains(*I) && *I != SubRegion) { + SubRegion->children.push_back(*I); + (*I)->parent = SubRegion; + } else + Keep.push_back(*I); + + children.clear(); + children.insert(children.begin(), Keep.begin(), Keep.end()); +} + + +Region *Region::removeSubRegion(Region *Child) { + assert(Child->parent == this && "Child is not a child of this region!"); + Child->parent = 0; + RegionSet::iterator I = std::find(children.begin(), children.end(), Child); + assert(I != children.end() && "Region does not exit. Unable to remove."); + children.erase(children.begin()+(I-begin())); + return Child; +} + +unsigned Region::getDepth() const { + unsigned Depth = 0; + + for (Region *R = parent; R != 0; R = R->parent) + ++Depth; + + return Depth; +} + +Region *Region::getExpandedRegion() const { + unsigned NumSuccessors = exit->getTerminator()->getNumSuccessors(); + + if (NumSuccessors == 0) + return NULL; + + for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit()); + PI != PE; ++PI) + if (!DT->dominates(getEntry(), *PI)) + return NULL; + + Region *R = RI->getRegionFor(exit); + + if (R->getEntry() != exit) { + if (exit->getTerminator()->getNumSuccessors() == 1) + return new Region(getEntry(), *succ_begin(exit), RI, DT); + else + return NULL; + } + + while (R->getParent() && R->getParent()->getEntry() == exit) + R = R->getParent(); + + if (!DT->dominates(getEntry(), R->getExit())) + for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit()); + PI != PE; ++PI) + if (!DT->dominates(R->getExit(), *PI)) + return NULL; + + return new Region(getEntry(), R->getExit(), RI, DT); +} + +void Region::print(raw_ostream &OS, bool print_tree, unsigned level, + enum PrintStyle Style) const { + if (print_tree) + OS.indent(level*2) << "[" << level << "] " << getNameStr(); + else + OS.indent(level*2) << getNameStr(); + + OS << "\n"; + + + if (Style != PrintNone) { + OS.indent(level*2) << "{\n"; + OS.indent(level*2 + 2); + + if (Style == PrintBB) { + for (const_block_iterator I = block_begin(), E = block_end(); I != E; ++I) + OS << (*I)->getName() << ", "; // TODO: remove the last "," + } else if (Style == PrintRN) { + for (const_element_iterator I = element_begin(), E = element_end(); I!=E; ++I) + OS << **I << ", "; // TODO: remove the last ", + } + + OS << "\n"; + } + + if (print_tree) + for (const_iterator RI = begin(), RE = end(); RI != RE; ++RI) + (*RI)->print(OS, print_tree, level+1, Style); + + if (Style != PrintNone) + OS.indent(level*2) << "} \n"; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void Region::dump() const { + print(dbgs(), true, getDepth(), printStyle.getValue()); +} +#endif + +void Region::clearNodeCache() { + // Free the cached nodes. + for (BBNodeMapT::iterator I = BBNodeMap.begin(), + IE = BBNodeMap.end(); I != IE; ++I) + delete I->second; + + BBNodeMap.clear(); + for (Region::iterator RI = begin(), RE = end(); RI != RE; ++RI) + (*RI)->clearNodeCache(); +} + +//===----------------------------------------------------------------------===// +// RegionInfo implementation +// + +bool RegionInfo::isCommonDomFrontier(BasicBlock *BB, BasicBlock *entry, + BasicBlock *exit) const { + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { + BasicBlock *P = *PI; + if (DT->dominates(entry, P) && !DT->dominates(exit, P)) + return false; + } + return true; +} + +bool RegionInfo::isRegion(BasicBlock *entry, BasicBlock *exit) const { + assert(entry && exit && "entry and exit must not be null!"); + typedef DominanceFrontier::DomSetType DST; + + DST *entrySuccs = &DF->find(entry)->second; + + // Exit is the header of a loop that contains the entry. In this case, + // the dominance frontier must only contain the exit. + if (!DT->dominates(entry, exit)) { + for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end(); + SI != SE; ++SI) + if (*SI != exit && *SI != entry) + return false; + + return true; + } + + DST *exitSuccs = &DF->find(exit)->second; + + // Do not allow edges leaving the region. + for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end(); + SI != SE; ++SI) { + if (*SI == exit || *SI == entry) + continue; + if (exitSuccs->find(*SI) == exitSuccs->end()) + return false; + if (!isCommonDomFrontier(*SI, entry, exit)) + return false; + } + + // Do not allow edges pointing into the region. + for (DST::iterator SI = exitSuccs->begin(), SE = exitSuccs->end(); + SI != SE; ++SI) + if (DT->properlyDominates(entry, *SI) && *SI != exit) + return false; + + + return true; +} + +void RegionInfo::insertShortCut(BasicBlock *entry, BasicBlock *exit, + BBtoBBMap *ShortCut) const { + assert(entry && exit && "entry and exit must not be null!"); + + BBtoBBMap::iterator e = ShortCut->find(exit); + + if (e == ShortCut->end()) + // No further region at exit available. + (*ShortCut)[entry] = exit; + else { + // We found a region e that starts at exit. Therefore (entry, e->second) + // is also a region, that is larger than (entry, exit). Insert the + // larger one. + BasicBlock *BB = e->second; + (*ShortCut)[entry] = BB; + } +} + +DomTreeNode* RegionInfo::getNextPostDom(DomTreeNode* N, + BBtoBBMap *ShortCut) const { + BBtoBBMap::iterator e = ShortCut->find(N->getBlock()); + + if (e == ShortCut->end()) + return N->getIDom(); + + return PDT->getNode(e->second)->getIDom(); +} + +bool RegionInfo::isTrivialRegion(BasicBlock *entry, BasicBlock *exit) const { + assert(entry && exit && "entry and exit must not be null!"); + + unsigned num_successors = succ_end(entry) - succ_begin(entry); + + if (num_successors <= 1 && exit == *(succ_begin(entry))) + return true; + + return false; +} + +void RegionInfo::updateStatistics(Region *R) { + ++numRegions; + + // TODO: Slow. Should only be enabled if -stats is used. + if (R->isSimple()) ++numSimpleRegions; +} + +Region *RegionInfo::createRegion(BasicBlock *entry, BasicBlock *exit) { + assert(entry && exit && "entry and exit must not be null!"); + + if (isTrivialRegion(entry, exit)) + return 0; + + Region *region = new Region(entry, exit, this, DT); + BBtoRegion.insert(std::make_pair(entry, region)); + + #ifdef XDEBUG + region->verifyRegion(); + #else + DEBUG(region->verifyRegion()); + #endif + + updateStatistics(region); + return region; +} + +void RegionInfo::findRegionsWithEntry(BasicBlock *entry, BBtoBBMap *ShortCut) { + assert(entry); + + DomTreeNode *N = PDT->getNode(entry); + + if (!N) + return; + + Region *lastRegion= 0; + BasicBlock *lastExit = entry; + + // As only a BasicBlock that postdominates entry can finish a region, walk the + // post dominance tree upwards. + while ((N = getNextPostDom(N, ShortCut))) { + BasicBlock *exit = N->getBlock(); + + if (!exit) + break; + + if (isRegion(entry, exit)) { + Region *newRegion = createRegion(entry, exit); + + if (lastRegion) + newRegion->addSubRegion(lastRegion); + + lastRegion = newRegion; + lastExit = exit; + } + + // This can never be a region, so stop the search. + if (!DT->dominates(entry, exit)) + break; + } + + // Tried to create regions from entry to lastExit. Next time take a + // shortcut from entry to lastExit. + if (lastExit != entry) + insertShortCut(entry, lastExit, ShortCut); +} + +void RegionInfo::scanForRegions(Function &F, BBtoBBMap *ShortCut) { + BasicBlock *entry = &(F.getEntryBlock()); + DomTreeNode *N = DT->getNode(entry); + + // Iterate over the dominance tree in post order to start with the small + // regions from the bottom of the dominance tree. If the small regions are + // detected first, detection of bigger regions is faster, as we can jump + // over the small regions. + for (po_iterator<DomTreeNode*> FI = po_begin(N), FE = po_end(N); FI != FE; + ++FI) { + findRegionsWithEntry(FI->getBlock(), ShortCut); + } +} + +Region *RegionInfo::getTopMostParent(Region *region) { + while (region->parent) + region = region->getParent(); + + return region; +} + +void RegionInfo::buildRegionsTree(DomTreeNode *N, Region *region) { + BasicBlock *BB = N->getBlock(); + + // Passed region exit + while (BB == region->getExit()) + region = region->getParent(); + + BBtoRegionMap::iterator it = BBtoRegion.find(BB); + + // This basic block is a start block of a region. It is already in the + // BBtoRegion relation. Only the child basic blocks have to be updated. + if (it != BBtoRegion.end()) { + Region *newRegion = it->second; + region->addSubRegion(getTopMostParent(newRegion)); + region = newRegion; + } else { + BBtoRegion[BB] = region; + } + + for (DomTreeNode::iterator CI = N->begin(), CE = N->end(); CI != CE; ++CI) + buildRegionsTree(*CI, region); +} + +void RegionInfo::releaseMemory() { + BBtoRegion.clear(); + if (TopLevelRegion) + delete TopLevelRegion; + TopLevelRegion = 0; +} + +RegionInfo::RegionInfo() : FunctionPass(ID) { + initializeRegionInfoPass(*PassRegistry::getPassRegistry()); + TopLevelRegion = 0; +} + +RegionInfo::~RegionInfo() { + releaseMemory(); +} + +void RegionInfo::Calculate(Function &F) { + // ShortCut a function where for every BB the exit of the largest region + // starting with BB is stored. These regions can be threated as single BBS. + // This improves performance on linear CFGs. + BBtoBBMap ShortCut; + + scanForRegions(F, &ShortCut); + BasicBlock *BB = &F.getEntryBlock(); + buildRegionsTree(DT->getNode(BB), TopLevelRegion); +} + +bool RegionInfo::runOnFunction(Function &F) { + releaseMemory(); + + DT = &getAnalysis<DominatorTree>(); + PDT = &getAnalysis<PostDominatorTree>(); + DF = &getAnalysis<DominanceFrontier>(); + + TopLevelRegion = new Region(&F.getEntryBlock(), 0, this, DT, 0); + updateStatistics(TopLevelRegion); + + Calculate(F); + + return false; +} + +void RegionInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<DominatorTree>(); + AU.addRequired<PostDominatorTree>(); + AU.addRequired<DominanceFrontier>(); +} + +void RegionInfo::print(raw_ostream &OS, const Module *) const { + OS << "Region tree:\n"; + TopLevelRegion->print(OS, true, 0, printStyle.getValue()); + OS << "End region tree\n"; +} + +void RegionInfo::verifyAnalysis() const { + // Only do verification when user wants to, otherwise this expensive check + // will be invoked by PMDataManager::verifyPreservedAnalysis when + // a regionpass (marked PreservedAll) finish. + if (!VerifyRegionInfo) return; + + TopLevelRegion->verifyRegionNest(); +} + +// Region pass manager support. +Region *RegionInfo::getRegionFor(BasicBlock *BB) const { + BBtoRegionMap::const_iterator I= + BBtoRegion.find(BB); + return I != BBtoRegion.end() ? I->second : 0; +} + +void RegionInfo::setRegionFor(BasicBlock *BB, Region *R) { + BBtoRegion[BB] = R; +} + +Region *RegionInfo::operator[](BasicBlock *BB) const { + return getRegionFor(BB); +} + +BasicBlock *RegionInfo::getMaxRegionExit(BasicBlock *BB) const { + BasicBlock *Exit = NULL; + + while (true) { + // Get largest region that starts at BB. + Region *R = getRegionFor(BB); + while (R && R->getParent() && R->getParent()->getEntry() == BB) + R = R->getParent(); + + // Get the single exit of BB. + if (R && R->getEntry() == BB) + Exit = R->getExit(); + else if (++succ_begin(BB) == succ_end(BB)) + Exit = *succ_begin(BB); + else // No single exit exists. + return Exit; + + // Get largest region that starts at Exit. + Region *ExitR = getRegionFor(Exit); + while (ExitR && ExitR->getParent() + && ExitR->getParent()->getEntry() == Exit) + ExitR = ExitR->getParent(); + + for (pred_iterator PI = pred_begin(Exit), PE = pred_end(Exit); PI != PE; + ++PI) + if (!R->contains(*PI) && !ExitR->contains(*PI)) + break; + + // This stops infinite cycles. + if (DT->dominates(Exit, BB)) + break; + + BB = Exit; + } + + return Exit; +} + +Region* +RegionInfo::getCommonRegion(Region *A, Region *B) const { + assert (A && B && "One of the Regions is NULL"); + + if (A->contains(B)) return A; + + while (!B->contains(A)) + B = B->getParent(); + + return B; +} + +Region* +RegionInfo::getCommonRegion(SmallVectorImpl<Region*> &Regions) const { + Region* ret = Regions.back(); + Regions.pop_back(); + + for (SmallVectorImpl<Region*>::const_iterator I = Regions.begin(), + E = Regions.end(); I != E; ++I) + ret = getCommonRegion(ret, *I); + + return ret; +} + +Region* +RegionInfo::getCommonRegion(SmallVectorImpl<BasicBlock*> &BBs) const { + Region* ret = getRegionFor(BBs.back()); + BBs.pop_back(); + + for (SmallVectorImpl<BasicBlock*>::const_iterator I = BBs.begin(), + E = BBs.end(); I != E; ++I) + ret = getCommonRegion(ret, getRegionFor(*I)); + + return ret; +} + +void RegionInfo::splitBlock(BasicBlock* NewBB, BasicBlock *OldBB) +{ + Region *R = getRegionFor(OldBB); + + setRegionFor(NewBB, R); + + while (R->getEntry() == OldBB && !R->isTopLevelRegion()) { + R->replaceEntry(NewBB); + R = R->getParent(); + } + + setRegionFor(OldBB, R); +} + +char RegionInfo::ID = 0; +INITIALIZE_PASS_BEGIN(RegionInfo, "regions", + "Detect single entry single exit regions", true, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) +INITIALIZE_PASS_DEPENDENCY(DominanceFrontier) +INITIALIZE_PASS_END(RegionInfo, "regions", + "Detect single entry single exit regions", true, true) + +// Create methods available outside of this file, to use them +// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by +// the link time optimization. + +namespace llvm { + FunctionPass *createRegionInfoPass() { + return new RegionInfo(); + } +} + diff --git a/contrib/llvm/lib/Analysis/RegionPass.cpp b/contrib/llvm/lib/Analysis/RegionPass.cpp new file mode 100644 index 000000000000..9208fa21d7ec --- /dev/null +++ b/contrib/llvm/lib/Analysis/RegionPass.cpp @@ -0,0 +1,275 @@ +//===- RegionPass.cpp - Region Pass and Region Pass Manager ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements RegionPass and RGPassManager. All region optimization +// and transformation passes are derived from RegionPass. RGPassManager is +// responsible for managing RegionPasses. +// most of these codes are COPY from LoopPass.cpp +// +//===----------------------------------------------------------------------===// +#include "llvm/Analysis/RegionPass.h" +#include "llvm/Analysis/RegionIterator.h" +#include "llvm/Support/Timer.h" + +#define DEBUG_TYPE "regionpassmgr" +#include "llvm/Support/Debug.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// RGPassManager +// + +char RGPassManager::ID = 0; + +RGPassManager::RGPassManager() + : FunctionPass(ID), PMDataManager() { + skipThisRegion = false; + redoThisRegion = false; + RI = NULL; + CurrentRegion = NULL; +} + +// Recurse through all subregions and all regions into RQ. +static void addRegionIntoQueue(Region *R, std::deque<Region *> &RQ) { + RQ.push_back(R); + for (Region::iterator I = R->begin(), E = R->end(); I != E; ++I) + addRegionIntoQueue(*I, RQ); +} + +/// Pass Manager itself does not invalidate any analysis info. +void RGPassManager::getAnalysisUsage(AnalysisUsage &Info) const { + Info.addRequired<RegionInfo>(); + Info.setPreservesAll(); +} + +/// run - Execute all of the passes scheduled for execution. Keep track of +/// whether any of the passes modifies the function, and if so, return true. +bool RGPassManager::runOnFunction(Function &F) { + RI = &getAnalysis<RegionInfo>(); + bool Changed = false; + + // Collect inherited analysis from Module level pass manager. + populateInheritedAnalysis(TPM->activeStack); + + addRegionIntoQueue(RI->getTopLevelRegion(), RQ); + + if (RQ.empty()) // No regions, skip calling finalizers + return false; + + // Initialization + for (std::deque<Region *>::const_iterator I = RQ.begin(), E = RQ.end(); + I != E; ++I) { + Region *R = *I; + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + RegionPass *RP = (RegionPass *)getContainedPass(Index); + Changed |= RP->doInitialization(R, *this); + } + } + + // Walk Regions + while (!RQ.empty()) { + + CurrentRegion = RQ.back(); + skipThisRegion = false; + redoThisRegion = false; + + // Run all passes on the current Region. + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + RegionPass *P = (RegionPass*)getContainedPass(Index); + + dumpPassInfo(P, EXECUTION_MSG, ON_REGION_MSG, + CurrentRegion->getNameStr()); + dumpRequiredSet(P); + + initializeAnalysisImpl(P); + + { + PassManagerPrettyStackEntry X(P, *CurrentRegion->getEntry()); + + TimeRegion PassTimer(getPassTimer(P)); + Changed |= P->runOnRegion(CurrentRegion, *this); + } + + if (Changed) + dumpPassInfo(P, MODIFICATION_MSG, ON_REGION_MSG, + skipThisRegion ? "<deleted>" : + CurrentRegion->getNameStr()); + dumpPreservedSet(P); + + if (!skipThisRegion) { + // Manually check that this region is still healthy. This is done + // instead of relying on RegionInfo::verifyRegion since RegionInfo + // is a function pass and it's really expensive to verify every + // Region in the function every time. That level of checking can be + // enabled with the -verify-region-info option. + { + TimeRegion PassTimer(getPassTimer(P)); + CurrentRegion->verifyRegion(); + } + + // Then call the regular verifyAnalysis functions. + verifyPreservedAnalysis(P); + } + + removeNotPreservedAnalysis(P); + recordAvailableAnalysis(P); + removeDeadPasses(P, + skipThisRegion ? "<deleted>" : + CurrentRegion->getNameStr(), + ON_REGION_MSG); + + if (skipThisRegion) + // Do not run other passes on this region. + break; + } + + // If the region was deleted, release all the region passes. This frees up + // some memory, and avoids trouble with the pass manager trying to call + // verifyAnalysis on them. + if (skipThisRegion) + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + freePass(P, "<deleted>", ON_REGION_MSG); + } + + // Pop the region from queue after running all passes. + RQ.pop_back(); + + if (redoThisRegion) + RQ.push_back(CurrentRegion); + + // Free all region nodes created in region passes. + RI->clearNodeCache(); + } + + // Finalization + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + RegionPass *P = (RegionPass*)getContainedPass(Index); + Changed |= P->doFinalization(); + } + + // Print the region tree after all pass. + DEBUG( + dbgs() << "\nRegion tree of function " << F.getName() + << " after all region Pass:\n"; + RI->dump(); + dbgs() << "\n"; + ); + + return Changed; +} + +/// Print passes managed by this manager +void RGPassManager::dumpPassStructure(unsigned Offset) { + errs().indent(Offset*2) << "Region Pass Manager\n"; + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + P->dumpPassStructure(Offset + 1); + dumpLastUses(P, Offset+1); + } +} + +namespace { +//===----------------------------------------------------------------------===// +// PrintRegionPass +class PrintRegionPass : public RegionPass { +private: + std::string Banner; + raw_ostream &Out; // raw_ostream to print on. + +public: + static char ID; + PrintRegionPass() : RegionPass(ID), Out(dbgs()) {} + PrintRegionPass(const std::string &B, raw_ostream &o) + : RegionPass(ID), Banner(B), Out(o) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + virtual bool runOnRegion(Region *R, RGPassManager &RGM) { + Out << Banner; + for (Region::block_iterator I = R->block_begin(), E = R->block_end(); + I != E; ++I) + (*I)->print(Out); + + return false; + } +}; + +char PrintRegionPass::ID = 0; +} //end anonymous namespace + +//===----------------------------------------------------------------------===// +// RegionPass + +// Check if this pass is suitable for the current RGPassManager, if +// available. This pass P is not suitable for a RGPassManager if P +// is not preserving higher level analysis info used by other +// RGPassManager passes. In such case, pop RGPassManager from the +// stack. This will force assignPassManager() to create new +// LPPassManger as expected. +void RegionPass::preparePassManager(PMStack &PMS) { + + // Find RGPassManager + while (!PMS.empty() && + PMS.top()->getPassManagerType() > PMT_RegionPassManager) + PMS.pop(); + + + // If this pass is destroying high level information that is used + // by other passes that are managed by LPM then do not insert + // this pass in current LPM. Use new RGPassManager. + if (PMS.top()->getPassManagerType() == PMT_RegionPassManager && + !PMS.top()->preserveHigherLevelAnalysis(this)) + PMS.pop(); +} + +/// Assign pass manager to manage this pass. +void RegionPass::assignPassManager(PMStack &PMS, + PassManagerType PreferredType) { + // Find RGPassManager + while (!PMS.empty() && + PMS.top()->getPassManagerType() > PMT_RegionPassManager) + PMS.pop(); + + RGPassManager *RGPM; + + // Create new Region Pass Manager if it does not exist. + if (PMS.top()->getPassManagerType() == PMT_RegionPassManager) + RGPM = (RGPassManager*)PMS.top(); + else { + + assert (!PMS.empty() && "Unable to create Region Pass Manager"); + PMDataManager *PMD = PMS.top(); + + // [1] Create new Region Pass Manager + RGPM = new RGPassManager(); + RGPM->populateInheritedAnalysis(PMS); + + // [2] Set up new manager's top level manager + PMTopLevelManager *TPM = PMD->getTopLevelManager(); + TPM->addIndirectPassManager(RGPM); + + // [3] Assign manager to manage this new manager. This may create + // and push new managers into PMS + TPM->schedulePass(RGPM); + + // [4] Push new manager into PMS + PMS.push(RGPM); + } + + RGPM->add(this); +} + +/// Get the printer pass +Pass *RegionPass::createPrinterPass(raw_ostream &O, + const std::string &Banner) const { + return new PrintRegionPass(Banner, O); +} diff --git a/contrib/llvm/lib/Analysis/RegionPrinter.cpp b/contrib/llvm/lib/Analysis/RegionPrinter.cpp new file mode 100644 index 000000000000..c5f1b925921b --- /dev/null +++ b/contrib/llvm/lib/Analysis/RegionPrinter.cpp @@ -0,0 +1,218 @@ +//===- RegionPrinter.cpp - Print regions tree pass ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Print out the region tree of a function using dotty/graphviz. +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DOTGraphTraitsPass.h" +#include "llvm/Analysis/RegionInfo.h" +#include "llvm/Analysis/RegionIterator.h" +#include "llvm/Analysis/RegionPrinter.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +/// onlySimpleRegion - Show only the simple regions in the RegionViewer. +static cl::opt<bool> +onlySimpleRegions("only-simple-regions", + cl::desc("Show only simple regions in the graphviz viewer"), + cl::Hidden, + cl::init(false)); + +namespace llvm { +template<> +struct DOTGraphTraits<RegionNode*> : public DefaultDOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) + : DefaultDOTGraphTraits(isSimple) {} + + std::string getNodeLabel(RegionNode *Node, RegionNode *Graph) { + + if (!Node->isSubRegion()) { + BasicBlock *BB = Node->getNodeAs<BasicBlock>(); + + if (isSimple()) + return DOTGraphTraits<const Function*> + ::getSimpleNodeLabel(BB, BB->getParent()); + else + return DOTGraphTraits<const Function*> + ::getCompleteNodeLabel(BB, BB->getParent()); + } + + return "Not implemented"; + } +}; + +template<> +struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> { + + DOTGraphTraits (bool isSimple=false) + : DOTGraphTraits<RegionNode*>(isSimple) {} + + static std::string getGraphName(RegionInfo *DT) { + return "Region Graph"; + } + + std::string getNodeLabel(RegionNode *Node, RegionInfo *G) { + return DOTGraphTraits<RegionNode*>::getNodeLabel(Node, + G->getTopLevelRegion()); + } + + std::string getEdgeAttributes(RegionNode *srcNode, + GraphTraits<RegionInfo*>::ChildIteratorType CI, RegionInfo *RI) { + + RegionNode *destNode = *CI; + + if (srcNode->isSubRegion() || destNode->isSubRegion()) + return ""; + + // In case of a backedge, do not use it to define the layout of the nodes. + BasicBlock *srcBB = srcNode->getNodeAs<BasicBlock>(); + BasicBlock *destBB = destNode->getNodeAs<BasicBlock>(); + + Region *R = RI->getRegionFor(destBB); + + while (R && R->getParent()) + if (R->getParent()->getEntry() == destBB) + R = R->getParent(); + else + break; + + if (R->getEntry() == destBB && R->contains(srcBB)) + return "constraint=false"; + + return ""; + } + + // Print the cluster of the subregions. This groups the single basic blocks + // and adds a different background color for each group. + static void printRegionCluster(const Region *R, GraphWriter<RegionInfo*> &GW, + unsigned depth = 0) { + raw_ostream &O = GW.getOStream(); + O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(R) + << " {\n"; + O.indent(2 * (depth + 1)) << "label = \"\";\n"; + + if (!onlySimpleRegions || R->isSimple()) { + O.indent(2 * (depth + 1)) << "style = filled;\n"; + O.indent(2 * (depth + 1)) << "color = " + << ((R->getDepth() * 2 % 12) + 1) << "\n"; + + } else { + O.indent(2 * (depth + 1)) << "style = solid;\n"; + O.indent(2 * (depth + 1)) << "color = " + << ((R->getDepth() * 2 % 12) + 2) << "\n"; + } + + for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI) + printRegionCluster(*RI, GW, depth + 1); + + RegionInfo *RI = R->getRegionInfo(); + + for (Region::const_block_iterator BI = R->block_begin(), + BE = R->block_end(); BI != BE; ++BI) + if (RI->getRegionFor(*BI) == R) + O.indent(2 * (depth + 1)) << "Node" + << static_cast<const void*>(RI->getTopLevelRegion()->getBBNode(*BI)) + << ";\n"; + + O.indent(2 * depth) << "}\n"; + } + + static void addCustomGraphFeatures(const RegionInfo* RI, + GraphWriter<RegionInfo*> &GW) { + raw_ostream &O = GW.getOStream(); + O << "\tcolorscheme = \"paired12\"\n"; + printRegionCluster(RI->getTopLevelRegion(), GW, 4); + } +}; +} //end namespace llvm + +namespace { + +struct RegionViewer + : public DOTGraphTraitsViewer<RegionInfo, false> { + static char ID; + RegionViewer() : DOTGraphTraitsViewer<RegionInfo, false>("reg", ID){ + initializeRegionViewerPass(*PassRegistry::getPassRegistry()); + } +}; +char RegionViewer::ID = 0; + +struct RegionOnlyViewer + : public DOTGraphTraitsViewer<RegionInfo, true> { + static char ID; + RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfo, true>("regonly", ID) { + initializeRegionOnlyViewerPass(*PassRegistry::getPassRegistry()); + } +}; +char RegionOnlyViewer::ID = 0; + +struct RegionPrinter + : public DOTGraphTraitsPrinter<RegionInfo, false> { + static char ID; + RegionPrinter() : + DOTGraphTraitsPrinter<RegionInfo, false>("reg", ID) { + initializeRegionPrinterPass(*PassRegistry::getPassRegistry()); + } +}; +char RegionPrinter::ID = 0; +} //end anonymous namespace + +INITIALIZE_PASS(RegionPrinter, "dot-regions", + "Print regions of function to 'dot' file", true, true) + +INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function", + true, true) + +INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only", + "View regions of function (with no function bodies)", + true, true) + +namespace { + +struct RegionOnlyPrinter + : public DOTGraphTraitsPrinter<RegionInfo, true> { + static char ID; + RegionOnlyPrinter() : + DOTGraphTraitsPrinter<RegionInfo, true>("reg", ID) { + initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry()); + } +}; + +} + +char RegionOnlyPrinter::ID = 0; +INITIALIZE_PASS(RegionOnlyPrinter, "dot-regions-only", + "Print regions of function to 'dot' file " + "(with no function bodies)", + true, true) + +FunctionPass* llvm::createRegionViewerPass() { + return new RegionViewer(); +} + +FunctionPass* llvm::createRegionOnlyViewerPass() { + return new RegionOnlyViewer(); +} + +FunctionPass* llvm::createRegionPrinterPass() { + return new RegionPrinter(); +} + +FunctionPass* llvm::createRegionOnlyPrinterPass() { + return new RegionOnlyPrinter(); +} + diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp new file mode 100644 index 000000000000..0a02f4e9d747 --- /dev/null +++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp @@ -0,0 +1,7720 @@ +//===- ScalarEvolution.cpp - Scalar Evolution Analysis ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the scalar evolution analysis +// engine, which is used primarily to analyze expressions involving induction +// variables in loops. +// +// There are several aspects to this library. First is the representation of +// scalar expressions, which are represented as subclasses of the SCEV class. +// These classes are used to represent certain types of subexpressions that we +// can handle. We only create one SCEV of a particular shape, so +// pointer-comparisons for equality are legal. +// +// One important aspect of the SCEV objects is that they are never cyclic, even +// if there is a cycle in the dataflow for an expression (ie, a PHI node). If +// the PHI node is one of the idioms that we can represent (e.g., a polynomial +// recurrence) then we represent it directly as a recurrence node, otherwise we +// represent it as a SCEVUnknown node. +// +// In addition to being able to represent expressions of various types, we also +// have folders that are used to build the *canonical* representation for a +// particular expression. These folders are capable of using a variety of +// rewrite rules to simplify the expressions. +// +// Once the folders are defined, we can implement the more interesting +// higher-level code, such as the code that recognizes PHI nodes of various +// types, computes the execution count of a loop, etc. +// +// TODO: We should use these routines and value representations to implement +// dependence analysis! +// +//===----------------------------------------------------------------------===// +// +// There are several good references for the techniques used in this analysis. +// +// Chains of recurrences -- a method to expedite the evaluation +// of closed-form functions +// Olaf Bachmann, Paul S. Wang, Eugene V. Zima +// +// On computational properties of chains of recurrences +// Eugene V. Zima +// +// Symbolic Evaluation of Chains of Recurrences for Loop Optimization +// Robert A. van Engelen +// +// Efficient Symbolic Analysis for Optimizing Compilers +// Robert A. van Engelen +// +// Using the chains of recurrences algebra for data dependence testing and +// induction variable substitution +// MS Thesis, Johnie Birch +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "scalar-evolution" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ConstantRange.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumArrayLenItCounts, + "Number of trip counts computed with array length"); +STATISTIC(NumTripCountsComputed, + "Number of loops with predictable loop counts"); +STATISTIC(NumTripCountsNotComputed, + "Number of loops without predictable loop counts"); +STATISTIC(NumBruteForceTripCountsComputed, + "Number of loops with trip counts computed by force"); + +static cl::opt<unsigned> +MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, + cl::desc("Maximum number of iterations SCEV will " + "symbolically execute a constant " + "derived loop"), + cl::init(100)); + +// FIXME: Enable this with XDEBUG when the test suite is clean. +static cl::opt<bool> +VerifySCEV("verify-scev", + cl::desc("Verify ScalarEvolution's backedge taken counts (slow)")); + +INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution", + "Scalar Evolution Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution", + "Scalar Evolution Analysis", false, true) +char ScalarEvolution::ID = 0; + +//===----------------------------------------------------------------------===// +// SCEV class definitions +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Implementation of the SCEV class. +// + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void SCEV::dump() const { + print(dbgs()); + dbgs() << '\n'; +} +#endif + +void SCEV::print(raw_ostream &OS) const { + switch (getSCEVType()) { + case scConstant: + WriteAsOperand(OS, cast<SCEVConstant>(this)->getValue(), false); + return; + case scTruncate: { + const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this); + const SCEV *Op = Trunc->getOperand(); + OS << "(trunc " << *Op->getType() << " " << *Op << " to " + << *Trunc->getType() << ")"; + return; + } + case scZeroExtend: { + const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this); + const SCEV *Op = ZExt->getOperand(); + OS << "(zext " << *Op->getType() << " " << *Op << " to " + << *ZExt->getType() << ")"; + return; + } + case scSignExtend: { + const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this); + const SCEV *Op = SExt->getOperand(); + OS << "(sext " << *Op->getType() << " " << *Op << " to " + << *SExt->getType() << ")"; + return; + } + case scAddRecExpr: { + const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this); + OS << "{" << *AR->getOperand(0); + for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i) + OS << ",+," << *AR->getOperand(i); + OS << "}<"; + if (AR->getNoWrapFlags(FlagNUW)) + OS << "nuw><"; + if (AR->getNoWrapFlags(FlagNSW)) + OS << "nsw><"; + if (AR->getNoWrapFlags(FlagNW) && + !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW))) + OS << "nw><"; + WriteAsOperand(OS, AR->getLoop()->getHeader(), /*PrintType=*/false); + OS << ">"; + return; + } + case scAddExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: { + const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this); + const char *OpStr = 0; + switch (NAry->getSCEVType()) { + case scAddExpr: OpStr = " + "; break; + case scMulExpr: OpStr = " * "; break; + case scUMaxExpr: OpStr = " umax "; break; + case scSMaxExpr: OpStr = " smax "; break; + } + OS << "("; + for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); + I != E; ++I) { + OS << **I; + if (llvm::next(I) != E) + OS << OpStr; + } + OS << ")"; + switch (NAry->getSCEVType()) { + case scAddExpr: + case scMulExpr: + if (NAry->getNoWrapFlags(FlagNUW)) + OS << "<nuw>"; + if (NAry->getNoWrapFlags(FlagNSW)) + OS << "<nsw>"; + } + return; + } + case scUDivExpr: { + const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this); + OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")"; + return; + } + case scUnknown: { + const SCEVUnknown *U = cast<SCEVUnknown>(this); + Type *AllocTy; + if (U->isSizeOf(AllocTy)) { + OS << "sizeof(" << *AllocTy << ")"; + return; + } + if (U->isAlignOf(AllocTy)) { + OS << "alignof(" << *AllocTy << ")"; + return; + } + + Type *CTy; + Constant *FieldNo; + if (U->isOffsetOf(CTy, FieldNo)) { + OS << "offsetof(" << *CTy << ", "; + WriteAsOperand(OS, FieldNo, false); + OS << ")"; + return; + } + + // Otherwise just print it normally. + WriteAsOperand(OS, U->getValue(), false); + return; + } + case scCouldNotCompute: + OS << "***COULDNOTCOMPUTE***"; + return; + default: break; + } + llvm_unreachable("Unknown SCEV kind!"); +} + +Type *SCEV::getType() const { + switch (getSCEVType()) { + case scConstant: + return cast<SCEVConstant>(this)->getType(); + case scTruncate: + case scZeroExtend: + case scSignExtend: + return cast<SCEVCastExpr>(this)->getType(); + case scAddRecExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: + return cast<SCEVNAryExpr>(this)->getType(); + case scAddExpr: + return cast<SCEVAddExpr>(this)->getType(); + case scUDivExpr: + return cast<SCEVUDivExpr>(this)->getType(); + case scUnknown: + return cast<SCEVUnknown>(this)->getType(); + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + default: + llvm_unreachable("Unknown SCEV kind!"); + } +} + +bool SCEV::isZero() const { + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this)) + return SC->getValue()->isZero(); + return false; +} + +bool SCEV::isOne() const { + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this)) + return SC->getValue()->isOne(); + return false; +} + +bool SCEV::isAllOnesValue() const { + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this)) + return SC->getValue()->isAllOnesValue(); + return false; +} + +/// isNonConstantNegative - Return true if the specified scev is negated, but +/// not a constant. +bool SCEV::isNonConstantNegative() const { + const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(this); + if (!Mul) return false; + + // If there is a constant factor, it will be first. + const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0)); + if (!SC) return false; + + // Return true if the value is negative, this matches things like (-42 * V). + return SC->getValue()->getValue().isNegative(); +} + +SCEVCouldNotCompute::SCEVCouldNotCompute() : + SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {} + +bool SCEVCouldNotCompute::classof(const SCEV *S) { + return S->getSCEVType() == scCouldNotCompute; +} + +const SCEV *ScalarEvolution::getConstant(ConstantInt *V) { + FoldingSetNodeID ID; + ID.AddInteger(scConstant); + ID.AddPointer(V); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + +const SCEV *ScalarEvolution::getConstant(const APInt& Val) { + return getConstant(ConstantInt::get(getContext(), Val)); +} + +const SCEV * +ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) { + IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty)); + return getConstant(ConstantInt::get(ITy, V, isSigned)); +} + +SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, + unsigned SCEVTy, const SCEV *op, Type *ty) + : SCEV(ID, SCEVTy), Op(op), Ty(ty) {} + +SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, + const SCEV *op, Type *ty) + : SCEVCastExpr(ID, scTruncate, op, ty) { + assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot truncate non-integer value!"); +} + +SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID, + const SCEV *op, Type *ty) + : SCEVCastExpr(ID, scZeroExtend, op, ty) { + assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot zero extend non-integer value!"); +} + +SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID, + const SCEV *op, Type *ty) + : SCEVCastExpr(ID, scSignExtend, op, ty) { + assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot sign extend non-integer value!"); +} + +void SCEVUnknown::deleted() { + // Clear this SCEVUnknown from various maps. + SE->forgetMemoizedResults(this); + + // Remove this SCEVUnknown from the uniquing map. + SE->UniqueSCEVs.RemoveNode(this); + + // Release the value. + setValPtr(0); +} + +void SCEVUnknown::allUsesReplacedWith(Value *New) { + // Clear this SCEVUnknown from various maps. + SE->forgetMemoizedResults(this); + + // Remove this SCEVUnknown from the uniquing map. + SE->UniqueSCEVs.RemoveNode(this); + + // Update this SCEVUnknown to point to the new value. This is needed + // because there may still be outstanding SCEVs which still point to + // this SCEVUnknown. + setValPtr(New); +} + +bool SCEVUnknown::isSizeOf(Type *&AllocTy) const { + if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) + if (VCE->getOpcode() == Instruction::PtrToInt) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) + if (CE->getOpcode() == Instruction::GetElementPtr && + CE->getOperand(0)->isNullValue() && + CE->getNumOperands() == 2) + if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1))) + if (CI->isOne()) { + AllocTy = cast<PointerType>(CE->getOperand(0)->getType()) + ->getElementType(); + return true; + } + + return false; +} + +bool SCEVUnknown::isAlignOf(Type *&AllocTy) const { + if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) + if (VCE->getOpcode() == Instruction::PtrToInt) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) + if (CE->getOpcode() == Instruction::GetElementPtr && + CE->getOperand(0)->isNullValue()) { + Type *Ty = + cast<PointerType>(CE->getOperand(0)->getType())->getElementType(); + if (StructType *STy = dyn_cast<StructType>(Ty)) + if (!STy->isPacked() && + CE->getNumOperands() == 3 && + CE->getOperand(1)->isNullValue()) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2))) + if (CI->isOne() && + STy->getNumElements() == 2 && + STy->getElementType(0)->isIntegerTy(1)) { + AllocTy = STy->getElementType(1); + return true; + } + } + } + + return false; +} + +bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const { + if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) + if (VCE->getOpcode() == Instruction::PtrToInt) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) + if (CE->getOpcode() == Instruction::GetElementPtr && + CE->getNumOperands() == 3 && + CE->getOperand(0)->isNullValue() && + CE->getOperand(1)->isNullValue()) { + Type *Ty = + cast<PointerType>(CE->getOperand(0)->getType())->getElementType(); + // Ignore vector types here so that ScalarEvolutionExpander doesn't + // emit getelementptrs that index into vectors. + if (Ty->isStructTy() || Ty->isArrayTy()) { + CTy = Ty; + FieldNo = CE->getOperand(2); + return true; + } + } + + return false; +} + +//===----------------------------------------------------------------------===// +// SCEV Utilities +//===----------------------------------------------------------------------===// + +namespace { + /// SCEVComplexityCompare - Return true if the complexity of the LHS is less + /// than the complexity of the RHS. This comparator is used to canonicalize + /// expressions. + class SCEVComplexityCompare { + const LoopInfo *const LI; + public: + explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {} + + // Return true or false if LHS is less than, or at least RHS, respectively. + bool operator()(const SCEV *LHS, const SCEV *RHS) const { + return compare(LHS, RHS) < 0; + } + + // Return negative, zero, or positive, if LHS is less than, equal to, or + // greater than RHS, respectively. A three-way result allows recursive + // comparisons to be more efficient. + int compare(const SCEV *LHS, const SCEV *RHS) const { + // Fast-path: SCEVs are uniqued so we can do a quick equality check. + if (LHS == RHS) + return 0; + + // Primarily, sort the SCEVs by their getSCEVType(). + unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType(); + if (LType != RType) + return (int)LType - (int)RType; + + // Aside from the getSCEVType() ordering, the particular ordering + // isn't very important except that it's beneficial to be consistent, + // so that (a + b) and (b + a) don't end up as different expressions. + switch (LType) { + case scUnknown: { + const SCEVUnknown *LU = cast<SCEVUnknown>(LHS); + const SCEVUnknown *RU = cast<SCEVUnknown>(RHS); + + // Sort SCEVUnknown values with some loose heuristics. TODO: This is + // not as complete as it could be. + const Value *LV = LU->getValue(), *RV = RU->getValue(); + + // Order pointer values after integer values. This helps SCEVExpander + // form GEPs. + bool LIsPointer = LV->getType()->isPointerTy(), + RIsPointer = RV->getType()->isPointerTy(); + if (LIsPointer != RIsPointer) + return (int)LIsPointer - (int)RIsPointer; + + // Compare getValueID values. + unsigned LID = LV->getValueID(), + RID = RV->getValueID(); + if (LID != RID) + return (int)LID - (int)RID; + + // Sort arguments by their position. + if (const Argument *LA = dyn_cast<Argument>(LV)) { + const Argument *RA = cast<Argument>(RV); + unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo(); + return (int)LArgNo - (int)RArgNo; + } + + // For instructions, compare their loop depth, and their operand + // count. This is pretty loose. + if (const Instruction *LInst = dyn_cast<Instruction>(LV)) { + const Instruction *RInst = cast<Instruction>(RV); + + // Compare loop depths. + const BasicBlock *LParent = LInst->getParent(), + *RParent = RInst->getParent(); + if (LParent != RParent) { + unsigned LDepth = LI->getLoopDepth(LParent), + RDepth = LI->getLoopDepth(RParent); + if (LDepth != RDepth) + return (int)LDepth - (int)RDepth; + } + + // Compare the number of operands. + unsigned LNumOps = LInst->getNumOperands(), + RNumOps = RInst->getNumOperands(); + return (int)LNumOps - (int)RNumOps; + } + + return 0; + } + + case scConstant: { + const SCEVConstant *LC = cast<SCEVConstant>(LHS); + const SCEVConstant *RC = cast<SCEVConstant>(RHS); + + // Compare constant values. + const APInt &LA = LC->getValue()->getValue(); + const APInt &RA = RC->getValue()->getValue(); + unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth(); + if (LBitWidth != RBitWidth) + return (int)LBitWidth - (int)RBitWidth; + return LA.ult(RA) ? -1 : 1; + } + + case scAddRecExpr: { + const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS); + const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS); + + // Compare addrec loop depths. + const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop(); + if (LLoop != RLoop) { + unsigned LDepth = LLoop->getLoopDepth(), + RDepth = RLoop->getLoopDepth(); + if (LDepth != RDepth) + return (int)LDepth - (int)RDepth; + } + + // Addrec complexity grows with operand count. + unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands(); + if (LNumOps != RNumOps) + return (int)LNumOps - (int)RNumOps; + + // Lexicographically compare. + for (unsigned i = 0; i != LNumOps; ++i) { + long X = compare(LA->getOperand(i), RA->getOperand(i)); + if (X != 0) + return X; + } + + return 0; + } + + case scAddExpr: + case scMulExpr: + case scSMaxExpr: + case scUMaxExpr: { + const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS); + const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS); + + // Lexicographically compare n-ary expressions. + unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands(); + if (LNumOps != RNumOps) + return (int)LNumOps - (int)RNumOps; + + for (unsigned i = 0; i != LNumOps; ++i) { + if (i >= RNumOps) + return 1; + long X = compare(LC->getOperand(i), RC->getOperand(i)); + if (X != 0) + return X; + } + return (int)LNumOps - (int)RNumOps; + } + + case scUDivExpr: { + const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS); + const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS); + + // Lexicographically compare udiv expressions. + long X = compare(LC->getLHS(), RC->getLHS()); + if (X != 0) + return X; + return compare(LC->getRHS(), RC->getRHS()); + } + + case scTruncate: + case scZeroExtend: + case scSignExtend: { + const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS); + const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS); + + // Compare cast expressions by operand. + return compare(LC->getOperand(), RC->getOperand()); + } + + default: + llvm_unreachable("Unknown SCEV kind!"); + } + } + }; +} + +/// GroupByComplexity - Given a list of SCEV objects, order them by their +/// complexity, and group objects of the same complexity together by value. +/// When this routine is finished, we know that any duplicates in the vector are +/// consecutive and that complexity is monotonically increasing. +/// +/// Note that we go take special precautions to ensure that we get deterministic +/// results from this routine. In other words, we don't want the results of +/// this to depend on where the addresses of various SCEV objects happened to +/// land in memory. +/// +static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops, + LoopInfo *LI) { + if (Ops.size() < 2) return; // Noop + if (Ops.size() == 2) { + // This is the common case, which also happens to be trivially simple. + // Special case it. + const SCEV *&LHS = Ops[0], *&RHS = Ops[1]; + if (SCEVComplexityCompare(LI)(RHS, LHS)) + std::swap(LHS, RHS); + return; + } + + // Do the rough sort by complexity. + std::stable_sort(Ops.begin(), Ops.end(), SCEVComplexityCompare(LI)); + + // Now that we are sorted by complexity, group elements of the same + // complexity. Note that this is, at worst, N^2, but the vector is likely to + // be extremely short in practice. Note that we take this approach because we + // do not want to depend on the addresses of the objects we are grouping. + for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) { + const SCEV *S = Ops[i]; + unsigned Complexity = S->getSCEVType(); + + // If there are any objects of the same complexity and same value as this + // one, group them. + for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) { + if (Ops[j] == S) { // Found a duplicate. + // Move it to immediately after i'th element. + std::swap(Ops[i+1], Ops[j]); + ++i; // no need to rescan it. + if (i == e-2) return; // Done! + } + } + } +} + + + +//===----------------------------------------------------------------------===// +// Simple SCEV method implementations +//===----------------------------------------------------------------------===// + +/// BinomialCoefficient - Compute BC(It, K). The result has width W. +/// Assume, K > 0. +static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, + ScalarEvolution &SE, + Type *ResultTy) { + // Handle the simplest case efficiently. + if (K == 1) + return SE.getTruncateOrZeroExtend(It, ResultTy); + + // We are using the following formula for BC(It, K): + // + // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K! + // + // Suppose, W is the bitwidth of the return value. We must be prepared for + // overflow. Hence, we must assure that the result of our computation is + // equal to the accurate one modulo 2^W. Unfortunately, division isn't + // safe in modular arithmetic. + // + // However, this code doesn't use exactly that formula; the formula it uses + // is something like the following, where T is the number of factors of 2 in + // K! (i.e. trailing zeros in the binary representation of K!), and ^ is + // exponentiation: + // + // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T) + // + // This formula is trivially equivalent to the previous formula. However, + // this formula can be implemented much more efficiently. The trick is that + // K! / 2^T is odd, and exact division by an odd number *is* safe in modular + // arithmetic. To do exact division in modular arithmetic, all we have + // to do is multiply by the inverse. Therefore, this step can be done at + // width W. + // + // The next issue is how to safely do the division by 2^T. The way this + // is done is by doing the multiplication step at a width of at least W + T + // bits. This way, the bottom W+T bits of the product are accurate. Then, + // when we perform the division by 2^T (which is equivalent to a right shift + // by T), the bottom W bits are accurate. Extra bits are okay; they'll get + // truncated out after the division by 2^T. + // + // In comparison to just directly using the first formula, this technique + // is much more efficient; using the first formula requires W * K bits, + // but this formula less than W + K bits. Also, the first formula requires + // a division step, whereas this formula only requires multiplies and shifts. + // + // It doesn't matter whether the subtraction step is done in the calculation + // width or the input iteration count's width; if the subtraction overflows, + // the result must be zero anyway. We prefer here to do it in the width of + // the induction variable because it helps a lot for certain cases; CodeGen + // isn't smart enough to ignore the overflow, which leads to much less + // efficient code if the width of the subtraction is wider than the native + // register width. + // + // (It's possible to not widen at all by pulling out factors of 2 before + // the multiplication; for example, K=2 can be calculated as + // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires + // extra arithmetic, so it's not an obvious win, and it gets + // much more complicated for K > 3.) + + // Protection from insane SCEVs; this bound is conservative, + // but it probably doesn't matter. + if (K > 1000) + return SE.getCouldNotCompute(); + + unsigned W = SE.getTypeSizeInBits(ResultTy); + + // Calculate K! / 2^T and T; we divide out the factors of two before + // multiplying for calculating K! / 2^T to avoid overflow. + // Other overflow doesn't matter because we only care about the bottom + // W bits of the result. + APInt OddFactorial(W, 1); + unsigned T = 1; + for (unsigned i = 3; i <= K; ++i) { + APInt Mult(W, i); + unsigned TwoFactors = Mult.countTrailingZeros(); + T += TwoFactors; + Mult = Mult.lshr(TwoFactors); + OddFactorial *= Mult; + } + + // We need at least W + T bits for the multiplication step + unsigned CalculationBits = W + T; + + // Calculate 2^T, at width T+W. + APInt DivFactor = APInt::getOneBitSet(CalculationBits, T); + + // Calculate the multiplicative inverse of K! / 2^T; + // this multiplication factor will perform the exact division by + // K! / 2^T. + APInt Mod = APInt::getSignedMinValue(W+1); + APInt MultiplyFactor = OddFactorial.zext(W+1); + MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod); + MultiplyFactor = MultiplyFactor.trunc(W); + + // Calculate the product, at width T+W + IntegerType *CalculationTy = IntegerType::get(SE.getContext(), + CalculationBits); + const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy); + for (unsigned i = 1; i != K; ++i) { + const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i)); + Dividend = SE.getMulExpr(Dividend, + SE.getTruncateOrZeroExtend(S, CalculationTy)); + } + + // Divide by 2^T + const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor)); + + // Truncate the result, and divide by K! / 2^T. + + return SE.getMulExpr(SE.getConstant(MultiplyFactor), + SE.getTruncateOrZeroExtend(DivResult, ResultTy)); +} + +/// evaluateAtIteration - Return the value of this chain of recurrences at +/// the specified iteration number. We can evaluate this recurrence by +/// multiplying each element in the chain by the binomial coefficient +/// corresponding to it. In other words, we can evaluate {A,+,B,+,C,+,D} as: +/// +/// A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3) +/// +/// where BC(It, k) stands for binomial coefficient. +/// +const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It, + ScalarEvolution &SE) const { + const SCEV *Result = getStart(); + for (unsigned i = 1, e = getNumOperands(); i != e; ++i) { + // The computation is correct in the face of overflow provided that the + // multiplication is performed _after_ the evaluation of the binomial + // coefficient. + const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType()); + if (isa<SCEVCouldNotCompute>(Coeff)) + return Coeff; + + Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff)); + } + return Result; +} + +//===----------------------------------------------------------------------===// +// SCEV Expression folder implementations +//===----------------------------------------------------------------------===// + +const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, + Type *Ty) { + assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) && + "This is not a truncating conversion!"); + assert(isSCEVable(Ty) && + "This is not a conversion to a SCEVable type!"); + Ty = getEffectiveSCEVType(Ty); + + FoldingSetNodeID ID; + ID.AddInteger(scTruncate); + ID.AddPointer(Op); + ID.AddPointer(Ty); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + + // Fold if the operand is constant. + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) + return getConstant( + cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty))); + + // trunc(trunc(x)) --> trunc(x) + if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) + return getTruncateExpr(ST->getOperand(), Ty); + + // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing + if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op)) + return getTruncateOrSignExtend(SS->getOperand(), Ty); + + // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing + if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) + return getTruncateOrZeroExtend(SZ->getOperand(), Ty); + + // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can + // eliminate all the truncates. + if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) { + SmallVector<const SCEV *, 4> Operands; + bool hasTrunc = false; + for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) { + const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty); + hasTrunc = isa<SCEVTruncateExpr>(S); + Operands.push_back(S); + } + if (!hasTrunc) + return getAddExpr(Operands); + UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL. + } + + // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can + // eliminate all the truncates. + if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) { + SmallVector<const SCEV *, 4> Operands; + bool hasTrunc = false; + for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) { + const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty); + hasTrunc = isa<SCEVTruncateExpr>(S); + Operands.push_back(S); + } + if (!hasTrunc) + return getMulExpr(Operands); + UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL. + } + + // If the input value is a chrec scev, truncate the chrec's operands. + if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) { + SmallVector<const SCEV *, 4> Operands; + for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) + Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty)); + return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap); + } + + // The cast wasn't folded; create an explicit cast node. We can reuse + // the existing insert position since if we get here, we won't have + // made any changes which would invalidate it. + SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), + Op, Ty); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + +const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, + Type *Ty) { + assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && + "This is not an extending conversion!"); + assert(isSCEVable(Ty) && + "This is not a conversion to a SCEVable type!"); + Ty = getEffectiveSCEVType(Ty); + + // Fold if the operand is constant. + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) + return getConstant( + cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty))); + + // zext(zext(x)) --> zext(x) + if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) + return getZeroExtendExpr(SZ->getOperand(), Ty); + + // Before doing any expensive analysis, check to see if we've already + // computed a SCEV for this Op and Ty. + FoldingSetNodeID ID; + ID.AddInteger(scZeroExtend); + ID.AddPointer(Op); + ID.AddPointer(Ty); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + + // zext(trunc(x)) --> zext(x) or x or trunc(x) + if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) { + // It's possible the bits taken off by the truncate were all zero bits. If + // so, we should be able to simplify this further. + const SCEV *X = ST->getOperand(); + ConstantRange CR = getUnsignedRange(X); + unsigned TruncBits = getTypeSizeInBits(ST->getType()); + unsigned NewBits = getTypeSizeInBits(Ty); + if (CR.truncate(TruncBits).zeroExtend(NewBits).contains( + CR.zextOrTrunc(NewBits))) + return getTruncateOrZeroExtend(X, Ty); + } + + // If the input value is a chrec scev, and we can prove that the value + // did not overflow the old, smaller, value, we can zero extend all of the + // operands (often constants). This allows analysis of something like + // this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; } + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) + if (AR->isAffine()) { + const SCEV *Start = AR->getStart(); + const SCEV *Step = AR->getStepRecurrence(*this); + unsigned BitWidth = getTypeSizeInBits(AR->getType()); + const Loop *L = AR->getLoop(); + + // If we have special knowledge that this addrec won't overflow, + // we don't need to do any further analysis. + if (AR->getNoWrapFlags(SCEV::FlagNUW)) + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getZeroExtendExpr(Step, Ty), + L, AR->getNoWrapFlags()); + + // Check whether the backedge-taken count is SCEVCouldNotCompute. + // Note that this serves two purposes: It filters out loops that are + // simply not analyzable, and it covers the case where this code is + // being called from within backedge-taken count analysis, such that + // attempting to ask for the backedge-taken count would likely result + // in infinite recursion. In the later case, the analysis code will + // cope with a conservative value, and it will take care to purge + // that value once it has finished. + const SCEV *MaxBECount = getMaxBackedgeTakenCount(L); + if (!isa<SCEVCouldNotCompute>(MaxBECount)) { + // Manually compute the final value for AR, checking for + // overflow. + + // Check whether the backedge-taken count can be losslessly casted to + // the addrec's type. The count is always unsigned. + const SCEV *CastedMaxBECount = + getTruncateOrZeroExtend(MaxBECount, Start->getType()); + const SCEV *RecastedMaxBECount = + getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); + if (MaxBECount == RecastedMaxBECount) { + Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); + // Check whether Start+Step*MaxBECount has no unsigned overflow. + const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step); + const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul), WideTy); + const SCEV *WideStart = getZeroExtendExpr(Start, WideTy); + const SCEV *WideMaxBECount = + getZeroExtendExpr(CastedMaxBECount, WideTy); + const SCEV *OperandExtendedAdd = + getAddExpr(WideStart, + getMulExpr(WideMaxBECount, + getZeroExtendExpr(Step, WideTy))); + if (ZAdd == OperandExtendedAdd) { + // Cache knowledge of AR NUW, which is propagated to this AddRec. + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); + // Return the expression with the addrec on the outside. + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getZeroExtendExpr(Step, Ty), + L, AR->getNoWrapFlags()); + } + // Similar to above, only this time treat the step value as signed. + // This covers loops that count down. + OperandExtendedAdd = + getAddExpr(WideStart, + getMulExpr(WideMaxBECount, + getSignExtendExpr(Step, WideTy))); + if (ZAdd == OperandExtendedAdd) { + // Cache knowledge of AR NW, which is propagated to this AddRec. + // Negative step causes unsigned wrap, but it still can't self-wrap. + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW); + // Return the expression with the addrec on the outside. + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + L, AR->getNoWrapFlags()); + } + } + + // If the backedge is guarded by a comparison with the pre-inc value + // the addrec is safe. Also, if the entry is guarded by a comparison + // with the start value and the backedge is guarded by a comparison + // with the post-inc value, the addrec is safe. + if (isKnownPositive(Step)) { + const SCEV *N = getConstant(APInt::getMinValue(BitWidth) - + getUnsignedRange(Step).getUnsignedMax()); + if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) || + (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) && + isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, + AR->getPostIncExpr(*this), N))) { + // Cache knowledge of AR NUW, which is propagated to this AddRec. + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); + // Return the expression with the addrec on the outside. + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getZeroExtendExpr(Step, Ty), + L, AR->getNoWrapFlags()); + } + } else if (isKnownNegative(Step)) { + const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) - + getSignedRange(Step).getSignedMin()); + if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) || + (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) && + isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, + AR->getPostIncExpr(*this), N))) { + // Cache knowledge of AR NW, which is propagated to this AddRec. + // Negative step causes unsigned wrap, but it still can't self-wrap. + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW); + // Return the expression with the addrec on the outside. + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + L, AR->getNoWrapFlags()); + } + } + } + } + + // The cast wasn't folded; create an explicit cast node. + // Recompute the insert position, as it may have been invalidated. + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), + Op, Ty); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + +// Get the limit of a recurrence such that incrementing by Step cannot cause +// signed overflow as long as the value of the recurrence within the loop does +// not exceed this limit before incrementing. +static const SCEV *getOverflowLimitForStep(const SCEV *Step, + ICmpInst::Predicate *Pred, + ScalarEvolution *SE) { + unsigned BitWidth = SE->getTypeSizeInBits(Step->getType()); + if (SE->isKnownPositive(Step)) { + *Pred = ICmpInst::ICMP_SLT; + return SE->getConstant(APInt::getSignedMinValue(BitWidth) - + SE->getSignedRange(Step).getSignedMax()); + } + if (SE->isKnownNegative(Step)) { + *Pred = ICmpInst::ICMP_SGT; + return SE->getConstant(APInt::getSignedMaxValue(BitWidth) - + SE->getSignedRange(Step).getSignedMin()); + } + return 0; +} + +// The recurrence AR has been shown to have no signed wrap. Typically, if we can +// prove NSW for AR, then we can just as easily prove NSW for its preincrement +// or postincrement sibling. This allows normalizing a sign extended AddRec as +// such: {sext(Step + Start),+,Step} => {(Step + sext(Start),+,Step} As a +// result, the expression "Step + sext(PreIncAR)" is congruent with +// "sext(PostIncAR)" +static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR, + Type *Ty, + ScalarEvolution *SE) { + const Loop *L = AR->getLoop(); + const SCEV *Start = AR->getStart(); + const SCEV *Step = AR->getStepRecurrence(*SE); + + // Check for a simple looking step prior to loop entry. + const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start); + if (!SA) + return 0; + + // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV + // subtraction is expensive. For this purpose, perform a quick and dirty + // difference, by checking for Step in the operand list. + SmallVector<const SCEV *, 4> DiffOps; + for (SCEVAddExpr::op_iterator I = SA->op_begin(), E = SA->op_end(); + I != E; ++I) { + if (*I != Step) + DiffOps.push_back(*I); + } + if (DiffOps.size() == SA->getNumOperands()) + return 0; + + // This is a postinc AR. Check for overflow on the preinc recurrence using the + // same three conditions that getSignExtendedExpr checks. + + // 1. NSW flags on the step increment. + const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags()); + const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>( + SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap)); + + if (PreAR && PreAR->getNoWrapFlags(SCEV::FlagNSW)) + return PreStart; + + // 2. Direct overflow check on the step operation's expression. + unsigned BitWidth = SE->getTypeSizeInBits(AR->getType()); + Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2); + const SCEV *OperandExtendedStart = + SE->getAddExpr(SE->getSignExtendExpr(PreStart, WideTy), + SE->getSignExtendExpr(Step, WideTy)); + if (SE->getSignExtendExpr(Start, WideTy) == OperandExtendedStart) { + // Cache knowledge of PreAR NSW. + if (PreAR) + const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(SCEV::FlagNSW); + // FIXME: this optimization needs a unit test + DEBUG(dbgs() << "SCEV: untested prestart overflow check\n"); + return PreStart; + } + + // 3. Loop precondition. + ICmpInst::Predicate Pred; + const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, SE); + + if (OverflowLimit && + SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) { + return PreStart; + } + return 0; +} + +// Get the normalized sign-extended expression for this AddRec's Start. +static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR, + Type *Ty, + ScalarEvolution *SE) { + const SCEV *PreStart = getPreStartForSignExtend(AR, Ty, SE); + if (!PreStart) + return SE->getSignExtendExpr(AR->getStart(), Ty); + + return SE->getAddExpr(SE->getSignExtendExpr(AR->getStepRecurrence(*SE), Ty), + SE->getSignExtendExpr(PreStart, Ty)); +} + +const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, + Type *Ty) { + assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && + "This is not an extending conversion!"); + assert(isSCEVable(Ty) && + "This is not a conversion to a SCEVable type!"); + Ty = getEffectiveSCEVType(Ty); + + // Fold if the operand is constant. + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) + return getConstant( + cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty))); + + // sext(sext(x)) --> sext(x) + if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op)) + return getSignExtendExpr(SS->getOperand(), Ty); + + // sext(zext(x)) --> zext(x) + if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) + return getZeroExtendExpr(SZ->getOperand(), Ty); + + // Before doing any expensive analysis, check to see if we've already + // computed a SCEV for this Op and Ty. + FoldingSetNodeID ID; + ID.AddInteger(scSignExtend); + ID.AddPointer(Op); + ID.AddPointer(Ty); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + + // If the input value is provably positive, build a zext instead. + if (isKnownNonNegative(Op)) + return getZeroExtendExpr(Op, Ty); + + // sext(trunc(x)) --> sext(x) or x or trunc(x) + if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) { + // It's possible the bits taken off by the truncate were all sign bits. If + // so, we should be able to simplify this further. + const SCEV *X = ST->getOperand(); + ConstantRange CR = getSignedRange(X); + unsigned TruncBits = getTypeSizeInBits(ST->getType()); + unsigned NewBits = getTypeSizeInBits(Ty); + if (CR.truncate(TruncBits).signExtend(NewBits).contains( + CR.sextOrTrunc(NewBits))) + return getTruncateOrSignExtend(X, Ty); + } + + // If the input value is a chrec scev, and we can prove that the value + // did not overflow the old, smaller, value, we can sign extend all of the + // operands (often constants). This allows analysis of something like + // this: for (signed char X = 0; X < 100; ++X) { int Y = X; } + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) + if (AR->isAffine()) { + const SCEV *Start = AR->getStart(); + const SCEV *Step = AR->getStepRecurrence(*this); + unsigned BitWidth = getTypeSizeInBits(AR->getType()); + const Loop *L = AR->getLoop(); + + // If we have special knowledge that this addrec won't overflow, + // we don't need to do any further analysis. + if (AR->getNoWrapFlags(SCEV::FlagNSW)) + return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), + getSignExtendExpr(Step, Ty), + L, SCEV::FlagNSW); + + // Check whether the backedge-taken count is SCEVCouldNotCompute. + // Note that this serves two purposes: It filters out loops that are + // simply not analyzable, and it covers the case where this code is + // being called from within backedge-taken count analysis, such that + // attempting to ask for the backedge-taken count would likely result + // in infinite recursion. In the later case, the analysis code will + // cope with a conservative value, and it will take care to purge + // that value once it has finished. + const SCEV *MaxBECount = getMaxBackedgeTakenCount(L); + if (!isa<SCEVCouldNotCompute>(MaxBECount)) { + // Manually compute the final value for AR, checking for + // overflow. + + // Check whether the backedge-taken count can be losslessly casted to + // the addrec's type. The count is always unsigned. + const SCEV *CastedMaxBECount = + getTruncateOrZeroExtend(MaxBECount, Start->getType()); + const SCEV *RecastedMaxBECount = + getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); + if (MaxBECount == RecastedMaxBECount) { + Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); + // Check whether Start+Step*MaxBECount has no signed overflow. + const SCEV *SMul = getMulExpr(CastedMaxBECount, Step); + const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul), WideTy); + const SCEV *WideStart = getSignExtendExpr(Start, WideTy); + const SCEV *WideMaxBECount = + getZeroExtendExpr(CastedMaxBECount, WideTy); + const SCEV *OperandExtendedAdd = + getAddExpr(WideStart, + getMulExpr(WideMaxBECount, + getSignExtendExpr(Step, WideTy))); + if (SAdd == OperandExtendedAdd) { + // Cache knowledge of AR NSW, which is propagated to this AddRec. + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); + // Return the expression with the addrec on the outside. + return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), + getSignExtendExpr(Step, Ty), + L, AR->getNoWrapFlags()); + } + // Similar to above, only this time treat the step value as unsigned. + // This covers loops that count up with an unsigned step. + OperandExtendedAdd = + getAddExpr(WideStart, + getMulExpr(WideMaxBECount, + getZeroExtendExpr(Step, WideTy))); + if (SAdd == OperandExtendedAdd) { + // Cache knowledge of AR NSW, which is propagated to this AddRec. + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); + // Return the expression with the addrec on the outside. + return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), + getZeroExtendExpr(Step, Ty), + L, AR->getNoWrapFlags()); + } + } + + // If the backedge is guarded by a comparison with the pre-inc value + // the addrec is safe. Also, if the entry is guarded by a comparison + // with the start value and the backedge is guarded by a comparison + // with the post-inc value, the addrec is safe. + ICmpInst::Predicate Pred; + const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, this); + if (OverflowLimit && + (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) || + (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) && + isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this), + OverflowLimit)))) { + // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec. + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); + return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), + getSignExtendExpr(Step, Ty), + L, AR->getNoWrapFlags()); + } + } + } + + // The cast wasn't folded; create an explicit cast node. + // Recompute the insert position, as it may have been invalidated. + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), + Op, Ty); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + +/// getAnyExtendExpr - Return a SCEV for the given operand extended with +/// unspecified bits out to the given type. +/// +const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, + Type *Ty) { + assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && + "This is not an extending conversion!"); + assert(isSCEVable(Ty) && + "This is not a conversion to a SCEVable type!"); + Ty = getEffectiveSCEVType(Ty); + + // Sign-extend negative constants. + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) + if (SC->getValue()->getValue().isNegative()) + return getSignExtendExpr(Op, Ty); + + // Peel off a truncate cast. + if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) { + const SCEV *NewOp = T->getOperand(); + if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty)) + return getAnyExtendExpr(NewOp, Ty); + return getTruncateOrNoop(NewOp, Ty); + } + + // Next try a zext cast. If the cast is folded, use it. + const SCEV *ZExt = getZeroExtendExpr(Op, Ty); + if (!isa<SCEVZeroExtendExpr>(ZExt)) + return ZExt; + + // Next try a sext cast. If the cast is folded, use it. + const SCEV *SExt = getSignExtendExpr(Op, Ty); + if (!isa<SCEVSignExtendExpr>(SExt)) + return SExt; + + // Force the cast to be folded into the operands of an addrec. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) { + SmallVector<const SCEV *, 4> Ops; + for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); + I != E; ++I) + Ops.push_back(getAnyExtendExpr(*I, Ty)); + return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW); + } + + // If the expression is obviously signed, use the sext cast value. + if (isa<SCEVSMaxExpr>(Op)) + return SExt; + + // Absent any other information, use the zext cast value. + return ZExt; +} + +/// CollectAddOperandsWithScales - Process the given Ops list, which is +/// a list of operands to be added under the given scale, update the given +/// map. This is a helper function for getAddRecExpr. As an example of +/// what it does, given a sequence of operands that would form an add +/// expression like this: +/// +/// m + n + 13 + (A * (o + p + (B * q + m + 29))) + r + (-1 * r) +/// +/// where A and B are constants, update the map with these values: +/// +/// (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0) +/// +/// and add 13 + A*B*29 to AccumulatedConstant. +/// This will allow getAddRecExpr to produce this: +/// +/// 13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B) +/// +/// This form often exposes folding opportunities that are hidden in +/// the original operand list. +/// +/// Return true iff it appears that any interesting folding opportunities +/// may be exposed. This helps getAddRecExpr short-circuit extra work in +/// the common case where no interesting opportunities are present, and +/// is also used as a check to avoid infinite recursion. +/// +static bool +CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M, + SmallVectorImpl<const SCEV *> &NewOps, + APInt &AccumulatedConstant, + const SCEV *const *Ops, size_t NumOperands, + const APInt &Scale, + ScalarEvolution &SE) { + bool Interesting = false; + + // Iterate over the add operands. They are sorted, with constants first. + unsigned i = 0; + while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) { + ++i; + // Pull a buried constant out to the outside. + if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero()) + Interesting = true; + AccumulatedConstant += Scale * C->getValue()->getValue(); + } + + // Next comes everything else. We're especially interested in multiplies + // here, but they're in the middle, so just visit the rest with one loop. + for (; i != NumOperands; ++i) { + const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]); + if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) { + APInt NewScale = + Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue(); + if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) { + // A multiplication of a constant with another add; recurse. + const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1)); + Interesting |= + CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, + Add->op_begin(), Add->getNumOperands(), + NewScale, SE); + } else { + // A multiplication of a constant with some other value. Update + // the map. + SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end()); + const SCEV *Key = SE.getMulExpr(MulOps); + std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair = + M.insert(std::make_pair(Key, NewScale)); + if (Pair.second) { + NewOps.push_back(Pair.first->first); + } else { + Pair.first->second += NewScale; + // The map already had an entry for this value, which may indicate + // a folding opportunity. + Interesting = true; + } + } + } else { + // An ordinary operand. Update the map. + std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair = + M.insert(std::make_pair(Ops[i], Scale)); + if (Pair.second) { + NewOps.push_back(Pair.first->first); + } else { + Pair.first->second += Scale; + // The map already had an entry for this value, which may indicate + // a folding opportunity. + Interesting = true; + } + } + } + + return Interesting; +} + +namespace { + struct APIntCompare { + bool operator()(const APInt &LHS, const APInt &RHS) const { + return LHS.ult(RHS); + } + }; +} + +/// getAddExpr - Get a canonical add expression, or something simpler if +/// possible. +const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, + SCEV::NoWrapFlags Flags) { + assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) && + "only nuw or nsw allowed"); + assert(!Ops.empty() && "Cannot get empty add!"); + if (Ops.size() == 1) return Ops[0]; +#ifndef NDEBUG + Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); + for (unsigned i = 1, e = Ops.size(); i != e; ++i) + assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && + "SCEVAddExpr operand types don't match!"); +#endif + + // If FlagNSW is true and all the operands are non-negative, infer FlagNUW. + // And vice-versa. + int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW; + SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask); + if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) { + bool All = true; + for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(), + E = Ops.end(); I != E; ++I) + if (!isKnownNonNegative(*I)) { + All = false; + break; + } + if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask); + } + + // Sort by complexity, this groups all similar expression types together. + GroupByComplexity(Ops, LI); + + // If there are any constants, fold them together. + unsigned Idx = 0; + if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { + ++Idx; + assert(Idx < Ops.size()); + while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { + // We found two constants, fold them together! + Ops[0] = getConstant(LHSC->getValue()->getValue() + + RHSC->getValue()->getValue()); + if (Ops.size() == 2) return Ops[0]; + Ops.erase(Ops.begin()+1); // Erase the folded element + LHSC = cast<SCEVConstant>(Ops[0]); + } + + // If we are left with a constant zero being added, strip it off. + if (LHSC->getValue()->isZero()) { + Ops.erase(Ops.begin()); + --Idx; + } + + if (Ops.size() == 1) return Ops[0]; + } + + // Okay, check to see if the same value occurs in the operand list more than + // once. If so, merge them together into an multiply expression. Since we + // sorted the list, these values are required to be adjacent. + Type *Ty = Ops[0]->getType(); + bool FoundMatch = false; + for (unsigned i = 0, e = Ops.size(); i != e-1; ++i) + if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2 + // Scan ahead to count how many equal operands there are. + unsigned Count = 2; + while (i+Count != e && Ops[i+Count] == Ops[i]) + ++Count; + // Merge the values into a multiply. + const SCEV *Scale = getConstant(Ty, Count); + const SCEV *Mul = getMulExpr(Scale, Ops[i]); + if (Ops.size() == Count) + return Mul; + Ops[i] = Mul; + Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count); + --i; e -= Count - 1; + FoundMatch = true; + } + if (FoundMatch) + return getAddExpr(Ops, Flags); + + // Check for truncates. If all the operands are truncated from the same + // type, see if factoring out the truncate would permit the result to be + // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n) + // if the contents of the resulting outer trunc fold to something simple. + for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) { + const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]); + Type *DstType = Trunc->getType(); + Type *SrcType = Trunc->getOperand()->getType(); + SmallVector<const SCEV *, 8> LargeOps; + bool Ok = true; + // Check all the operands to see if they can be represented in the + // source type of the truncate. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) { + if (T->getOperand()->getType() != SrcType) { + Ok = false; + break; + } + LargeOps.push_back(T->getOperand()); + } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) { + LargeOps.push_back(getAnyExtendExpr(C, SrcType)); + } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) { + SmallVector<const SCEV *, 8> LargeMulOps; + for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) { + if (const SCEVTruncateExpr *T = + dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) { + if (T->getOperand()->getType() != SrcType) { + Ok = false; + break; + } + LargeMulOps.push_back(T->getOperand()); + } else if (const SCEVConstant *C = + dyn_cast<SCEVConstant>(M->getOperand(j))) { + LargeMulOps.push_back(getAnyExtendExpr(C, SrcType)); + } else { + Ok = false; + break; + } + } + if (Ok) + LargeOps.push_back(getMulExpr(LargeMulOps)); + } else { + Ok = false; + break; + } + } + if (Ok) { + // Evaluate the expression in the larger type. + const SCEV *Fold = getAddExpr(LargeOps, Flags); + // If it folds to something simple, use it. Otherwise, don't. + if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold)) + return getTruncateExpr(Fold, DstType); + } + } + + // Skip past any other cast SCEVs. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr) + ++Idx; + + // If there are add operands they would be next. + if (Idx < Ops.size()) { + bool DeletedAdd = false; + while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) { + // If we have an add, expand the add operands onto the end of the operands + // list. + Ops.erase(Ops.begin()+Idx); + Ops.append(Add->op_begin(), Add->op_end()); + DeletedAdd = true; + } + + // If we deleted at least one add, we added operands to the end of the list, + // and they are not necessarily sorted. Recurse to resort and resimplify + // any operands we just acquired. + if (DeletedAdd) + return getAddExpr(Ops); + } + + // Skip over the add expression until we get to a multiply. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr) + ++Idx; + + // Check to see if there are any folding opportunities present with + // operands multiplied by constant values. + if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) { + uint64_t BitWidth = getTypeSizeInBits(Ty); + DenseMap<const SCEV *, APInt> M; + SmallVector<const SCEV *, 8> NewOps; + APInt AccumulatedConstant(BitWidth, 0); + if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, + Ops.data(), Ops.size(), + APInt(BitWidth, 1), *this)) { + // Some interesting folding opportunity is present, so its worthwhile to + // re-generate the operands list. Group the operands by constant scale, + // to avoid multiplying by the same constant scale multiple times. + std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists; + for (SmallVectorImpl<const SCEV *>::const_iterator I = NewOps.begin(), + E = NewOps.end(); I != E; ++I) + MulOpLists[M.find(*I)->second].push_back(*I); + // Re-generate the operands list. + Ops.clear(); + if (AccumulatedConstant != 0) + Ops.push_back(getConstant(AccumulatedConstant)); + for (std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare>::iterator + I = MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I) + if (I->first != 0) + Ops.push_back(getMulExpr(getConstant(I->first), + getAddExpr(I->second))); + if (Ops.empty()) + return getConstant(Ty, 0); + if (Ops.size() == 1) + return Ops[0]; + return getAddExpr(Ops); + } + } + + // If we are adding something to a multiply expression, make sure the + // something is not already an operand of the multiply. If so, merge it into + // the multiply. + for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) { + const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]); + for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) { + const SCEV *MulOpSCEV = Mul->getOperand(MulOp); + if (isa<SCEVConstant>(MulOpSCEV)) + continue; + for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp) + if (MulOpSCEV == Ops[AddOp]) { + // Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1)) + const SCEV *InnerMul = Mul->getOperand(MulOp == 0); + if (Mul->getNumOperands() != 2) { + // If the multiply has more than two operands, we must get the + // Y*Z term. + SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(), + Mul->op_begin()+MulOp); + MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); + InnerMul = getMulExpr(MulOps); + } + const SCEV *One = getConstant(Ty, 1); + const SCEV *AddOne = getAddExpr(One, InnerMul); + const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV); + if (Ops.size() == 2) return OuterMul; + if (AddOp < Idx) { + Ops.erase(Ops.begin()+AddOp); + Ops.erase(Ops.begin()+Idx-1); + } else { + Ops.erase(Ops.begin()+Idx); + Ops.erase(Ops.begin()+AddOp-1); + } + Ops.push_back(OuterMul); + return getAddExpr(Ops); + } + + // Check this multiply against other multiplies being added together. + for (unsigned OtherMulIdx = Idx+1; + OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]); + ++OtherMulIdx) { + const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]); + // If MulOp occurs in OtherMul, we can fold the two multiplies + // together. + for (unsigned OMulOp = 0, e = OtherMul->getNumOperands(); + OMulOp != e; ++OMulOp) + if (OtherMul->getOperand(OMulOp) == MulOpSCEV) { + // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E)) + const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0); + if (Mul->getNumOperands() != 2) { + SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(), + Mul->op_begin()+MulOp); + MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); + InnerMul1 = getMulExpr(MulOps); + } + const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0); + if (OtherMul->getNumOperands() != 2) { + SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(), + OtherMul->op_begin()+OMulOp); + MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end()); + InnerMul2 = getMulExpr(MulOps); + } + const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2); + const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum); + if (Ops.size() == 2) return OuterMul; + Ops.erase(Ops.begin()+Idx); + Ops.erase(Ops.begin()+OtherMulIdx-1); + Ops.push_back(OuterMul); + return getAddExpr(Ops); + } + } + } + } + + // If there are any add recurrences in the operands list, see if any other + // added values are loop invariant. If so, we can fold them into the + // recurrence. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr) + ++Idx; + + // Scan over all recurrences, trying to fold loop invariants into them. + for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) { + // Scan all of the other operands to this add and add them to the vector if + // they are loop invariant w.r.t. the recurrence. + SmallVector<const SCEV *, 8> LIOps; + const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]); + const Loop *AddRecLoop = AddRec->getLoop(); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (isLoopInvariant(Ops[i], AddRecLoop)) { + LIOps.push_back(Ops[i]); + Ops.erase(Ops.begin()+i); + --i; --e; + } + + // If we found some loop invariants, fold them into the recurrence. + if (!LIOps.empty()) { + // NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step} + LIOps.push_back(AddRec->getStart()); + + SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(), + AddRec->op_end()); + AddRecOps[0] = getAddExpr(LIOps); + + // Build the new addrec. Propagate the NUW and NSW flags if both the + // outer add and the inner addrec are guaranteed to have no overflow. + // Always propagate NW. + Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW)); + const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags); + + // If all of the other operands were loop invariant, we are done. + if (Ops.size() == 1) return NewRec; + + // Otherwise, add the folded AddRec by the non-invariant parts. + for (unsigned i = 0;; ++i) + if (Ops[i] == AddRec) { + Ops[i] = NewRec; + break; + } + return getAddExpr(Ops); + } + + // Okay, if there weren't any loop invariants to be folded, check to see if + // there are multiple AddRec's with the same loop induction variable being + // added together. If so, we can fold them. + for (unsigned OtherIdx = Idx+1; + OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); + ++OtherIdx) + if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) { + // Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L> + SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(), + AddRec->op_end()); + for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); + ++OtherIdx) + if (const SCEVAddRecExpr *OtherAddRec = + dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx])) + if (OtherAddRec->getLoop() == AddRecLoop) { + for (unsigned i = 0, e = OtherAddRec->getNumOperands(); + i != e; ++i) { + if (i >= AddRecOps.size()) { + AddRecOps.append(OtherAddRec->op_begin()+i, + OtherAddRec->op_end()); + break; + } + AddRecOps[i] = getAddExpr(AddRecOps[i], + OtherAddRec->getOperand(i)); + } + Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; + } + // Step size has changed, so we cannot guarantee no self-wraparound. + Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap); + return getAddExpr(Ops); + } + + // Otherwise couldn't fold anything into this recurrence. Move onto the + // next one. + } + + // Okay, it looks like we really DO need an add expr. Check to see if we + // already have one, otherwise create a new one. + FoldingSetNodeID ID; + ID.AddInteger(scAddExpr); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + ID.AddPointer(Ops[i]); + void *IP = 0; + SCEVAddExpr *S = + static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + if (!S) { + const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); + S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator), + O, Ops.size()); + UniqueSCEVs.InsertNode(S, IP); + } + S->setNoWrapFlags(Flags); + return S; +} + +static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) { + uint64_t k = i*j; + if (j > 1 && k / j != i) Overflow = true; + return k; +} + +/// Compute the result of "n choose k", the binomial coefficient. If an +/// intermediate computation overflows, Overflow will be set and the return will +/// be garbage. Overflow is not cleared on absence of overflow. +static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) { + // We use the multiplicative formula: + // n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 . + // At each iteration, we take the n-th term of the numeral and divide by the + // (k-n)th term of the denominator. This division will always produce an + // integral result, and helps reduce the chance of overflow in the + // intermediate computations. However, we can still overflow even when the + // final result would fit. + + if (n == 0 || n == k) return 1; + if (k > n) return 0; + + if (k > n/2) + k = n-k; + + uint64_t r = 1; + for (uint64_t i = 1; i <= k; ++i) { + r = umul_ov(r, n-(i-1), Overflow); + r /= i; + } + return r; +} + +/// getMulExpr - Get a canonical multiply expression, or something simpler if +/// possible. +const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, + SCEV::NoWrapFlags Flags) { + assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) && + "only nuw or nsw allowed"); + assert(!Ops.empty() && "Cannot get empty mul!"); + if (Ops.size() == 1) return Ops[0]; +#ifndef NDEBUG + Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); + for (unsigned i = 1, e = Ops.size(); i != e; ++i) + assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && + "SCEVMulExpr operand types don't match!"); +#endif + + // If FlagNSW is true and all the operands are non-negative, infer FlagNUW. + // And vice-versa. + int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW; + SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask); + if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) { + bool All = true; + for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(), + E = Ops.end(); I != E; ++I) + if (!isKnownNonNegative(*I)) { + All = false; + break; + } + if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask); + } + + // Sort by complexity, this groups all similar expression types together. + GroupByComplexity(Ops, LI); + + // If there are any constants, fold them together. + unsigned Idx = 0; + if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { + + // C1*(C2+V) -> C1*C2 + C1*V + if (Ops.size() == 2) + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) + if (Add->getNumOperands() == 2 && + isa<SCEVConstant>(Add->getOperand(0))) + return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)), + getMulExpr(LHSC, Add->getOperand(1))); + + ++Idx; + while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { + // We found two constants, fold them together! + ConstantInt *Fold = ConstantInt::get(getContext(), + LHSC->getValue()->getValue() * + RHSC->getValue()->getValue()); + Ops[0] = getConstant(Fold); + Ops.erase(Ops.begin()+1); // Erase the folded element + if (Ops.size() == 1) return Ops[0]; + LHSC = cast<SCEVConstant>(Ops[0]); + } + + // If we are left with a constant one being multiplied, strip it off. + if (cast<SCEVConstant>(Ops[0])->getValue()->equalsInt(1)) { + Ops.erase(Ops.begin()); + --Idx; + } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) { + // If we have a multiply of zero, it will always be zero. + return Ops[0]; + } else if (Ops[0]->isAllOnesValue()) { + // If we have a mul by -1 of an add, try distributing the -1 among the + // add operands. + if (Ops.size() == 2) { + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) { + SmallVector<const SCEV *, 4> NewOps; + bool AnyFolded = false; + for (SCEVAddRecExpr::op_iterator I = Add->op_begin(), + E = Add->op_end(); I != E; ++I) { + const SCEV *Mul = getMulExpr(Ops[0], *I); + if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true; + NewOps.push_back(Mul); + } + if (AnyFolded) + return getAddExpr(NewOps); + } + else if (const SCEVAddRecExpr * + AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) { + // Negation preserves a recurrence's no self-wrap property. + SmallVector<const SCEV *, 4> Operands; + for (SCEVAddRecExpr::op_iterator I = AddRec->op_begin(), + E = AddRec->op_end(); I != E; ++I) { + Operands.push_back(getMulExpr(Ops[0], *I)); + } + return getAddRecExpr(Operands, AddRec->getLoop(), + AddRec->getNoWrapFlags(SCEV::FlagNW)); + } + } + } + + if (Ops.size() == 1) + return Ops[0]; + } + + // Skip over the add expression until we get to a multiply. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr) + ++Idx; + + // If there are mul operands inline them all into this expression. + if (Idx < Ops.size()) { + bool DeletedMul = false; + while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) { + // If we have an mul, expand the mul operands onto the end of the operands + // list. + Ops.erase(Ops.begin()+Idx); + Ops.append(Mul->op_begin(), Mul->op_end()); + DeletedMul = true; + } + + // If we deleted at least one mul, we added operands to the end of the list, + // and they are not necessarily sorted. Recurse to resort and resimplify + // any operands we just acquired. + if (DeletedMul) + return getMulExpr(Ops); + } + + // If there are any add recurrences in the operands list, see if any other + // added values are loop invariant. If so, we can fold them into the + // recurrence. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr) + ++Idx; + + // Scan over all recurrences, trying to fold loop invariants into them. + for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) { + // Scan all of the other operands to this mul and add them to the vector if + // they are loop invariant w.r.t. the recurrence. + SmallVector<const SCEV *, 8> LIOps; + const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]); + const Loop *AddRecLoop = AddRec->getLoop(); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (isLoopInvariant(Ops[i], AddRecLoop)) { + LIOps.push_back(Ops[i]); + Ops.erase(Ops.begin()+i); + --i; --e; + } + + // If we found some loop invariants, fold them into the recurrence. + if (!LIOps.empty()) { + // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step} + SmallVector<const SCEV *, 4> NewOps; + NewOps.reserve(AddRec->getNumOperands()); + const SCEV *Scale = getMulExpr(LIOps); + for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) + NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i))); + + // Build the new addrec. Propagate the NUW and NSW flags if both the + // outer mul and the inner addrec are guaranteed to have no overflow. + // + // No self-wrap cannot be guaranteed after changing the step size, but + // will be inferred if either NUW or NSW is true. + Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW)); + const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags); + + // If all of the other operands were loop invariant, we are done. + if (Ops.size() == 1) return NewRec; + + // Otherwise, multiply the folded AddRec by the non-invariant parts. + for (unsigned i = 0;; ++i) + if (Ops[i] == AddRec) { + Ops[i] = NewRec; + break; + } + return getMulExpr(Ops); + } + + // Okay, if there weren't any loop invariants to be folded, check to see if + // there are multiple AddRec's with the same loop induction variable being + // multiplied together. If so, we can fold them. + for (unsigned OtherIdx = Idx+1; + OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); + ++OtherIdx) { + if (AddRecLoop != cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) + continue; + + // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L> + // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [ + // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z + // ]]],+,...up to x=2n}. + // Note that the arguments to choose() are always integers with values + // known at compile time, never SCEV objects. + // + // The implementation avoids pointless extra computations when the two + // addrec's are of different length (mathematically, it's equivalent to + // an infinite stream of zeros on the right). + bool OpsModified = false; + for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); + ++OtherIdx) { + const SCEVAddRecExpr *OtherAddRec = + dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]); + if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop) + continue; + + bool Overflow = false; + Type *Ty = AddRec->getType(); + bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64; + SmallVector<const SCEV*, 7> AddRecOps; + for (int x = 0, xe = AddRec->getNumOperands() + + OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) { + const SCEV *Term = getConstant(Ty, 0); + for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) { + uint64_t Coeff1 = Choose(x, 2*x - y, Overflow); + for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1), + ze = std::min(x+1, (int)OtherAddRec->getNumOperands()); + z < ze && !Overflow; ++z) { + uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow); + uint64_t Coeff; + if (LargerThan64Bits) + Coeff = umul_ov(Coeff1, Coeff2, Overflow); + else + Coeff = Coeff1*Coeff2; + const SCEV *CoeffTerm = getConstant(Ty, Coeff); + const SCEV *Term1 = AddRec->getOperand(y-z); + const SCEV *Term2 = OtherAddRec->getOperand(z); + Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2)); + } + } + AddRecOps.push_back(Term); + } + if (!Overflow) { + const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(), + SCEV::FlagAnyWrap); + if (Ops.size() == 2) return NewAddRec; + Ops[Idx] = NewAddRec; + Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; + OpsModified = true; + AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec); + if (!AddRec) + break; + } + } + if (OpsModified) + return getMulExpr(Ops); + } + + // Otherwise couldn't fold anything into this recurrence. Move onto the + // next one. + } + + // Okay, it looks like we really DO need an mul expr. Check to see if we + // already have one, otherwise create a new one. + FoldingSetNodeID ID; + ID.AddInteger(scMulExpr); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + ID.AddPointer(Ops[i]); + void *IP = 0; + SCEVMulExpr *S = + static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + if (!S) { + const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); + S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator), + O, Ops.size()); + UniqueSCEVs.InsertNode(S, IP); + } + S->setNoWrapFlags(Flags); + return S; +} + +/// getUDivExpr - Get a canonical unsigned division expression, or something +/// simpler if possible. +const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, + const SCEV *RHS) { + assert(getEffectiveSCEVType(LHS->getType()) == + getEffectiveSCEVType(RHS->getType()) && + "SCEVUDivExpr operand types don't match!"); + + if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) { + if (RHSC->getValue()->equalsInt(1)) + return LHS; // X udiv 1 --> x + // If the denominator is zero, the result of the udiv is undefined. Don't + // try to analyze it, because the resolution chosen here may differ from + // the resolution chosen in other parts of the compiler. + if (!RHSC->getValue()->isZero()) { + // Determine if the division can be folded into the operands of + // its operands. + // TODO: Generalize this to non-constants by using known-bits information. + Type *Ty = LHS->getType(); + unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros(); + unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1; + // For non-power-of-two values, effectively round the value up to the + // nearest power of two. + if (!RHSC->getValue()->getValue().isPowerOf2()) + ++MaxShiftAmt; + IntegerType *ExtTy = + IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt); + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) + if (const SCEVConstant *Step = + dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) { + // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded. + const APInt &StepInt = Step->getValue()->getValue(); + const APInt &DivInt = RHSC->getValue()->getValue(); + if (!StepInt.urem(DivInt) && + getZeroExtendExpr(AR, ExtTy) == + getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), + getZeroExtendExpr(Step, ExtTy), + AR->getLoop(), SCEV::FlagAnyWrap)) { + SmallVector<const SCEV *, 4> Operands; + for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i) + Operands.push_back(getUDivExpr(AR->getOperand(i), RHS)); + return getAddRecExpr(Operands, AR->getLoop(), + SCEV::FlagNW); + } + /// Get a canonical UDivExpr for a recurrence. + /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0. + // We can currently only fold X%N if X is constant. + const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart()); + if (StartC && !DivInt.urem(StepInt) && + getZeroExtendExpr(AR, ExtTy) == + getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), + getZeroExtendExpr(Step, ExtTy), + AR->getLoop(), SCEV::FlagAnyWrap)) { + const APInt &StartInt = StartC->getValue()->getValue(); + const APInt &StartRem = StartInt.urem(StepInt); + if (StartRem != 0) + LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step, + AR->getLoop(), SCEV::FlagNW); + } + } + // (A*B)/C --> A*(B/C) if safe and B/C can be folded. + if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) { + SmallVector<const SCEV *, 4> Operands; + for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) + Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy)); + if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands)) + // Find an operand that's safely divisible. + for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { + const SCEV *Op = M->getOperand(i); + const SCEV *Div = getUDivExpr(Op, RHSC); + if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) { + Operands = SmallVector<const SCEV *, 4>(M->op_begin(), + M->op_end()); + Operands[i] = Div; + return getMulExpr(Operands); + } + } + } + // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded. + if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) { + SmallVector<const SCEV *, 4> Operands; + for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) + Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy)); + if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) { + Operands.clear(); + for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) { + const SCEV *Op = getUDivExpr(A->getOperand(i), RHS); + if (isa<SCEVUDivExpr>(Op) || + getMulExpr(Op, RHS) != A->getOperand(i)) + break; + Operands.push_back(Op); + } + if (Operands.size() == A->getNumOperands()) + return getAddExpr(Operands); + } + } + + // Fold if both operands are constant. + if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) { + Constant *LHSCV = LHSC->getValue(); + Constant *RHSCV = RHSC->getValue(); + return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV, + RHSCV))); + } + } + } + + FoldingSetNodeID ID; + ID.AddInteger(scUDivExpr); + ID.AddPointer(LHS); + ID.AddPointer(RHS); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator), + LHS, RHS); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + + +/// getAddRecExpr - Get an add recurrence expression for the specified loop. +/// Simplify the expression as much as possible. +const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step, + const Loop *L, + SCEV::NoWrapFlags Flags) { + SmallVector<const SCEV *, 4> Operands; + Operands.push_back(Start); + if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step)) + if (StepChrec->getLoop() == L) { + Operands.append(StepChrec->op_begin(), StepChrec->op_end()); + return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW)); + } + + Operands.push_back(Step); + return getAddRecExpr(Operands, L, Flags); +} + +/// getAddRecExpr - Get an add recurrence expression for the specified loop. +/// Simplify the expression as much as possible. +const SCEV * +ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, + const Loop *L, SCEV::NoWrapFlags Flags) { + if (Operands.size() == 1) return Operands[0]; +#ifndef NDEBUG + Type *ETy = getEffectiveSCEVType(Operands[0]->getType()); + for (unsigned i = 1, e = Operands.size(); i != e; ++i) + assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy && + "SCEVAddRecExpr operand types don't match!"); + for (unsigned i = 0, e = Operands.size(); i != e; ++i) + assert(isLoopInvariant(Operands[i], L) && + "SCEVAddRecExpr operand is not loop-invariant!"); +#endif + + if (Operands.back()->isZero()) { + Operands.pop_back(); + return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0} --> X + } + + // It's tempting to want to call getMaxBackedgeTakenCount count here and + // use that information to infer NUW and NSW flags. However, computing a + // BE count requires calling getAddRecExpr, so we may not yet have a + // meaningful BE count at this point (and if we don't, we'd be stuck + // with a SCEVCouldNotCompute as the cached BE count). + + // If FlagNSW is true and all the operands are non-negative, infer FlagNUW. + // And vice-versa. + int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW; + SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask); + if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) { + bool All = true; + for (SmallVectorImpl<const SCEV *>::const_iterator I = Operands.begin(), + E = Operands.end(); I != E; ++I) + if (!isKnownNonNegative(*I)) { + All = false; + break; + } + if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask); + } + + // Canonicalize nested AddRecs in by nesting them in order of loop depth. + if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) { + const Loop *NestedLoop = NestedAR->getLoop(); + if (L->contains(NestedLoop) ? + (L->getLoopDepth() < NestedLoop->getLoopDepth()) : + (!NestedLoop->contains(L) && + DT->dominates(L->getHeader(), NestedLoop->getHeader()))) { + SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(), + NestedAR->op_end()); + Operands[0] = NestedAR->getStart(); + // AddRecs require their operands be loop-invariant with respect to their + // loops. Don't perform this transformation if it would break this + // requirement. + bool AllInvariant = true; + for (unsigned i = 0, e = Operands.size(); i != e; ++i) + if (!isLoopInvariant(Operands[i], L)) { + AllInvariant = false; + break; + } + if (AllInvariant) { + // Create a recurrence for the outer loop with the same step size. + // + // The outer recurrence keeps its NW flag but only keeps NUW/NSW if the + // inner recurrence has the same property. + SCEV::NoWrapFlags OuterFlags = + maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags()); + + NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags); + AllInvariant = true; + for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i) + if (!isLoopInvariant(NestedOperands[i], NestedLoop)) { + AllInvariant = false; + break; + } + if (AllInvariant) { + // Ok, both add recurrences are valid after the transformation. + // + // The inner recurrence keeps its NW flag but only keeps NUW/NSW if + // the outer recurrence has the same property. + SCEV::NoWrapFlags InnerFlags = + maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags); + return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags); + } + } + // Reset Operands to its original state. + Operands[0] = NestedAR; + } + } + + // Okay, it looks like we really DO need an addrec expr. Check to see if we + // already have one, otherwise create a new one. + FoldingSetNodeID ID; + ID.AddInteger(scAddRecExpr); + for (unsigned i = 0, e = Operands.size(); i != e; ++i) + ID.AddPointer(Operands[i]); + ID.AddPointer(L); + void *IP = 0; + SCEVAddRecExpr *S = + static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + if (!S) { + const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size()); + std::uninitialized_copy(Operands.begin(), Operands.end(), O); + S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator), + O, Operands.size(), L); + UniqueSCEVs.InsertNode(S, IP); + } + S->setNoWrapFlags(Flags); + return S; +} + +const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, + const SCEV *RHS) { + SmallVector<const SCEV *, 2> Ops; + Ops.push_back(LHS); + Ops.push_back(RHS); + return getSMaxExpr(Ops); +} + +const SCEV * +ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { + assert(!Ops.empty() && "Cannot get empty smax!"); + if (Ops.size() == 1) return Ops[0]; +#ifndef NDEBUG + Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); + for (unsigned i = 1, e = Ops.size(); i != e; ++i) + assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && + "SCEVSMaxExpr operand types don't match!"); +#endif + + // Sort by complexity, this groups all similar expression types together. + GroupByComplexity(Ops, LI); + + // If there are any constants, fold them together. + unsigned Idx = 0; + if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { + ++Idx; + assert(Idx < Ops.size()); + while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { + // We found two constants, fold them together! + ConstantInt *Fold = ConstantInt::get(getContext(), + APIntOps::smax(LHSC->getValue()->getValue(), + RHSC->getValue()->getValue())); + Ops[0] = getConstant(Fold); + Ops.erase(Ops.begin()+1); // Erase the folded element + if (Ops.size() == 1) return Ops[0]; + LHSC = cast<SCEVConstant>(Ops[0]); + } + + // If we are left with a constant minimum-int, strip it off. + if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) { + Ops.erase(Ops.begin()); + --Idx; + } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) { + // If we have an smax with a constant maximum-int, it will always be + // maximum-int. + return Ops[0]; + } + + if (Ops.size() == 1) return Ops[0]; + } + + // Find the first SMax + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr) + ++Idx; + + // Check to see if one of the operands is an SMax. If so, expand its operands + // onto our operand list, and recurse to simplify. + if (Idx < Ops.size()) { + bool DeletedSMax = false; + while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) { + Ops.erase(Ops.begin()+Idx); + Ops.append(SMax->op_begin(), SMax->op_end()); + DeletedSMax = true; + } + + if (DeletedSMax) + return getSMaxExpr(Ops); + } + + // Okay, check to see if the same value occurs in the operand list twice. If + // so, delete one. Since we sorted the list, these values are required to + // be adjacent. + for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) + // X smax Y smax Y --> X smax Y + // X smax Y --> X, if X is always greater than Y + if (Ops[i] == Ops[i+1] || + isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) { + Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); + --i; --e; + } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) { + Ops.erase(Ops.begin()+i, Ops.begin()+i+1); + --i; --e; + } + + if (Ops.size() == 1) return Ops[0]; + + assert(!Ops.empty() && "Reduced smax down to nothing!"); + + // Okay, it looks like we really DO need an smax expr. Check to see if we + // already have one, otherwise create a new one. + FoldingSetNodeID ID; + ID.AddInteger(scSMaxExpr); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + ID.AddPointer(Ops[i]); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); + SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator), + O, Ops.size()); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + +const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, + const SCEV *RHS) { + SmallVector<const SCEV *, 2> Ops; + Ops.push_back(LHS); + Ops.push_back(RHS); + return getUMaxExpr(Ops); +} + +const SCEV * +ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { + assert(!Ops.empty() && "Cannot get empty umax!"); + if (Ops.size() == 1) return Ops[0]; +#ifndef NDEBUG + Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); + for (unsigned i = 1, e = Ops.size(); i != e; ++i) + assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && + "SCEVUMaxExpr operand types don't match!"); +#endif + + // Sort by complexity, this groups all similar expression types together. + GroupByComplexity(Ops, LI); + + // If there are any constants, fold them together. + unsigned Idx = 0; + if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { + ++Idx; + assert(Idx < Ops.size()); + while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { + // We found two constants, fold them together! + ConstantInt *Fold = ConstantInt::get(getContext(), + APIntOps::umax(LHSC->getValue()->getValue(), + RHSC->getValue()->getValue())); + Ops[0] = getConstant(Fold); + Ops.erase(Ops.begin()+1); // Erase the folded element + if (Ops.size() == 1) return Ops[0]; + LHSC = cast<SCEVConstant>(Ops[0]); + } + + // If we are left with a constant minimum-int, strip it off. + if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) { + Ops.erase(Ops.begin()); + --Idx; + } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) { + // If we have an umax with a constant maximum-int, it will always be + // maximum-int. + return Ops[0]; + } + + if (Ops.size() == 1) return Ops[0]; + } + + // Find the first UMax + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr) + ++Idx; + + // Check to see if one of the operands is a UMax. If so, expand its operands + // onto our operand list, and recurse to simplify. + if (Idx < Ops.size()) { + bool DeletedUMax = false; + while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) { + Ops.erase(Ops.begin()+Idx); + Ops.append(UMax->op_begin(), UMax->op_end()); + DeletedUMax = true; + } + + if (DeletedUMax) + return getUMaxExpr(Ops); + } + + // Okay, check to see if the same value occurs in the operand list twice. If + // so, delete one. Since we sorted the list, these values are required to + // be adjacent. + for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) + // X umax Y umax Y --> X umax Y + // X umax Y --> X, if X is always greater than Y + if (Ops[i] == Ops[i+1] || + isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) { + Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); + --i; --e; + } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) { + Ops.erase(Ops.begin()+i, Ops.begin()+i+1); + --i; --e; + } + + if (Ops.size() == 1) return Ops[0]; + + assert(!Ops.empty() && "Reduced umax down to nothing!"); + + // Okay, it looks like we really DO need a umax expr. Check to see if we + // already have one, otherwise create a new one. + FoldingSetNodeID ID; + ID.AddInteger(scUMaxExpr); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + ID.AddPointer(Ops[i]); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); + SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator), + O, Ops.size()); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + +const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS, + const SCEV *RHS) { + // ~smax(~x, ~y) == smin(x, y). + return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); +} + +const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, + const SCEV *RHS) { + // ~umax(~x, ~y) == umin(x, y) + return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); +} + +const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { + // If we have DataLayout, we can bypass creating a target-independent + // constant expression and then folding it back into a ConstantInt. + // This is just a compile-time optimization. + if (TD) + return getConstant(IntTy, TD->getTypeAllocSize(AllocTy)); + + Constant *C = ConstantExpr::getSizeOf(AllocTy); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) + C = Folded; + Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); + assert(Ty == IntTy && "Effective SCEV type doesn't match"); + return getTruncateOrZeroExtend(getSCEV(C), Ty); +} + +const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy, + StructType *STy, + unsigned FieldNo) { + // If we have DataLayout, we can bypass creating a target-independent + // constant expression and then folding it back into a ConstantInt. + // This is just a compile-time optimization. + if (TD) { + return getConstant(IntTy, + TD->getStructLayout(STy)->getElementOffset(FieldNo)); + } + + Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) + C = Folded; + + Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); + return getTruncateOrZeroExtend(getSCEV(C), Ty); +} + +const SCEV *ScalarEvolution::getUnknown(Value *V) { + // Don't attempt to do anything other than create a SCEVUnknown object + // here. createSCEV only calls getUnknown after checking for all other + // interesting possibilities, and any other code that calls getUnknown + // is doing so in order to hide a value from SCEV canonicalization. + + FoldingSetNodeID ID; + ID.AddInteger(scUnknown); + ID.AddPointer(V); + void *IP = 0; + if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) { + assert(cast<SCEVUnknown>(S)->getValue() == V && + "Stale SCEVUnknown in uniquing map!"); + return S; + } + SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this, + FirstUnknown); + FirstUnknown = cast<SCEVUnknown>(S); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + +//===----------------------------------------------------------------------===// +// Basic SCEV Analysis and PHI Idiom Recognition Code +// + +/// isSCEVable - Test if values of the given type are analyzable within +/// the SCEV framework. This primarily includes integer types, and it +/// can optionally include pointer types if the ScalarEvolution class +/// has access to target-specific information. +bool ScalarEvolution::isSCEVable(Type *Ty) const { + // Integers and pointers are always SCEVable. + return Ty->isIntegerTy() || Ty->isPointerTy(); +} + +/// getTypeSizeInBits - Return the size in bits of the specified type, +/// for which isSCEVable must return true. +uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { + assert(isSCEVable(Ty) && "Type is not SCEVable!"); + + // If we have a DataLayout, use it! + if (TD) + return TD->getTypeSizeInBits(Ty); + + // Integer types have fixed sizes. + if (Ty->isIntegerTy()) + return Ty->getPrimitiveSizeInBits(); + + // The only other support type is pointer. Without DataLayout, conservatively + // assume pointers are 64-bit. + assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!"); + return 64; +} + +/// getEffectiveSCEVType - Return a type with the same bitwidth as +/// the given type and which represents how SCEV will treat the given +/// type, for which isSCEVable must return true. For pointer types, +/// this is the pointer-sized integer type. +Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const { + assert(isSCEVable(Ty) && "Type is not SCEVable!"); + + if (Ty->isIntegerTy()) { + return Ty; + } + + // The only other support type is pointer. + assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!"); + + if (TD) + return TD->getIntPtrType(Ty); + + // Without DataLayout, conservatively assume pointers are 64-bit. + return Type::getInt64Ty(getContext()); +} + +const SCEV *ScalarEvolution::getCouldNotCompute() { + return &CouldNotCompute; +} + +namespace { + // Helper class working with SCEVTraversal to figure out if a SCEV contains + // a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne + // is set iff if find such SCEVUnknown. + // + struct FindInvalidSCEVUnknown { + bool FindOne; + FindInvalidSCEVUnknown() { FindOne = false; } + bool follow(const SCEV *S) { + switch (S->getSCEVType()) { + case scConstant: + return false; + case scUnknown: + if (!cast<SCEVUnknown>(S)->getValue()) + FindOne = true; + return false; + default: + return true; + } + } + bool isDone() const { return FindOne; } + }; +} + +bool ScalarEvolution::checkValidity(const SCEV *S) const { + FindInvalidSCEVUnknown F; + SCEVTraversal<FindInvalidSCEVUnknown> ST(F); + ST.visitAll(S); + + return !F.FindOne; +} + +/// getSCEV - Return an existing SCEV if it exists, otherwise analyze the +/// expression and create a new one. +const SCEV *ScalarEvolution::getSCEV(Value *V) { + assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); + + ValueExprMapType::iterator I = ValueExprMap.find_as(V); + if (I != ValueExprMap.end()) { + const SCEV *S = I->second; + if (checkValidity(S)) + return S; + else + ValueExprMap.erase(I); + } + const SCEV *S = createSCEV(V); + + // The process of creating a SCEV for V may have caused other SCEVs + // to have been created, so it's necessary to insert the new entry + // from scratch, rather than trying to remember the insert position + // above. + ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S)); + return S; +} + +/// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V +/// +const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) { + if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V)) + return getConstant( + cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue()))); + + Type *Ty = V->getType(); + Ty = getEffectiveSCEVType(Ty); + return getMulExpr(V, + getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)))); +} + +/// getNotSCEV - Return a SCEV corresponding to ~V = -1-V +const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { + if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V)) + return getConstant( + cast<ConstantInt>(ConstantExpr::getNot(VC->getValue()))); + + Type *Ty = V->getType(); + Ty = getEffectiveSCEVType(Ty); + const SCEV *AllOnes = + getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))); + return getMinusSCEV(AllOnes, V); +} + +/// getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1. +const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, + SCEV::NoWrapFlags Flags) { + assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW"); + + // Fast path: X - X --> 0. + if (LHS == RHS) + return getConstant(LHS->getType(), 0); + + // X - Y --> X + -Y + return getAddExpr(LHS, getNegativeSCEV(RHS), Flags); +} + +/// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the +/// input value to the specified type. If the type must be extended, it is zero +/// extended. +const SCEV * +ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) { + Type *SrcTy = V->getType(); + assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot truncate or zero extend with non-integer arguments!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) + return getTruncateExpr(V, Ty); + return getZeroExtendExpr(V, Ty); +} + +/// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion of the +/// input value to the specified type. If the type must be extended, it is sign +/// extended. +const SCEV * +ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, + Type *Ty) { + Type *SrcTy = V->getType(); + assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot truncate or zero extend with non-integer arguments!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) + return getTruncateExpr(V, Ty); + return getSignExtendExpr(V, Ty); +} + +/// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of the +/// input value to the specified type. If the type must be extended, it is zero +/// extended. The conversion must not be narrowing. +const SCEV * +ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) { + Type *SrcTy = V->getType(); + assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot noop or zero extend with non-integer arguments!"); + assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && + "getNoopOrZeroExtend cannot truncate!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + return getZeroExtendExpr(V, Ty); +} + +/// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the +/// input value to the specified type. If the type must be extended, it is sign +/// extended. The conversion must not be narrowing. +const SCEV * +ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) { + Type *SrcTy = V->getType(); + assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot noop or sign extend with non-integer arguments!"); + assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && + "getNoopOrSignExtend cannot truncate!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + return getSignExtendExpr(V, Ty); +} + +/// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of +/// the input value to the specified type. If the type must be extended, +/// it is extended with unspecified bits. The conversion must not be +/// narrowing. +const SCEV * +ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) { + Type *SrcTy = V->getType(); + assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot noop or any extend with non-integer arguments!"); + assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && + "getNoopOrAnyExtend cannot truncate!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + return getAnyExtendExpr(V, Ty); +} + +/// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the +/// input value to the specified type. The conversion must not be widening. +const SCEV * +ScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) { + Type *SrcTy = V->getType(); + assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot truncate or noop with non-integer arguments!"); + assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) && + "getTruncateOrNoop cannot extend!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + return getTruncateExpr(V, Ty); +} + +/// getUMaxFromMismatchedTypes - Promote the operands to the wider of +/// the types using zero-extension, and then perform a umax operation +/// with them. +const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS, + const SCEV *RHS) { + const SCEV *PromotedLHS = LHS; + const SCEV *PromotedRHS = RHS; + + if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType())) + PromotedRHS = getZeroExtendExpr(RHS, LHS->getType()); + else + PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType()); + + return getUMaxExpr(PromotedLHS, PromotedRHS); +} + +/// getUMinFromMismatchedTypes - Promote the operands to the wider of +/// the types using zero-extension, and then perform a umin operation +/// with them. +const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS, + const SCEV *RHS) { + const SCEV *PromotedLHS = LHS; + const SCEV *PromotedRHS = RHS; + + if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType())) + PromotedRHS = getZeroExtendExpr(RHS, LHS->getType()); + else + PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType()); + + return getUMinExpr(PromotedLHS, PromotedRHS); +} + +/// getPointerBase - Transitively follow the chain of pointer-type operands +/// until reaching a SCEV that does not have a single pointer operand. This +/// returns a SCEVUnknown pointer for well-formed pointer-type expressions, +/// but corner cases do exist. +const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) { + // A pointer operand may evaluate to a nonpointer expression, such as null. + if (!V->getType()->isPointerTy()) + return V; + + if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) { + return getPointerBase(Cast->getOperand()); + } + else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) { + const SCEV *PtrOp = 0; + for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); + I != E; ++I) { + if ((*I)->getType()->isPointerTy()) { + // Cannot find the base of an expression with multiple pointer operands. + if (PtrOp) + return V; + PtrOp = *I; + } + } + if (!PtrOp) + return V; + return getPointerBase(PtrOp); + } + return V; +} + +/// PushDefUseChildren - Push users of the given Instruction +/// onto the given Worklist. +static void +PushDefUseChildren(Instruction *I, + SmallVectorImpl<Instruction *> &Worklist) { + // Push the def-use children onto the Worklist stack. + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) + Worklist.push_back(cast<Instruction>(*UI)); +} + +/// ForgetSymbolicValue - This looks up computed SCEV values for all +/// instructions that depend on the given instruction and removes them from +/// the ValueExprMapType map if they reference SymName. This is used during PHI +/// resolution. +void +ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) { + SmallVector<Instruction *, 16> Worklist; + PushDefUseChildren(PN, Worklist); + + SmallPtrSet<Instruction *, 8> Visited; + Visited.insert(PN); + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + if (!Visited.insert(I)) continue; + + ValueExprMapType::iterator It = + ValueExprMap.find_as(static_cast<Value *>(I)); + if (It != ValueExprMap.end()) { + const SCEV *Old = It->second; + + // Short-circuit the def-use traversal if the symbolic name + // ceases to appear in expressions. + if (Old != SymName && !hasOperand(Old, SymName)) + continue; + + // SCEVUnknown for a PHI either means that it has an unrecognized + // structure, it's a PHI that's in the progress of being computed + // by createNodeForPHI, or it's a single-value PHI. In the first case, + // additional loop trip count information isn't going to change anything. + // In the second case, createNodeForPHI will perform the necessary + // updates on its own when it gets to that point. In the third, we do + // want to forget the SCEVUnknown. + if (!isa<PHINode>(I) || + !isa<SCEVUnknown>(Old) || + (I != PN && Old == SymName)) { + forgetMemoizedResults(Old); + ValueExprMap.erase(It); + } + } + + PushDefUseChildren(I, Worklist); + } +} + +/// createNodeForPHI - PHI nodes have two cases. Either the PHI node exists in +/// a loop header, making it a potential recurrence, or it doesn't. +/// +const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { + if (const Loop *L = LI->getLoopFor(PN->getParent())) + if (L->getHeader() == PN->getParent()) { + // The loop may have multiple entrances or multiple exits; we can analyze + // this phi as an addrec if it has a unique entry value and a unique + // backedge value. + Value *BEValueV = 0, *StartValueV = 0; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = PN->getIncomingValue(i); + if (L->contains(PN->getIncomingBlock(i))) { + if (!BEValueV) { + BEValueV = V; + } else if (BEValueV != V) { + BEValueV = 0; + break; + } + } else if (!StartValueV) { + StartValueV = V; + } else if (StartValueV != V) { + StartValueV = 0; + break; + } + } + if (BEValueV && StartValueV) { + // While we are analyzing this PHI node, handle its value symbolically. + const SCEV *SymbolicName = getUnknown(PN); + assert(ValueExprMap.find_as(PN) == ValueExprMap.end() && + "PHI node already processed?"); + ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName)); + + // Using this symbolic name for the PHI, analyze the value coming around + // the back-edge. + const SCEV *BEValue = getSCEV(BEValueV); + + // NOTE: If BEValue is loop invariant, we know that the PHI node just + // has a special value for the first iteration of the loop. + + // If the value coming around the backedge is an add with the symbolic + // value we just inserted, then we found a simple induction variable! + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) { + // If there is a single occurrence of the symbolic value, replace it + // with a recurrence. + unsigned FoundIndex = Add->getNumOperands(); + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if (Add->getOperand(i) == SymbolicName) + if (FoundIndex == e) { + FoundIndex = i; + break; + } + + if (FoundIndex != Add->getNumOperands()) { + // Create an add with everything but the specified operand. + SmallVector<const SCEV *, 8> Ops; + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if (i != FoundIndex) + Ops.push_back(Add->getOperand(i)); + const SCEV *Accum = getAddExpr(Ops); + + // This is not a valid addrec if the step amount is varying each + // loop iteration, but is not itself an addrec in this loop. + if (isLoopInvariant(Accum, L) || + (isa<SCEVAddRecExpr>(Accum) && + cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) { + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; + + // If the increment doesn't overflow, then neither the addrec nor + // the post-increment will overflow. + if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) { + if (OBO->hasNoUnsignedWrap()) + Flags = setFlags(Flags, SCEV::FlagNUW); + if (OBO->hasNoSignedWrap()) + Flags = setFlags(Flags, SCEV::FlagNSW); + } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) { + // If the increment is an inbounds GEP, then we know the address + // space cannot be wrapped around. We cannot make any guarantee + // about signed or unsigned overflow because pointers are + // unsigned but we may have a negative index from the base + // pointer. We can guarantee that no unsigned wrap occurs if the + // indices form a positive value. + if (GEP->isInBounds()) { + Flags = setFlags(Flags, SCEV::FlagNW); + + const SCEV *Ptr = getSCEV(GEP->getPointerOperand()); + if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr))) + Flags = setFlags(Flags, SCEV::FlagNUW); + } + } else if (const SubOperator *OBO = + dyn_cast<SubOperator>(BEValueV)) { + if (OBO->hasNoUnsignedWrap()) + Flags = setFlags(Flags, SCEV::FlagNUW); + if (OBO->hasNoSignedWrap()) + Flags = setFlags(Flags, SCEV::FlagNSW); + } + + const SCEV *StartVal = getSCEV(StartValueV); + const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); + + // Since the no-wrap flags are on the increment, they apply to the + // post-incremented value as well. + if (isLoopInvariant(Accum, L)) + (void)getAddRecExpr(getAddExpr(StartVal, Accum), + Accum, L, Flags); + + // Okay, for the entire analysis of this edge we assumed the PHI + // to be symbolic. We now need to go back and purge all of the + // entries for the scalars that use the symbolic expression. + ForgetSymbolicName(PN, SymbolicName); + ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; + return PHISCEV; + } + } + } else if (const SCEVAddRecExpr *AddRec = + dyn_cast<SCEVAddRecExpr>(BEValue)) { + // Otherwise, this could be a loop like this: + // i = 0; for (j = 1; ..; ++j) { .... i = j; } + // In this case, j = {1,+,1} and BEValue is j. + // Because the other in-value of i (0) fits the evolution of BEValue + // i really is an addrec evolution. + if (AddRec->getLoop() == L && AddRec->isAffine()) { + const SCEV *StartVal = getSCEV(StartValueV); + + // If StartVal = j.start - j.stride, we can use StartVal as the + // initial step of the addrec evolution. + if (StartVal == getMinusSCEV(AddRec->getOperand(0), + AddRec->getOperand(1))) { + // FIXME: For constant StartVal, we should be able to infer + // no-wrap flags. + const SCEV *PHISCEV = + getAddRecExpr(StartVal, AddRec->getOperand(1), L, + SCEV::FlagAnyWrap); + + // Okay, for the entire analysis of this edge we assumed the PHI + // to be symbolic. We now need to go back and purge all of the + // entries for the scalars that use the symbolic expression. + ForgetSymbolicName(PN, SymbolicName); + ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; + return PHISCEV; + } + } + } + } + } + + // If the PHI has a single incoming value, follow that value, unless the + // PHI's incoming blocks are in a different loop, in which case doing so + // risks breaking LCSSA form. Instcombine would normally zap these, but + // it doesn't have DominatorTree information, so it may miss cases. + if (Value *V = SimplifyInstruction(PN, TD, TLI, DT)) + if (LI->replacementPreservesLCSSAForm(PN, V)) + return getSCEV(V); + + // If it's not a loop phi, we can't handle it yet. + return getUnknown(PN); +} + +/// createNodeForGEP - Expand GEP instructions into add and multiply +/// operations. This allows them to be analyzed by regular SCEV code. +/// +const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { + Type *IntPtrTy = getEffectiveSCEVType(GEP->getType()); + Value *Base = GEP->getOperand(0); + // Don't attempt to analyze GEPs over unsized objects. + if (!Base->getType()->getPointerElementType()->isSized()) + return getUnknown(GEP); + + // Don't blindly transfer the inbounds flag from the GEP instruction to the + // Add expression, because the Instruction may be guarded by control flow + // and the no-overflow bits may not be valid for the expression in any + // context. + SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap; + + const SCEV *TotalOffset = getConstant(IntPtrTy, 0); + gep_type_iterator GTI = gep_type_begin(GEP); + for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()), + E = GEP->op_end(); + I != E; ++I) { + Value *Index = *I; + // Compute the (potentially symbolic) offset in bytes for this index. + if (StructType *STy = dyn_cast<StructType>(*GTI++)) { + // For a struct, add the member offset. + unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); + const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo); + + // Add the field offset to the running total offset. + TotalOffset = getAddExpr(TotalOffset, FieldOffset); + } else { + // For an array, add the element offset, explicitly scaled. + const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, *GTI); + const SCEV *IndexS = getSCEV(Index); + // Getelementptr indices are signed. + IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy); + + // Multiply the index by the element size to compute the element offset. + const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, Wrap); + + // Add the element offset to the running total offset. + TotalOffset = getAddExpr(TotalOffset, LocalOffset); + } + } + + // Get the SCEV for the GEP base. + const SCEV *BaseS = getSCEV(Base); + + // Add the total offset from all the GEP indices to the base. + return getAddExpr(BaseS, TotalOffset, Wrap); +} + +/// GetMinTrailingZeros - Determine the minimum number of zero bits that S is +/// guaranteed to end in (at every loop iteration). It is, at the same time, +/// the minimum number of times S is divisible by 2. For example, given {4,+,8} +/// it returns 2. If S is guaranteed to be 0, it returns the bitwidth of S. +uint32_t +ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) + return C->getValue()->getValue().countTrailingZeros(); + + if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S)) + return std::min(GetMinTrailingZeros(T->getOperand()), + (uint32_t)getTypeSizeInBits(T->getType())); + + if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) { + uint32_t OpRes = GetMinTrailingZeros(E->getOperand()); + return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ? + getTypeSizeInBits(E->getType()) : OpRes; + } + + if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) { + uint32_t OpRes = GetMinTrailingZeros(E->getOperand()); + return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ? + getTypeSizeInBits(E->getType()) : OpRes; + } + + if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) { + // The result is the min of all operands results. + uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0)); + for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i) + MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i))); + return MinOpRes; + } + + if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) { + // The result is the sum of all operands results. + uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0)); + uint32_t BitWidth = getTypeSizeInBits(M->getType()); + for (unsigned i = 1, e = M->getNumOperands(); + SumOpRes != BitWidth && i != e; ++i) + SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)), + BitWidth); + return SumOpRes; + } + + if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) { + // The result is the min of all operands results. + uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0)); + for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i) + MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i))); + return MinOpRes; + } + + if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) { + // The result is the min of all operands results. + uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0)); + for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i) + MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i))); + return MinOpRes; + } + + if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) { + // The result is the min of all operands results. + uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0)); + for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i) + MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i))); + return MinOpRes; + } + + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + // For a SCEVUnknown, ask ValueTracking. + unsigned BitWidth = getTypeSizeInBits(U->getType()); + APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); + ComputeMaskedBits(U->getValue(), Zeros, Ones); + return Zeros.countTrailingOnes(); + } + + // SCEVUDivExpr + return 0; +} + +/// getUnsignedRange - Determine the unsigned range for a particular SCEV. +/// +ConstantRange +ScalarEvolution::getUnsignedRange(const SCEV *S) { + // See if we've computed this range already. + DenseMap<const SCEV *, ConstantRange>::iterator I = UnsignedRanges.find(S); + if (I != UnsignedRanges.end()) + return I->second; + + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) + return setUnsignedRange(C, ConstantRange(C->getValue()->getValue())); + + unsigned BitWidth = getTypeSizeInBits(S->getType()); + ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true); + + // If the value has known zeros, the maximum unsigned value will have those + // known zeros as well. + uint32_t TZ = GetMinTrailingZeros(S); + if (TZ != 0) + ConservativeResult = + ConstantRange(APInt::getMinValue(BitWidth), + APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1); + + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + ConstantRange X = getUnsignedRange(Add->getOperand(0)); + for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) + X = X.add(getUnsignedRange(Add->getOperand(i))); + return setUnsignedRange(Add, ConservativeResult.intersectWith(X)); + } + + if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { + ConstantRange X = getUnsignedRange(Mul->getOperand(0)); + for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) + X = X.multiply(getUnsignedRange(Mul->getOperand(i))); + return setUnsignedRange(Mul, ConservativeResult.intersectWith(X)); + } + + if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) { + ConstantRange X = getUnsignedRange(SMax->getOperand(0)); + for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) + X = X.smax(getUnsignedRange(SMax->getOperand(i))); + return setUnsignedRange(SMax, ConservativeResult.intersectWith(X)); + } + + if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) { + ConstantRange X = getUnsignedRange(UMax->getOperand(0)); + for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) + X = X.umax(getUnsignedRange(UMax->getOperand(i))); + return setUnsignedRange(UMax, ConservativeResult.intersectWith(X)); + } + + if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { + ConstantRange X = getUnsignedRange(UDiv->getLHS()); + ConstantRange Y = getUnsignedRange(UDiv->getRHS()); + return setUnsignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y))); + } + + if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) { + ConstantRange X = getUnsignedRange(ZExt->getOperand()); + return setUnsignedRange(ZExt, + ConservativeResult.intersectWith(X.zeroExtend(BitWidth))); + } + + if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) { + ConstantRange X = getUnsignedRange(SExt->getOperand()); + return setUnsignedRange(SExt, + ConservativeResult.intersectWith(X.signExtend(BitWidth))); + } + + if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) { + ConstantRange X = getUnsignedRange(Trunc->getOperand()); + return setUnsignedRange(Trunc, + ConservativeResult.intersectWith(X.truncate(BitWidth))); + } + + if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { + // If there's no unsigned wrap, the value will never be less than its + // initial value. + if (AddRec->getNoWrapFlags(SCEV::FlagNUW)) + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart())) + if (!C->getValue()->isZero()) + ConservativeResult = + ConservativeResult.intersectWith( + ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0))); + + // TODO: non-affine addrec + if (AddRec->isAffine()) { + Type *Ty = AddRec->getType(); + const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); + if (!isa<SCEVCouldNotCompute>(MaxBECount) && + getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) { + MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty); + + const SCEV *Start = AddRec->getStart(); + const SCEV *Step = AddRec->getStepRecurrence(*this); + + ConstantRange StartRange = getUnsignedRange(Start); + ConstantRange StepRange = getSignedRange(Step); + ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount); + ConstantRange EndRange = + StartRange.add(MaxBECountRange.multiply(StepRange)); + + // Check for overflow. This must be done with ConstantRange arithmetic + // because we could be called from within the ScalarEvolution overflow + // checking code. + ConstantRange ExtStartRange = StartRange.zextOrTrunc(BitWidth*2+1); + ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1); + ConstantRange ExtMaxBECountRange = + MaxBECountRange.zextOrTrunc(BitWidth*2+1); + ConstantRange ExtEndRange = EndRange.zextOrTrunc(BitWidth*2+1); + if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) != + ExtEndRange) + return setUnsignedRange(AddRec, ConservativeResult); + + APInt Min = APIntOps::umin(StartRange.getUnsignedMin(), + EndRange.getUnsignedMin()); + APInt Max = APIntOps::umax(StartRange.getUnsignedMax(), + EndRange.getUnsignedMax()); + if (Min.isMinValue() && Max.isMaxValue()) + return setUnsignedRange(AddRec, ConservativeResult); + return setUnsignedRange(AddRec, + ConservativeResult.intersectWith(ConstantRange(Min, Max+1))); + } + } + + return setUnsignedRange(AddRec, ConservativeResult); + } + + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + // For a SCEVUnknown, ask ValueTracking. + APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); + ComputeMaskedBits(U->getValue(), Zeros, Ones, TD); + if (Ones == ~Zeros + 1) + return setUnsignedRange(U, ConservativeResult); + return setUnsignedRange(U, + ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1))); + } + + return setUnsignedRange(S, ConservativeResult); +} + +/// getSignedRange - Determine the signed range for a particular SCEV. +/// +ConstantRange +ScalarEvolution::getSignedRange(const SCEV *S) { + // See if we've computed this range already. + DenseMap<const SCEV *, ConstantRange>::iterator I = SignedRanges.find(S); + if (I != SignedRanges.end()) + return I->second; + + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) + return setSignedRange(C, ConstantRange(C->getValue()->getValue())); + + unsigned BitWidth = getTypeSizeInBits(S->getType()); + ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true); + + // If the value has known zeros, the maximum signed value will have those + // known zeros as well. + uint32_t TZ = GetMinTrailingZeros(S); + if (TZ != 0) + ConservativeResult = + ConstantRange(APInt::getSignedMinValue(BitWidth), + APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1); + + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + ConstantRange X = getSignedRange(Add->getOperand(0)); + for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) + X = X.add(getSignedRange(Add->getOperand(i))); + return setSignedRange(Add, ConservativeResult.intersectWith(X)); + } + + if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { + ConstantRange X = getSignedRange(Mul->getOperand(0)); + for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) + X = X.multiply(getSignedRange(Mul->getOperand(i))); + return setSignedRange(Mul, ConservativeResult.intersectWith(X)); + } + + if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) { + ConstantRange X = getSignedRange(SMax->getOperand(0)); + for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) + X = X.smax(getSignedRange(SMax->getOperand(i))); + return setSignedRange(SMax, ConservativeResult.intersectWith(X)); + } + + if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) { + ConstantRange X = getSignedRange(UMax->getOperand(0)); + for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) + X = X.umax(getSignedRange(UMax->getOperand(i))); + return setSignedRange(UMax, ConservativeResult.intersectWith(X)); + } + + if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { + ConstantRange X = getSignedRange(UDiv->getLHS()); + ConstantRange Y = getSignedRange(UDiv->getRHS()); + return setSignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y))); + } + + if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) { + ConstantRange X = getSignedRange(ZExt->getOperand()); + return setSignedRange(ZExt, + ConservativeResult.intersectWith(X.zeroExtend(BitWidth))); + } + + if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) { + ConstantRange X = getSignedRange(SExt->getOperand()); + return setSignedRange(SExt, + ConservativeResult.intersectWith(X.signExtend(BitWidth))); + } + + if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) { + ConstantRange X = getSignedRange(Trunc->getOperand()); + return setSignedRange(Trunc, + ConservativeResult.intersectWith(X.truncate(BitWidth))); + } + + if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { + // If there's no signed wrap, and all the operands have the same sign or + // zero, the value won't ever change sign. + if (AddRec->getNoWrapFlags(SCEV::FlagNSW)) { + bool AllNonNeg = true; + bool AllNonPos = true; + for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { + if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false; + if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false; + } + if (AllNonNeg) + ConservativeResult = ConservativeResult.intersectWith( + ConstantRange(APInt(BitWidth, 0), + APInt::getSignedMinValue(BitWidth))); + else if (AllNonPos) + ConservativeResult = ConservativeResult.intersectWith( + ConstantRange(APInt::getSignedMinValue(BitWidth), + APInt(BitWidth, 1))); + } + + // TODO: non-affine addrec + if (AddRec->isAffine()) { + Type *Ty = AddRec->getType(); + const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); + if (!isa<SCEVCouldNotCompute>(MaxBECount) && + getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) { + MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty); + + const SCEV *Start = AddRec->getStart(); + const SCEV *Step = AddRec->getStepRecurrence(*this); + + ConstantRange StartRange = getSignedRange(Start); + ConstantRange StepRange = getSignedRange(Step); + ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount); + ConstantRange EndRange = + StartRange.add(MaxBECountRange.multiply(StepRange)); + + // Check for overflow. This must be done with ConstantRange arithmetic + // because we could be called from within the ScalarEvolution overflow + // checking code. + ConstantRange ExtStartRange = StartRange.sextOrTrunc(BitWidth*2+1); + ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1); + ConstantRange ExtMaxBECountRange = + MaxBECountRange.zextOrTrunc(BitWidth*2+1); + ConstantRange ExtEndRange = EndRange.sextOrTrunc(BitWidth*2+1); + if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) != + ExtEndRange) + return setSignedRange(AddRec, ConservativeResult); + + APInt Min = APIntOps::smin(StartRange.getSignedMin(), + EndRange.getSignedMin()); + APInt Max = APIntOps::smax(StartRange.getSignedMax(), + EndRange.getSignedMax()); + if (Min.isMinSignedValue() && Max.isMaxSignedValue()) + return setSignedRange(AddRec, ConservativeResult); + return setSignedRange(AddRec, + ConservativeResult.intersectWith(ConstantRange(Min, Max+1))); + } + } + + return setSignedRange(AddRec, ConservativeResult); + } + + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + // For a SCEVUnknown, ask ValueTracking. + if (!U->getValue()->getType()->isIntegerTy() && !TD) + return setSignedRange(U, ConservativeResult); + unsigned NS = ComputeNumSignBits(U->getValue(), TD); + if (NS <= 1) + return setSignedRange(U, ConservativeResult); + return setSignedRange(U, ConservativeResult.intersectWith( + ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), + APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1))); + } + + return setSignedRange(S, ConservativeResult); +} + +/// createSCEV - We know that there is no SCEV for the specified value. +/// Analyze the expression. +/// +const SCEV *ScalarEvolution::createSCEV(Value *V) { + if (!isSCEVable(V->getType())) + return getUnknown(V); + + unsigned Opcode = Instruction::UserOp1; + if (Instruction *I = dyn_cast<Instruction>(V)) { + Opcode = I->getOpcode(); + + // Don't attempt to analyze instructions in blocks that aren't + // reachable. Such instructions don't matter, and they aren't required + // to obey basic rules for definitions dominating uses which this + // analysis depends on. + if (!DT->isReachableFromEntry(I->getParent())) + return getUnknown(V); + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + Opcode = CE->getOpcode(); + else if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) + return getConstant(CI); + else if (isa<ConstantPointerNull>(V)) + return getConstant(V->getType(), 0); + else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) + return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee()); + else + return getUnknown(V); + + Operator *U = cast<Operator>(V); + switch (Opcode) { + case Instruction::Add: { + // The simple thing to do would be to just call getSCEV on both operands + // and call getAddExpr with the result. However if we're looking at a + // bunch of things all added together, this can be quite inefficient, + // because it leads to N-1 getAddExpr calls for N ultimate operands. + // Instead, gather up all the operands and make a single getAddExpr call. + // LLVM IR canonical form means we need only traverse the left operands. + // + // Don't apply this instruction's NSW or NUW flags to the new + // expression. The instruction may be guarded by control flow that the + // no-wrap behavior depends on. Non-control-equivalent instructions can be + // mapped to the same SCEV expression, and it would be incorrect to transfer + // NSW/NUW semantics to those operations. + SmallVector<const SCEV *, 4> AddOps; + AddOps.push_back(getSCEV(U->getOperand(1))); + for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) { + unsigned Opcode = Op->getValueID() - Value::InstructionVal; + if (Opcode != Instruction::Add && Opcode != Instruction::Sub) + break; + U = cast<Operator>(Op); + const SCEV *Op1 = getSCEV(U->getOperand(1)); + if (Opcode == Instruction::Sub) + AddOps.push_back(getNegativeSCEV(Op1)); + else + AddOps.push_back(Op1); + } + AddOps.push_back(getSCEV(U->getOperand(0))); + return getAddExpr(AddOps); + } + case Instruction::Mul: { + // Don't transfer NSW/NUW for the same reason as AddExpr. + SmallVector<const SCEV *, 4> MulOps; + MulOps.push_back(getSCEV(U->getOperand(1))); + for (Value *Op = U->getOperand(0); + Op->getValueID() == Instruction::Mul + Value::InstructionVal; + Op = U->getOperand(0)) { + U = cast<Operator>(Op); + MulOps.push_back(getSCEV(U->getOperand(1))); + } + MulOps.push_back(getSCEV(U->getOperand(0))); + return getMulExpr(MulOps); + } + case Instruction::UDiv: + return getUDivExpr(getSCEV(U->getOperand(0)), + getSCEV(U->getOperand(1))); + case Instruction::Sub: + return getMinusSCEV(getSCEV(U->getOperand(0)), + getSCEV(U->getOperand(1))); + case Instruction::And: + // For an expression like x&255 that merely masks off the high bits, + // use zext(trunc(x)) as the SCEV expression. + if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { + if (CI->isNullValue()) + return getSCEV(U->getOperand(1)); + if (CI->isAllOnesValue()) + return getSCEV(U->getOperand(0)); + const APInt &A = CI->getValue(); + + // Instcombine's ShrinkDemandedConstant may strip bits out of + // constants, obscuring what would otherwise be a low-bits mask. + // Use ComputeMaskedBits to compute what ShrinkDemandedConstant + // knew about to reconstruct a low-bits mask value. + unsigned LZ = A.countLeadingZeros(); + unsigned BitWidth = A.getBitWidth(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD); + + APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ); + + if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask)) + return + getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)), + IntegerType::get(getContext(), BitWidth - LZ)), + U->getType()); + } + break; + + case Instruction::Or: + // If the RHS of the Or is a constant, we may have something like: + // X*4+1 which got turned into X*4|1. Handle this as an Add so loop + // optimizations will transparently handle this case. + // + // In order for this transformation to be safe, the LHS must be of the + // form X*(2^n) and the Or constant must be less than 2^n. + if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { + const SCEV *LHS = getSCEV(U->getOperand(0)); + const APInt &CIVal = CI->getValue(); + if (GetMinTrailingZeros(LHS) >= + (CIVal.getBitWidth() - CIVal.countLeadingZeros())) { + // Build a plain add SCEV. + const SCEV *S = getAddExpr(LHS, getSCEV(CI)); + // If the LHS of the add was an addrec and it has no-wrap flags, + // transfer the no-wrap flags, since an or won't introduce a wrap. + if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) { + const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS); + const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags( + OldAR->getNoWrapFlags()); + } + return S; + } + } + break; + case Instruction::Xor: + if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { + // If the RHS of the xor is a signbit, then this is just an add. + // Instcombine turns add of signbit into xor as a strength reduction step. + if (CI->getValue().isSignBit()) + return getAddExpr(getSCEV(U->getOperand(0)), + getSCEV(U->getOperand(1))); + + // If the RHS of xor is -1, then this is a not operation. + if (CI->isAllOnesValue()) + return getNotSCEV(getSCEV(U->getOperand(0))); + + // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask. + // This is a variant of the check for xor with -1, and it handles + // the case where instcombine has trimmed non-demanded bits out + // of an xor with -1. + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0))) + if (ConstantInt *LCI = dyn_cast<ConstantInt>(BO->getOperand(1))) + if (BO->getOpcode() == Instruction::And && + LCI->getValue() == CI->getValue()) + if (const SCEVZeroExtendExpr *Z = + dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) { + Type *UTy = U->getType(); + const SCEV *Z0 = Z->getOperand(); + Type *Z0Ty = Z0->getType(); + unsigned Z0TySize = getTypeSizeInBits(Z0Ty); + + // If C is a low-bits mask, the zero extend is serving to + // mask off the high bits. Complement the operand and + // re-apply the zext. + if (APIntOps::isMask(Z0TySize, CI->getValue())) + return getZeroExtendExpr(getNotSCEV(Z0), UTy); + + // If C is a single bit, it may be in the sign-bit position + // before the zero-extend. In this case, represent the xor + // using an add, which is equivalent, and re-apply the zext. + APInt Trunc = CI->getValue().trunc(Z0TySize); + if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() && + Trunc.isSignBit()) + return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)), + UTy); + } + } + break; + + case Instruction::Shl: + // Turn shift left of a constant amount into a multiply. + if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) { + uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth(); + + // If the shift count is not less than the bitwidth, the result of + // the shift is undefined. Don't try to analyze it, because the + // resolution chosen here may differ from the resolution chosen in + // other parts of the compiler. + if (SA->getValue().uge(BitWidth)) + break; + + Constant *X = ConstantInt::get(getContext(), + APInt::getOneBitSet(BitWidth, SA->getZExtValue())); + return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X)); + } + break; + + case Instruction::LShr: + // Turn logical shift right of a constant into a unsigned divide. + if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) { + uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth(); + + // If the shift count is not less than the bitwidth, the result of + // the shift is undefined. Don't try to analyze it, because the + // resolution chosen here may differ from the resolution chosen in + // other parts of the compiler. + if (SA->getValue().uge(BitWidth)) + break; + + Constant *X = ConstantInt::get(getContext(), + APInt::getOneBitSet(BitWidth, SA->getZExtValue())); + return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X)); + } + break; + + case Instruction::AShr: + // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression. + if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) + if (Operator *L = dyn_cast<Operator>(U->getOperand(0))) + if (L->getOpcode() == Instruction::Shl && + L->getOperand(1) == U->getOperand(1)) { + uint64_t BitWidth = getTypeSizeInBits(U->getType()); + + // If the shift count is not less than the bitwidth, the result of + // the shift is undefined. Don't try to analyze it, because the + // resolution chosen here may differ from the resolution chosen in + // other parts of the compiler. + if (CI->getValue().uge(BitWidth)) + break; + + uint64_t Amt = BitWidth - CI->getZExtValue(); + if (Amt == BitWidth) + return getSCEV(L->getOperand(0)); // shift by zero --> noop + return + getSignExtendExpr(getTruncateExpr(getSCEV(L->getOperand(0)), + IntegerType::get(getContext(), + Amt)), + U->getType()); + } + break; + + case Instruction::Trunc: + return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType()); + + case Instruction::ZExt: + return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType()); + + case Instruction::SExt: + return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType()); + + case Instruction::BitCast: + // BitCasts are no-op casts so we just eliminate the cast. + if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType())) + return getSCEV(U->getOperand(0)); + break; + + // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can + // lead to pointer expressions which cannot safely be expanded to GEPs, + // because ScalarEvolution doesn't respect the GEP aliasing rules when + // simplifying integer expressions. + + case Instruction::GetElementPtr: + return createNodeForGEP(cast<GEPOperator>(U)); + + case Instruction::PHI: + return createNodeForPHI(cast<PHINode>(U)); + + case Instruction::Select: + // This could be a smax or umax that was lowered earlier. + // Try to recover it. + if (ICmpInst *ICI = dyn_cast<ICmpInst>(U->getOperand(0))) { + Value *LHS = ICI->getOperand(0); + Value *RHS = ICI->getOperand(1); + switch (ICI->getPredicate()) { + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + std::swap(LHS, RHS); + // fall through + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + // a >s b ? a+x : b+x -> smax(a, b)+x + // a >s b ? b+x : a+x -> smin(a, b)+x + if (LHS->getType() == U->getType()) { + const SCEV *LS = getSCEV(LHS); + const SCEV *RS = getSCEV(RHS); + const SCEV *LA = getSCEV(U->getOperand(1)); + const SCEV *RA = getSCEV(U->getOperand(2)); + const SCEV *LDiff = getMinusSCEV(LA, LS); + const SCEV *RDiff = getMinusSCEV(RA, RS); + if (LDiff == RDiff) + return getAddExpr(getSMaxExpr(LS, RS), LDiff); + LDiff = getMinusSCEV(LA, RS); + RDiff = getMinusSCEV(RA, LS); + if (LDiff == RDiff) + return getAddExpr(getSMinExpr(LS, RS), LDiff); + } + break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + std::swap(LHS, RHS); + // fall through + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + // a >u b ? a+x : b+x -> umax(a, b)+x + // a >u b ? b+x : a+x -> umin(a, b)+x + if (LHS->getType() == U->getType()) { + const SCEV *LS = getSCEV(LHS); + const SCEV *RS = getSCEV(RHS); + const SCEV *LA = getSCEV(U->getOperand(1)); + const SCEV *RA = getSCEV(U->getOperand(2)); + const SCEV *LDiff = getMinusSCEV(LA, LS); + const SCEV *RDiff = getMinusSCEV(RA, RS); + if (LDiff == RDiff) + return getAddExpr(getUMaxExpr(LS, RS), LDiff); + LDiff = getMinusSCEV(LA, RS); + RDiff = getMinusSCEV(RA, LS); + if (LDiff == RDiff) + return getAddExpr(getUMinExpr(LS, RS), LDiff); + } + break; + case ICmpInst::ICMP_NE: + // n != 0 ? n+x : 1+x -> umax(n, 1)+x + if (LHS->getType() == U->getType() && + isa<ConstantInt>(RHS) && + cast<ConstantInt>(RHS)->isZero()) { + const SCEV *One = getConstant(LHS->getType(), 1); + const SCEV *LS = getSCEV(LHS); + const SCEV *LA = getSCEV(U->getOperand(1)); + const SCEV *RA = getSCEV(U->getOperand(2)); + const SCEV *LDiff = getMinusSCEV(LA, LS); + const SCEV *RDiff = getMinusSCEV(RA, One); + if (LDiff == RDiff) + return getAddExpr(getUMaxExpr(One, LS), LDiff); + } + break; + case ICmpInst::ICMP_EQ: + // n == 0 ? 1+x : n+x -> umax(n, 1)+x + if (LHS->getType() == U->getType() && + isa<ConstantInt>(RHS) && + cast<ConstantInt>(RHS)->isZero()) { + const SCEV *One = getConstant(LHS->getType(), 1); + const SCEV *LS = getSCEV(LHS); + const SCEV *LA = getSCEV(U->getOperand(1)); + const SCEV *RA = getSCEV(U->getOperand(2)); + const SCEV *LDiff = getMinusSCEV(LA, One); + const SCEV *RDiff = getMinusSCEV(RA, LS); + if (LDiff == RDiff) + return getAddExpr(getUMaxExpr(One, LS), LDiff); + } + break; + default: + break; + } + } + + default: // We cannot analyze this expression. + break; + } + + return getUnknown(V); +} + + + +//===----------------------------------------------------------------------===// +// Iteration Count Computation Code +// + +/// getSmallConstantTripCount - Returns the maximum trip count of this loop as a +/// normal unsigned value. Returns 0 if the trip count is unknown or not +/// constant. Will also return 0 if the maximum trip count is very large (>= +/// 2^32). +/// +/// This "trip count" assumes that control exits via ExitingBlock. More +/// precisely, it is the number of times that control may reach ExitingBlock +/// before taking the branch. For loops with multiple exits, it may not be the +/// number times that the loop header executes because the loop may exit +/// prematurely via another branch. +/// +/// FIXME: We conservatively call getBackedgeTakenCount(L) instead of +/// getExitCount(L, ExitingBlock) to compute a safe trip count considering all +/// loop exits. getExitCount() may return an exact count for this branch +/// assuming no-signed-wrap. The number of well-defined iterations may actually +/// be higher than this trip count if this exit test is skipped and the loop +/// exits via a different branch. Ideally, getExitCount() would know whether it +/// depends on a NSW assumption, and we would only fall back to a conservative +/// trip count in that case. +unsigned ScalarEvolution:: +getSmallConstantTripCount(Loop *L, BasicBlock * /*ExitingBlock*/) { + const SCEVConstant *ExitCount = + dyn_cast<SCEVConstant>(getBackedgeTakenCount(L)); + if (!ExitCount) + return 0; + + ConstantInt *ExitConst = ExitCount->getValue(); + + // Guard against huge trip counts. + if (ExitConst->getValue().getActiveBits() > 32) + return 0; + + // In case of integer overflow, this returns 0, which is correct. + return ((unsigned)ExitConst->getZExtValue()) + 1; +} + +/// getSmallConstantTripMultiple - Returns the largest constant divisor of the +/// trip count of this loop as a normal unsigned value, if possible. This +/// means that the actual trip count is always a multiple of the returned +/// value (don't forget the trip count could very well be zero as well!). +/// +/// Returns 1 if the trip count is unknown or not guaranteed to be the +/// multiple of a constant (which is also the case if the trip count is simply +/// constant, use getSmallConstantTripCount for that case), Will also return 1 +/// if the trip count is very large (>= 2^32). +/// +/// As explained in the comments for getSmallConstantTripCount, this assumes +/// that control exits the loop via ExitingBlock. +unsigned ScalarEvolution:: +getSmallConstantTripMultiple(Loop *L, BasicBlock * /*ExitingBlock*/) { + const SCEV *ExitCount = getBackedgeTakenCount(L); + if (ExitCount == getCouldNotCompute()) + return 1; + + // Get the trip count from the BE count by adding 1. + const SCEV *TCMul = getAddExpr(ExitCount, + getConstant(ExitCount->getType(), 1)); + // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt + // to factor simple cases. + if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul)) + TCMul = Mul->getOperand(0); + + const SCEVConstant *MulC = dyn_cast<SCEVConstant>(TCMul); + if (!MulC) + return 1; + + ConstantInt *Result = MulC->getValue(); + + // Guard against huge trip counts (this requires checking + // for zero to handle the case where the trip count == -1 and the + // addition wraps). + if (!Result || Result->getValue().getActiveBits() > 32 || + Result->getValue().getActiveBits() == 0) + return 1; + + return (unsigned)Result->getZExtValue(); +} + +// getExitCount - Get the expression for the number of loop iterations for which +// this loop is guaranteed not to exit via ExitingBlock. Otherwise return +// SCEVCouldNotCompute. +const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) { + return getBackedgeTakenInfo(L).getExact(ExitingBlock, this); +} + +/// getBackedgeTakenCount - If the specified loop has a predictable +/// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute +/// object. The backedge-taken count is the number of times the loop header +/// will be branched to from within the loop. This is one less than the +/// trip count of the loop, since it doesn't count the first iteration, +/// when the header is branched to from outside the loop. +/// +/// Note that it is not valid to call this method on a loop without a +/// loop-invariant backedge-taken count (see +/// hasLoopInvariantBackedgeTakenCount). +/// +const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) { + return getBackedgeTakenInfo(L).getExact(this); +} + +/// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except +/// return the least SCEV value that is known never to be less than the +/// actual backedge taken count. +const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) { + return getBackedgeTakenInfo(L).getMax(this); +} + +/// PushLoopPHIs - Push PHI nodes in the header of the given loop +/// onto the given Worklist. +static void +PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) { + BasicBlock *Header = L->getHeader(); + + // Push all Loop-header PHIs onto the Worklist stack. + for (BasicBlock::iterator I = Header->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) + Worklist.push_back(PN); +} + +const ScalarEvolution::BackedgeTakenInfo & +ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { + // Initially insert an invalid entry for this loop. If the insertion + // succeeds, proceed to actually compute a backedge-taken count and + // update the value. The temporary CouldNotCompute value tells SCEV + // code elsewhere that it shouldn't attempt to request a new + // backedge-taken count, which could result in infinite recursion. + std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair = + BackedgeTakenCounts.insert(std::make_pair(L, BackedgeTakenInfo())); + if (!Pair.second) + return Pair.first->second; + + // ComputeBackedgeTakenCount may allocate memory for its result. Inserting it + // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result + // must be cleared in this scope. + BackedgeTakenInfo Result = ComputeBackedgeTakenCount(L); + + if (Result.getExact(this) != getCouldNotCompute()) { + assert(isLoopInvariant(Result.getExact(this), L) && + isLoopInvariant(Result.getMax(this), L) && + "Computed backedge-taken count isn't loop invariant for loop!"); + ++NumTripCountsComputed; + } + else if (Result.getMax(this) == getCouldNotCompute() && + isa<PHINode>(L->getHeader()->begin())) { + // Only count loops that have phi nodes as not being computable. + ++NumTripCountsNotComputed; + } + + // Now that we know more about the trip count for this loop, forget any + // existing SCEV values for PHI nodes in this loop since they are only + // conservative estimates made without the benefit of trip count + // information. This is similar to the code in forgetLoop, except that + // it handles SCEVUnknown PHI nodes specially. + if (Result.hasAnyInfo()) { + SmallVector<Instruction *, 16> Worklist; + PushLoopPHIs(L, Worklist); + + SmallPtrSet<Instruction *, 8> Visited; + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + if (!Visited.insert(I)) continue; + + ValueExprMapType::iterator It = + ValueExprMap.find_as(static_cast<Value *>(I)); + if (It != ValueExprMap.end()) { + const SCEV *Old = It->second; + + // SCEVUnknown for a PHI either means that it has an unrecognized + // structure, or it's a PHI that's in the progress of being computed + // by createNodeForPHI. In the former case, additional loop trip + // count information isn't going to change anything. In the later + // case, createNodeForPHI will perform the necessary updates on its + // own when it gets to that point. + if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) { + forgetMemoizedResults(Old); + ValueExprMap.erase(It); + } + if (PHINode *PN = dyn_cast<PHINode>(I)) + ConstantEvolutionLoopExitValue.erase(PN); + } + + PushDefUseChildren(I, Worklist); + } + } + + // Re-lookup the insert position, since the call to + // ComputeBackedgeTakenCount above could result in a + // recusive call to getBackedgeTakenInfo (on a different + // loop), which would invalidate the iterator computed + // earlier. + return BackedgeTakenCounts.find(L)->second = Result; +} + +/// forgetLoop - This method should be called by the client when it has +/// changed a loop in a way that may effect ScalarEvolution's ability to +/// compute a trip count, or if the loop is deleted. +void ScalarEvolution::forgetLoop(const Loop *L) { + // Drop any stored trip count value. + DenseMap<const Loop*, BackedgeTakenInfo>::iterator BTCPos = + BackedgeTakenCounts.find(L); + if (BTCPos != BackedgeTakenCounts.end()) { + BTCPos->second.clear(); + BackedgeTakenCounts.erase(BTCPos); + } + + // Drop information about expressions based on loop-header PHIs. + SmallVector<Instruction *, 16> Worklist; + PushLoopPHIs(L, Worklist); + + SmallPtrSet<Instruction *, 8> Visited; + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + if (!Visited.insert(I)) continue; + + ValueExprMapType::iterator It = + ValueExprMap.find_as(static_cast<Value *>(I)); + if (It != ValueExprMap.end()) { + forgetMemoizedResults(It->second); + ValueExprMap.erase(It); + if (PHINode *PN = dyn_cast<PHINode>(I)) + ConstantEvolutionLoopExitValue.erase(PN); + } + + PushDefUseChildren(I, Worklist); + } + + // Forget all contained loops too, to avoid dangling entries in the + // ValuesAtScopes map. + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + forgetLoop(*I); +} + +/// forgetValue - This method should be called by the client when it has +/// changed a value in a way that may effect its value, or which may +/// disconnect it from a def-use chain linking it to a loop. +void ScalarEvolution::forgetValue(Value *V) { + Instruction *I = dyn_cast<Instruction>(V); + if (!I) return; + + // Drop information about expressions based on loop-header PHIs. + SmallVector<Instruction *, 16> Worklist; + Worklist.push_back(I); + + SmallPtrSet<Instruction *, 8> Visited; + while (!Worklist.empty()) { + I = Worklist.pop_back_val(); + if (!Visited.insert(I)) continue; + + ValueExprMapType::iterator It = + ValueExprMap.find_as(static_cast<Value *>(I)); + if (It != ValueExprMap.end()) { + forgetMemoizedResults(It->second); + ValueExprMap.erase(It); + if (PHINode *PN = dyn_cast<PHINode>(I)) + ConstantEvolutionLoopExitValue.erase(PN); + } + + PushDefUseChildren(I, Worklist); + } +} + +/// getExact - Get the exact loop backedge taken count considering all loop +/// exits. A computable result can only be return for loops with a single exit. +/// Returning the minimum taken count among all exits is incorrect because one +/// of the loop's exit limit's may have been skipped. HowFarToZero assumes that +/// the limit of each loop test is never skipped. This is a valid assumption as +/// long as the loop exits via that test. For precise results, it is the +/// caller's responsibility to specify the relevant loop exit using +/// getExact(ExitingBlock, SE). +const SCEV * +ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const { + // If any exits were not computable, the loop is not computable. + if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute(); + + // We need exactly one computable exit. + if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute(); + assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info"); + + const SCEV *BECount = 0; + for (const ExitNotTakenInfo *ENT = &ExitNotTaken; + ENT != 0; ENT = ENT->getNextExit()) { + + assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV"); + + if (!BECount) + BECount = ENT->ExactNotTaken; + else if (BECount != ENT->ExactNotTaken) + return SE->getCouldNotCompute(); + } + assert(BECount && "Invalid not taken count for loop exit"); + return BECount; +} + +/// getExact - Get the exact not taken count for this loop exit. +const SCEV * +ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock, + ScalarEvolution *SE) const { + for (const ExitNotTakenInfo *ENT = &ExitNotTaken; + ENT != 0; ENT = ENT->getNextExit()) { + + if (ENT->ExitingBlock == ExitingBlock) + return ENT->ExactNotTaken; + } + return SE->getCouldNotCompute(); +} + +/// getMax - Get the max backedge taken count for the loop. +const SCEV * +ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const { + return Max ? Max : SE->getCouldNotCompute(); +} + +bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S, + ScalarEvolution *SE) const { + if (Max && Max != SE->getCouldNotCompute() && SE->hasOperand(Max, S)) + return true; + + if (!ExitNotTaken.ExitingBlock) + return false; + + for (const ExitNotTakenInfo *ENT = &ExitNotTaken; + ENT != 0; ENT = ENT->getNextExit()) { + + if (ENT->ExactNotTaken != SE->getCouldNotCompute() + && SE->hasOperand(ENT->ExactNotTaken, S)) { + return true; + } + } + return false; +} + +/// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each +/// computable exit into a persistent ExitNotTakenInfo array. +ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( + SmallVectorImpl< std::pair<BasicBlock *, const SCEV *> > &ExitCounts, + bool Complete, const SCEV *MaxCount) : Max(MaxCount) { + + if (!Complete) + ExitNotTaken.setIncomplete(); + + unsigned NumExits = ExitCounts.size(); + if (NumExits == 0) return; + + ExitNotTaken.ExitingBlock = ExitCounts[0].first; + ExitNotTaken.ExactNotTaken = ExitCounts[0].second; + if (NumExits == 1) return; + + // Handle the rare case of multiple computable exits. + ExitNotTakenInfo *ENT = new ExitNotTakenInfo[NumExits-1]; + + ExitNotTakenInfo *PrevENT = &ExitNotTaken; + for (unsigned i = 1; i < NumExits; ++i, PrevENT = ENT, ++ENT) { + PrevENT->setNextExit(ENT); + ENT->ExitingBlock = ExitCounts[i].first; + ENT->ExactNotTaken = ExitCounts[i].second; + } +} + +/// clear - Invalidate this result and free the ExitNotTakenInfo array. +void ScalarEvolution::BackedgeTakenInfo::clear() { + ExitNotTaken.ExitingBlock = 0; + ExitNotTaken.ExactNotTaken = 0; + delete[] ExitNotTaken.getNextExit(); +} + +/// ComputeBackedgeTakenCount - Compute the number of times the backedge +/// of the specified loop will execute. +ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { + SmallVector<BasicBlock *, 8> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + // Examine all exits and pick the most conservative values. + const SCEV *MaxBECount = getCouldNotCompute(); + bool CouldComputeBECount = true; + SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts; + for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { + ExitLimit EL = ComputeExitLimit(L, ExitingBlocks[i]); + if (EL.Exact == getCouldNotCompute()) + // We couldn't compute an exact value for this exit, so + // we won't be able to compute an exact value for the loop. + CouldComputeBECount = false; + else + ExitCounts.push_back(std::make_pair(ExitingBlocks[i], EL.Exact)); + + if (MaxBECount == getCouldNotCompute()) + MaxBECount = EL.Max; + else if (EL.Max != getCouldNotCompute()) { + // We cannot take the "min" MaxBECount, because non-unit stride loops may + // skip some loop tests. Taking the max over the exits is sufficiently + // conservative. TODO: We could do better taking into consideration + // that (1) the loop has unit stride (2) the last loop test is + // less-than/greater-than (3) any loop test is less-than/greater-than AND + // falls-through some constant times less then the other tests. + MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max); + } + } + + return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount); +} + +/// ComputeExitLimit - Compute the number of times the backedge of the specified +/// loop will execute if it exits via the specified block. +ScalarEvolution::ExitLimit +ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { + + // Okay, we've chosen an exiting block. See what condition causes us to + // exit at this block. + // + // FIXME: we should be able to handle switch instructions (with a single exit) + BranchInst *ExitBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); + if (ExitBr == 0) return getCouldNotCompute(); + assert(ExitBr->isConditional() && "If unconditional, it can't be in loop!"); + + // At this point, we know we have a conditional branch that determines whether + // the loop is exited. However, we don't know if the branch is executed each + // time through the loop. If not, then the execution count of the branch will + // not be equal to the trip count of the loop. + // + // Currently we check for this by checking to see if the Exit branch goes to + // the loop header. If so, we know it will always execute the same number of + // times as the loop. We also handle the case where the exit block *is* the + // loop header. This is common for un-rotated loops. + // + // If both of those tests fail, walk up the unique predecessor chain to the + // header, stopping if there is an edge that doesn't exit the loop. If the + // header is reached, the execution count of the branch will be equal to the + // trip count of the loop. + // + // More extensive analysis could be done to handle more cases here. + // + if (ExitBr->getSuccessor(0) != L->getHeader() && + ExitBr->getSuccessor(1) != L->getHeader() && + ExitBr->getParent() != L->getHeader()) { + // The simple checks failed, try climbing the unique predecessor chain + // up to the header. + bool Ok = false; + for (BasicBlock *BB = ExitBr->getParent(); BB; ) { + BasicBlock *Pred = BB->getUniquePredecessor(); + if (!Pred) + return getCouldNotCompute(); + TerminatorInst *PredTerm = Pred->getTerminator(); + for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) { + BasicBlock *PredSucc = PredTerm->getSuccessor(i); + if (PredSucc == BB) + continue; + // If the predecessor has a successor that isn't BB and isn't + // outside the loop, assume the worst. + if (L->contains(PredSucc)) + return getCouldNotCompute(); + } + if (Pred == L->getHeader()) { + Ok = true; + break; + } + BB = Pred; + } + if (!Ok) + return getCouldNotCompute(); + } + + // Proceed to the next level to examine the exit condition expression. + return ComputeExitLimitFromCond(L, ExitBr->getCondition(), + ExitBr->getSuccessor(0), + ExitBr->getSuccessor(1), + /*IsSubExpr=*/false); +} + +/// ComputeExitLimitFromCond - Compute the number of times the +/// backedge of the specified loop will execute if its exit condition +/// were a conditional branch of ExitCond, TBB, and FBB. +/// +/// @param IsSubExpr is true if ExitCond does not directly control the exit +/// branch. In this case, we cannot assume that the loop only exits when the +/// condition is true and cannot infer that failing to meet the condition prior +/// to integer wraparound results in undefined behavior. +ScalarEvolution::ExitLimit +ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, + Value *ExitCond, + BasicBlock *TBB, + BasicBlock *FBB, + bool IsSubExpr) { + // Check if the controlling expression for this loop is an And or Or. + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) { + if (BO->getOpcode() == Instruction::And) { + // Recurse on the operands of the and. + bool EitherMayExit = L->contains(TBB); + ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, + IsSubExpr || EitherMayExit); + ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, + IsSubExpr || EitherMayExit); + const SCEV *BECount = getCouldNotCompute(); + const SCEV *MaxBECount = getCouldNotCompute(); + if (EitherMayExit) { + // Both conditions must be true for the loop to continue executing. + // Choose the less conservative count. + if (EL0.Exact == getCouldNotCompute() || + EL1.Exact == getCouldNotCompute()) + BECount = getCouldNotCompute(); + else + BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact); + if (EL0.Max == getCouldNotCompute()) + MaxBECount = EL1.Max; + else if (EL1.Max == getCouldNotCompute()) + MaxBECount = EL0.Max; + else + MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max); + } else { + // Both conditions must be true at the same time for the loop to exit. + // For now, be conservative. + assert(L->contains(FBB) && "Loop block has no successor in loop!"); + if (EL0.Max == EL1.Max) + MaxBECount = EL0.Max; + if (EL0.Exact == EL1.Exact) + BECount = EL0.Exact; + } + + return ExitLimit(BECount, MaxBECount); + } + if (BO->getOpcode() == Instruction::Or) { + // Recurse on the operands of the or. + bool EitherMayExit = L->contains(FBB); + ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, + IsSubExpr || EitherMayExit); + ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, + IsSubExpr || EitherMayExit); + const SCEV *BECount = getCouldNotCompute(); + const SCEV *MaxBECount = getCouldNotCompute(); + if (EitherMayExit) { + // Both conditions must be false for the loop to continue executing. + // Choose the less conservative count. + if (EL0.Exact == getCouldNotCompute() || + EL1.Exact == getCouldNotCompute()) + BECount = getCouldNotCompute(); + else + BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact); + if (EL0.Max == getCouldNotCompute()) + MaxBECount = EL1.Max; + else if (EL1.Max == getCouldNotCompute()) + MaxBECount = EL0.Max; + else + MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max); + } else { + // Both conditions must be false at the same time for the loop to exit. + // For now, be conservative. + assert(L->contains(TBB) && "Loop block has no successor in loop!"); + if (EL0.Max == EL1.Max) + MaxBECount = EL0.Max; + if (EL0.Exact == EL1.Exact) + BECount = EL0.Exact; + } + + return ExitLimit(BECount, MaxBECount); + } + } + + // With an icmp, it may be feasible to compute an exact backedge-taken count. + // Proceed to the next level to examine the icmp. + if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) + return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, IsSubExpr); + + // Check for a constant condition. These are normally stripped out by + // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to + // preserve the CFG and is temporarily leaving constant conditions + // in place. + if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) { + if (L->contains(FBB) == !CI->getZExtValue()) + // The backedge is always taken. + return getCouldNotCompute(); + else + // The backedge is never taken. + return getConstant(CI->getType(), 0); + } + + // If it's not an integer or pointer comparison then compute it the hard way. + return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); +} + +/// ComputeExitLimitFromICmp - Compute the number of times the +/// backedge of the specified loop will execute if its exit condition +/// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB. +ScalarEvolution::ExitLimit +ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L, + ICmpInst *ExitCond, + BasicBlock *TBB, + BasicBlock *FBB, + bool IsSubExpr) { + + // If the condition was exit on true, convert the condition to exit on false + ICmpInst::Predicate Cond; + if (!L->contains(FBB)) + Cond = ExitCond->getPredicate(); + else + Cond = ExitCond->getInversePredicate(); + + // Handle common loops like: for (X = "string"; *X; ++X) + if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0))) + if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) { + ExitLimit ItCnt = + ComputeLoadConstantCompareExitLimit(LI, RHS, L, Cond); + if (ItCnt.hasAnyInfo()) + return ItCnt; + } + + const SCEV *LHS = getSCEV(ExitCond->getOperand(0)); + const SCEV *RHS = getSCEV(ExitCond->getOperand(1)); + + // Try to evaluate any dependencies out of the loop. + LHS = getSCEVAtScope(LHS, L); + RHS = getSCEVAtScope(RHS, L); + + // At this point, we would like to compute how many iterations of the + // loop the predicate will return true for these inputs. + if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) { + // If there is a loop-invariant, force it into the RHS. + std::swap(LHS, RHS); + Cond = ICmpInst::getSwappedPredicate(Cond); + } + + // Simplify the operands before analyzing them. + (void)SimplifyICmpOperands(Cond, LHS, RHS); + + // If we have a comparison of a chrec against a constant, try to use value + // ranges to answer this query. + if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) + if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS)) + if (AddRec->getLoop() == L) { + // Form the constant range. + ConstantRange CompRange( + ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue())); + + const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this); + if (!isa<SCEVCouldNotCompute>(Ret)) return Ret; + } + + switch (Cond) { + case ICmpInst::ICMP_NE: { // while (X != Y) + // Convert to: while (X-Y != 0) + ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, IsSubExpr); + if (EL.hasAnyInfo()) return EL; + break; + } + case ICmpInst::ICMP_EQ: { // while (X == Y) + // Convert to: while (X-Y == 0) + ExitLimit EL = HowFarToNonZero(getMinusSCEV(LHS, RHS), L); + if (EL.hasAnyInfo()) return EL; + break; + } + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_ULT: { // while (X < Y) + bool IsSigned = Cond == ICmpInst::ICMP_SLT; + ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, IsSubExpr); + if (EL.hasAnyInfo()) return EL; + break; + } + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_UGT: { // while (X > Y) + bool IsSigned = Cond == ICmpInst::ICMP_SGT; + ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, IsSubExpr); + if (EL.hasAnyInfo()) return EL; + break; + } + default: +#if 0 + dbgs() << "ComputeBackedgeTakenCount "; + if (ExitCond->getOperand(0)->getType()->isUnsigned()) + dbgs() << "[unsigned] "; + dbgs() << *LHS << " " + << Instruction::getOpcodeName(Instruction::ICmp) + << " " << *RHS << "\n"; +#endif + break; + } + return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); +} + +static ConstantInt * +EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, + ScalarEvolution &SE) { + const SCEV *InVal = SE.getConstant(C); + const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE); + assert(isa<SCEVConstant>(Val) && + "Evaluation of SCEV at constant didn't fold correctly?"); + return cast<SCEVConstant>(Val)->getValue(); +} + +/// ComputeLoadConstantCompareExitLimit - Given an exit condition of +/// 'icmp op load X, cst', try to see if we can compute the backedge +/// execution count. +ScalarEvolution::ExitLimit +ScalarEvolution::ComputeLoadConstantCompareExitLimit( + LoadInst *LI, + Constant *RHS, + const Loop *L, + ICmpInst::Predicate predicate) { + + if (LI->isVolatile()) return getCouldNotCompute(); + + // Check to see if the loaded pointer is a getelementptr of a global. + // TODO: Use SCEV instead of manually grubbing with GEPs. + GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)); + if (!GEP) return getCouldNotCompute(); + + // Make sure that it is really a constant global we are gepping, with an + // initializer, and make sure the first IDX is really 0. + GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)); + if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() || + GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) || + !cast<Constant>(GEP->getOperand(1))->isNullValue()) + return getCouldNotCompute(); + + // Okay, we allow one non-constant index into the GEP instruction. + Value *VarIdx = 0; + std::vector<Constant*> Indexes; + unsigned VarIdxNum = 0; + for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i) + if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) { + Indexes.push_back(CI); + } else if (!isa<ConstantInt>(GEP->getOperand(i))) { + if (VarIdx) return getCouldNotCompute(); // Multiple non-constant idx's. + VarIdx = GEP->getOperand(i); + VarIdxNum = i-2; + Indexes.push_back(0); + } + + // Loop-invariant loads may be a byproduct of loop optimization. Skip them. + if (!VarIdx) + return getCouldNotCompute(); + + // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant. + // Check to see if X is a loop variant variable value now. + const SCEV *Idx = getSCEV(VarIdx); + Idx = getSCEVAtScope(Idx, L); + + // We can only recognize very limited forms of loop index expressions, in + // particular, only affine AddRec's like {C1,+,C2}. + const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx); + if (!IdxExpr || !IdxExpr->isAffine() || isLoopInvariant(IdxExpr, L) || + !isa<SCEVConstant>(IdxExpr->getOperand(0)) || + !isa<SCEVConstant>(IdxExpr->getOperand(1))) + return getCouldNotCompute(); + + unsigned MaxSteps = MaxBruteForceIterations; + for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) { + ConstantInt *ItCst = ConstantInt::get( + cast<IntegerType>(IdxExpr->getType()), IterationNum); + ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this); + + // Form the GEP offset. + Indexes[VarIdxNum] = Val; + + Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(), + Indexes); + if (Result == 0) break; // Cannot compute! + + // Evaluate the condition for this iteration. + Result = ConstantExpr::getICmp(predicate, Result, RHS); + if (!isa<ConstantInt>(Result)) break; // Couldn't decide for sure + if (cast<ConstantInt>(Result)->getValue().isMinValue()) { +#if 0 + dbgs() << "\n***\n*** Computed loop count " << *ItCst + << "\n*** From global " << *GV << "*** BB: " << *L->getHeader() + << "***\n"; +#endif + ++NumArrayLenItCounts; + return getConstant(ItCst); // Found terminating iteration! + } + } + return getCouldNotCompute(); +} + + +/// CanConstantFold - Return true if we can constant fold an instruction of the +/// specified type, assuming that all operands were constants. +static bool CanConstantFold(const Instruction *I) { + if (isa<BinaryOperator>(I) || isa<CmpInst>(I) || + isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) || + isa<LoadInst>(I)) + return true; + + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (const Function *F = CI->getCalledFunction()) + return canConstantFoldCallTo(F); + return false; +} + +/// Determine whether this instruction can constant evolve within this loop +/// assuming its operands can all constant evolve. +static bool canConstantEvolve(Instruction *I, const Loop *L) { + // An instruction outside of the loop can't be derived from a loop PHI. + if (!L->contains(I)) return false; + + if (isa<PHINode>(I)) { + if (L->getHeader() == I->getParent()) + return true; + else + // We don't currently keep track of the control flow needed to evaluate + // PHIs, so we cannot handle PHIs inside of loops. + return false; + } + + // If we won't be able to constant fold this expression even if the operands + // are constants, bail early. + return CanConstantFold(I); +} + +/// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by +/// recursing through each instruction operand until reaching a loop header phi. +static PHINode * +getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, + DenseMap<Instruction *, PHINode *> &PHIMap) { + + // Otherwise, we can evaluate this instruction if all of its operands are + // constant or derived from a PHI node themselves. + PHINode *PHI = 0; + for (Instruction::op_iterator OpI = UseInst->op_begin(), + OpE = UseInst->op_end(); OpI != OpE; ++OpI) { + + if (isa<Constant>(*OpI)) continue; + + Instruction *OpInst = dyn_cast<Instruction>(*OpI); + if (!OpInst || !canConstantEvolve(OpInst, L)) return 0; + + PHINode *P = dyn_cast<PHINode>(OpInst); + if (!P) + // If this operand is already visited, reuse the prior result. + // We may have P != PHI if this is the deepest point at which the + // inconsistent paths meet. + P = PHIMap.lookup(OpInst); + if (!P) { + // Recurse and memoize the results, whether a phi is found or not. + // This recursive call invalidates pointers into PHIMap. + P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap); + PHIMap[OpInst] = P; + } + if (P == 0) return 0; // Not evolving from PHI + if (PHI && PHI != P) return 0; // Evolving from multiple different PHIs. + PHI = P; + } + // This is a expression evolving from a constant PHI! + return PHI; +} + +/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node +/// in the loop that V is derived from. We allow arbitrary operations along the +/// way, but the operands of an operation must either be constants or a value +/// derived from a constant PHI. If this expression does not fit with these +/// constraints, return null. +static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { + Instruction *I = dyn_cast<Instruction>(V); + if (I == 0 || !canConstantEvolve(I, L)) return 0; + + if (PHINode *PN = dyn_cast<PHINode>(I)) { + return PN; + } + + // Record non-constant instructions contained by the loop. + DenseMap<Instruction *, PHINode *> PHIMap; + return getConstantEvolvingPHIOperands(I, L, PHIMap); +} + +/// EvaluateExpression - Given an expression that passes the +/// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node +/// in the loop has the value PHIVal. If we can't fold this expression for some +/// reason, return null. +static Constant *EvaluateExpression(Value *V, const Loop *L, + DenseMap<Instruction *, Constant *> &Vals, + const DataLayout *TD, + const TargetLibraryInfo *TLI) { + // Convenient constant check, but redundant for recursive calls. + if (Constant *C = dyn_cast<Constant>(V)) return C; + Instruction *I = dyn_cast<Instruction>(V); + if (!I) return 0; + + if (Constant *C = Vals.lookup(I)) return C; + + // An instruction inside the loop depends on a value outside the loop that we + // weren't given a mapping for, or a value such as a call inside the loop. + if (!canConstantEvolve(I, L)) return 0; + + // An unmapped PHI can be due to a branch or another loop inside this loop, + // or due to this not being the initial iteration through a loop where we + // couldn't compute the evolution of this particular PHI last time. + if (isa<PHINode>(I)) return 0; + + std::vector<Constant*> Operands(I->getNumOperands()); + + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i)); + if (!Operand) { + Operands[i] = dyn_cast<Constant>(I->getOperand(i)); + if (!Operands[i]) return 0; + continue; + } + Constant *C = EvaluateExpression(Operand, L, Vals, TD, TLI); + Vals[Operand] = C; + if (!C) return 0; + Operands[i] = C; + } + + if (CmpInst *CI = dyn_cast<CmpInst>(I)) + return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], + Operands[1], TD, TLI); + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + if (!LI->isVolatile()) + return ConstantFoldLoadFromConstPtr(Operands[0], TD); + } + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD, + TLI); +} + +/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is +/// in the header of its containing loop, we know the loop executes a +/// constant number of times, and the PHI node is just a recurrence +/// involving constants, fold it. +Constant * +ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, + const APInt &BEs, + const Loop *L) { + DenseMap<PHINode*, Constant*>::const_iterator I = + ConstantEvolutionLoopExitValue.find(PN); + if (I != ConstantEvolutionLoopExitValue.end()) + return I->second; + + if (BEs.ugt(MaxBruteForceIterations)) + return ConstantEvolutionLoopExitValue[PN] = 0; // Not going to evaluate it. + + Constant *&RetVal = ConstantEvolutionLoopExitValue[PN]; + + DenseMap<Instruction *, Constant *> CurrentIterVals; + BasicBlock *Header = L->getHeader(); + assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); + + // Since the loop is canonicalized, the PHI node must have two entries. One + // entry must be a constant (coming in from outside of the loop), and the + // second must be derived from the same PHI. + bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); + PHINode *PHI = 0; + for (BasicBlock::iterator I = Header->begin(); + (PHI = dyn_cast<PHINode>(I)); ++I) { + Constant *StartCST = + dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge)); + if (StartCST == 0) continue; + CurrentIterVals[PHI] = StartCST; + } + if (!CurrentIterVals.count(PN)) + return RetVal = 0; + + Value *BEValue = PN->getIncomingValue(SecondIsBackedge); + + // Execute the loop symbolically to determine the exit value. + if (BEs.getActiveBits() >= 32) + return RetVal = 0; // More than 2^32-1 iterations?? Not doing it! + + unsigned NumIterations = BEs.getZExtValue(); // must be in range + unsigned IterationNum = 0; + for (; ; ++IterationNum) { + if (IterationNum == NumIterations) + return RetVal = CurrentIterVals[PN]; // Got exit value! + + // Compute the value of the PHIs for the next iteration. + // EvaluateExpression adds non-phi values to the CurrentIterVals map. + DenseMap<Instruction *, Constant *> NextIterVals; + Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, + TLI); + if (NextPHI == 0) + return 0; // Couldn't evaluate! + NextIterVals[PN] = NextPHI; + + bool StoppedEvolving = NextPHI == CurrentIterVals[PN]; + + // Also evaluate the other PHI nodes. However, we don't get to stop if we + // cease to be able to evaluate one of them or if they stop evolving, + // because that doesn't necessarily prevent us from computing PN. + SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute; + for (DenseMap<Instruction *, Constant *>::const_iterator + I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){ + PHINode *PHI = dyn_cast<PHINode>(I->first); + if (!PHI || PHI == PN || PHI->getParent() != Header) continue; + PHIsToCompute.push_back(std::make_pair(PHI, I->second)); + } + // We use two distinct loops because EvaluateExpression may invalidate any + // iterators into CurrentIterVals. + for (SmallVectorImpl<std::pair<PHINode *, Constant*> >::const_iterator + I = PHIsToCompute.begin(), E = PHIsToCompute.end(); I != E; ++I) { + PHINode *PHI = I->first; + Constant *&NextPHI = NextIterVals[PHI]; + if (!NextPHI) { // Not already computed. + Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); + NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI); + } + if (NextPHI != I->second) + StoppedEvolving = false; + } + + // If all entries in CurrentIterVals == NextIterVals then we can stop + // iterating, the loop can't continue to change. + if (StoppedEvolving) + return RetVal = CurrentIterVals[PN]; + + CurrentIterVals.swap(NextIterVals); + } +} + +/// ComputeExitCountExhaustively - If the loop is known to execute a +/// constant number of times (the condition evolves only from constants), +/// try to evaluate a few iterations of the loop until we get the exit +/// condition gets a value of ExitWhen (true or false). If we cannot +/// evaluate the trip count of the loop, return getCouldNotCompute(). +const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, + Value *Cond, + bool ExitWhen) { + PHINode *PN = getConstantEvolvingPHI(Cond, L); + if (PN == 0) return getCouldNotCompute(); + + // If the loop is canonicalized, the PHI will have exactly two entries. + // That's the only form we support here. + if (PN->getNumIncomingValues() != 2) return getCouldNotCompute(); + + DenseMap<Instruction *, Constant *> CurrentIterVals; + BasicBlock *Header = L->getHeader(); + assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); + + // One entry must be a constant (coming in from outside of the loop), and the + // second must be derived from the same PHI. + bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); + PHINode *PHI = 0; + for (BasicBlock::iterator I = Header->begin(); + (PHI = dyn_cast<PHINode>(I)); ++I) { + Constant *StartCST = + dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge)); + if (StartCST == 0) continue; + CurrentIterVals[PHI] = StartCST; + } + if (!CurrentIterVals.count(PN)) + return getCouldNotCompute(); + + // Okay, we find a PHI node that defines the trip count of this loop. Execute + // the loop symbolically to determine when the condition gets a value of + // "ExitWhen". + + unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. + for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){ + ConstantInt *CondVal = + dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, CurrentIterVals, + TD, TLI)); + + // Couldn't symbolically evaluate. + if (!CondVal) return getCouldNotCompute(); + + if (CondVal->getValue() == uint64_t(ExitWhen)) { + ++NumBruteForceTripCountsComputed; + return getConstant(Type::getInt32Ty(getContext()), IterationNum); + } + + // Update all the PHI nodes for the next iteration. + DenseMap<Instruction *, Constant *> NextIterVals; + + // Create a list of which PHIs we need to compute. We want to do this before + // calling EvaluateExpression on them because that may invalidate iterators + // into CurrentIterVals. + SmallVector<PHINode *, 8> PHIsToCompute; + for (DenseMap<Instruction *, Constant *>::const_iterator + I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){ + PHINode *PHI = dyn_cast<PHINode>(I->first); + if (!PHI || PHI->getParent() != Header) continue; + PHIsToCompute.push_back(PHI); + } + for (SmallVectorImpl<PHINode *>::const_iterator I = PHIsToCompute.begin(), + E = PHIsToCompute.end(); I != E; ++I) { + PHINode *PHI = *I; + Constant *&NextPHI = NextIterVals[PHI]; + if (NextPHI) continue; // Already computed! + + Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); + NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI); + } + CurrentIterVals.swap(NextIterVals); + } + + // Too many iterations were needed to evaluate. + return getCouldNotCompute(); +} + +/// getSCEVAtScope - Return a SCEV expression for the specified value +/// at the specified scope in the program. The L value specifies a loop +/// nest to evaluate the expression at, where null is the top-level or a +/// specified loop is immediately inside of the loop. +/// +/// This method can be used to compute the exit value for a variable defined +/// in a loop by querying what the value will hold in the parent loop. +/// +/// In the case that a relevant loop exit value cannot be computed, the +/// original value V is returned. +const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { + // Check to see if we've folded this expression at this loop before. + SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values = ValuesAtScopes[V]; + for (unsigned u = 0; u < Values.size(); u++) { + if (Values[u].first == L) + return Values[u].second ? Values[u].second : V; + } + Values.push_back(std::make_pair(L, static_cast<const SCEV *>(0))); + // Otherwise compute it. + const SCEV *C = computeSCEVAtScope(V, L); + SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values2 = ValuesAtScopes[V]; + for (unsigned u = Values2.size(); u > 0; u--) { + if (Values2[u - 1].first == L) { + Values2[u - 1].second = C; + break; + } + } + return C; +} + +/// This builds up a Constant using the ConstantExpr interface. That way, we +/// will return Constants for objects which aren't represented by a +/// SCEVConstant, because SCEVConstant is restricted to ConstantInt. +/// Returns NULL if the SCEV isn't representable as a Constant. +static Constant *BuildConstantFromSCEV(const SCEV *V) { + switch (V->getSCEVType()) { + default: // TODO: smax, umax. + case scCouldNotCompute: + case scAddRecExpr: + break; + case scConstant: + return cast<SCEVConstant>(V)->getValue(); + case scUnknown: + return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue()); + case scSignExtend: { + const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V); + if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand())) + return ConstantExpr::getSExt(CastOp, SS->getType()); + break; + } + case scZeroExtend: { + const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V); + if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand())) + return ConstantExpr::getZExt(CastOp, SZ->getType()); + break; + } + case scTruncate: { + const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V); + if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand())) + return ConstantExpr::getTrunc(CastOp, ST->getType()); + break; + } + case scAddExpr: { + const SCEVAddExpr *SA = cast<SCEVAddExpr>(V); + if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) { + if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) { + unsigned AS = PTy->getAddressSpace(); + Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS); + C = ConstantExpr::getBitCast(C, DestPtrTy); + } + for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) { + Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i)); + if (!C2) return 0; + + // First pointer! + if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) { + unsigned AS = C2->getType()->getPointerAddressSpace(); + std::swap(C, C2); + Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS); + // The offsets have been converted to bytes. We can add bytes to an + // i8* by GEP with the byte count in the first index. + C = ConstantExpr::getBitCast(C, DestPtrTy); + } + + // Don't bother trying to sum two pointers. We probably can't + // statically compute a load that results from it anyway. + if (C2->getType()->isPointerTy()) + return 0; + + if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) { + if (PTy->getElementType()->isStructTy()) + C2 = ConstantExpr::getIntegerCast( + C2, Type::getInt32Ty(C->getContext()), true); + C = ConstantExpr::getGetElementPtr(C, C2); + } else + C = ConstantExpr::getAdd(C, C2); + } + return C; + } + break; + } + case scMulExpr: { + const SCEVMulExpr *SM = cast<SCEVMulExpr>(V); + if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) { + // Don't bother with pointers at all. + if (C->getType()->isPointerTy()) return 0; + for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) { + Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i)); + if (!C2 || C2->getType()->isPointerTy()) return 0; + C = ConstantExpr::getMul(C, C2); + } + return C; + } + break; + } + case scUDivExpr: { + const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V); + if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS())) + if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS())) + if (LHS->getType() == RHS->getType()) + return ConstantExpr::getUDiv(LHS, RHS); + break; + } + } + return 0; +} + +const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { + if (isa<SCEVConstant>(V)) return V; + + // If this instruction is evolved from a constant-evolving PHI, compute the + // exit value from the loop without using SCEVs. + if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) { + if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) { + const Loop *LI = (*this->LI)[I->getParent()]; + if (LI && LI->getParentLoop() == L) // Looking for loop exit value. + if (PHINode *PN = dyn_cast<PHINode>(I)) + if (PN->getParent() == LI->getHeader()) { + // Okay, there is no closed form solution for the PHI node. Check + // to see if the loop that contains it has a known backedge-taken + // count. If so, we may be able to force computation of the exit + // value. + const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI); + if (const SCEVConstant *BTCC = + dyn_cast<SCEVConstant>(BackedgeTakenCount)) { + // Okay, we know how many times the containing loop executes. If + // this is a constant evolving PHI node, get the final value at + // the specified iteration number. + Constant *RV = getConstantEvolutionLoopExitValue(PN, + BTCC->getValue()->getValue(), + LI); + if (RV) return getSCEV(RV); + } + } + + // Okay, this is an expression that we cannot symbolically evaluate + // into a SCEV. Check to see if it's possible to symbolically evaluate + // the arguments into constants, and if so, try to constant propagate the + // result. This is particularly useful for computing loop exit values. + if (CanConstantFold(I)) { + SmallVector<Constant *, 4> Operands; + bool MadeImprovement = false; + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + Value *Op = I->getOperand(i); + if (Constant *C = dyn_cast<Constant>(Op)) { + Operands.push_back(C); + continue; + } + + // If any of the operands is non-constant and if they are + // non-integer and non-pointer, don't even try to analyze them + // with scev techniques. + if (!isSCEVable(Op->getType())) + return V; + + const SCEV *OrigV = getSCEV(Op); + const SCEV *OpV = getSCEVAtScope(OrigV, L); + MadeImprovement |= OrigV != OpV; + + Constant *C = BuildConstantFromSCEV(OpV); + if (!C) return V; + if (C->getType() != Op->getType()) + C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, + Op->getType(), + false), + C, Op->getType()); + Operands.push_back(C); + } + + // Check to see if getSCEVAtScope actually made an improvement. + if (MadeImprovement) { + Constant *C = 0; + if (const CmpInst *CI = dyn_cast<CmpInst>(I)) + C = ConstantFoldCompareInstOperands(CI->getPredicate(), + Operands[0], Operands[1], TD, + TLI); + else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { + if (!LI->isVolatile()) + C = ConstantFoldLoadFromConstPtr(Operands[0], TD); + } else + C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), + Operands, TD, TLI); + if (!C) return V; + return getSCEV(C); + } + } + } + + // This is some other type of SCEVUnknown, just return it. + return V; + } + + if (const SCEVCommutativeExpr *Comm = dyn_cast<SCEVCommutativeExpr>(V)) { + // Avoid performing the look-up in the common case where the specified + // expression has no loop-variant portions. + for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) { + const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L); + if (OpAtScope != Comm->getOperand(i)) { + // Okay, at least one of these operands is loop variant but might be + // foldable. Build a new instance of the folded commutative expression. + SmallVector<const SCEV *, 8> NewOps(Comm->op_begin(), + Comm->op_begin()+i); + NewOps.push_back(OpAtScope); + + for (++i; i != e; ++i) { + OpAtScope = getSCEVAtScope(Comm->getOperand(i), L); + NewOps.push_back(OpAtScope); + } + if (isa<SCEVAddExpr>(Comm)) + return getAddExpr(NewOps); + if (isa<SCEVMulExpr>(Comm)) + return getMulExpr(NewOps); + if (isa<SCEVSMaxExpr>(Comm)) + return getSMaxExpr(NewOps); + if (isa<SCEVUMaxExpr>(Comm)) + return getUMaxExpr(NewOps); + llvm_unreachable("Unknown commutative SCEV type!"); + } + } + // If we got here, all operands are loop invariant. + return Comm; + } + + if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) { + const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L); + const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L); + if (LHS == Div->getLHS() && RHS == Div->getRHS()) + return Div; // must be loop invariant + return getUDivExpr(LHS, RHS); + } + + // If this is a loop recurrence for a loop that does not contain L, then we + // are dealing with the final value computed by the loop. + if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) { + // First, attempt to evaluate each operand. + // Avoid performing the look-up in the common case where the specified + // expression has no loop-variant portions. + for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { + const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L); + if (OpAtScope == AddRec->getOperand(i)) + continue; + + // Okay, at least one of these operands is loop variant but might be + // foldable. Build a new instance of the folded commutative expression. + SmallVector<const SCEV *, 8> NewOps(AddRec->op_begin(), + AddRec->op_begin()+i); + NewOps.push_back(OpAtScope); + for (++i; i != e; ++i) + NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L)); + + const SCEV *FoldedRec = + getAddRecExpr(NewOps, AddRec->getLoop(), + AddRec->getNoWrapFlags(SCEV::FlagNW)); + AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec); + // The addrec may be folded to a nonrecurrence, for example, if the + // induction variable is multiplied by zero after constant folding. Go + // ahead and return the folded value. + if (!AddRec) + return FoldedRec; + break; + } + + // If the scope is outside the addrec's loop, evaluate it by using the + // loop exit value of the addrec. + if (!AddRec->getLoop()->contains(L)) { + // To evaluate this recurrence, we need to know how many times the AddRec + // loop iterates. Compute this now. + const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop()); + if (BackedgeTakenCount == getCouldNotCompute()) return AddRec; + + // Then, evaluate the AddRec. + return AddRec->evaluateAtIteration(BackedgeTakenCount, *this); + } + + return AddRec; + } + + if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) { + const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); + if (Op == Cast->getOperand()) + return Cast; // must be loop invariant + return getZeroExtendExpr(Op, Cast->getType()); + } + + if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) { + const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); + if (Op == Cast->getOperand()) + return Cast; // must be loop invariant + return getSignExtendExpr(Op, Cast->getType()); + } + + if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) { + const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); + if (Op == Cast->getOperand()) + return Cast; // must be loop invariant + return getTruncateExpr(Op, Cast->getType()); + } + + llvm_unreachable("Unknown SCEV type!"); +} + +/// getSCEVAtScope - This is a convenience function which does +/// getSCEVAtScope(getSCEV(V), L). +const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) { + return getSCEVAtScope(getSCEV(V), L); +} + +/// SolveLinEquationWithOverflow - Finds the minimum unsigned root of the +/// following equation: +/// +/// A * X = B (mod N) +/// +/// where N = 2^BW and BW is the common bit width of A and B. The signedness of +/// A and B isn't important. +/// +/// If the equation does not have a solution, SCEVCouldNotCompute is returned. +static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B, + ScalarEvolution &SE) { + uint32_t BW = A.getBitWidth(); + assert(BW == B.getBitWidth() && "Bit widths must be the same."); + assert(A != 0 && "A must be non-zero."); + + // 1. D = gcd(A, N) + // + // The gcd of A and N may have only one prime factor: 2. The number of + // trailing zeros in A is its multiplicity + uint32_t Mult2 = A.countTrailingZeros(); + // D = 2^Mult2 + + // 2. Check if B is divisible by D. + // + // B is divisible by D if and only if the multiplicity of prime factor 2 for B + // is not less than multiplicity of this prime factor for D. + if (B.countTrailingZeros() < Mult2) + return SE.getCouldNotCompute(); + + // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic + // modulo (N / D). + // + // (N / D) may need BW+1 bits in its representation. Hence, we'll use this + // bit width during computations. + APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D + APInt Mod(BW + 1, 0); + Mod.setBit(BW - Mult2); // Mod = N / D + APInt I = AD.multiplicativeInverse(Mod); + + // 4. Compute the minimum unsigned root of the equation: + // I * (B / D) mod (N / D) + APInt Result = (I * B.lshr(Mult2).zext(BW + 1)).urem(Mod); + + // The result is guaranteed to be less than 2^BW so we may truncate it to BW + // bits. + return SE.getConstant(Result.trunc(BW)); +} + +/// SolveQuadraticEquation - Find the roots of the quadratic equation for the +/// given quadratic chrec {L,+,M,+,N}. This returns either the two roots (which +/// might be the same) or two SCEVCouldNotCompute objects. +/// +static std::pair<const SCEV *,const SCEV *> +SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { + assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!"); + const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0)); + const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1)); + const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2)); + + // We currently can only solve this if the coefficients are constants. + if (!LC || !MC || !NC) { + const SCEV *CNC = SE.getCouldNotCompute(); + return std::make_pair(CNC, CNC); + } + + uint32_t BitWidth = LC->getValue()->getValue().getBitWidth(); + const APInt &L = LC->getValue()->getValue(); + const APInt &M = MC->getValue()->getValue(); + const APInt &N = NC->getValue()->getValue(); + APInt Two(BitWidth, 2); + APInt Four(BitWidth, 4); + + { + using namespace APIntOps; + const APInt& C = L; + // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C + // The B coefficient is M-N/2 + APInt B(M); + B -= sdiv(N,Two); + + // The A coefficient is N/2 + APInt A(N.sdiv(Two)); + + // Compute the B^2-4ac term. + APInt SqrtTerm(B); + SqrtTerm *= B; + SqrtTerm -= Four * (A * C); + + if (SqrtTerm.isNegative()) { + // The loop is provably infinite. + const SCEV *CNC = SE.getCouldNotCompute(); + return std::make_pair(CNC, CNC); + } + + // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest + // integer value or else APInt::sqrt() will assert. + APInt SqrtVal(SqrtTerm.sqrt()); + + // Compute the two solutions for the quadratic formula. + // The divisions must be performed as signed divisions. + APInt NegB(-B); + APInt TwoA(A << 1); + if (TwoA.isMinValue()) { + const SCEV *CNC = SE.getCouldNotCompute(); + return std::make_pair(CNC, CNC); + } + + LLVMContext &Context = SE.getContext(); + + ConstantInt *Solution1 = + ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA)); + ConstantInt *Solution2 = + ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA)); + + return std::make_pair(SE.getConstant(Solution1), + SE.getConstant(Solution2)); + } // end APIntOps namespace +} + +/// HowFarToZero - Return the number of times a backedge comparing the specified +/// value to zero will execute. If not computable, return CouldNotCompute. +/// +/// This is only used for loops with a "x != y" exit test. The exit condition is +/// now expressed as a single expression, V = x-y. So the exit test is +/// effectively V != 0. We know and take advantage of the fact that this +/// expression only being used in a comparison by zero context. +ScalarEvolution::ExitLimit +ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) { + // If the value is a constant + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) { + // If the value is already zero, the branch will execute zero times. + if (C->getValue()->isZero()) return C; + return getCouldNotCompute(); // Otherwise it will loop infinitely. + } + + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V); + if (!AddRec || AddRec->getLoop() != L) + return getCouldNotCompute(); + + // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of + // the quadratic equation to solve it. + if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) { + std::pair<const SCEV *,const SCEV *> Roots = + SolveQuadraticEquation(AddRec, *this); + const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first); + const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second); + if (R1 && R2) { +#if 0 + dbgs() << "HFTZ: " << *V << " - sol#1: " << *R1 + << " sol#2: " << *R2 << "\n"; +#endif + // Pick the smallest positive root value. + if (ConstantInt *CB = + dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT, + R1->getValue(), + R2->getValue()))) { + if (CB->getZExtValue() == false) + std::swap(R1, R2); // R1 is the minimum root now. + + // We can only use this value if the chrec ends up with an exact zero + // value at this index. When solving for "X*X != 5", for example, we + // should not accept a root of 2. + const SCEV *Val = AddRec->evaluateAtIteration(R1, *this); + if (Val->isZero()) + return R1; // We found a quadratic root! + } + } + return getCouldNotCompute(); + } + + // Otherwise we can only handle this if it is affine. + if (!AddRec->isAffine()) + return getCouldNotCompute(); + + // If this is an affine expression, the execution count of this branch is + // the minimum unsigned root of the following equation: + // + // Start + Step*N = 0 (mod 2^BW) + // + // equivalent to: + // + // Step*N = -Start (mod 2^BW) + // + // where BW is the common bit width of Start and Step. + + // Get the initial value for the loop. + const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop()); + const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop()); + + // For now we handle only constant steps. + // + // TODO: Handle a nonconstant Step given AddRec<NUW>. If the + // AddRec is NUW, then (in an unsigned sense) it cannot be counting up to wrap + // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step. + // We have not yet seen any such cases. + const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step); + if (StepC == 0 || StepC->getValue()->equalsInt(0)) + return getCouldNotCompute(); + + // For positive steps (counting up until unsigned overflow): + // N = -Start/Step (as unsigned) + // For negative steps (counting down to zero): + // N = Start/-Step + // First compute the unsigned distance from zero in the direction of Step. + bool CountDown = StepC->getValue()->getValue().isNegative(); + const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start); + + // Handle unitary steps, which cannot wraparound. + // 1*N = -Start; -1*N = Start (mod 2^BW), so: + // N = Distance (as unsigned) + if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) { + ConstantRange CR = getUnsignedRange(Start); + const SCEV *MaxBECount; + if (!CountDown && CR.getUnsignedMin().isMinValue()) + // When counting up, the worst starting value is 1, not 0. + MaxBECount = CR.getUnsignedMax().isMinValue() + ? getConstant(APInt::getMinValue(CR.getBitWidth())) + : getConstant(APInt::getMaxValue(CR.getBitWidth())); + else + MaxBECount = getConstant(CountDown ? CR.getUnsignedMax() + : -CR.getUnsignedMin()); + return ExitLimit(Distance, MaxBECount); + } + + // If the recurrence is known not to wraparound, unsigned divide computes the + // back edge count. (Ideally we would have an "isexact" bit for udiv). We know + // that the value will either become zero (and thus the loop terminates), that + // the loop will terminate through some other exit condition first, or that + // the loop has undefined behavior. This means we can't "miss" the exit + // value, even with nonunit stride. + // + // This is only valid for expressions that directly compute the loop exit. It + // is invalid for subexpressions in which the loop may exit through this + // branch even if this subexpression is false. In that case, the trip count + // computed by this udiv could be smaller than the number of well-defined + // iterations. + if (!IsSubExpr && AddRec->getNoWrapFlags(SCEV::FlagNW)) + return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); + + // Then, try to solve the above equation provided that Start is constant. + if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) + return SolveLinEquationWithOverflow(StepC->getValue()->getValue(), + -StartC->getValue()->getValue(), + *this); + return getCouldNotCompute(); +} + +/// HowFarToNonZero - Return the number of times a backedge checking the +/// specified value for nonzero will execute. If not computable, return +/// CouldNotCompute +ScalarEvolution::ExitLimit +ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) { + // Loops that look like: while (X == 0) are very strange indeed. We don't + // handle them yet except for the trivial case. This could be expanded in the + // future as needed. + + // If the value is a constant, check to see if it is known to be non-zero + // already. If so, the backedge will execute zero times. + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) { + if (!C->getValue()->isNullValue()) + return getConstant(C->getType(), 0); + return getCouldNotCompute(); // Otherwise it will loop infinitely. + } + + // We could implement others, but I really doubt anyone writes loops like + // this, and if they did, they would already be constant folded. + return getCouldNotCompute(); +} + +/// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB +/// (which may not be an immediate predecessor) which has exactly one +/// successor from which BB is reachable, or null if no such block is +/// found. +/// +std::pair<BasicBlock *, BasicBlock *> +ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) { + // If the block has a unique predecessor, then there is no path from the + // predecessor to the block that does not go through the direct edge + // from the predecessor to the block. + if (BasicBlock *Pred = BB->getSinglePredecessor()) + return std::make_pair(Pred, BB); + + // A loop's header is defined to be a block that dominates the loop. + // If the header has a unique predecessor outside the loop, it must be + // a block that has exactly one successor that can reach the loop. + if (Loop *L = LI->getLoopFor(BB)) + return std::make_pair(L->getLoopPredecessor(), L->getHeader()); + + return std::pair<BasicBlock *, BasicBlock *>(); +} + +/// HasSameValue - SCEV structural equivalence is usually sufficient for +/// testing whether two expressions are equal, however for the purposes of +/// looking for a condition guarding a loop, it can be useful to be a little +/// more general, since a front-end may have replicated the controlling +/// expression. +/// +static bool HasSameValue(const SCEV *A, const SCEV *B) { + // Quick check to see if they are the same SCEV. + if (A == B) return true; + + // Otherwise, if they're both SCEVUnknown, it's possible that they hold + // two different instructions with the same value. Check for this case. + if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A)) + if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B)) + if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue())) + if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue())) + if (AI->isIdenticalTo(BI) && !AI->mayReadFromMemory()) + return true; + + // Otherwise assume they may have a different value. + return false; +} + +/// SimplifyICmpOperands - Simplify LHS and RHS in a comparison with +/// predicate Pred. Return true iff any changes were made. +/// +bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, + const SCEV *&LHS, const SCEV *&RHS, + unsigned Depth) { + bool Changed = false; + + // If we hit the max recursion limit bail out. + if (Depth >= 3) + return false; + + // Canonicalize a constant to the right side. + if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) { + // Check for both operands constant. + if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) { + if (ConstantExpr::getICmp(Pred, + LHSC->getValue(), + RHSC->getValue())->isNullValue()) + goto trivially_false; + else + goto trivially_true; + } + // Otherwise swap the operands to put the constant on the right. + std::swap(LHS, RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + Changed = true; + } + + // If we're comparing an addrec with a value which is loop-invariant in the + // addrec's loop, put the addrec on the left. Also make a dominance check, + // as both operands could be addrecs loop-invariant in each other's loop. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) { + const Loop *L = AR->getLoop(); + if (isLoopInvariant(LHS, L) && properlyDominates(LHS, L->getHeader())) { + std::swap(LHS, RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + Changed = true; + } + } + + // If there's a constant operand, canonicalize comparisons with boundary + // cases, and canonicalize *-or-equal comparisons to regular comparisons. + if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) { + const APInt &RA = RC->getValue()->getValue(); + switch (Pred) { + default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_NE: + // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b. + if (!RA) + if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS)) + if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(AE->getOperand(0))) + if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 && + ME->getOperand(0)->isAllOnesValue()) { + RHS = AE->getOperand(1); + LHS = ME->getOperand(1); + Changed = true; + } + break; + case ICmpInst::ICMP_UGE: + if ((RA - 1).isMinValue()) { + Pred = ICmpInst::ICMP_NE; + RHS = getConstant(RA - 1); + Changed = true; + break; + } + if (RA.isMaxValue()) { + Pred = ICmpInst::ICMP_EQ; + Changed = true; + break; + } + if (RA.isMinValue()) goto trivially_true; + + Pred = ICmpInst::ICMP_UGT; + RHS = getConstant(RA - 1); + Changed = true; + break; + case ICmpInst::ICMP_ULE: + if ((RA + 1).isMaxValue()) { + Pred = ICmpInst::ICMP_NE; + RHS = getConstant(RA + 1); + Changed = true; + break; + } + if (RA.isMinValue()) { + Pred = ICmpInst::ICMP_EQ; + Changed = true; + break; + } + if (RA.isMaxValue()) goto trivially_true; + + Pred = ICmpInst::ICMP_ULT; + RHS = getConstant(RA + 1); + Changed = true; + break; + case ICmpInst::ICMP_SGE: + if ((RA - 1).isMinSignedValue()) { + Pred = ICmpInst::ICMP_NE; + RHS = getConstant(RA - 1); + Changed = true; + break; + } + if (RA.isMaxSignedValue()) { + Pred = ICmpInst::ICMP_EQ; + Changed = true; + break; + } + if (RA.isMinSignedValue()) goto trivially_true; + + Pred = ICmpInst::ICMP_SGT; + RHS = getConstant(RA - 1); + Changed = true; + break; + case ICmpInst::ICMP_SLE: + if ((RA + 1).isMaxSignedValue()) { + Pred = ICmpInst::ICMP_NE; + RHS = getConstant(RA + 1); + Changed = true; + break; + } + if (RA.isMinSignedValue()) { + Pred = ICmpInst::ICMP_EQ; + Changed = true; + break; + } + if (RA.isMaxSignedValue()) goto trivially_true; + + Pred = ICmpInst::ICMP_SLT; + RHS = getConstant(RA + 1); + Changed = true; + break; + case ICmpInst::ICMP_UGT: + if (RA.isMinValue()) { + Pred = ICmpInst::ICMP_NE; + Changed = true; + break; + } + if ((RA + 1).isMaxValue()) { + Pred = ICmpInst::ICMP_EQ; + RHS = getConstant(RA + 1); + Changed = true; + break; + } + if (RA.isMaxValue()) goto trivially_false; + break; + case ICmpInst::ICMP_ULT: + if (RA.isMaxValue()) { + Pred = ICmpInst::ICMP_NE; + Changed = true; + break; + } + if ((RA - 1).isMinValue()) { + Pred = ICmpInst::ICMP_EQ; + RHS = getConstant(RA - 1); + Changed = true; + break; + } + if (RA.isMinValue()) goto trivially_false; + break; + case ICmpInst::ICMP_SGT: + if (RA.isMinSignedValue()) { + Pred = ICmpInst::ICMP_NE; + Changed = true; + break; + } + if ((RA + 1).isMaxSignedValue()) { + Pred = ICmpInst::ICMP_EQ; + RHS = getConstant(RA + 1); + Changed = true; + break; + } + if (RA.isMaxSignedValue()) goto trivially_false; + break; + case ICmpInst::ICMP_SLT: + if (RA.isMaxSignedValue()) { + Pred = ICmpInst::ICMP_NE; + Changed = true; + break; + } + if ((RA - 1).isMinSignedValue()) { + Pred = ICmpInst::ICMP_EQ; + RHS = getConstant(RA - 1); + Changed = true; + break; + } + if (RA.isMinSignedValue()) goto trivially_false; + break; + } + } + + // Check for obvious equality. + if (HasSameValue(LHS, RHS)) { + if (ICmpInst::isTrueWhenEqual(Pred)) + goto trivially_true; + if (ICmpInst::isFalseWhenEqual(Pred)) + goto trivially_false; + } + + // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by + // adding or subtracting 1 from one of the operands. + switch (Pred) { + case ICmpInst::ICMP_SLE: + if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) { + RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, + SCEV::FlagNSW); + Pred = ICmpInst::ICMP_SLT; + Changed = true; + } else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) { + LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS, + SCEV::FlagNSW); + Pred = ICmpInst::ICMP_SLT; + Changed = true; + } + break; + case ICmpInst::ICMP_SGE: + if (!getSignedRange(RHS).getSignedMin().isMinSignedValue()) { + RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS, + SCEV::FlagNSW); + Pred = ICmpInst::ICMP_SGT; + Changed = true; + } else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) { + LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, + SCEV::FlagNSW); + Pred = ICmpInst::ICMP_SGT; + Changed = true; + } + break; + case ICmpInst::ICMP_ULE: + if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) { + RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, + SCEV::FlagNUW); + Pred = ICmpInst::ICMP_ULT; + Changed = true; + } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) { + LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS, + SCEV::FlagNUW); + Pred = ICmpInst::ICMP_ULT; + Changed = true; + } + break; + case ICmpInst::ICMP_UGE: + if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) { + RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS, + SCEV::FlagNUW); + Pred = ICmpInst::ICMP_UGT; + Changed = true; + } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) { + LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, + SCEV::FlagNUW); + Pred = ICmpInst::ICMP_UGT; + Changed = true; + } + break; + default: + break; + } + + // TODO: More simplifications are possible here. + + // Recursively simplify until we either hit a recursion limit or nothing + // changes. + if (Changed) + return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1); + + return Changed; + +trivially_true: + // Return 0 == 0. + LHS = RHS = getConstant(ConstantInt::getFalse(getContext())); + Pred = ICmpInst::ICMP_EQ; + return true; + +trivially_false: + // Return 0 != 0. + LHS = RHS = getConstant(ConstantInt::getFalse(getContext())); + Pred = ICmpInst::ICMP_NE; + return true; +} + +bool ScalarEvolution::isKnownNegative(const SCEV *S) { + return getSignedRange(S).getSignedMax().isNegative(); +} + +bool ScalarEvolution::isKnownPositive(const SCEV *S) { + return getSignedRange(S).getSignedMin().isStrictlyPositive(); +} + +bool ScalarEvolution::isKnownNonNegative(const SCEV *S) { + return !getSignedRange(S).getSignedMin().isNegative(); +} + +bool ScalarEvolution::isKnownNonPositive(const SCEV *S) { + return !getSignedRange(S).getSignedMax().isStrictlyPositive(); +} + +bool ScalarEvolution::isKnownNonZero(const SCEV *S) { + return isKnownNegative(S) || isKnownPositive(S); +} + +bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + // Canonicalize the inputs first. + (void)SimplifyICmpOperands(Pred, LHS, RHS); + + // If LHS or RHS is an addrec, check to see if the condition is true in + // every iteration of the loop. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) + if (isLoopEntryGuardedByCond( + AR->getLoop(), Pred, AR->getStart(), RHS) && + isLoopBackedgeGuardedByCond( + AR->getLoop(), Pred, AR->getPostIncExpr(*this), RHS)) + return true; + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) + if (isLoopEntryGuardedByCond( + AR->getLoop(), Pred, LHS, AR->getStart()) && + isLoopBackedgeGuardedByCond( + AR->getLoop(), Pred, LHS, AR->getPostIncExpr(*this))) + return true; + + // Otherwise see what can be done with known constant ranges. + return isKnownPredicateWithRanges(Pred, LHS, RHS); +} + +bool +ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + if (HasSameValue(LHS, RHS)) + return ICmpInst::isTrueWhenEqual(Pred); + + // This code is split out from isKnownPredicate because it is called from + // within isLoopEntryGuardedByCond. + switch (Pred) { + default: + llvm_unreachable("Unexpected ICmpInst::Predicate value!"); + case ICmpInst::ICMP_SGT: + Pred = ICmpInst::ICMP_SLT; + std::swap(LHS, RHS); + case ICmpInst::ICMP_SLT: { + ConstantRange LHSRange = getSignedRange(LHS); + ConstantRange RHSRange = getSignedRange(RHS); + if (LHSRange.getSignedMax().slt(RHSRange.getSignedMin())) + return true; + if (LHSRange.getSignedMin().sge(RHSRange.getSignedMax())) + return false; + break; + } + case ICmpInst::ICMP_SGE: + Pred = ICmpInst::ICMP_SLE; + std::swap(LHS, RHS); + case ICmpInst::ICMP_SLE: { + ConstantRange LHSRange = getSignedRange(LHS); + ConstantRange RHSRange = getSignedRange(RHS); + if (LHSRange.getSignedMax().sle(RHSRange.getSignedMin())) + return true; + if (LHSRange.getSignedMin().sgt(RHSRange.getSignedMax())) + return false; + break; + } + case ICmpInst::ICMP_UGT: + Pred = ICmpInst::ICMP_ULT; + std::swap(LHS, RHS); + case ICmpInst::ICMP_ULT: { + ConstantRange LHSRange = getUnsignedRange(LHS); + ConstantRange RHSRange = getUnsignedRange(RHS); + if (LHSRange.getUnsignedMax().ult(RHSRange.getUnsignedMin())) + return true; + if (LHSRange.getUnsignedMin().uge(RHSRange.getUnsignedMax())) + return false; + break; + } + case ICmpInst::ICMP_UGE: + Pred = ICmpInst::ICMP_ULE; + std::swap(LHS, RHS); + case ICmpInst::ICMP_ULE: { + ConstantRange LHSRange = getUnsignedRange(LHS); + ConstantRange RHSRange = getUnsignedRange(RHS); + if (LHSRange.getUnsignedMax().ule(RHSRange.getUnsignedMin())) + return true; + if (LHSRange.getUnsignedMin().ugt(RHSRange.getUnsignedMax())) + return false; + break; + } + case ICmpInst::ICMP_NE: { + if (getUnsignedRange(LHS).intersectWith(getUnsignedRange(RHS)).isEmptySet()) + return true; + if (getSignedRange(LHS).intersectWith(getSignedRange(RHS)).isEmptySet()) + return true; + + const SCEV *Diff = getMinusSCEV(LHS, RHS); + if (isKnownNonZero(Diff)) + return true; + break; + } + case ICmpInst::ICMP_EQ: + // The check at the top of the function catches the case where + // the values are known to be equal. + break; + } + return false; +} + +/// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is +/// protected by a conditional between LHS and RHS. This is used to +/// to eliminate casts. +bool +ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, + ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + // Interpret a null as meaning no loop, where there is obviously no guard + // (interprocedural conditions notwithstanding). + if (!L) return true; + + BasicBlock *Latch = L->getLoopLatch(); + if (!Latch) + return false; + + BranchInst *LoopContinuePredicate = + dyn_cast<BranchInst>(Latch->getTerminator()); + if (!LoopContinuePredicate || + LoopContinuePredicate->isUnconditional()) + return false; + + return isImpliedCond(Pred, LHS, RHS, + LoopContinuePredicate->getCondition(), + LoopContinuePredicate->getSuccessor(0) != L->getHeader()); +} + +/// isLoopEntryGuardedByCond - Test whether entry to the loop is protected +/// by a conditional between LHS and RHS. This is used to help avoid max +/// expressions in loop trip counts, and to eliminate casts. +bool +ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, + ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + // Interpret a null as meaning no loop, where there is obviously no guard + // (interprocedural conditions notwithstanding). + if (!L) return false; + + // Starting at the loop predecessor, climb up the predecessor chain, as long + // as there are predecessors that can be found that have unique successors + // leading to the original header. + for (std::pair<BasicBlock *, BasicBlock *> + Pair(L->getLoopPredecessor(), L->getHeader()); + Pair.first; + Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { + + BranchInst *LoopEntryPredicate = + dyn_cast<BranchInst>(Pair.first->getTerminator()); + if (!LoopEntryPredicate || + LoopEntryPredicate->isUnconditional()) + continue; + + if (isImpliedCond(Pred, LHS, RHS, + LoopEntryPredicate->getCondition(), + LoopEntryPredicate->getSuccessor(0) != Pair.second)) + return true; + } + + return false; +} + +/// RAII wrapper to prevent recursive application of isImpliedCond. +/// ScalarEvolution's PendingLoopPredicates set must be empty unless we are +/// currently evaluating isImpliedCond. +struct MarkPendingLoopPredicate { + Value *Cond; + DenseSet<Value*> &LoopPreds; + bool Pending; + + MarkPendingLoopPredicate(Value *C, DenseSet<Value*> &LP) + : Cond(C), LoopPreds(LP) { + Pending = !LoopPreds.insert(Cond).second; + } + ~MarkPendingLoopPredicate() { + if (!Pending) + LoopPreds.erase(Cond); + } +}; + +/// isImpliedCond - Test whether the condition described by Pred, LHS, +/// and RHS is true whenever the given Cond value evaluates to true. +bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + Value *FoundCondValue, + bool Inverse) { + MarkPendingLoopPredicate Mark(FoundCondValue, PendingLoopPredicates); + if (Mark.Pending) + return false; + + // Recursively handle And and Or conditions. + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) { + if (BO->getOpcode() == Instruction::And) { + if (!Inverse) + return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) || + isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse); + } else if (BO->getOpcode() == Instruction::Or) { + if (Inverse) + return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) || + isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse); + } + } + + ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue); + if (!ICI) return false; + + // Bail if the ICmp's operands' types are wider than the needed type + // before attempting to call getSCEV on them. This avoids infinite + // recursion, since the analysis of widening casts can require loop + // exit condition information for overflow checking, which would + // lead back here. + if (getTypeSizeInBits(LHS->getType()) < + getTypeSizeInBits(ICI->getOperand(0)->getType())) + return false; + + // Now that we found a conditional branch that dominates the loop or controls + // the loop latch. Check to see if it is the comparison we are looking for. + ICmpInst::Predicate FoundPred; + if (Inverse) + FoundPred = ICI->getInversePredicate(); + else + FoundPred = ICI->getPredicate(); + + const SCEV *FoundLHS = getSCEV(ICI->getOperand(0)); + const SCEV *FoundRHS = getSCEV(ICI->getOperand(1)); + + // Balance the types. The case where FoundLHS' type is wider than + // LHS' type is checked for above. + if (getTypeSizeInBits(LHS->getType()) > + getTypeSizeInBits(FoundLHS->getType())) { + if (CmpInst::isSigned(Pred)) { + FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType()); + FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType()); + } else { + FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType()); + FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType()); + } + } + + // Canonicalize the query to match the way instcombine will have + // canonicalized the comparison. + if (SimplifyICmpOperands(Pred, LHS, RHS)) + if (LHS == RHS) + return CmpInst::isTrueWhenEqual(Pred); + if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS)) + if (FoundLHS == FoundRHS) + return CmpInst::isFalseWhenEqual(FoundPred); + + // Check to see if we can make the LHS or RHS match. + if (LHS == FoundRHS || RHS == FoundLHS) { + if (isa<SCEVConstant>(RHS)) { + std::swap(FoundLHS, FoundRHS); + FoundPred = ICmpInst::getSwappedPredicate(FoundPred); + } else { + std::swap(LHS, RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + } + } + + // Check whether the found predicate is the same as the desired predicate. + if (FoundPred == Pred) + return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS); + + // Check whether swapping the found predicate makes it the same as the + // desired predicate. + if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) { + if (isa<SCEVConstant>(RHS)) + return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS); + else + return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred), + RHS, LHS, FoundLHS, FoundRHS); + } + + // Check whether the actual condition is beyond sufficient. + if (FoundPred == ICmpInst::ICMP_EQ) + if (ICmpInst::isTrueWhenEqual(Pred)) + if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS)) + return true; + if (Pred == ICmpInst::ICMP_NE) + if (!ICmpInst::isTrueWhenEqual(FoundPred)) + if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS)) + return true; + + // Otherwise assume the worst. + return false; +} + +/// isImpliedCondOperands - Test whether the condition described by Pred, +/// LHS, and RHS is true whenever the condition described by Pred, FoundLHS, +/// and FoundRHS is true. +bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, + const SCEV *FoundRHS) { + return isImpliedCondOperandsHelper(Pred, LHS, RHS, + FoundLHS, FoundRHS) || + // ~x < ~y --> x > y + isImpliedCondOperandsHelper(Pred, LHS, RHS, + getNotSCEV(FoundRHS), + getNotSCEV(FoundLHS)); +} + +/// isImpliedCondOperandsHelper - Test whether the condition described by +/// Pred, LHS, and RHS is true whenever the condition described by Pred, +/// FoundLHS, and FoundRHS is true. +bool +ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, + const SCEV *FoundRHS) { + switch (Pred) { + default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_NE: + if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS)) + return true; + break; + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + if (isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, LHS, FoundLHS) && + isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, RHS, FoundRHS)) + return true; + break; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + if (isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, LHS, FoundLHS) && + isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, RHS, FoundRHS)) + return true; + break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + if (isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, LHS, FoundLHS) && + isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, RHS, FoundRHS)) + return true; + break; + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + if (isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, LHS, FoundLHS) && + isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, RHS, FoundRHS)) + return true; + break; + } + + return false; +} + +// Verify if an linear IV with positive stride can overflow when in a +// less-than comparison, knowing the invariant term of the comparison, the +// stride and the knowledge of NSW/NUW flags on the recurrence. +bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, + bool IsSigned, bool NoWrap) { + if (NoWrap) return false; + + unsigned BitWidth = getTypeSizeInBits(RHS->getType()); + const SCEV *One = getConstant(Stride->getType(), 1); + + if (IsSigned) { + APInt MaxRHS = getSignedRange(RHS).getSignedMax(); + APInt MaxValue = APInt::getSignedMaxValue(BitWidth); + APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One)) + .getSignedMax(); + + // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow! + return (MaxValue - MaxStrideMinusOne).slt(MaxRHS); + } + + APInt MaxRHS = getUnsignedRange(RHS).getUnsignedMax(); + APInt MaxValue = APInt::getMaxValue(BitWidth); + APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One)) + .getUnsignedMax(); + + // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow! + return (MaxValue - MaxStrideMinusOne).ult(MaxRHS); +} + +// Verify if an linear IV with negative stride can overflow when in a +// greater-than comparison, knowing the invariant term of the comparison, +// the stride and the knowledge of NSW/NUW flags on the recurrence. +bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, + bool IsSigned, bool NoWrap) { + if (NoWrap) return false; + + unsigned BitWidth = getTypeSizeInBits(RHS->getType()); + const SCEV *One = getConstant(Stride->getType(), 1); + + if (IsSigned) { + APInt MinRHS = getSignedRange(RHS).getSignedMin(); + APInt MinValue = APInt::getSignedMinValue(BitWidth); + APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One)) + .getSignedMax(); + + // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow! + return (MinValue + MaxStrideMinusOne).sgt(MinRHS); + } + + APInt MinRHS = getUnsignedRange(RHS).getUnsignedMin(); + APInt MinValue = APInt::getMinValue(BitWidth); + APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One)) + .getUnsignedMax(); + + // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow! + return (MinValue + MaxStrideMinusOne).ugt(MinRHS); +} + +// Compute the backedge taken count knowing the interval difference, the +// stride and presence of the equality in the comparison. +const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step, + bool Equality) { + const SCEV *One = getConstant(Step->getType(), 1); + Delta = Equality ? getAddExpr(Delta, Step) + : getAddExpr(Delta, getMinusSCEV(Step, One)); + return getUDivExpr(Delta, Step); +} + +/// HowManyLessThans - Return the number of times a backedge containing the +/// specified less-than comparison will execute. If not computable, return +/// CouldNotCompute. +/// +/// @param IsSubExpr is true when the LHS < RHS condition does not directly +/// control the branch. In this case, we can only compute an iteration count for +/// a subexpression that cannot overflow before evaluating true. +ScalarEvolution::ExitLimit +ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, + const Loop *L, bool IsSigned, + bool IsSubExpr) { + // We handle only IV < Invariant + if (!isLoopInvariant(RHS, L)) + return getCouldNotCompute(); + + const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS); + + // Avoid weird loops + if (!IV || IV->getLoop() != L || !IV->isAffine()) + return getCouldNotCompute(); + + bool NoWrap = !IsSubExpr && + IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW); + + const SCEV *Stride = IV->getStepRecurrence(*this); + + // Avoid negative or zero stride values + if (!isKnownPositive(Stride)) + return getCouldNotCompute(); + + // Avoid proven overflow cases: this will ensure that the backedge taken count + // will not generate any unsigned overflow. Relaxed no-overflow conditions + // exploit NoWrapFlags, allowing to optimize in presence of undefined + // behaviors like the case of C language. + if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap)) + return getCouldNotCompute(); + + ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT + : ICmpInst::ICMP_ULT; + const SCEV *Start = IV->getStart(); + const SCEV *End = RHS; + if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS)) + End = IsSigned ? getSMaxExpr(RHS, Start) + : getUMaxExpr(RHS, Start); + + const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride, false); + + APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin() + : getUnsignedRange(Start).getUnsignedMin(); + + APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin() + : getUnsignedRange(Stride).getUnsignedMin(); + + unsigned BitWidth = getTypeSizeInBits(LHS->getType()); + APInt Limit = IsSigned ? APInt::getSignedMaxValue(BitWidth) - (MinStride - 1) + : APInt::getMaxValue(BitWidth) - (MinStride - 1); + + // Although End can be a MAX expression we estimate MaxEnd considering only + // the case End = RHS. This is safe because in the other case (End - Start) + // is zero, leading to a zero maximum backedge taken count. + APInt MaxEnd = + IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit) + : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit); + + const SCEV *MaxBECount = getCouldNotCompute(); + if (isa<SCEVConstant>(BECount)) + MaxBECount = BECount; + else + MaxBECount = computeBECount(getConstant(MaxEnd - MinStart), + getConstant(MinStride), false); + + if (isa<SCEVCouldNotCompute>(MaxBECount)) + MaxBECount = BECount; + + return ExitLimit(BECount, MaxBECount); +} + +ScalarEvolution::ExitLimit +ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, + const Loop *L, bool IsSigned, + bool IsSubExpr) { + // We handle only IV > Invariant + if (!isLoopInvariant(RHS, L)) + return getCouldNotCompute(); + + const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS); + + // Avoid weird loops + if (!IV || IV->getLoop() != L || !IV->isAffine()) + return getCouldNotCompute(); + + bool NoWrap = !IsSubExpr && + IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW); + + const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this)); + + // Avoid negative or zero stride values + if (!isKnownPositive(Stride)) + return getCouldNotCompute(); + + // Avoid proven overflow cases: this will ensure that the backedge taken count + // will not generate any unsigned overflow. Relaxed no-overflow conditions + // exploit NoWrapFlags, allowing to optimize in presence of undefined + // behaviors like the case of C language. + if (!Stride->isOne() && doesIVOverflowOnGT(RHS, Stride, IsSigned, NoWrap)) + return getCouldNotCompute(); + + ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SGT + : ICmpInst::ICMP_UGT; + + const SCEV *Start = IV->getStart(); + const SCEV *End = RHS; + if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS)) + End = IsSigned ? getSMinExpr(RHS, Start) + : getUMinExpr(RHS, Start); + + const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false); + + APInt MaxStart = IsSigned ? getSignedRange(Start).getSignedMax() + : getUnsignedRange(Start).getUnsignedMax(); + + APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin() + : getUnsignedRange(Stride).getUnsignedMin(); + + unsigned BitWidth = getTypeSizeInBits(LHS->getType()); + APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1) + : APInt::getMinValue(BitWidth) + (MinStride - 1); + + // Although End can be a MIN expression we estimate MinEnd considering only + // the case End = RHS. This is safe because in the other case (Start - End) + // is zero, leading to a zero maximum backedge taken count. + APInt MinEnd = + IsSigned ? APIntOps::smax(getSignedRange(RHS).getSignedMin(), Limit) + : APIntOps::umax(getUnsignedRange(RHS).getUnsignedMin(), Limit); + + + const SCEV *MaxBECount = getCouldNotCompute(); + if (isa<SCEVConstant>(BECount)) + MaxBECount = BECount; + else + MaxBECount = computeBECount(getConstant(MaxStart - MinEnd), + getConstant(MinStride), false); + + if (isa<SCEVCouldNotCompute>(MaxBECount)) + MaxBECount = BECount; + + return ExitLimit(BECount, MaxBECount); +} + +/// getNumIterationsInRange - Return the number of iterations of this loop that +/// produce values in the specified constant range. Another way of looking at +/// this is that it returns the first iteration number where the value is not in +/// the condition, thus computing the exit count. If the iteration count can't +/// be computed, an instance of SCEVCouldNotCompute is returned. +const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, + ScalarEvolution &SE) const { + if (Range.isFullSet()) // Infinite loop. + return SE.getCouldNotCompute(); + + // If the start is a non-zero constant, shift the range to simplify things. + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart())) + if (!SC->getValue()->isZero()) { + SmallVector<const SCEV *, 4> Operands(op_begin(), op_end()); + Operands[0] = SE.getConstant(SC->getType(), 0); + const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(), + getNoWrapFlags(FlagNW)); + if (const SCEVAddRecExpr *ShiftedAddRec = + dyn_cast<SCEVAddRecExpr>(Shifted)) + return ShiftedAddRec->getNumIterationsInRange( + Range.subtract(SC->getValue()->getValue()), SE); + // This is strange and shouldn't happen. + return SE.getCouldNotCompute(); + } + + // The only time we can solve this is when we have all constant indices. + // Otherwise, we cannot determine the overflow conditions. + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) + if (!isa<SCEVConstant>(getOperand(i))) + return SE.getCouldNotCompute(); + + + // Okay at this point we know that all elements of the chrec are constants and + // that the start element is zero. + + // First check to see if the range contains zero. If not, the first + // iteration exits. + unsigned BitWidth = SE.getTypeSizeInBits(getType()); + if (!Range.contains(APInt(BitWidth, 0))) + return SE.getConstant(getType(), 0); + + if (isAffine()) { + // If this is an affine expression then we have this situation: + // Solve {0,+,A} in Range === Ax in Range + + // We know that zero is in the range. If A is positive then we know that + // the upper value of the range must be the first possible exit value. + // If A is negative then the lower of the range is the last possible loop + // value. Also note that we already checked for a full range. + APInt One(BitWidth,1); + APInt A = cast<SCEVConstant>(getOperand(1))->getValue()->getValue(); + APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower(); + + // The exit value should be (End+A)/A. + APInt ExitVal = (End + A).udiv(A); + ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal); + + // Evaluate at the exit value. If we really did fall out of the valid + // range, then we computed our trip count, otherwise wrap around or other + // things must have happened. + ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE); + if (Range.contains(Val->getValue())) + return SE.getCouldNotCompute(); // Something strange happened + + // Ensure that the previous value is in the range. This is a sanity check. + assert(Range.contains( + EvaluateConstantChrecAtConstant(this, + ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) && + "Linear scev computation is off in a bad way!"); + return SE.getConstant(ExitValue); + } else if (isQuadratic()) { + // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the + // quadratic equation to solve it. To do this, we must frame our problem in + // terms of figuring out when zero is crossed, instead of when + // Range.getUpper() is crossed. + SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end()); + NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper())); + const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(), + // getNoWrapFlags(FlagNW) + FlagAnyWrap); + + // Next, solve the constructed addrec + std::pair<const SCEV *,const SCEV *> Roots = + SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE); + const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first); + const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second); + if (R1) { + // Pick the smallest positive root value. + if (ConstantInt *CB = + dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT, + R1->getValue(), R2->getValue()))) { + if (CB->getZExtValue() == false) + std::swap(R1, R2); // R1 is the minimum root now. + + // Make sure the root is not off by one. The returned iteration should + // not be in the range, but the previous one should be. When solving + // for "X*X < 5", for example, we should not return a root of 2. + ConstantInt *R1Val = EvaluateConstantChrecAtConstant(this, + R1->getValue(), + SE); + if (Range.contains(R1Val->getValue())) { + // The next iteration must be out of the range... + ConstantInt *NextVal = + ConstantInt::get(SE.getContext(), R1->getValue()->getValue()+1); + + R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); + if (!Range.contains(R1Val->getValue())) + return SE.getConstant(NextVal); + return SE.getCouldNotCompute(); // Something strange happened + } + + // If R1 was not in the range, then it is a good return value. Make + // sure that R1-1 WAS in the range though, just in case. + ConstantInt *NextVal = + ConstantInt::get(SE.getContext(), R1->getValue()->getValue()-1); + R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); + if (Range.contains(R1Val->getValue())) + return R1; + return SE.getCouldNotCompute(); // Something strange happened + } + } + } + + return SE.getCouldNotCompute(); +} + +static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) { + APInt A = C1->getValue()->getValue().abs(); + APInt B = C2->getValue()->getValue().abs(); + uint32_t ABW = A.getBitWidth(); + uint32_t BBW = B.getBitWidth(); + + if (ABW > BBW) + B = B.zext(ABW); + else if (ABW < BBW) + A = A.zext(BBW); + + return APIntOps::GreatestCommonDivisor(A, B); +} + +static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) { + APInt A = C1->getValue()->getValue(); + APInt B = C2->getValue()->getValue(); + uint32_t ABW = A.getBitWidth(); + uint32_t BBW = B.getBitWidth(); + + if (ABW > BBW) + B = B.sext(ABW); + else if (ABW < BBW) + A = A.sext(BBW); + + return APIntOps::srem(A, B); +} + +static const APInt sdiv(const SCEVConstant *C1, const SCEVConstant *C2) { + APInt A = C1->getValue()->getValue(); + APInt B = C2->getValue()->getValue(); + uint32_t ABW = A.getBitWidth(); + uint32_t BBW = B.getBitWidth(); + + if (ABW > BBW) + B = B.sext(ABW); + else if (ABW < BBW) + A = A.sext(BBW); + + return APIntOps::sdiv(A, B); +} + +namespace { +struct SCEVGCD : public SCEVVisitor<SCEVGCD, const SCEV *> { +public: + // Pattern match Step into Start. When Step is a multiply expression, find + // the largest subexpression of Step that appears in Start. When Start is an + // add expression, try to match Step in the subexpressions of Start, non + // matching subexpressions are returned under Remainder. + static const SCEV *findGCD(ScalarEvolution &SE, const SCEV *Start, + const SCEV *Step, const SCEV **Remainder) { + assert(Remainder && "Remainder should not be NULL"); + SCEVGCD R(SE, Step, SE.getConstant(Step->getType(), 0)); + const SCEV *Res = R.visit(Start); + *Remainder = R.Remainder; + return Res; + } + + SCEVGCD(ScalarEvolution &S, const SCEV *G, const SCEV *R) + : SE(S), GCD(G), Remainder(R) { + Zero = SE.getConstant(GCD->getType(), 0); + One = SE.getConstant(GCD->getType(), 1); + } + + const SCEV *visitConstant(const SCEVConstant *Constant) { + if (GCD == Constant || Constant == Zero) + return GCD; + + if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD)) { + const SCEV *Res = SE.getConstant(gcd(Constant, CGCD)); + if (Res != One) + return Res; + + Remainder = SE.getConstant(srem(Constant, CGCD)); + Constant = cast<SCEVConstant>(SE.getMinusSCEV(Constant, Remainder)); + Res = SE.getConstant(gcd(Constant, CGCD)); + return Res; + } + + // When GCD is not a constant, it could be that the GCD is an Add, Mul, + // AddRec, etc., in which case we want to find out how many times the + // Constant divides the GCD: we then return that as the new GCD. + const SCEV *Rem = Zero; + const SCEV *Res = findGCD(SE, GCD, Constant, &Rem); + + if (Res == One || Rem != Zero) { + Remainder = Constant; + return One; + } + + assert(isa<SCEVConstant>(Res) && "Res should be a constant"); + Remainder = SE.getConstant(srem(Constant, cast<SCEVConstant>(Res))); + return Res; + } + + const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { + if (GCD == Expr) + return GCD; + + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { + const SCEV *Rem = Zero; + const SCEV *Res = findGCD(SE, Expr->getOperand(e - 1 - i), GCD, &Rem); + + // FIXME: There may be ambiguous situations: for instance, + // GCD(-4 + (3 * %m), 2 * %m) where 2 divides -4 and %m divides (3 * %m). + // The order in which the AddExpr is traversed computes a different GCD + // and Remainder. + if (Res != One) + GCD = Res; + if (Rem != Zero) + Remainder = SE.getAddExpr(Remainder, Rem); + } + + return GCD; + } + + const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { + if (GCD == Expr) + return GCD; + + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { + if (Expr->getOperand(i) == GCD) + return GCD; + } + + // If we have not returned yet, it means that GCD is not part of Expr. + const SCEV *PartialGCD = One; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { + const SCEV *Rem = Zero; + const SCEV *Res = findGCD(SE, Expr->getOperand(i), GCD, &Rem); + if (Rem != Zero) + // GCD does not divide Expr->getOperand(i). + continue; + + if (Res == GCD) + return GCD; + PartialGCD = SE.getMulExpr(PartialGCD, Res); + if (PartialGCD == GCD) + return GCD; + } + + if (PartialGCD != One) + return PartialGCD; + + Remainder = Expr; + const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(GCD); + if (!Mul) + return PartialGCD; + + // When the GCD is a multiply expression, try to decompose it: + // this occurs when Step does not divide the Start expression + // as in: {(-4 + (3 * %m)),+,(2 * %m)} + for (int i = 0, e = Mul->getNumOperands(); i < e; ++i) { + const SCEV *Rem = Zero; + const SCEV *Res = findGCD(SE, Expr, Mul->getOperand(i), &Rem); + if (Rem == Zero) { + Remainder = Rem; + return Res; + } + } + + return PartialGCD; + } + + const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { + if (GCD == Expr) + return GCD; + + if (!Expr->isAffine()) { + Remainder = Expr; + return GCD; + } + + const SCEV *Rem = Zero; + const SCEV *Res = findGCD(SE, Expr->getOperand(0), GCD, &Rem); + if (Rem != Zero) + Remainder = SE.getAddExpr(Remainder, Rem); + + Rem = Zero; + Res = findGCD(SE, Expr->getOperand(1), Res, &Rem); + if (Rem != Zero) { + Remainder = Expr; + return GCD; + } + + return Res; + } + + const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { + return One; + } + +private: + ScalarEvolution &SE; + const SCEV *GCD, *Remainder, *Zero, *One; +}; + +struct SCEVDivision : public SCEVVisitor<SCEVDivision, const SCEV *> { +public: + // Remove from Start all multiples of Step. + static const SCEV *divide(ScalarEvolution &SE, const SCEV *Start, + const SCEV *Step) { + SCEVDivision D(SE, Step); + const SCEV *Rem = D.Zero; + (void)Rem; + // The division is guaranteed to succeed: Step should divide Start with no + // remainder. + assert(Step == SCEVGCD::findGCD(SE, Start, Step, &Rem) && Rem == D.Zero && + "Step should divide Start with no remainder."); + return D.visit(Start); + } + + SCEVDivision(ScalarEvolution &S, const SCEV *G) : SE(S), GCD(G) { + Zero = SE.getConstant(GCD->getType(), 0); + One = SE.getConstant(GCD->getType(), 1); + } + + const SCEV *visitConstant(const SCEVConstant *Constant) { + if (GCD == Constant) + return One; + + if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD)) + return SE.getConstant(sdiv(Constant, CGCD)); + return Constant; + } + + const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { + if (GCD == Expr) + return One; + + SmallVector<const SCEV *, 2> Operands; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + Operands.push_back(divide(SE, Expr->getOperand(i), GCD)); + + if (Operands.size() == 1) + return Operands[0]; + return SE.getAddExpr(Operands); + } + + const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { + if (GCD == Expr) + return One; + + bool FoundGCDTerm = false; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + if (Expr->getOperand(i) == GCD) + FoundGCDTerm = true; + + SmallVector<const SCEV *, 2> Operands; + if (FoundGCDTerm) { + FoundGCDTerm = false; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { + if (FoundGCDTerm) + Operands.push_back(Expr->getOperand(i)); + else if (Expr->getOperand(i) == GCD) + FoundGCDTerm = true; + else + Operands.push_back(Expr->getOperand(i)); + } + } else { + FoundGCDTerm = false; + const SCEV *PartialGCD = One; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { + if (PartialGCD == GCD) { + Operands.push_back(Expr->getOperand(i)); + continue; + } + + const SCEV *Rem = Zero; + const SCEV *Res = SCEVGCD::findGCD(SE, Expr->getOperand(i), GCD, &Rem); + if (Rem == Zero) { + PartialGCD = SE.getMulExpr(PartialGCD, Res); + Operands.push_back(divide(SE, Expr->getOperand(i), GCD)); + } else { + Operands.push_back(Expr->getOperand(i)); + } + } + } + + if (Operands.size() == 1) + return Operands[0]; + return SE.getMulExpr(Operands); + } + + const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { + if (GCD == Expr) + return One; + + assert(Expr->isAffine() && "Expr should be affine"); + + const SCEV *Start = divide(SE, Expr->getStart(), GCD); + const SCEV *Step = divide(SE, Expr->getStepRecurrence(SE), GCD); + + return SE.getAddRecExpr(Start, Step, Expr->getLoop(), + Expr->getNoWrapFlags()); + } + + const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { + return Expr; + } + +private: + ScalarEvolution &SE; + const SCEV *GCD, *Zero, *One; +}; +} + +/// Splits the SCEV into two vectors of SCEVs representing the subscripts and +/// sizes of an array access. Returns the remainder of the delinearization that +/// is the offset start of the array. The SCEV->delinearize algorithm computes +/// the multiples of SCEV coefficients: that is a pattern matching of sub +/// expressions in the stride and base of a SCEV corresponding to the +/// computation of a GCD (greatest common divisor) of base and stride. When +/// SCEV->delinearize fails, it returns the SCEV unchanged. +/// +/// For example: when analyzing the memory access A[i][j][k] in this loop nest +/// +/// void foo(long n, long m, long o, double A[n][m][o]) { +/// +/// for (long i = 0; i < n; i++) +/// for (long j = 0; j < m; j++) +/// for (long k = 0; k < o; k++) +/// A[i][j][k] = 1.0; +/// } +/// +/// the delinearization input is the following AddRec SCEV: +/// +/// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k> +/// +/// From this SCEV, we are able to say that the base offset of the access is %A +/// because it appears as an offset that does not divide any of the strides in +/// the loops: +/// +/// CHECK: Base offset: %A +/// +/// and then SCEV->delinearize determines the size of some of the dimensions of +/// the array as these are the multiples by which the strides are happening: +/// +/// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes. +/// +/// Note that the outermost dimension remains of UnknownSize because there are +/// no strides that would help identifying the size of the last dimension: when +/// the array has been statically allocated, one could compute the size of that +/// dimension by dividing the overall size of the array by the size of the known +/// dimensions: %m * %o * 8. +/// +/// Finally delinearize provides the access functions for the array reference +/// that does correspond to A[i][j][k] of the above C testcase: +/// +/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>] +/// +/// The testcases are checking the output of a function pass: +/// DelinearizationPass that walks through all loads and stores of a function +/// asking for the SCEV of the memory access with respect to all enclosing +/// loops, calling SCEV->delinearize on that and printing the results. + +const SCEV * +SCEVAddRecExpr::delinearize(ScalarEvolution &SE, + SmallVectorImpl<const SCEV *> &Subscripts, + SmallVectorImpl<const SCEV *> &Sizes) const { + // Early exit in case this SCEV is not an affine multivariate function. + if (!this->isAffine()) + return this; + + const SCEV *Start = this->getStart(); + const SCEV *Step = this->getStepRecurrence(SE); + + // Build the SCEV representation of the cannonical induction variable in the + // loop of this SCEV. + const SCEV *Zero = SE.getConstant(this->getType(), 0); + const SCEV *One = SE.getConstant(this->getType(), 1); + const SCEV *IV = + SE.getAddRecExpr(Zero, One, this->getLoop(), this->getNoWrapFlags()); + + DEBUG(dbgs() << "(delinearize: " << *this << "\n"); + + // Currently we fail to delinearize when the stride of this SCEV is 1. We + // could decide to not fail in this case: we could just return 1 for the size + // of the subscript, and this same SCEV for the access function. + if (Step == One) { + DEBUG(dbgs() << "failed to delinearize " << *this << "\n)\n"); + return this; + } + + // Find the GCD and Remainder of the Start and Step coefficients of this SCEV. + const SCEV *Remainder = NULL; + const SCEV *GCD = SCEVGCD::findGCD(SE, Start, Step, &Remainder); + + DEBUG(dbgs() << "GCD: " << *GCD << "\n"); + DEBUG(dbgs() << "Remainder: " << *Remainder << "\n"); + + // Same remark as above: we currently fail the delinearization, although we + // can very well handle this special case. + if (GCD == One) { + DEBUG(dbgs() << "failed to delinearize " << *this << "\n)\n"); + return this; + } + + // As findGCD computed Remainder, GCD divides "Start - Remainder." The + // Quotient is then this SCEV without Remainder, scaled down by the GCD. The + // Quotient is what will be used in the next subscript delinearization. + const SCEV *Quotient = + SCEVDivision::divide(SE, SE.getMinusSCEV(Start, Remainder), GCD); + DEBUG(dbgs() << "Quotient: " << *Quotient << "\n"); + + const SCEV *Rem; + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Quotient)) + // Recursively call delinearize on the Quotient until there are no more + // multiples that can be recognized. + Rem = AR->delinearize(SE, Subscripts, Sizes); + else + Rem = Quotient; + + // Scale up the cannonical induction variable IV by whatever remains from the + // Step after division by the GCD: the GCD is the size of all the sub-array. + if (Step != GCD) { + Step = SCEVDivision::divide(SE, Step, GCD); + IV = SE.getMulExpr(IV, Step); + } + // The access function in the current subscript is computed as the cannonical + // induction variable IV (potentially scaled up by the step) and offset by + // Rem, the offset of delinearization in the sub-array. + const SCEV *Index = SE.getAddExpr(IV, Rem); + + // Record the access function and the size of the current subscript. + Subscripts.push_back(Index); + Sizes.push_back(GCD); + +#ifndef NDEBUG + int Size = Sizes.size(); + DEBUG(dbgs() << "succeeded to delinearize " << *this << "\n"); + DEBUG(dbgs() << "ArrayDecl[UnknownSize]"); + for (int i = 0; i < Size - 1; i++) + DEBUG(dbgs() << "[" << *Sizes[i] << "]"); + DEBUG(dbgs() << " with elements of " << *Sizes[Size - 1] << " bytes.\n"); + + DEBUG(dbgs() << "ArrayRef"); + for (int i = 0; i < Size; i++) + DEBUG(dbgs() << "[" << *Subscripts[i] << "]"); + DEBUG(dbgs() << "\n)\n"); +#endif + + return Remainder; +} + +//===----------------------------------------------------------------------===// +// SCEVCallbackVH Class Implementation +//===----------------------------------------------------------------------===// + +void ScalarEvolution::SCEVCallbackVH::deleted() { + assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!"); + if (PHINode *PN = dyn_cast<PHINode>(getValPtr())) + SE->ConstantEvolutionLoopExitValue.erase(PN); + SE->ValueExprMap.erase(getValPtr()); + // this now dangles! +} + +void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) { + assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!"); + + // Forget all the expressions associated with users of the old value, + // so that future queries will recompute the expressions using the new + // value. + Value *Old = getValPtr(); + SmallVector<User *, 16> Worklist; + SmallPtrSet<User *, 8> Visited; + for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end(); + UI != UE; ++UI) + Worklist.push_back(*UI); + while (!Worklist.empty()) { + User *U = Worklist.pop_back_val(); + // Deleting the Old value will cause this to dangle. Postpone + // that until everything else is done. + if (U == Old) + continue; + if (!Visited.insert(U)) + continue; + if (PHINode *PN = dyn_cast<PHINode>(U)) + SE->ConstantEvolutionLoopExitValue.erase(PN); + SE->ValueExprMap.erase(U); + for (Value::use_iterator UI = U->use_begin(), UE = U->use_end(); + UI != UE; ++UI) + Worklist.push_back(*UI); + } + // Delete the Old value. + if (PHINode *PN = dyn_cast<PHINode>(Old)) + SE->ConstantEvolutionLoopExitValue.erase(PN); + SE->ValueExprMap.erase(Old); + // this now dangles! +} + +ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) + : CallbackVH(V), SE(se) {} + +//===----------------------------------------------------------------------===// +// ScalarEvolution Class Implementation +//===----------------------------------------------------------------------===// + +ScalarEvolution::ScalarEvolution() + : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64), FirstUnknown(0) { + initializeScalarEvolutionPass(*PassRegistry::getPassRegistry()); +} + +bool ScalarEvolution::runOnFunction(Function &F) { + this->F = &F; + LI = &getAnalysis<LoopInfo>(); + TD = getAnalysisIfAvailable<DataLayout>(); + TLI = &getAnalysis<TargetLibraryInfo>(); + DT = &getAnalysis<DominatorTree>(); + return false; +} + +void ScalarEvolution::releaseMemory() { + // Iterate through all the SCEVUnknown instances and call their + // destructors, so that they release their references to their values. + for (SCEVUnknown *U = FirstUnknown; U; U = U->Next) + U->~SCEVUnknown(); + FirstUnknown = 0; + + ValueExprMap.clear(); + + // Free any extra memory created for ExitNotTakenInfo in the unlikely event + // that a loop had multiple computable exits. + for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I = + BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); + I != E; ++I) { + I->second.clear(); + } + + assert(PendingLoopPredicates.empty() && "isImpliedCond garbage"); + + BackedgeTakenCounts.clear(); + ConstantEvolutionLoopExitValue.clear(); + ValuesAtScopes.clear(); + LoopDispositions.clear(); + BlockDispositions.clear(); + UnsignedRanges.clear(); + SignedRanges.clear(); + UniqueSCEVs.clear(); + SCEVAllocator.Reset(); +} + +void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<LoopInfo>(); + AU.addRequiredTransitive<DominatorTree>(); + AU.addRequired<TargetLibraryInfo>(); +} + +bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) { + return !isa<SCEVCouldNotCompute>(getBackedgeTakenCount(L)); +} + +static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, + const Loop *L) { + // Print all inner loops first + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + PrintLoopInfo(OS, SE, *I); + + OS << "Loop "; + WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false); + OS << ": "; + + SmallVector<BasicBlock *, 8> ExitBlocks; + L->getExitBlocks(ExitBlocks); + if (ExitBlocks.size() != 1) + OS << "<multiple exits> "; + + if (SE->hasLoopInvariantBackedgeTakenCount(L)) { + OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L); + } else { + OS << "Unpredictable backedge-taken count. "; + } + + OS << "\n" + "Loop "; + WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false); + OS << ": "; + + if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) { + OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L); + } else { + OS << "Unpredictable max backedge-taken count. "; + } + + OS << "\n"; +} + +void ScalarEvolution::print(raw_ostream &OS, const Module *) const { + // ScalarEvolution's implementation of the print method is to print + // out SCEV values of all instructions that are interesting. Doing + // this potentially causes it to create new SCEV objects though, + // which technically conflicts with the const qualifier. This isn't + // observable from outside the class though, so casting away the + // const isn't dangerous. + ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); + + OS << "Classifying expressions for: "; + WriteAsOperand(OS, F, /*PrintType=*/false); + OS << "\n"; + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) + if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) { + OS << *I << '\n'; + OS << " --> "; + const SCEV *SV = SE.getSCEV(&*I); + SV->print(OS); + + const Loop *L = LI->getLoopFor((*I).getParent()); + + const SCEV *AtUse = SE.getSCEVAtScope(SV, L); + if (AtUse != SV) { + OS << " --> "; + AtUse->print(OS); + } + + if (L) { + OS << "\t\t" "Exits: "; + const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop()); + if (!SE.isLoopInvariant(ExitValue, L)) { + OS << "<<Unknown>>"; + } else { + OS << *ExitValue; + } + } + + OS << "\n"; + } + + OS << "Determining loop execution counts for: "; + WriteAsOperand(OS, F, /*PrintType=*/false); + OS << "\n"; + for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + PrintLoopInfo(OS, &SE, *I); +} + +ScalarEvolution::LoopDisposition +ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) { + SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values = LoopDispositions[S]; + for (unsigned u = 0; u < Values.size(); u++) { + if (Values[u].first == L) + return Values[u].second; + } + Values.push_back(std::make_pair(L, LoopVariant)); + LoopDisposition D = computeLoopDisposition(S, L); + SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values2 = LoopDispositions[S]; + for (unsigned u = Values2.size(); u > 0; u--) { + if (Values2[u - 1].first == L) { + Values2[u - 1].second = D; + break; + } + } + return D; +} + +ScalarEvolution::LoopDisposition +ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { + switch (S->getSCEVType()) { + case scConstant: + return LoopInvariant; + case scTruncate: + case scZeroExtend: + case scSignExtend: + return getLoopDisposition(cast<SCEVCastExpr>(S)->getOperand(), L); + case scAddRecExpr: { + const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S); + + // If L is the addrec's loop, it's computable. + if (AR->getLoop() == L) + return LoopComputable; + + // Add recurrences are never invariant in the function-body (null loop). + if (!L) + return LoopVariant; + + // This recurrence is variant w.r.t. L if L contains AR's loop. + if (L->contains(AR->getLoop())) + return LoopVariant; + + // This recurrence is invariant w.r.t. L if AR's loop contains L. + if (AR->getLoop()->contains(L)) + return LoopInvariant; + + // This recurrence is variant w.r.t. L if any of its operands + // are variant. + for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); + I != E; ++I) + if (!isLoopInvariant(*I, L)) + return LoopVariant; + + // Otherwise it's loop-invariant. + return LoopInvariant; + } + case scAddExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: { + const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S); + bool HasVarying = false; + for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); + I != E; ++I) { + LoopDisposition D = getLoopDisposition(*I, L); + if (D == LoopVariant) + return LoopVariant; + if (D == LoopComputable) + HasVarying = true; + } + return HasVarying ? LoopComputable : LoopInvariant; + } + case scUDivExpr: { + const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S); + LoopDisposition LD = getLoopDisposition(UDiv->getLHS(), L); + if (LD == LoopVariant) + return LoopVariant; + LoopDisposition RD = getLoopDisposition(UDiv->getRHS(), L); + if (RD == LoopVariant) + return LoopVariant; + return (LD == LoopInvariant && RD == LoopInvariant) ? + LoopInvariant : LoopComputable; + } + case scUnknown: + // All non-instruction values are loop invariant. All instructions are loop + // invariant if they are not contained in the specified loop. + // Instructions are never considered invariant in the function body + // (null loop) because they are defined within the "loop". + if (Instruction *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) + return (L && !L->contains(I)) ? LoopInvariant : LoopVariant; + return LoopInvariant; + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + default: llvm_unreachable("Unknown SCEV kind!"); + } +} + +bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) { + return getLoopDisposition(S, L) == LoopInvariant; +} + +bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) { + return getLoopDisposition(S, L) == LoopComputable; +} + +ScalarEvolution::BlockDisposition +ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) { + SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values = BlockDispositions[S]; + for (unsigned u = 0; u < Values.size(); u++) { + if (Values[u].first == BB) + return Values[u].second; + } + Values.push_back(std::make_pair(BB, DoesNotDominateBlock)); + BlockDisposition D = computeBlockDisposition(S, BB); + SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values2 = BlockDispositions[S]; + for (unsigned u = Values2.size(); u > 0; u--) { + if (Values2[u - 1].first == BB) { + Values2[u - 1].second = D; + break; + } + } + return D; +} + +ScalarEvolution::BlockDisposition +ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { + switch (S->getSCEVType()) { + case scConstant: + return ProperlyDominatesBlock; + case scTruncate: + case scZeroExtend: + case scSignExtend: + return getBlockDisposition(cast<SCEVCastExpr>(S)->getOperand(), BB); + case scAddRecExpr: { + // This uses a "dominates" query instead of "properly dominates" query + // to test for proper dominance too, because the instruction which + // produces the addrec's value is a PHI, and a PHI effectively properly + // dominates its entire containing block. + const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S); + if (!DT->dominates(AR->getLoop()->getHeader(), BB)) + return DoesNotDominateBlock; + } + // FALL THROUGH into SCEVNAryExpr handling. + case scAddExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: { + const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S); + bool Proper = true; + for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); + I != E; ++I) { + BlockDisposition D = getBlockDisposition(*I, BB); + if (D == DoesNotDominateBlock) + return DoesNotDominateBlock; + if (D == DominatesBlock) + Proper = false; + } + return Proper ? ProperlyDominatesBlock : DominatesBlock; + } + case scUDivExpr: { + const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S); + const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS(); + BlockDisposition LD = getBlockDisposition(LHS, BB); + if (LD == DoesNotDominateBlock) + return DoesNotDominateBlock; + BlockDisposition RD = getBlockDisposition(RHS, BB); + if (RD == DoesNotDominateBlock) + return DoesNotDominateBlock; + return (LD == ProperlyDominatesBlock && RD == ProperlyDominatesBlock) ? + ProperlyDominatesBlock : DominatesBlock; + } + case scUnknown: + if (Instruction *I = + dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) { + if (I->getParent() == BB) + return DominatesBlock; + if (DT->properlyDominates(I->getParent(), BB)) + return ProperlyDominatesBlock; + return DoesNotDominateBlock; + } + return ProperlyDominatesBlock; + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + default: + llvm_unreachable("Unknown SCEV kind!"); + } +} + +bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) { + return getBlockDisposition(S, BB) >= DominatesBlock; +} + +bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) { + return getBlockDisposition(S, BB) == ProperlyDominatesBlock; +} + +namespace { +// Search for a SCEV expression node within an expression tree. +// Implements SCEVTraversal::Visitor. +struct SCEVSearch { + const SCEV *Node; + bool IsFound; + + SCEVSearch(const SCEV *N): Node(N), IsFound(false) {} + + bool follow(const SCEV *S) { + IsFound |= (S == Node); + return !IsFound; + } + bool isDone() const { return IsFound; } +}; +} + +bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { + SCEVSearch Search(Op); + visitAll(S, Search); + return Search.IsFound; +} + +void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { + ValuesAtScopes.erase(S); + LoopDispositions.erase(S); + BlockDispositions.erase(S); + UnsignedRanges.erase(S); + SignedRanges.erase(S); + + for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I = + BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) { + BackedgeTakenInfo &BEInfo = I->second; + if (BEInfo.hasOperand(S, this)) { + BEInfo.clear(); + BackedgeTakenCounts.erase(I++); + } + else + ++I; + } +} + +typedef DenseMap<const Loop *, std::string> VerifyMap; + +/// replaceSubString - Replaces all occurences of From in Str with To. +static void replaceSubString(std::string &Str, StringRef From, StringRef To) { + size_t Pos = 0; + while ((Pos = Str.find(From, Pos)) != std::string::npos) { + Str.replace(Pos, From.size(), To.data(), To.size()); + Pos += To.size(); + } +} + +/// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis. +static void +getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) { + for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I) { + getLoopBackedgeTakenCounts(*I, Map, SE); // recurse. + + std::string &S = Map[L]; + if (S.empty()) { + raw_string_ostream OS(S); + SE.getBackedgeTakenCount(L)->print(OS); + + // false and 0 are semantically equivalent. This can happen in dead loops. + replaceSubString(OS.str(), "false", "0"); + // Remove wrap flags, their use in SCEV is highly fragile. + // FIXME: Remove this when SCEV gets smarter about them. + replaceSubString(OS.str(), "<nw>", ""); + replaceSubString(OS.str(), "<nsw>", ""); + replaceSubString(OS.str(), "<nuw>", ""); + } + } +} + +void ScalarEvolution::verifyAnalysis() const { + if (!VerifySCEV) + return; + + ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); + + // Gather stringified backedge taken counts for all loops using SCEV's caches. + // FIXME: It would be much better to store actual values instead of strings, + // but SCEV pointers will change if we drop the caches. + VerifyMap BackedgeDumpsOld, BackedgeDumpsNew; + for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) + getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE); + + // Gather stringified backedge taken counts for all loops without using + // SCEV's caches. + SE.releaseMemory(); + for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) + getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE); + + // Now compare whether they're the same with and without caches. This allows + // verifying that no pass changed the cache. + assert(BackedgeDumpsOld.size() == BackedgeDumpsNew.size() && + "New loops suddenly appeared!"); + + for (VerifyMap::iterator OldI = BackedgeDumpsOld.begin(), + OldE = BackedgeDumpsOld.end(), + NewI = BackedgeDumpsNew.begin(); + OldI != OldE; ++OldI, ++NewI) { + assert(OldI->first == NewI->first && "Loop order changed!"); + + // Compare the stringified SCEVs. We don't care if undef backedgetaken count + // changes. + // FIXME: We currently ignore SCEV changes from/to CouldNotCompute. This + // means that a pass is buggy or SCEV has to learn a new pattern but is + // usually not harmful. + if (OldI->second != NewI->second && + OldI->second.find("undef") == std::string::npos && + NewI->second.find("undef") == std::string::npos && + OldI->second != "***COULDNOTCOMPUTE***" && + NewI->second != "***COULDNOTCOMPUTE***") { + dbgs() << "SCEVValidator: SCEV for loop '" + << OldI->first->getHeader()->getName() + << "' changed from '" << OldI->second + << "' to '" << NewI->second << "'!\n"; + std::abort(); + } + } + + // TODO: Verify more things. +} diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp new file mode 100644 index 000000000000..79c5f0deb03b --- /dev/null +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -0,0 +1,173 @@ +//===- ScalarEvolutionAliasAnalysis.cpp - SCEV-based Alias Analysis -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ScalarEvolutionAliasAnalysis pass, which implements a +// simple alias analysis implemented in terms of ScalarEvolution queries. +// +// This differs from traditional loop dependence analysis in that it tests +// for dependencies within a single iteration of a loop, rather than +// dependencies between different iterations. +// +// ScalarEvolution has a more complete understanding of pointer arithmetic +// than BasicAliasAnalysis' collection of ad-hoc analyses. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Pass.h" +using namespace llvm; + +namespace { + /// ScalarEvolutionAliasAnalysis - This is a simple alias analysis + /// implementation that uses ScalarEvolution to answer queries. + class ScalarEvolutionAliasAnalysis : public FunctionPass, + public AliasAnalysis { + ScalarEvolution *SE; + + public: + static char ID; // Class identification, replacement for typeinfo + ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(0) { + initializeScalarEvolutionAliasAnalysisPass( + *PassRegistry::getPassRegistry()); + } + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + + private: + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool runOnFunction(Function &F); + virtual AliasResult alias(const Location &LocA, const Location &LocB); + + Value *GetBaseValue(const SCEV *S); + }; +} // End of anonymous namespace + +// Register this pass... +char ScalarEvolutionAliasAnalysis::ID = 0; +INITIALIZE_AG_PASS_BEGIN(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa", + "ScalarEvolution-based Alias Analysis", false, true, false) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_AG_PASS_END(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa", + "ScalarEvolution-based Alias Analysis", false, true, false) + +FunctionPass *llvm::createScalarEvolutionAliasAnalysisPass() { + return new ScalarEvolutionAliasAnalysis(); +} + +void +ScalarEvolutionAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredTransitive<ScalarEvolution>(); + AU.setPreservesAll(); + AliasAnalysis::getAnalysisUsage(AU); +} + +bool +ScalarEvolutionAliasAnalysis::runOnFunction(Function &F) { + InitializeAliasAnalysis(this); + SE = &getAnalysis<ScalarEvolution>(); + return false; +} + +/// GetBaseValue - Given an expression, try to find a +/// base value. Return null is none was found. +Value * +ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) { + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + // In an addrec, assume that the base will be in the start, rather + // than the step. + return GetBaseValue(AR->getStart()); + } else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) { + // If there's a pointer operand, it'll be sorted at the end of the list. + const SCEV *Last = A->getOperand(A->getNumOperands()-1); + if (Last->getType()->isPointerTy()) + return GetBaseValue(Last); + } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + // This is a leaf node. + return U->getValue(); + } + // No Identified object found. + return 0; +} + +AliasAnalysis::AliasResult +ScalarEvolutionAliasAnalysis::alias(const Location &LocA, + const Location &LocB) { + // If either of the memory references is empty, it doesn't matter what the + // pointer values are. This allows the code below to ignore this special + // case. + if (LocA.Size == 0 || LocB.Size == 0) + return NoAlias; + + // This is ScalarEvolutionAliasAnalysis. Get the SCEVs! + const SCEV *AS = SE->getSCEV(const_cast<Value *>(LocA.Ptr)); + const SCEV *BS = SE->getSCEV(const_cast<Value *>(LocB.Ptr)); + + // If they evaluate to the same expression, it's a MustAlias. + if (AS == BS) return MustAlias; + + // If something is known about the difference between the two addresses, + // see if it's enough to prove a NoAlias. + if (SE->getEffectiveSCEVType(AS->getType()) == + SE->getEffectiveSCEVType(BS->getType())) { + unsigned BitWidth = SE->getTypeSizeInBits(AS->getType()); + APInt ASizeInt(BitWidth, LocA.Size); + APInt BSizeInt(BitWidth, LocB.Size); + + // Compute the difference between the two pointers. + const SCEV *BA = SE->getMinusSCEV(BS, AS); + + // Test whether the difference is known to be great enough that memory of + // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt + // are non-zero, which is special-cased above. + if (ASizeInt.ule(SE->getUnsignedRange(BA).getUnsignedMin()) && + (-BSizeInt).uge(SE->getUnsignedRange(BA).getUnsignedMax())) + return NoAlias; + + // Folding the subtraction while preserving range information can be tricky + // (because of INT_MIN, etc.); if the prior test failed, swap AS and BS + // and try again to see if things fold better that way. + + // Compute the difference between the two pointers. + const SCEV *AB = SE->getMinusSCEV(AS, BS); + + // Test whether the difference is known to be great enough that memory of + // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt + // are non-zero, which is special-cased above. + if (BSizeInt.ule(SE->getUnsignedRange(AB).getUnsignedMin()) && + (-ASizeInt).uge(SE->getUnsignedRange(AB).getUnsignedMax())) + return NoAlias; + } + + // If ScalarEvolution can find an underlying object, form a new query. + // The correctness of this depends on ScalarEvolution not recognizing + // inttoptr and ptrtoint operators. + Value *AO = GetBaseValue(AS); + Value *BO = GetBaseValue(BS); + if ((AO && AO != LocA.Ptr) || (BO && BO != LocB.Ptr)) + if (alias(Location(AO ? AO : LocA.Ptr, + AO ? +UnknownSize : LocA.Size, + AO ? 0 : LocA.TBAATag), + Location(BO ? BO : LocB.Ptr, + BO ? +UnknownSize : LocB.Size, + BO ? 0 : LocB.TBAATag)) == NoAlias) + return NoAlias; + + // Forward the query to the next analysis. + return AliasAnalysis::alias(LocA, LocB); +} diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp new file mode 100644 index 000000000000..86a557b55f7e --- /dev/null +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -0,0 +1,1748 @@ +//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the scalar evolution expander, +// which is used to generate the code corresponding to a given scalar evolution +// expression. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP, +/// reusing an existing cast if a suitable one exists, moving an existing +/// cast if a suitable one exists but isn't in the right place, or +/// creating a new one. +Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, + Instruction::CastOps Op, + BasicBlock::iterator IP) { + // This function must be called with the builder having a valid insertion + // point. It doesn't need to be the actual IP where the uses of the returned + // cast will be added, but it must dominate such IP. + // We use this precondition to produce a cast that will dominate all its + // uses. In particular, this is crucial for the case where the builder's + // insertion point *is* the point where we were asked to put the cast. + // Since we don't know the builder's insertion point is actually + // where the uses will be added (only that it dominates it), we are + // not allowed to move it. + BasicBlock::iterator BIP = Builder.GetInsertPoint(); + + Instruction *Ret = NULL; + + // Check to see if there is already a cast! + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + User *U = *UI; + if (U->getType() == Ty) + if (CastInst *CI = dyn_cast<CastInst>(U)) + if (CI->getOpcode() == Op) { + // If the cast isn't where we want it, create a new cast at IP. + // Likewise, do not reuse a cast at BIP because it must dominate + // instructions that might be inserted before BIP. + if (BasicBlock::iterator(CI) != IP || BIP == IP) { + // Create a new cast, and leave the old cast in place in case + // it is being used as an insert point. Clear its operand + // so that it doesn't hold anything live. + Ret = CastInst::Create(Op, V, Ty, "", IP); + Ret->takeName(CI); + CI->replaceAllUsesWith(Ret); + CI->setOperand(0, UndefValue::get(V->getType())); + break; + } + Ret = CI; + break; + } + } + + // Create a new cast. + if (!Ret) + Ret = CastInst::Create(Op, V, Ty, V->getName(), IP); + + // We assert at the end of the function since IP might point to an + // instruction with different dominance properties than a cast + // (an invoke for example) and not dominate BIP (but the cast does). + assert(SE.DT->dominates(Ret, BIP)); + + rememberInstruction(Ret); + return Ret; +} + +/// InsertNoopCastOfTo - Insert a cast of V to the specified type, +/// which must be possible with a noop cast, doing what we can to share +/// the casts. +Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { + Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false); + assert((Op == Instruction::BitCast || + Op == Instruction::PtrToInt || + Op == Instruction::IntToPtr) && + "InsertNoopCastOfTo cannot perform non-noop casts!"); + assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) && + "InsertNoopCastOfTo cannot change sizes!"); + + // Short-circuit unnecessary bitcasts. + if (Op == Instruction::BitCast) { + if (V->getType() == Ty) + return V; + if (CastInst *CI = dyn_cast<CastInst>(V)) { + if (CI->getOperand(0)->getType() == Ty) + return CI->getOperand(0); + } + } + // Short-circuit unnecessary inttoptr<->ptrtoint casts. + if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) && + SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) { + if (CastInst *CI = dyn_cast<CastInst>(V)) + if ((CI->getOpcode() == Instruction::PtrToInt || + CI->getOpcode() == Instruction::IntToPtr) && + SE.getTypeSizeInBits(CI->getType()) == + SE.getTypeSizeInBits(CI->getOperand(0)->getType())) + return CI->getOperand(0); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + if ((CE->getOpcode() == Instruction::PtrToInt || + CE->getOpcode() == Instruction::IntToPtr) && + SE.getTypeSizeInBits(CE->getType()) == + SE.getTypeSizeInBits(CE->getOperand(0)->getType())) + return CE->getOperand(0); + } + + // Fold a cast of a constant. + if (Constant *C = dyn_cast<Constant>(V)) + return ConstantExpr::getCast(Op, C, Ty); + + // Cast the argument at the beginning of the entry block, after + // any bitcasts of other arguments. + if (Argument *A = dyn_cast<Argument>(V)) { + BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin(); + while ((isa<BitCastInst>(IP) && + isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) && + cast<BitCastInst>(IP)->getOperand(0) != A) || + isa<DbgInfoIntrinsic>(IP) || + isa<LandingPadInst>(IP)) + ++IP; + return ReuseOrCreateCast(A, Ty, Op, IP); + } + + // Cast the instruction immediately after the instruction. + Instruction *I = cast<Instruction>(V); + BasicBlock::iterator IP = I; ++IP; + if (InvokeInst *II = dyn_cast<InvokeInst>(I)) + IP = II->getNormalDest()->begin(); + while (isa<PHINode>(IP) || isa<LandingPadInst>(IP)) + ++IP; + return ReuseOrCreateCast(I, Ty, Op, IP); +} + +/// InsertBinop - Insert the specified binary operator, doing a small amount +/// of work to avoid inserting an obviously redundant operation. +Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, + Value *LHS, Value *RHS) { + // Fold a binop with constant operands. + if (Constant *CLHS = dyn_cast<Constant>(LHS)) + if (Constant *CRHS = dyn_cast<Constant>(RHS)) + return ConstantExpr::get(Opcode, CLHS, CRHS); + + // Do a quick scan to see if we have this binop nearby. If so, reuse it. + unsigned ScanLimit = 6; + BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin(); + // Scanning starts from the last instruction before the insertion point. + BasicBlock::iterator IP = Builder.GetInsertPoint(); + if (IP != BlockBegin) { + --IP; + for (; ScanLimit; --IP, --ScanLimit) { + // Don't count dbg.value against the ScanLimit, to avoid perturbing the + // generated code. + if (isa<DbgInfoIntrinsic>(IP)) + ScanLimit++; + if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS && + IP->getOperand(1) == RHS) + return IP; + if (IP == BlockBegin) break; + } + } + + // Save the original insertion point so we can restore it when we're done. + DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc(); + BuilderType::InsertPointGuard Guard(Builder); + + // Move the insertion point out of as many loops as we can. + while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { + if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break; + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) break; + + // Ok, move up a level. + Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); + } + + // If we haven't found this binop, insert it. + Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS)); + BO->setDebugLoc(Loc); + rememberInstruction(BO); + + return BO; +} + +/// FactorOutConstant - Test if S is divisible by Factor, using signed +/// division. If so, update S with Factor divided out and return true. +/// S need not be evenly divisible if a reasonable remainder can be +/// computed. +/// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made +/// unnecessary; in its place, just signed-divide Ops[i] by the scale and +/// check to see if the divide was folded. +static bool FactorOutConstant(const SCEV *&S, + const SCEV *&Remainder, + const SCEV *Factor, + ScalarEvolution &SE, + const DataLayout *TD) { + // Everything is divisible by one. + if (Factor->isOne()) + return true; + + // x/x == 1. + if (S == Factor) { + S = SE.getConstant(S->getType(), 1); + return true; + } + + // For a Constant, check for a multiple of the given factor. + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { + // 0/x == 0. + if (C->isZero()) + return true; + // Check for divisibility. + if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) { + ConstantInt *CI = + ConstantInt::get(SE.getContext(), + C->getValue()->getValue().sdiv( + FC->getValue()->getValue())); + // If the quotient is zero and the remainder is non-zero, reject + // the value at this scale. It will be considered for subsequent + // smaller scales. + if (!CI->isZero()) { + const SCEV *Div = SE.getConstant(CI); + S = Div; + Remainder = + SE.getAddExpr(Remainder, + SE.getConstant(C->getValue()->getValue().srem( + FC->getValue()->getValue()))); + return true; + } + } + } + + // In a Mul, check if there is a constant operand which is a multiple + // of the given factor. + if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) { + if (TD) { + // With DataLayout, the size is known. Check if there is a constant + // operand which is a multiple of the given factor. If so, we can + // factor it. + const SCEVConstant *FC = cast<SCEVConstant>(Factor); + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0))) + if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) { + SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); + NewMulOps[0] = + SE.getConstant(C->getValue()->getValue().sdiv( + FC->getValue()->getValue())); + S = SE.getMulExpr(NewMulOps); + return true; + } + } else { + // Without DataLayout, check if Factor can be factored out of any of the + // Mul's operands. If so, we can just remove it. + for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { + const SCEV *SOp = M->getOperand(i); + const SCEV *Remainder = SE.getConstant(SOp->getType(), 0); + if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) && + Remainder->isZero()) { + SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); + NewMulOps[i] = SOp; + S = SE.getMulExpr(NewMulOps); + return true; + } + } + } + } + + // In an AddRec, check if both start and step are divisible. + if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) { + const SCEV *Step = A->getStepRecurrence(SE); + const SCEV *StepRem = SE.getConstant(Step->getType(), 0); + if (!FactorOutConstant(Step, StepRem, Factor, SE, TD)) + return false; + if (!StepRem->isZero()) + return false; + const SCEV *Start = A->getStart(); + if (!FactorOutConstant(Start, Remainder, Factor, SE, TD)) + return false; + S = SE.getAddRecExpr(Start, Step, A->getLoop(), + A->getNoWrapFlags(SCEV::FlagNW)); + return true; + } + + return false; +} + +/// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs +/// is the number of SCEVAddRecExprs present, which are kept at the end of +/// the list. +/// +static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops, + Type *Ty, + ScalarEvolution &SE) { + unsigned NumAddRecs = 0; + for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i) + ++NumAddRecs; + // Group Ops into non-addrecs and addrecs. + SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs); + SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end()); + // Let ScalarEvolution sort and simplify the non-addrecs list. + const SCEV *Sum = NoAddRecs.empty() ? + SE.getConstant(Ty, 0) : + SE.getAddExpr(NoAddRecs); + // If it returned an add, use the operands. Otherwise it simplified + // the sum into a single value, so just use that. + Ops.clear(); + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum)) + Ops.append(Add->op_begin(), Add->op_end()); + else if (!Sum->isZero()) + Ops.push_back(Sum); + // Then append the addrecs. + Ops.append(AddRecs.begin(), AddRecs.end()); +} + +/// SplitAddRecs - Flatten a list of add operands, moving addrec start values +/// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}. +/// This helps expose more opportunities for folding parts of the expressions +/// into GEP indices. +/// +static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops, + Type *Ty, + ScalarEvolution &SE) { + // Find the addrecs. + SmallVector<const SCEV *, 8> AddRecs; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) { + const SCEV *Start = A->getStart(); + if (Start->isZero()) break; + const SCEV *Zero = SE.getConstant(Ty, 0); + AddRecs.push_back(SE.getAddRecExpr(Zero, + A->getStepRecurrence(SE), + A->getLoop(), + A->getNoWrapFlags(SCEV::FlagNW))); + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) { + Ops[i] = Zero; + Ops.append(Add->op_begin(), Add->op_end()); + e += Add->getNumOperands(); + } else { + Ops[i] = Start; + } + } + if (!AddRecs.empty()) { + // Add the addrecs onto the end of the list. + Ops.append(AddRecs.begin(), AddRecs.end()); + // Resort the operand list, moving any constants to the front. + SimplifyAddOperands(Ops, Ty, SE); + } +} + +/// expandAddToGEP - Expand an addition expression with a pointer type into +/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps +/// BasicAliasAnalysis and other passes analyze the result. See the rules +/// for getelementptr vs. inttoptr in +/// http://llvm.org/docs/LangRef.html#pointeraliasing +/// for details. +/// +/// Design note: The correctness of using getelementptr here depends on +/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as +/// they may introduce pointer arithmetic which may not be safely converted +/// into getelementptr. +/// +/// Design note: It might seem desirable for this function to be more +/// loop-aware. If some of the indices are loop-invariant while others +/// aren't, it might seem desirable to emit multiple GEPs, keeping the +/// loop-invariant portions of the overall computation outside the loop. +/// However, there are a few reasons this is not done here. Hoisting simple +/// arithmetic is a low-level optimization that often isn't very +/// important until late in the optimization process. In fact, passes +/// like InstructionCombining will combine GEPs, even if it means +/// pushing loop-invariant computation down into loops, so even if the +/// GEPs were split here, the work would quickly be undone. The +/// LoopStrengthReduction pass, which is usually run quite late (and +/// after the last InstructionCombining pass), takes care of hoisting +/// loop-invariant portions of expressions, after considering what +/// can be folded using target addressing modes. +/// +Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, + const SCEV *const *op_end, + PointerType *PTy, + Type *Ty, + Value *V) { + Type *ElTy = PTy->getElementType(); + SmallVector<Value *, 4> GepIndices; + SmallVector<const SCEV *, 8> Ops(op_begin, op_end); + bool AnyNonZeroIndices = false; + + // Split AddRecs up into parts as either of the parts may be usable + // without the other. + SplitAddRecs(Ops, Ty, SE); + + Type *IntPtrTy = SE.TD + ? SE.TD->getIntPtrType(PTy) + : Type::getInt64Ty(PTy->getContext()); + + // Descend down the pointer's type and attempt to convert the other + // operands into GEP indices, at each level. The first index in a GEP + // indexes into the array implied by the pointer operand; the rest of + // the indices index into the element or field type selected by the + // preceding index. + for (;;) { + // If the scale size is not 0, attempt to factor out a scale for + // array indexing. + SmallVector<const SCEV *, 8> ScaledOps; + if (ElTy->isSized()) { + const SCEV *ElSize = SE.getSizeOfExpr(IntPtrTy, ElTy); + if (!ElSize->isZero()) { + SmallVector<const SCEV *, 8> NewOps; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + const SCEV *Op = Ops[i]; + const SCEV *Remainder = SE.getConstant(Ty, 0); + if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) { + // Op now has ElSize factored out. + ScaledOps.push_back(Op); + if (!Remainder->isZero()) + NewOps.push_back(Remainder); + AnyNonZeroIndices = true; + } else { + // The operand was not divisible, so add it to the list of operands + // we'll scan next iteration. + NewOps.push_back(Ops[i]); + } + } + // If we made any changes, update Ops. + if (!ScaledOps.empty()) { + Ops = NewOps; + SimplifyAddOperands(Ops, Ty, SE); + } + } + } + + // Record the scaled array index for this level of the type. If + // we didn't find any operands that could be factored, tentatively + // assume that element zero was selected (since the zero offset + // would obviously be folded away). + Value *Scaled = ScaledOps.empty() ? + Constant::getNullValue(Ty) : + expandCodeFor(SE.getAddExpr(ScaledOps), Ty); + GepIndices.push_back(Scaled); + + // Collect struct field index operands. + while (StructType *STy = dyn_cast<StructType>(ElTy)) { + bool FoundFieldNo = false; + // An empty struct has no fields. + if (STy->getNumElements() == 0) break; + if (SE.TD) { + // With DataLayout, field offsets are known. See if a constant offset + // falls within any of the struct fields. + if (Ops.empty()) break; + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0])) + if (SE.getTypeSizeInBits(C->getType()) <= 64) { + const StructLayout &SL = *SE.TD->getStructLayout(STy); + uint64_t FullOffset = C->getValue()->getZExtValue(); + if (FullOffset < SL.getSizeInBytes()) { + unsigned ElIdx = SL.getElementContainingOffset(FullOffset); + GepIndices.push_back( + ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx)); + ElTy = STy->getTypeAtIndex(ElIdx); + Ops[0] = + SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx)); + AnyNonZeroIndices = true; + FoundFieldNo = true; + } + } + } else { + // Without DataLayout, just check for an offsetof expression of the + // appropriate struct type. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Ops[i])) { + Type *CTy; + Constant *FieldNo; + if (U->isOffsetOf(CTy, FieldNo) && CTy == STy) { + GepIndices.push_back(FieldNo); + ElTy = + STy->getTypeAtIndex(cast<ConstantInt>(FieldNo)->getZExtValue()); + Ops[i] = SE.getConstant(Ty, 0); + AnyNonZeroIndices = true; + FoundFieldNo = true; + break; + } + } + } + // If no struct field offsets were found, tentatively assume that + // field zero was selected (since the zero offset would obviously + // be folded away). + if (!FoundFieldNo) { + ElTy = STy->getTypeAtIndex(0u); + GepIndices.push_back( + Constant::getNullValue(Type::getInt32Ty(Ty->getContext()))); + } + } + + if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy)) + ElTy = ATy->getElementType(); + else + break; + } + + // If none of the operands were convertible to proper GEP indices, cast + // the base to i8* and do an ugly getelementptr with that. It's still + // better than ptrtoint+arithmetic+inttoptr at least. + if (!AnyNonZeroIndices) { + // Cast the base to i8*. + V = InsertNoopCastOfTo(V, + Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace())); + + assert(!isa<Instruction>(V) || + SE.DT->dominates(cast<Instruction>(V), Builder.GetInsertPoint())); + + // Expand the operands for a plain byte offset. + Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty); + + // Fold a GEP with constant operands. + if (Constant *CLHS = dyn_cast<Constant>(V)) + if (Constant *CRHS = dyn_cast<Constant>(Idx)) + return ConstantExpr::getGetElementPtr(CLHS, CRHS); + + // Do a quick scan to see if we have this GEP nearby. If so, reuse it. + unsigned ScanLimit = 6; + BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin(); + // Scanning starts from the last instruction before the insertion point. + BasicBlock::iterator IP = Builder.GetInsertPoint(); + if (IP != BlockBegin) { + --IP; + for (; ScanLimit; --IP, --ScanLimit) { + // Don't count dbg.value against the ScanLimit, to avoid perturbing the + // generated code. + if (isa<DbgInfoIntrinsic>(IP)) + ScanLimit++; + if (IP->getOpcode() == Instruction::GetElementPtr && + IP->getOperand(0) == V && IP->getOperand(1) == Idx) + return IP; + if (IP == BlockBegin) break; + } + } + + // Save the original insertion point so we can restore it when we're done. + BuilderType::InsertPointGuard Guard(Builder); + + // Move the insertion point out of as many loops as we can. + while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { + if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break; + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) break; + + // Ok, move up a level. + Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); + } + + // Emit a GEP. + Value *GEP = Builder.CreateGEP(V, Idx, "uglygep"); + rememberInstruction(GEP); + + return GEP; + } + + // Save the original insertion point so we can restore it when we're done. + BuilderType::InsertPoint SaveInsertPt = Builder.saveIP(); + + // Move the insertion point out of as many loops as we can. + while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { + if (!L->isLoopInvariant(V)) break; + + bool AnyIndexNotLoopInvariant = false; + for (SmallVectorImpl<Value *>::const_iterator I = GepIndices.begin(), + E = GepIndices.end(); I != E; ++I) + if (!L->isLoopInvariant(*I)) { + AnyIndexNotLoopInvariant = true; + break; + } + if (AnyIndexNotLoopInvariant) + break; + + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) break; + + // Ok, move up a level. + Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); + } + + // Insert a pretty getelementptr. Note that this GEP is not marked inbounds, + // because ScalarEvolution may have changed the address arithmetic to + // compute a value which is beyond the end of the allocated object. + Value *Casted = V; + if (V->getType() != PTy) + Casted = InsertNoopCastOfTo(Casted, PTy); + Value *GEP = Builder.CreateGEP(Casted, + GepIndices, + "scevgep"); + Ops.push_back(SE.getUnknown(GEP)); + rememberInstruction(GEP); + + // Restore the original insert point. + Builder.restoreIP(SaveInsertPt); + + return expand(SE.getAddExpr(Ops)); +} + +/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for +/// SCEV expansion. If they are nested, this is the most nested. If they are +/// neighboring, pick the later. +static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B, + DominatorTree &DT) { + if (!A) return B; + if (!B) return A; + if (A->contains(B)) return B; + if (B->contains(A)) return A; + if (DT.dominates(A->getHeader(), B->getHeader())) return B; + if (DT.dominates(B->getHeader(), A->getHeader())) return A; + return A; // Arbitrarily break the tie. +} + +/// getRelevantLoop - Get the most relevant loop associated with the given +/// expression, according to PickMostRelevantLoop. +const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { + // Test whether we've already computed the most relevant loop for this SCEV. + std::pair<DenseMap<const SCEV *, const Loop *>::iterator, bool> Pair = + RelevantLoops.insert(std::make_pair(S, static_cast<const Loop *>(0))); + if (!Pair.second) + return Pair.first->second; + + if (isa<SCEVConstant>(S)) + // A constant has no relevant loops. + return 0; + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + if (const Instruction *I = dyn_cast<Instruction>(U->getValue())) + return Pair.first->second = SE.LI->getLoopFor(I->getParent()); + // A non-instruction has no relevant loops. + return 0; + } + if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) { + const Loop *L = 0; + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) + L = AR->getLoop(); + for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end(); + I != E; ++I) + L = PickMostRelevantLoop(L, getRelevantLoop(*I), *SE.DT); + return RelevantLoops[N] = L; + } + if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) { + const Loop *Result = getRelevantLoop(C->getOperand()); + return RelevantLoops[C] = Result; + } + if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { + const Loop *Result = + PickMostRelevantLoop(getRelevantLoop(D->getLHS()), + getRelevantLoop(D->getRHS()), + *SE.DT); + return RelevantLoops[D] = Result; + } + llvm_unreachable("Unexpected SCEV type!"); +} + +namespace { + +/// LoopCompare - Compare loops by PickMostRelevantLoop. +class LoopCompare { + DominatorTree &DT; +public: + explicit LoopCompare(DominatorTree &dt) : DT(dt) {} + + bool operator()(std::pair<const Loop *, const SCEV *> LHS, + std::pair<const Loop *, const SCEV *> RHS) const { + // Keep pointer operands sorted at the end. + if (LHS.second->getType()->isPointerTy() != + RHS.second->getType()->isPointerTy()) + return LHS.second->getType()->isPointerTy(); + + // Compare loops with PickMostRelevantLoop. + if (LHS.first != RHS.first) + return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first; + + // If one operand is a non-constant negative and the other is not, + // put the non-constant negative on the right so that a sub can + // be used instead of a negate and add. + if (LHS.second->isNonConstantNegative()) { + if (!RHS.second->isNonConstantNegative()) + return false; + } else if (RHS.second->isNonConstantNegative()) + return true; + + // Otherwise they are equivalent according to this comparison. + return false; + } +}; + +} + +Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { + Type *Ty = SE.getEffectiveSCEVType(S->getType()); + + // Collect all the add operands in a loop, along with their associated loops. + // Iterate in reverse so that constants are emitted last, all else equal, and + // so that pointer operands are inserted first, which the code below relies on + // to form more involved GEPs. + SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops; + for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()), + E(S->op_begin()); I != E; ++I) + OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); + + // Sort by loop. Use a stable sort so that constants follow non-constants and + // pointer operands precede non-pointer operands. + std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT)); + + // Emit instructions to add all the operands. Hoist as much as possible + // out of loops, and form meaningful getelementptrs where possible. + Value *Sum = 0; + for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator + I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { + const Loop *CurLoop = I->first; + const SCEV *Op = I->second; + if (!Sum) { + // This is the first operand. Just expand it. + Sum = expand(Op); + ++I; + } else if (PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) { + // The running sum expression is a pointer. Try to form a getelementptr + // at this level with that as the base. + SmallVector<const SCEV *, 4> NewOps; + for (; I != E && I->first == CurLoop; ++I) { + // If the operand is SCEVUnknown and not instructions, peek through + // it, to enable more of it to be folded into the GEP. + const SCEV *X = I->second; + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X)) + if (!isa<Instruction>(U->getValue())) + X = SE.getSCEV(U->getValue()); + NewOps.push_back(X); + } + Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum); + } else if (PointerType *PTy = dyn_cast<PointerType>(Op->getType())) { + // The running sum is an integer, and there's a pointer at this level. + // Try to form a getelementptr. If the running sum is instructions, + // use a SCEVUnknown to avoid re-analyzing them. + SmallVector<const SCEV *, 4> NewOps; + NewOps.push_back(isa<Instruction>(Sum) ? SE.getUnknown(Sum) : + SE.getSCEV(Sum)); + for (++I; I != E && I->first == CurLoop; ++I) + NewOps.push_back(I->second); + Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op)); + } else if (Op->isNonConstantNegative()) { + // Instead of doing a negate and add, just do a subtract. + Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty); + Sum = InsertNoopCastOfTo(Sum, Ty); + Sum = InsertBinop(Instruction::Sub, Sum, W); + ++I; + } else { + // A simple add. + Value *W = expandCodeFor(Op, Ty); + Sum = InsertNoopCastOfTo(Sum, Ty); + // Canonicalize a constant to the RHS. + if (isa<Constant>(Sum)) std::swap(Sum, W); + Sum = InsertBinop(Instruction::Add, Sum, W); + ++I; + } + } + + return Sum; +} + +Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { + Type *Ty = SE.getEffectiveSCEVType(S->getType()); + + // Collect all the mul operands in a loop, along with their associated loops. + // Iterate in reverse so that constants are emitted last, all else equal. + SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops; + for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()), + E(S->op_begin()); I != E; ++I) + OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); + + // Sort by loop. Use a stable sort so that constants follow non-constants. + std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT)); + + // Emit instructions to mul all the operands. Hoist as much as possible + // out of loops. + Value *Prod = 0; + for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator + I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { + const SCEV *Op = I->second; + if (!Prod) { + // This is the first operand. Just expand it. + Prod = expand(Op); + ++I; + } else if (Op->isAllOnesValue()) { + // Instead of doing a multiply by negative one, just do a negate. + Prod = InsertNoopCastOfTo(Prod, Ty); + Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod); + ++I; + } else { + // A simple mul. + Value *W = expandCodeFor(Op, Ty); + Prod = InsertNoopCastOfTo(Prod, Ty); + // Canonicalize a constant to the RHS. + if (isa<Constant>(Prod)) std::swap(Prod, W); + Prod = InsertBinop(Instruction::Mul, Prod, W); + ++I; + } + } + + return Prod; +} + +Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { + Type *Ty = SE.getEffectiveSCEVType(S->getType()); + + Value *LHS = expandCodeFor(S->getLHS(), Ty); + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) { + const APInt &RHS = SC->getValue()->getValue(); + if (RHS.isPowerOf2()) + return InsertBinop(Instruction::LShr, LHS, + ConstantInt::get(Ty, RHS.logBase2())); + } + + Value *RHS = expandCodeFor(S->getRHS(), Ty); + return InsertBinop(Instruction::UDiv, LHS, RHS); +} + +/// Move parts of Base into Rest to leave Base with the minimal +/// expression that provides a pointer operand suitable for a +/// GEP expansion. +static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, + ScalarEvolution &SE) { + while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) { + Base = A->getStart(); + Rest = SE.getAddExpr(Rest, + SE.getAddRecExpr(SE.getConstant(A->getType(), 0), + A->getStepRecurrence(SE), + A->getLoop(), + A->getNoWrapFlags(SCEV::FlagNW))); + } + if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) { + Base = A->getOperand(A->getNumOperands()-1); + SmallVector<const SCEV *, 8> NewAddOps(A->op_begin(), A->op_end()); + NewAddOps.back() = Rest; + Rest = SE.getAddExpr(NewAddOps); + ExposePointerBase(Base, Rest, SE); + } +} + +/// Determine if this is a well-behaved chain of instructions leading back to +/// the PHI. If so, it may be reused by expanded expressions. +bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, + const Loop *L) { + if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) || + (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV))) + return false; + // If any of the operands don't dominate the insert position, bail. + // Addrec operands are always loop-invariant, so this can only happen + // if there are instructions which haven't been hoisted. + if (L == IVIncInsertLoop) { + for (User::op_iterator OI = IncV->op_begin()+1, + OE = IncV->op_end(); OI != OE; ++OI) + if (Instruction *OInst = dyn_cast<Instruction>(OI)) + if (!SE.DT->dominates(OInst, IVIncInsertPos)) + return false; + } + // Advance to the next instruction. + IncV = dyn_cast<Instruction>(IncV->getOperand(0)); + if (!IncV) + return false; + + if (IncV->mayHaveSideEffects()) + return false; + + if (IncV != PN) + return true; + + return isNormalAddRecExprPHI(PN, IncV, L); +} + +/// getIVIncOperand returns an induction variable increment's induction +/// variable operand. +/// +/// If allowScale is set, any type of GEP is allowed as long as the nonIV +/// operands dominate InsertPos. +/// +/// If allowScale is not set, ensure that a GEP increment conforms to one of the +/// simple patterns generated by getAddRecExprPHILiterally and +/// expandAddtoGEP. If the pattern isn't recognized, return NULL. +Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, + Instruction *InsertPos, + bool allowScale) { + if (IncV == InsertPos) + return NULL; + + switch (IncV->getOpcode()) { + default: + return NULL; + // Check for a simple Add/Sub or GEP of a loop invariant step. + case Instruction::Add: + case Instruction::Sub: { + Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1)); + if (!OInst || SE.DT->dominates(OInst, InsertPos)) + return dyn_cast<Instruction>(IncV->getOperand(0)); + return NULL; + } + case Instruction::BitCast: + return dyn_cast<Instruction>(IncV->getOperand(0)); + case Instruction::GetElementPtr: + for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end(); + I != E; ++I) { + if (isa<Constant>(*I)) + continue; + if (Instruction *OInst = dyn_cast<Instruction>(*I)) { + if (!SE.DT->dominates(OInst, InsertPos)) + return NULL; + } + if (allowScale) { + // allow any kind of GEP as long as it can be hoisted. + continue; + } + // This must be a pointer addition of constants (pretty), which is already + // handled, or some number of address-size elements (ugly). Ugly geps + // have 2 operands. i1* is used by the expander to represent an + // address-size element. + if (IncV->getNumOperands() != 2) + return NULL; + unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace(); + if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS) + && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS)) + return NULL; + break; + } + return dyn_cast<Instruction>(IncV->getOperand(0)); + } +} + +/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make +/// it available to other uses in this loop. Recursively hoist any operands, +/// until we reach a value that dominates InsertPos. +bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) { + if (SE.DT->dominates(IncV, InsertPos)) + return true; + + // InsertPos must itself dominate IncV so that IncV's new position satisfies + // its existing users. + if (isa<PHINode>(InsertPos) + || !SE.DT->dominates(InsertPos->getParent(), IncV->getParent())) + return false; + + // Check that the chain of IV operands leading back to Phi can be hoisted. + SmallVector<Instruction*, 4> IVIncs; + for(;;) { + Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true); + if (!Oper) + return false; + // IncV is safe to hoist. + IVIncs.push_back(IncV); + IncV = Oper; + if (SE.DT->dominates(IncV, InsertPos)) + break; + } + for (SmallVectorImpl<Instruction*>::reverse_iterator I = IVIncs.rbegin(), + E = IVIncs.rend(); I != E; ++I) { + (*I)->moveBefore(InsertPos); + } + return true; +} + +/// Determine if this cyclic phi is in a form that would have been generated by +/// LSR. We don't care if the phi was actually expanded in this pass, as long +/// as it is in a low-cost form, for example, no implied multiplication. This +/// should match any patterns generated by getAddRecExprPHILiterally and +/// expandAddtoGEP. +bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, + const Loop *L) { + for(Instruction *IVOper = IncV; + (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(), + /*allowScale=*/false));) { + if (IVOper == PN) + return true; + } + return false; +} + +/// expandIVInc - Expand an IV increment at Builder's current InsertPos. +/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may +/// need to materialize IV increments elsewhere to handle difficult situations. +Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L, + Type *ExpandTy, Type *IntTy, + bool useSubtract) { + Value *IncV; + // If the PHI is a pointer, use a GEP, otherwise use an add or sub. + if (ExpandTy->isPointerTy()) { + PointerType *GEPPtrTy = cast<PointerType>(ExpandTy); + // If the step isn't constant, don't use an implicitly scaled GEP, because + // that would require a multiply inside the loop. + if (!isa<ConstantInt>(StepV)) + GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()), + GEPPtrTy->getAddressSpace()); + const SCEV *const StepArray[1] = { SE.getSCEV(StepV) }; + IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN); + if (IncV->getType() != PN->getType()) { + IncV = Builder.CreateBitCast(IncV, PN->getType()); + rememberInstruction(IncV); + } + } else { + IncV = useSubtract ? + Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") : + Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next"); + rememberInstruction(IncV); + } + return IncV; +} + +/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand +/// the base addrec, which is the addrec without any non-loop-dominating +/// values, and return the PHI. +PHINode * +SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, + const Loop *L, + Type *ExpandTy, + Type *IntTy) { + assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position"); + + // Reuse a previously-inserted PHI, if present. + BasicBlock *LatchBlock = L->getLoopLatch(); + if (LatchBlock) { + for (BasicBlock::iterator I = L->getHeader()->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) { + if (!SE.isSCEVable(PN->getType()) || + (SE.getEffectiveSCEVType(PN->getType()) != + SE.getEffectiveSCEVType(Normalized->getType())) || + SE.getSCEV(PN) != Normalized) + continue; + + Instruction *IncV = + cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)); + + if (LSRMode) { + if (!isExpandedAddRecExprPHI(PN, IncV, L)) + continue; + if (L == IVIncInsertLoop && !hoistIVInc(IncV, IVIncInsertPos)) + continue; + } + else { + if (!isNormalAddRecExprPHI(PN, IncV, L)) + continue; + if (L == IVIncInsertLoop) + do { + if (SE.DT->dominates(IncV, IVIncInsertPos)) + break; + // Make sure the increment is where we want it. But don't move it + // down past a potential existing post-inc user. + IncV->moveBefore(IVIncInsertPos); + IVIncInsertPos = IncV; + IncV = cast<Instruction>(IncV->getOperand(0)); + } while (IncV != PN); + } + // Ok, the add recurrence looks usable. + // Remember this PHI, even in post-inc mode. + InsertedValues.insert(PN); + // Remember the increment. + rememberInstruction(IncV); + return PN; + } + } + + // Save the original insertion point so we can restore it when we're done. + BuilderType::InsertPointGuard Guard(Builder); + + // Another AddRec may need to be recursively expanded below. For example, if + // this AddRec is quadratic, the StepV may itself be an AddRec in this + // loop. Remove this loop from the PostIncLoops set before expanding such + // AddRecs. Otherwise, we cannot find a valid position for the step + // (i.e. StepV can never dominate its loop header). Ideally, we could do + // SavedIncLoops.swap(PostIncLoops), but we generally have a single element, + // so it's not worth implementing SmallPtrSet::swap. + PostIncLoopSet SavedPostIncLoops = PostIncLoops; + PostIncLoops.clear(); + + // Expand code for the start value. + Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy, + L->getHeader()->begin()); + + // StartV must be hoisted into L's preheader to dominate the new phi. + assert(!isa<Instruction>(StartV) || + SE.DT->properlyDominates(cast<Instruction>(StartV)->getParent(), + L->getHeader())); + + // Expand code for the step value. Do this before creating the PHI so that PHI + // reuse code doesn't see an incomplete PHI. + const SCEV *Step = Normalized->getStepRecurrence(SE); + // If the stride is negative, insert a sub instead of an add for the increment + // (unless it's a constant, because subtracts of constants are canonicalized + // to adds). + bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); + if (useSubtract) + Step = SE.getNegativeSCEV(Step); + // Expand the step somewhere that dominates the loop header. + Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); + + // Create the PHI. + BasicBlock *Header = L->getHeader(); + Builder.SetInsertPoint(Header, Header->begin()); + pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); + PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE), + Twine(IVName) + ".iv"); + rememberInstruction(PN); + + // Create the step instructions and populate the PHI. + for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { + BasicBlock *Pred = *HPI; + + // Add a start value. + if (!L->contains(Pred)) { + PN->addIncoming(StartV, Pred); + continue; + } + + // Create a step value and add it to the PHI. + // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the + // instructions at IVIncInsertPos. + Instruction *InsertPos = L == IVIncInsertLoop ? + IVIncInsertPos : Pred->getTerminator(); + Builder.SetInsertPoint(InsertPos); + Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); + if (isa<OverflowingBinaryOperator>(IncV)) { + if (Normalized->getNoWrapFlags(SCEV::FlagNUW)) + cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap(); + if (Normalized->getNoWrapFlags(SCEV::FlagNSW)) + cast<BinaryOperator>(IncV)->setHasNoSignedWrap(); + } + PN->addIncoming(IncV, Pred); + } + + // After expanding subexpressions, restore the PostIncLoops set so the caller + // can ensure that IVIncrement dominates the current uses. + PostIncLoops = SavedPostIncLoops; + + // Remember this PHI, even in post-inc mode. + InsertedValues.insert(PN); + + return PN; +} + +Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { + Type *STy = S->getType(); + Type *IntTy = SE.getEffectiveSCEVType(STy); + const Loop *L = S->getLoop(); + + // Determine a normalized form of this expression, which is the expression + // before any post-inc adjustment is made. + const SCEVAddRecExpr *Normalized = S; + if (PostIncLoops.count(L)) { + PostIncLoopSet Loops; + Loops.insert(L); + Normalized = + cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, 0, 0, + Loops, SE, *SE.DT)); + } + + // Strip off any non-loop-dominating component from the addrec start. + const SCEV *Start = Normalized->getStart(); + const SCEV *PostLoopOffset = 0; + if (!SE.properlyDominates(Start, L->getHeader())) { + PostLoopOffset = Start; + Start = SE.getConstant(Normalized->getType(), 0); + Normalized = cast<SCEVAddRecExpr>( + SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE), + Normalized->getLoop(), + Normalized->getNoWrapFlags(SCEV::FlagNW))); + } + + // Strip off any non-loop-dominating component from the addrec step. + const SCEV *Step = Normalized->getStepRecurrence(SE); + const SCEV *PostLoopScale = 0; + if (!SE.dominates(Step, L->getHeader())) { + PostLoopScale = Step; + Step = SE.getConstant(Normalized->getType(), 1); + Normalized = + cast<SCEVAddRecExpr>(SE.getAddRecExpr( + Start, Step, Normalized->getLoop(), + Normalized->getNoWrapFlags(SCEV::FlagNW))); + } + + // Expand the core addrec. If we need post-loop scaling, force it to + // expand to an integer type to avoid the need for additional casting. + Type *ExpandTy = PostLoopScale ? IntTy : STy; + PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy); + + // Accommodate post-inc mode, if necessary. + Value *Result; + if (!PostIncLoops.count(L)) + Result = PN; + else { + // In PostInc mode, use the post-incremented value. + BasicBlock *LatchBlock = L->getLoopLatch(); + assert(LatchBlock && "PostInc mode requires a unique loop latch!"); + Result = PN->getIncomingValueForBlock(LatchBlock); + + // For an expansion to use the postinc form, the client must call + // expandCodeFor with an InsertPoint that is either outside the PostIncLoop + // or dominated by IVIncInsertPos. + if (isa<Instruction>(Result) + && !SE.DT->dominates(cast<Instruction>(Result), + Builder.GetInsertPoint())) { + // The induction variable's postinc expansion does not dominate this use. + // IVUsers tries to prevent this case, so it is rare. However, it can + // happen when an IVUser outside the loop is not dominated by the latch + // block. Adjusting IVIncInsertPos before expansion begins cannot handle + // all cases. Consider a phi outide whose operand is replaced during + // expansion with the value of the postinc user. Without fundamentally + // changing the way postinc users are tracked, the only remedy is + // inserting an extra IV increment. StepV might fold into PostLoopOffset, + // but hopefully expandCodeFor handles that. + bool useSubtract = + !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); + if (useSubtract) + Step = SE.getNegativeSCEV(Step); + Value *StepV; + { + // Expand the step somewhere that dominates the loop header. + BuilderType::InsertPointGuard Guard(Builder); + StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); + } + Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); + } + } + + // Re-apply any non-loop-dominating scale. + if (PostLoopScale) { + assert(S->isAffine() && "Can't linearly scale non-affine recurrences."); + Result = InsertNoopCastOfTo(Result, IntTy); + Result = Builder.CreateMul(Result, + expandCodeFor(PostLoopScale, IntTy)); + rememberInstruction(Result); + } + + // Re-apply any non-loop-dominating offset. + if (PostLoopOffset) { + if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) { + const SCEV *const OffsetArray[1] = { PostLoopOffset }; + Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result); + } else { + Result = InsertNoopCastOfTo(Result, IntTy); + Result = Builder.CreateAdd(Result, + expandCodeFor(PostLoopOffset, IntTy)); + rememberInstruction(Result); + } + } + + return Result; +} + +Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { + if (!CanonicalMode) return expandAddRecExprLiterally(S); + + Type *Ty = SE.getEffectiveSCEVType(S->getType()); + const Loop *L = S->getLoop(); + + // First check for an existing canonical IV in a suitable type. + PHINode *CanonicalIV = 0; + if (PHINode *PN = L->getCanonicalInductionVariable()) + if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty)) + CanonicalIV = PN; + + // Rewrite an AddRec in terms of the canonical induction variable, if + // its type is more narrow. + if (CanonicalIV && + SE.getTypeSizeInBits(CanonicalIV->getType()) > + SE.getTypeSizeInBits(Ty)) { + SmallVector<const SCEV *, 4> NewOps(S->getNumOperands()); + for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) + NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType()); + Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(), + S->getNoWrapFlags(SCEV::FlagNW))); + BasicBlock::iterator NewInsertPt = + llvm::next(BasicBlock::iterator(cast<Instruction>(V))); + BuilderType::InsertPointGuard Guard(Builder); + while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt) || + isa<LandingPadInst>(NewInsertPt)) + ++NewInsertPt; + V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0, + NewInsertPt); + return V; + } + + // {X,+,F} --> X + {0,+,F} + if (!S->getStart()->isZero()) { + SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end()); + NewOps[0] = SE.getConstant(Ty, 0); + const SCEV *Rest = SE.getAddRecExpr(NewOps, L, + S->getNoWrapFlags(SCEV::FlagNW)); + + // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the + // comments on expandAddToGEP for details. + const SCEV *Base = S->getStart(); + const SCEV *RestArray[1] = { Rest }; + // Dig into the expression to find the pointer base for a GEP. + ExposePointerBase(Base, RestArray[0], SE); + // If we found a pointer, expand the AddRec with a GEP. + if (PointerType *PTy = dyn_cast<PointerType>(Base->getType())) { + // Make sure the Base isn't something exotic, such as a multiplied + // or divided pointer value. In those cases, the result type isn't + // actually a pointer type. + if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) { + Value *StartV = expand(Base); + assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!"); + return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV); + } + } + + // Just do a normal add. Pre-expand the operands to suppress folding. + return expand(SE.getAddExpr(SE.getUnknown(expand(S->getStart())), + SE.getUnknown(expand(Rest)))); + } + + // If we don't yet have a canonical IV, create one. + if (!CanonicalIV) { + // Create and insert the PHI node for the induction variable in the + // specified loop. + BasicBlock *Header = L->getHeader(); + pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); + CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar", + Header->begin()); + rememberInstruction(CanonicalIV); + + SmallSet<BasicBlock *, 4> PredSeen; + Constant *One = ConstantInt::get(Ty, 1); + for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { + BasicBlock *HP = *HPI; + if (!PredSeen.insert(HP)) + continue; + + if (L->contains(HP)) { + // Insert a unit add instruction right before the terminator + // corresponding to the back-edge. + Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One, + "indvar.next", + HP->getTerminator()); + Add->setDebugLoc(HP->getTerminator()->getDebugLoc()); + rememberInstruction(Add); + CanonicalIV->addIncoming(Add, HP); + } else { + CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP); + } + } + } + + // {0,+,1} --> Insert a canonical induction variable into the loop! + if (S->isAffine() && S->getOperand(1)->isOne()) { + assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) && + "IVs with types different from the canonical IV should " + "already have been handled!"); + return CanonicalIV; + } + + // {0,+,F} --> {0,+,1} * F + + // If this is a simple linear addrec, emit it now as a special case. + if (S->isAffine()) // {0,+,F} --> i*F + return + expand(SE.getTruncateOrNoop( + SE.getMulExpr(SE.getUnknown(CanonicalIV), + SE.getNoopOrAnyExtend(S->getOperand(1), + CanonicalIV->getType())), + Ty)); + + // If this is a chain of recurrences, turn it into a closed form, using the + // folders, then expandCodeFor the closed form. This allows the folders to + // simplify the expression without having to build a bunch of special code + // into this folder. + const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV. + + // Promote S up to the canonical IV type, if the cast is foldable. + const SCEV *NewS = S; + const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType()); + if (isa<SCEVAddRecExpr>(Ext)) + NewS = Ext; + + const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE); + //cerr << "Evaluated: " << *this << "\n to: " << *V << "\n"; + + // Truncate the result down to the original type, if needed. + const SCEV *T = SE.getTruncateOrNoop(V, Ty); + return expand(T); +} + +Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) { + Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Value *V = expandCodeFor(S->getOperand(), + SE.getEffectiveSCEVType(S->getOperand()->getType())); + Value *I = Builder.CreateTrunc(V, Ty); + rememberInstruction(I); + return I; +} + +Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) { + Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Value *V = expandCodeFor(S->getOperand(), + SE.getEffectiveSCEVType(S->getOperand()->getType())); + Value *I = Builder.CreateZExt(V, Ty); + rememberInstruction(I); + return I; +} + +Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) { + Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Value *V = expandCodeFor(S->getOperand(), + SE.getEffectiveSCEVType(S->getOperand()->getType())); + Value *I = Builder.CreateSExt(V, Ty); + rememberInstruction(I); + return I; +} + +Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { + Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); + Type *Ty = LHS->getType(); + for (int i = S->getNumOperands()-2; i >= 0; --i) { + // In the case of mixed integer and pointer types, do the + // rest of the comparisons as integer. + if (S->getOperand(i)->getType() != Ty) { + Ty = SE.getEffectiveSCEVType(Ty); + LHS = InsertNoopCastOfTo(LHS, Ty); + } + Value *RHS = expandCodeFor(S->getOperand(i), Ty); + Value *ICmp = Builder.CreateICmpSGT(LHS, RHS); + rememberInstruction(ICmp); + Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); + rememberInstruction(Sel); + LHS = Sel; + } + // In the case of mixed integer and pointer types, cast the + // final result back to the pointer type. + if (LHS->getType() != S->getType()) + LHS = InsertNoopCastOfTo(LHS, S->getType()); + return LHS; +} + +Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { + Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); + Type *Ty = LHS->getType(); + for (int i = S->getNumOperands()-2; i >= 0; --i) { + // In the case of mixed integer and pointer types, do the + // rest of the comparisons as integer. + if (S->getOperand(i)->getType() != Ty) { + Ty = SE.getEffectiveSCEVType(Ty); + LHS = InsertNoopCastOfTo(LHS, Ty); + } + Value *RHS = expandCodeFor(S->getOperand(i), Ty); + Value *ICmp = Builder.CreateICmpUGT(LHS, RHS); + rememberInstruction(ICmp); + Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax"); + rememberInstruction(Sel); + LHS = Sel; + } + // In the case of mixed integer and pointer types, cast the + // final result back to the pointer type. + if (LHS->getType() != S->getType()) + LHS = InsertNoopCastOfTo(LHS, S->getType()); + return LHS; +} + +Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty, + Instruction *IP) { + Builder.SetInsertPoint(IP->getParent(), IP); + return expandCodeFor(SH, Ty); +} + +Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) { + // Expand the code for this SCEV. + Value *V = expand(SH); + if (Ty) { + assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) && + "non-trivial casts should be done with the SCEVs directly!"); + V = InsertNoopCastOfTo(V, Ty); + } + return V; +} + +Value *SCEVExpander::expand(const SCEV *S) { + // Compute an insertion point for this SCEV object. Hoist the instructions + // as far out in the loop nest as possible. + Instruction *InsertPt = Builder.GetInsertPoint(); + for (Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock()); ; + L = L->getParentLoop()) + if (SE.isLoopInvariant(S, L)) { + if (!L) break; + if (BasicBlock *Preheader = L->getLoopPreheader()) + InsertPt = Preheader->getTerminator(); + else { + // LSR sets the insertion point for AddRec start/step values to the + // block start to simplify value reuse, even though it's an invalid + // position. SCEVExpander must correct for this in all cases. + InsertPt = L->getHeader()->getFirstInsertionPt(); + } + } else { + // If the SCEV is computable at this level, insert it into the header + // after the PHIs (and after any other instructions that we've inserted + // there) so that it is guaranteed to dominate any user inside the loop. + if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) + InsertPt = L->getHeader()->getFirstInsertionPt(); + while (InsertPt != Builder.GetInsertPoint() + && (isInsertedInstruction(InsertPt) + || isa<DbgInfoIntrinsic>(InsertPt))) { + InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); + } + break; + } + + // Check to see if we already expanded this here. + std::map<std::pair<const SCEV *, Instruction *>, TrackingVH<Value> >::iterator + I = InsertedExpressions.find(std::make_pair(S, InsertPt)); + if (I != InsertedExpressions.end()) + return I->second; + + BuilderType::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(InsertPt->getParent(), InsertPt); + + // Expand the expression into instructions. + Value *V = visit(S); + + // Remember the expanded value for this SCEV at this location. + // + // This is independent of PostIncLoops. The mapped value simply materializes + // the expression at this insertion point. If the mapped value happened to be + // a postinc expansion, it could be reused by a non postinc user, but only if + // its insertion point was already at the head of the loop. + InsertedExpressions[std::make_pair(S, InsertPt)] = V; + return V; +} + +void SCEVExpander::rememberInstruction(Value *I) { + if (!PostIncLoops.empty()) + InsertedPostIncValues.insert(I); + else + InsertedValues.insert(I); +} + +/// getOrInsertCanonicalInductionVariable - This method returns the +/// canonical induction variable of the specified type for the specified +/// loop (inserting one if there is none). A canonical induction variable +/// starts at zero and steps by one on each iteration. +PHINode * +SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, + Type *Ty) { + assert(Ty->isIntegerTy() && "Can only insert integer induction variables!"); + + // Build a SCEV for {0,+,1}<L>. + // Conservatively use FlagAnyWrap for now. + const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0), + SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap); + + // Emit code for it. + BuilderType::InsertPointGuard Guard(Builder); + PHINode *V = cast<PHINode>(expandCodeFor(H, 0, L->getHeader()->begin())); + + return V; +} + +/// Sort values by integer width for replaceCongruentIVs. +static bool width_descending(Value *lhs, Value *rhs) { + // Put pointers at the back and make sure pointer < pointer = false. + if (!lhs->getType()->isIntegerTy() || !rhs->getType()->isIntegerTy()) + return rhs->getType()->isIntegerTy() && !lhs->getType()->isIntegerTy(); + return rhs->getType()->getPrimitiveSizeInBits() + < lhs->getType()->getPrimitiveSizeInBits(); +} + +/// replaceCongruentIVs - Check for congruent phis in this loop header and +/// replace them with their most canonical representative. Return the number of +/// phis eliminated. +/// +/// This does not depend on any SCEVExpander state but should be used in +/// the same context that SCEVExpander is used. +unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, + SmallVectorImpl<WeakVH> &DeadInsts, + const TargetTransformInfo *TTI) { + // Find integer phis in order of increasing width. + SmallVector<PHINode*, 8> Phis; + for (BasicBlock::iterator I = L->getHeader()->begin(); + PHINode *Phi = dyn_cast<PHINode>(I); ++I) { + Phis.push_back(Phi); + } + if (TTI) + std::sort(Phis.begin(), Phis.end(), width_descending); + + unsigned NumElim = 0; + DenseMap<const SCEV *, PHINode *> ExprToIVMap; + // Process phis from wide to narrow. Mapping wide phis to the their truncation + // so narrow phis can reuse them. + for (SmallVectorImpl<PHINode*>::const_iterator PIter = Phis.begin(), + PEnd = Phis.end(); PIter != PEnd; ++PIter) { + PHINode *Phi = *PIter; + + // Fold constant phis. They may be congruent to other constant phis and + // would confuse the logic below that expects proper IVs. + if (Value *V = Phi->hasConstantValue()) { + Phi->replaceAllUsesWith(V); + DeadInsts.push_back(Phi); + ++NumElim; + DEBUG_WITH_TYPE(DebugType, dbgs() + << "INDVARS: Eliminated constant iv: " << *Phi << '\n'); + continue; + } + + if (!SE.isSCEVable(Phi->getType())) + continue; + + PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)]; + if (!OrigPhiRef) { + OrigPhiRef = Phi; + if (Phi->getType()->isIntegerTy() && TTI + && TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { + // This phi can be freely truncated to the narrowest phi type. Map the + // truncated expression to it so it will be reused for narrow types. + const SCEV *TruncExpr = + SE.getTruncateExpr(SE.getSCEV(Phi), Phis.back()->getType()); + ExprToIVMap[TruncExpr] = Phi; + } + continue; + } + + // Replacing a pointer phi with an integer phi or vice-versa doesn't make + // sense. + if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy()) + continue; + + if (BasicBlock *LatchBlock = L->getLoopLatch()) { + Instruction *OrigInc = + cast<Instruction>(OrigPhiRef->getIncomingValueForBlock(LatchBlock)); + Instruction *IsomorphicInc = + cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock)); + + // If this phi has the same width but is more canonical, replace the + // original with it. As part of the "more canonical" determination, + // respect a prior decision to use an IV chain. + if (OrigPhiRef->getType() == Phi->getType() + && !(ChainedPhis.count(Phi) + || isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L)) + && (ChainedPhis.count(Phi) + || isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) { + std::swap(OrigPhiRef, Phi); + std::swap(OrigInc, IsomorphicInc); + } + // Replacing the congruent phi is sufficient because acyclic redundancy + // elimination, CSE/GVN, should handle the rest. However, once SCEV proves + // that a phi is congruent, it's often the head of an IV user cycle that + // is isomorphic with the original phi. It's worth eagerly cleaning up the + // common case of a single IV increment so that DeleteDeadPHIs can remove + // cycles that had postinc uses. + const SCEV *TruncExpr = SE.getTruncateOrNoop(SE.getSCEV(OrigInc), + IsomorphicInc->getType()); + if (OrigInc != IsomorphicInc + && TruncExpr == SE.getSCEV(IsomorphicInc) + && ((isa<PHINode>(OrigInc) && isa<PHINode>(IsomorphicInc)) + || hoistIVInc(OrigInc, IsomorphicInc))) { + DEBUG_WITH_TYPE(DebugType, dbgs() + << "INDVARS: Eliminated congruent iv.inc: " + << *IsomorphicInc << '\n'); + Value *NewInc = OrigInc; + if (OrigInc->getType() != IsomorphicInc->getType()) { + Instruction *IP = isa<PHINode>(OrigInc) + ? (Instruction*)L->getHeader()->getFirstInsertionPt() + : OrigInc->getNextNode(); + IRBuilder<> Builder(IP); + Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc()); + NewInc = Builder. + CreateTruncOrBitCast(OrigInc, IsomorphicInc->getType(), IVName); + } + IsomorphicInc->replaceAllUsesWith(NewInc); + DeadInsts.push_back(IsomorphicInc); + } + } + DEBUG_WITH_TYPE(DebugType, dbgs() + << "INDVARS: Eliminated congruent iv: " << *Phi << '\n'); + ++NumElim; + Value *NewIV = OrigPhiRef; + if (OrigPhiRef->getType() != Phi->getType()) { + IRBuilder<> Builder(L->getHeader()->getFirstInsertionPt()); + Builder.SetCurrentDebugLocation(Phi->getDebugLoc()); + NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName); + } + Phi->replaceAllUsesWith(NewIV); + DeadInsts.push_back(Phi); + } + return NumElim; +} + +namespace { +// Search for a SCEV subexpression that is not safe to expand. Any expression +// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely +// UDiv expressions. We don't know if the UDiv is derived from an IR divide +// instruction, but the important thing is that we prove the denominator is +// nonzero before expansion. +// +// IVUsers already checks that IV-derived expressions are safe. So this check is +// only needed when the expression includes some subexpression that is not IV +// derived. +// +// Currently, we only allow division by a nonzero constant here. If this is +// inadequate, we could easily allow division by SCEVUnknown by using +// ValueTracking to check isKnownNonZero(). +// +// We cannot generally expand recurrences unless the step dominates the loop +// header. The expander handles the special case of affine recurrences by +// scaling the recurrence outside the loop, but this technique isn't generally +// applicable. Expanding a nested recurrence outside a loop requires computing +// binomial coefficients. This could be done, but the recurrence has to be in a +// perfectly reduced form, which can't be guaranteed. +struct SCEVFindUnsafe { + ScalarEvolution &SE; + bool IsUnsafe; + + SCEVFindUnsafe(ScalarEvolution &se): SE(se), IsUnsafe(false) {} + + bool follow(const SCEV *S) { + if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { + const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS()); + if (!SC || SC->getValue()->isZero()) { + IsUnsafe = true; + return false; + } + } + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + const SCEV *Step = AR->getStepRecurrence(SE); + if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) { + IsUnsafe = true; + return false; + } + } + return true; + } + bool isDone() const { return IsUnsafe; } +}; +} + +namespace llvm { +bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) { + SCEVFindUnsafe Search(SE); + visitAll(S, Search); + return !Search.IsUnsafe; +} +} diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp new file mode 100644 index 000000000000..f1106168440d --- /dev/null +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -0,0 +1,231 @@ +//===- ScalarEvolutionNormalization.cpp - See below -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements utilities for working with "normalized" expressions. +// See the comments at the top of ScalarEvolutionNormalization.h for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ScalarEvolutionNormalization.h" +using namespace llvm; + +/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression +/// and now we need to decide whether the user should use the preinc or post-inc +/// value. If this user should use the post-inc version of the IV, return true. +/// +/// Choosing wrong here can break dominance properties (if we choose to use the +/// post-inc value when we cannot) or it can end up adding extra live-ranges to +/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we +/// should use the post-inc value). +static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand, + const Loop *L, DominatorTree *DT) { + // If the user is in the loop, use the preinc value. + if (L->contains(User)) return false; + + BasicBlock *LatchBlock = L->getLoopLatch(); + if (!LatchBlock) + return false; + + // Ok, the user is outside of the loop. If it is dominated by the latch + // block, use the post-inc value. + if (DT->dominates(LatchBlock, User->getParent())) + return true; + + // There is one case we have to be careful of: PHI nodes. These little guys + // can live in blocks that are not dominated by the latch block, but (since + // their uses occur in the predecessor block, not the block the PHI lives in) + // should still use the post-inc value. Check for this case now. + PHINode *PN = dyn_cast<PHINode>(User); + if (!PN || !Operand) return false; // not a phi, not dominated by latch block. + + // Look at all of the uses of Operand by the PHI node. If any use corresponds + // to a block that is not dominated by the latch block, give up and use the + // preincremented value. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == Operand && + !DT->dominates(LatchBlock, PN->getIncomingBlock(i))) + return false; + + // Okay, all uses of Operand by PN are in predecessor blocks that really are + // dominated by the latch block. Use the post-incremented value. + return true; +} + +namespace { + +/// Hold the state used during post-inc expression transformation, including a +/// map of transformed expressions. +class PostIncTransform { + TransformKind Kind; + PostIncLoopSet &Loops; + ScalarEvolution &SE; + DominatorTree &DT; + + DenseMap<const SCEV*, const SCEV*> Transformed; + +public: + PostIncTransform(TransformKind kind, PostIncLoopSet &loops, + ScalarEvolution &se, DominatorTree &dt): + Kind(kind), Loops(loops), SE(se), DT(dt) {} + + const SCEV *TransformSubExpr(const SCEV *S, Instruction *User, + Value *OperandValToReplace); + +protected: + const SCEV *TransformImpl(const SCEV *S, Instruction *User, + Value *OperandValToReplace); +}; + +} // namespace + +/// Implement post-inc transformation for all valid expression types. +const SCEV *PostIncTransform:: +TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) { + + if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) { + const SCEV *O = X->getOperand(); + const SCEV *N = TransformSubExpr(O, User, OperandValToReplace); + if (O != N) + switch (S->getSCEVType()) { + case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType()); + case scSignExtend: return SE.getSignExtendExpr(N, S->getType()); + case scTruncate: return SE.getTruncateExpr(N, S->getType()); + default: llvm_unreachable("Unexpected SCEVCastExpr kind!"); + } + return S; + } + + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + // An addrec. This is the interesting part. + SmallVector<const SCEV *, 8> Operands; + const Loop *L = AR->getLoop(); + // The addrec conceptually uses its operands at loop entry. + Instruction *LUser = L->getHeader()->begin(); + // Transform each operand. + for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); + I != E; ++I) { + Operands.push_back(TransformSubExpr(*I, LUser, 0)); + } + // Conservatively use AnyWrap until/unless we need FlagNW. + const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); + switch (Kind) { + case NormalizeAutodetect: + // Normalize this SCEV by subtracting the expression for the final step. + // We only allow affine AddRecs to be normalized, otherwise we would not + // be able to correctly denormalize. + // e.g. {1,+,3,+,2} == {-2,+,1,+,2} + {3,+,2} + // Normalized form: {-2,+,1,+,2} + // Denormalized form: {1,+,3,+,2} + // + // However, denormalization would use the a different step expression than + // normalization (see getPostIncExpr), generating the wrong final + // expression: {-2,+,1,+,2} + {1,+,2} => {-1,+,3,+,2} + if (AR->isAffine() && + IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) { + Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE)); + Loops.insert(L); + } +#if 0 + // This assert is conceptually correct, but ScalarEvolution currently + // sometimes fails to canonicalize two equal SCEVs to exactly the same + // form. It's possibly a pessimization when this happens, but it isn't a + // correctness problem, so disable this assert for now. + assert(S == TransformSubExpr(Result, User, OperandValToReplace) && + "SCEV normalization is not invertible!"); +#endif + break; + case Normalize: + if (Loops.count(L)) { + const SCEV *TransformedStep = + TransformSubExpr(AR->getStepRecurrence(SE), + User, OperandValToReplace); + Result = SE.getMinusSCEV(Result, TransformedStep); + } +#if 0 + // See the comment on the assert above. + assert(S == TransformSubExpr(Result, User, OperandValToReplace) && + "SCEV normalization is not invertible!"); +#endif + break; + case Denormalize: + if (Loops.count(L)) + Result = cast<SCEVAddRecExpr>(Result)->getPostIncExpr(SE); + break; + } + return Result; + } + + if (const SCEVNAryExpr *X = dyn_cast<SCEVNAryExpr>(S)) { + SmallVector<const SCEV *, 8> Operands; + bool Changed = false; + // Transform each operand. + for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end(); + I != E; ++I) { + const SCEV *O = *I; + const SCEV *N = TransformSubExpr(O, User, OperandValToReplace); + Changed |= N != O; + Operands.push_back(N); + } + // If any operand actually changed, return a transformed result. + if (Changed) + switch (S->getSCEVType()) { + case scAddExpr: return SE.getAddExpr(Operands); + case scMulExpr: return SE.getMulExpr(Operands); + case scSMaxExpr: return SE.getSMaxExpr(Operands); + case scUMaxExpr: return SE.getUMaxExpr(Operands); + default: llvm_unreachable("Unexpected SCEVNAryExpr kind!"); + } + return S; + } + + if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) { + const SCEV *LO = X->getLHS(); + const SCEV *RO = X->getRHS(); + const SCEV *LN = TransformSubExpr(LO, User, OperandValToReplace); + const SCEV *RN = TransformSubExpr(RO, User, OperandValToReplace); + if (LO != LN || RO != RN) + return SE.getUDivExpr(LN, RN); + return S; + } + + llvm_unreachable("Unexpected SCEV kind!"); +} + +/// Manage recursive transformation across an expression DAG. Revisiting +/// expressions would lead to exponential recursion. +const SCEV *PostIncTransform:: +TransformSubExpr(const SCEV *S, Instruction *User, Value *OperandValToReplace) { + + if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S)) + return S; + + const SCEV *Result = Transformed.lookup(S); + if (Result) + return Result; + + Result = TransformImpl(S, User, OperandValToReplace); + Transformed[S] = Result; + return Result; +} + +/// Top level driver for transforming an expression DAG into its requested +/// post-inc form (either "Normalized" or "Denormalized". +const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, + const SCEV *S, + Instruction *User, + Value *OperandValToReplace, + PostIncLoopSet &Loops, + ScalarEvolution &SE, + DominatorTree &DT) { + PostIncTransform Transform(Kind, Loops, SE, DT); + return Transform.TransformSubExpr(S, User, OperandValToReplace); +} diff --git a/contrib/llvm/lib/Analysis/SparsePropagation.cpp b/contrib/llvm/lib/Analysis/SparsePropagation.cpp new file mode 100644 index 000000000000..15b78728a73c --- /dev/null +++ b/contrib/llvm/lib/Analysis/SparsePropagation.cpp @@ -0,0 +1,347 @@ +//===- SparsePropagation.cpp - Sparse Conditional Property Propagation ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements an abstract sparse conditional propagation algorithm, +// modeled after SCCP, but with a customizable lattice function. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sparseprop" +#include "llvm/Analysis/SparsePropagation.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// AbstractLatticeFunction Implementation +//===----------------------------------------------------------------------===// + +AbstractLatticeFunction::~AbstractLatticeFunction() {} + +/// PrintValue - Render the specified lattice value to the specified stream. +void AbstractLatticeFunction::PrintValue(LatticeVal V, raw_ostream &OS) { + if (V == UndefVal) + OS << "undefined"; + else if (V == OverdefinedVal) + OS << "overdefined"; + else if (V == UntrackedVal) + OS << "untracked"; + else + OS << "unknown lattice value"; +} + +//===----------------------------------------------------------------------===// +// SparseSolver Implementation +//===----------------------------------------------------------------------===// + +/// getOrInitValueState - Return the LatticeVal object that corresponds to the +/// value, initializing the value's state if it hasn't been entered into the +/// map yet. This function is necessary because not all values should start +/// out in the underdefined state... Arguments should be overdefined, and +/// constants should be marked as constants. +/// +SparseSolver::LatticeVal SparseSolver::getOrInitValueState(Value *V) { + DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(V); + if (I != ValueState.end()) return I->second; // Common case, in the map + + LatticeVal LV; + if (LatticeFunc->IsUntrackedValue(V)) + return LatticeFunc->getUntrackedVal(); + else if (Constant *C = dyn_cast<Constant>(V)) + LV = LatticeFunc->ComputeConstant(C); + else if (Argument *A = dyn_cast<Argument>(V)) + LV = LatticeFunc->ComputeArgument(A); + else if (!isa<Instruction>(V)) + // All other non-instructions are overdefined. + LV = LatticeFunc->getOverdefinedVal(); + else + // All instructions are underdefined by default. + LV = LatticeFunc->getUndefVal(); + + // If this value is untracked, don't add it to the map. + if (LV == LatticeFunc->getUntrackedVal()) + return LV; + return ValueState[V] = LV; +} + +/// UpdateState - When the state for some instruction is potentially updated, +/// this function notices and adds I to the worklist if needed. +void SparseSolver::UpdateState(Instruction &Inst, LatticeVal V) { + DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(&Inst); + if (I != ValueState.end() && I->second == V) + return; // No change. + + // An update. Visit uses of I. + ValueState[&Inst] = V; + InstWorkList.push_back(&Inst); +} + +/// MarkBlockExecutable - This method can be used by clients to mark all of +/// the blocks that are known to be intrinsically live in the processed unit. +void SparseSolver::MarkBlockExecutable(BasicBlock *BB) { + DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << "\n"); + BBExecutable.insert(BB); // Basic block is executable! + BBWorkList.push_back(BB); // Add the block to the work list! +} + +/// markEdgeExecutable - Mark a basic block as executable, adding it to the BB +/// work list if it is not already executable... +void SparseSolver::markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) { + if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second) + return; // This edge is already known to be executable! + + DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName() + << " -> " << Dest->getName() << "\n"); + + if (BBExecutable.count(Dest)) { + // The destination is already executable, but we just made an edge + // feasible that wasn't before. Revisit the PHI nodes in the block + // because they have potentially new operands. + for (BasicBlock::iterator I = Dest->begin(); isa<PHINode>(I); ++I) + visitPHINode(*cast<PHINode>(I)); + + } else { + MarkBlockExecutable(Dest); + } +} + + +/// getFeasibleSuccessors - Return a vector of booleans to indicate which +/// successors are reachable from a given terminator instruction. +void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI, + SmallVectorImpl<bool> &Succs, + bool AggressiveUndef) { + Succs.resize(TI.getNumSuccessors()); + if (TI.getNumSuccessors() == 0) return; + + if (BranchInst *BI = dyn_cast<BranchInst>(&TI)) { + if (BI->isUnconditional()) { + Succs[0] = true; + return; + } + + LatticeVal BCValue; + if (AggressiveUndef) + BCValue = getOrInitValueState(BI->getCondition()); + else + BCValue = getLatticeState(BI->getCondition()); + + if (BCValue == LatticeFunc->getOverdefinedVal() || + BCValue == LatticeFunc->getUntrackedVal()) { + // Overdefined condition variables can branch either way. + Succs[0] = Succs[1] = true; + return; + } + + // If undefined, neither is feasible yet. + if (BCValue == LatticeFunc->getUndefVal()) + return; + + Constant *C = LatticeFunc->GetConstant(BCValue, BI->getCondition(), *this); + if (C == 0 || !isa<ConstantInt>(C)) { + // Non-constant values can go either way. + Succs[0] = Succs[1] = true; + return; + } + + // Constant condition variables mean the branch can only go a single way + Succs[C->isNullValue()] = true; + return; + } + + if (isa<InvokeInst>(TI)) { + // Invoke instructions successors are always executable. + // TODO: Could ask the lattice function if the value can throw. + Succs[0] = Succs[1] = true; + return; + } + + if (isa<IndirectBrInst>(TI)) { + Succs.assign(Succs.size(), true); + return; + } + + SwitchInst &SI = cast<SwitchInst>(TI); + LatticeVal SCValue; + if (AggressiveUndef) + SCValue = getOrInitValueState(SI.getCondition()); + else + SCValue = getLatticeState(SI.getCondition()); + + if (SCValue == LatticeFunc->getOverdefinedVal() || + SCValue == LatticeFunc->getUntrackedVal()) { + // All destinations are executable! + Succs.assign(TI.getNumSuccessors(), true); + return; + } + + // If undefined, neither is feasible yet. + if (SCValue == LatticeFunc->getUndefVal()) + return; + + Constant *C = LatticeFunc->GetConstant(SCValue, SI.getCondition(), *this); + if (C == 0 || !isa<ConstantInt>(C)) { + // All destinations are executable! + Succs.assign(TI.getNumSuccessors(), true); + return; + } + SwitchInst::CaseIt Case = SI.findCaseValue(cast<ConstantInt>(C)); + Succs[Case.getSuccessorIndex()] = true; +} + + +/// isEdgeFeasible - Return true if the control flow edge from the 'From' +/// basic block to the 'To' basic block is currently feasible... +bool SparseSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To, + bool AggressiveUndef) { + SmallVector<bool, 16> SuccFeasible; + TerminatorInst *TI = From->getTerminator(); + getFeasibleSuccessors(*TI, SuccFeasible, AggressiveUndef); + + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + if (TI->getSuccessor(i) == To && SuccFeasible[i]) + return true; + + return false; +} + +void SparseSolver::visitTerminatorInst(TerminatorInst &TI) { + SmallVector<bool, 16> SuccFeasible; + getFeasibleSuccessors(TI, SuccFeasible, true); + + BasicBlock *BB = TI.getParent(); + + // Mark all feasible successors executable... + for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i) + if (SuccFeasible[i]) + markEdgeExecutable(BB, TI.getSuccessor(i)); +} + +void SparseSolver::visitPHINode(PHINode &PN) { + // The lattice function may store more information on a PHINode than could be + // computed from its incoming values. For example, SSI form stores its sigma + // functions as PHINodes with a single incoming value. + if (LatticeFunc->IsSpecialCasedPHI(&PN)) { + LatticeVal IV = LatticeFunc->ComputeInstructionState(PN, *this); + if (IV != LatticeFunc->getUntrackedVal()) + UpdateState(PN, IV); + return; + } + + LatticeVal PNIV = getOrInitValueState(&PN); + LatticeVal Overdefined = LatticeFunc->getOverdefinedVal(); + + // If this value is already overdefined (common) just return. + if (PNIV == Overdefined || PNIV == LatticeFunc->getUntrackedVal()) + return; // Quick exit + + // Super-extra-high-degree PHI nodes are unlikely to ever be interesting, + // and slow us down a lot. Just mark them overdefined. + if (PN.getNumIncomingValues() > 64) { + UpdateState(PN, Overdefined); + return; + } + + // Look at all of the executable operands of the PHI node. If any of them + // are overdefined, the PHI becomes overdefined as well. Otherwise, ask the + // transfer function to give us the merge of the incoming values. + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { + // If the edge is not yet known to be feasible, it doesn't impact the PHI. + if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent(), true)) + continue; + + // Merge in this value. + LatticeVal OpVal = getOrInitValueState(PN.getIncomingValue(i)); + if (OpVal != PNIV) + PNIV = LatticeFunc->MergeValues(PNIV, OpVal); + + if (PNIV == Overdefined) + break; // Rest of input values don't matter. + } + + // Update the PHI with the compute value, which is the merge of the inputs. + UpdateState(PN, PNIV); +} + + +void SparseSolver::visitInst(Instruction &I) { + // PHIs are handled by the propagation logic, they are never passed into the + // transfer functions. + if (PHINode *PN = dyn_cast<PHINode>(&I)) + return visitPHINode(*PN); + + // Otherwise, ask the transfer function what the result is. If this is + // something that we care about, remember it. + LatticeVal IV = LatticeFunc->ComputeInstructionState(I, *this); + if (IV != LatticeFunc->getUntrackedVal()) + UpdateState(I, IV); + + if (TerminatorInst *TI = dyn_cast<TerminatorInst>(&I)) + visitTerminatorInst(*TI); +} + +void SparseSolver::Solve(Function &F) { + MarkBlockExecutable(&F.getEntryBlock()); + + // Process the work lists until they are empty! + while (!BBWorkList.empty() || !InstWorkList.empty()) { + // Process the instruction work list. + while (!InstWorkList.empty()) { + Instruction *I = InstWorkList.back(); + InstWorkList.pop_back(); + + DEBUG(dbgs() << "\nPopped off I-WL: " << *I << "\n"); + + // "I" got into the work list because it made a transition. See if any + // users are both live and in need of updating. + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) { + Instruction *U = cast<Instruction>(*UI); + if (BBExecutable.count(U->getParent())) // Inst is executable? + visitInst(*U); + } + } + + // Process the basic block work list. + while (!BBWorkList.empty()) { + BasicBlock *BB = BBWorkList.back(); + BBWorkList.pop_back(); + + DEBUG(dbgs() << "\nPopped off BBWL: " << *BB); + + // Notify all instructions in this basic block that they are newly + // executable. + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + visitInst(*I); + } + } +} + +void SparseSolver::Print(Function &F, raw_ostream &OS) const { + OS << "\nFUNCTION: " << F.getName() << "\n"; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (!BBExecutable.count(BB)) + OS << "INFEASIBLE: "; + OS << "\t"; + if (BB->hasName()) + OS << BB->getName() << ":\n"; + else + OS << "; anon bb\n"; + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + LatticeFunc->PrintValue(getLatticeState(I), OS); + OS << *I << "\n"; + } + + OS << "\n"; + } +} + diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp new file mode 100644 index 000000000000..0353295345ce --- /dev/null +++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -0,0 +1,614 @@ +//===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "tti" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +// Setup the analysis group to manage the TargetTransformInfo passes. +INITIALIZE_ANALYSIS_GROUP(TargetTransformInfo, "Target Information", NoTTI) +char TargetTransformInfo::ID = 0; + +TargetTransformInfo::~TargetTransformInfo() { +} + +void TargetTransformInfo::pushTTIStack(Pass *P) { + TopTTI = this; + PrevTTI = &P->getAnalysis<TargetTransformInfo>(); + + // Walk up the chain and update the top TTI pointer. + for (TargetTransformInfo *PTTI = PrevTTI; PTTI; PTTI = PTTI->PrevTTI) + PTTI->TopTTI = this; +} + +void TargetTransformInfo::popTTIStack() { + TopTTI = 0; + + // Walk up the chain and update the top TTI pointer. + for (TargetTransformInfo *PTTI = PrevTTI; PTTI; PTTI = PTTI->PrevTTI) + PTTI->TopTTI = PrevTTI; + + PrevTTI = 0; +} + +void TargetTransformInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetTransformInfo>(); +} + +unsigned TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty, + Type *OpTy) const { + return PrevTTI->getOperationCost(Opcode, Ty, OpTy); +} + +unsigned TargetTransformInfo::getGEPCost( + const Value *Ptr, ArrayRef<const Value *> Operands) const { + return PrevTTI->getGEPCost(Ptr, Operands); +} + +unsigned TargetTransformInfo::getCallCost(FunctionType *FTy, + int NumArgs) const { + return PrevTTI->getCallCost(FTy, NumArgs); +} + +unsigned TargetTransformInfo::getCallCost(const Function *F, + int NumArgs) const { + return PrevTTI->getCallCost(F, NumArgs); +} + +unsigned TargetTransformInfo::getCallCost( + const Function *F, ArrayRef<const Value *> Arguments) const { + return PrevTTI->getCallCost(F, Arguments); +} + +unsigned TargetTransformInfo::getIntrinsicCost( + Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> ParamTys) const { + return PrevTTI->getIntrinsicCost(IID, RetTy, ParamTys); +} + +unsigned TargetTransformInfo::getIntrinsicCost( + Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments) const { + return PrevTTI->getIntrinsicCost(IID, RetTy, Arguments); +} + +unsigned TargetTransformInfo::getUserCost(const User *U) const { + return PrevTTI->getUserCost(U); +} + +bool TargetTransformInfo::hasBranchDivergence() const { + return PrevTTI->hasBranchDivergence(); +} + +bool TargetTransformInfo::isLoweredToCall(const Function *F) const { + return PrevTTI->isLoweredToCall(F); +} + +void TargetTransformInfo::getUnrollingPreferences(Loop *L, + UnrollingPreferences &UP) const { + PrevTTI->getUnrollingPreferences(L, UP); +} + +bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const { + return PrevTTI->isLegalAddImmediate(Imm); +} + +bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const { + return PrevTTI->isLegalICmpImmediate(Imm); +} + +bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, + bool HasBaseReg, + int64_t Scale) const { + return PrevTTI->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, + Scale); +} + +int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, + bool HasBaseReg, + int64_t Scale) const { + return PrevTTI->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, + Scale); +} + +bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const { + return PrevTTI->isTruncateFree(Ty1, Ty2); +} + +bool TargetTransformInfo::isTypeLegal(Type *Ty) const { + return PrevTTI->isTypeLegal(Ty); +} + +unsigned TargetTransformInfo::getJumpBufAlignment() const { + return PrevTTI->getJumpBufAlignment(); +} + +unsigned TargetTransformInfo::getJumpBufSize() const { + return PrevTTI->getJumpBufSize(); +} + +bool TargetTransformInfo::shouldBuildLookupTables() const { + return PrevTTI->shouldBuildLookupTables(); +} + +TargetTransformInfo::PopcntSupportKind +TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const { + return PrevTTI->getPopcntSupport(IntTyWidthInBit); +} + +bool TargetTransformInfo::haveFastSqrt(Type *Ty) const { + return PrevTTI->haveFastSqrt(Ty); +} + +unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { + return PrevTTI->getIntImmCost(Imm, Ty); +} + +unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { + return PrevTTI->getNumberOfRegisters(Vector); +} + +unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { + return PrevTTI->getRegisterBitWidth(Vector); +} + +unsigned TargetTransformInfo::getMaximumUnrollFactor() const { + return PrevTTI->getMaximumUnrollFactor(); +} + +unsigned TargetTransformInfo::getArithmeticInstrCost(unsigned Opcode, + Type *Ty, + OperandValueKind Op1Info, + OperandValueKind Op2Info) const { + return PrevTTI->getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info); +} + +unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index, Type *SubTp) const { + return PrevTTI->getShuffleCost(Kind, Tp, Index, SubTp); +} + +unsigned TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const { + return PrevTTI->getCastInstrCost(Opcode, Dst, Src); +} + +unsigned TargetTransformInfo::getCFInstrCost(unsigned Opcode) const { + return PrevTTI->getCFInstrCost(Opcode); +} + +unsigned TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const { + return PrevTTI->getCmpSelInstrCost(Opcode, ValTy, CondTy); +} + +unsigned TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { + return PrevTTI->getVectorInstrCost(Opcode, Val, Index); +} + +unsigned TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const { + return PrevTTI->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + ; +} + +unsigned +TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, + Type *RetTy, + ArrayRef<Type *> Tys) const { + return PrevTTI->getIntrinsicInstrCost(ID, RetTy, Tys); +} + +unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const { + return PrevTTI->getNumberOfParts(Tp); +} + +unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp, + bool IsComplex) const { + return PrevTTI->getAddressComputationCost(Tp, IsComplex); +} + +unsigned TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwise) const { + return PrevTTI->getReductionCost(Opcode, Ty, IsPairwise); +} + +namespace { + +struct NoTTI : ImmutablePass, TargetTransformInfo { + const DataLayout *DL; + + NoTTI() : ImmutablePass(ID), DL(0) { + initializeNoTTIPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + // Note that this subclass is special, and must *not* call initializeTTI as + // it does not chain. + TopTTI = this; + PrevTTI = 0; + DL = getAnalysisIfAvailable<DataLayout>(); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + // Note that this subclass is special, and must *not* call + // TTI::getAnalysisUsage as it breaks the recursion. + } + + /// Pass identification. + static char ID; + + /// Provide necessary pointer adjustments for the two base classes. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &TargetTransformInfo::ID) + return (TargetTransformInfo*)this; + return this; + } + + unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) const { + switch (Opcode) { + default: + // By default, just classify everything as 'basic'. + return TCC_Basic; + + case Instruction::GetElementPtr: + llvm_unreachable("Use getGEPCost for GEP operations!"); + + case Instruction::BitCast: + assert(OpTy && "Cast instructions must provide the operand type"); + if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy())) + // Identity and pointer-to-pointer casts are free. + return TCC_Free; + + // Otherwise, the default basic cost is used. + return TCC_Basic; + + case Instruction::IntToPtr: { + if (!DL) + return TCC_Basic; + + // An inttoptr cast is free so long as the input is a legal integer type + // which doesn't contain values outside the range of a pointer. + unsigned OpSize = OpTy->getScalarSizeInBits(); + if (DL->isLegalInteger(OpSize) && + OpSize <= DL->getPointerTypeSizeInBits(Ty)) + return TCC_Free; + + // Otherwise it's not a no-op. + return TCC_Basic; + } + case Instruction::PtrToInt: { + if (!DL) + return TCC_Basic; + + // A ptrtoint cast is free so long as the result is large enough to store + // the pointer, and a legal integer type. + unsigned DestSize = Ty->getScalarSizeInBits(); + if (DL->isLegalInteger(DestSize) && + DestSize >= DL->getPointerTypeSizeInBits(OpTy)) + return TCC_Free; + + // Otherwise it's not a no-op. + return TCC_Basic; + } + case Instruction::Trunc: + // trunc to a native type is free (assuming the target has compare and + // shift-right of the same width). + if (DL && DL->isLegalInteger(DL->getTypeSizeInBits(Ty))) + return TCC_Free; + + return TCC_Basic; + } + } + + unsigned getGEPCost(const Value *Ptr, + ArrayRef<const Value *> Operands) const { + // In the basic model, we just assume that all-constant GEPs will be folded + // into their uses via addressing modes. + for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) + if (!isa<Constant>(Operands[Idx])) + return TCC_Basic; + + return TCC_Free; + } + + unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const { + assert(FTy && "FunctionType must be provided to this routine."); + + // The target-independent implementation just measures the size of the + // function by approximating that each argument will take on average one + // instruction to prepare. + + if (NumArgs < 0) + // Set the argument number to the number of explicit arguments in the + // function. + NumArgs = FTy->getNumParams(); + + return TCC_Basic * (NumArgs + 1); + } + + unsigned getCallCost(const Function *F, int NumArgs = -1) const { + assert(F && "A concrete function must be provided to this routine."); + + if (NumArgs < 0) + // Set the argument number to the number of explicit arguments in the + // function. + NumArgs = F->arg_size(); + + if (Intrinsic::ID IID = (Intrinsic::ID)F->getIntrinsicID()) { + FunctionType *FTy = F->getFunctionType(); + SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end()); + return TopTTI->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys); + } + + if (!TopTTI->isLoweredToCall(F)) + return TCC_Basic; // Give a basic cost if it will be lowered directly. + + return TopTTI->getCallCost(F->getFunctionType(), NumArgs); + } + + unsigned getCallCost(const Function *F, + ArrayRef<const Value *> Arguments) const { + // Simply delegate to generic handling of the call. + // FIXME: We should use instsimplify or something else to catch calls which + // will constant fold with these arguments. + return TopTTI->getCallCost(F, Arguments.size()); + } + + unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef<Type *> ParamTys) const { + switch (IID) { + default: + // Intrinsics rarely (if ever) have normal argument setup constraints. + // Model them as having a basic instruction cost. + // FIXME: This is wrong for libc intrinsics. + return TCC_Basic; + + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + // These intrinsics don't actually represent code after lowering. + return TCC_Free; + } + } + + unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef<const Value *> Arguments) const { + // Delegate to the generic intrinsic handling code. This mostly provides an + // opportunity for targets to (for example) special case the cost of + // certain intrinsics based on constants used as arguments. + SmallVector<Type *, 8> ParamTys; + ParamTys.reserve(Arguments.size()); + for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) + ParamTys.push_back(Arguments[Idx]->getType()); + return TopTTI->getIntrinsicCost(IID, RetTy, ParamTys); + } + + unsigned getUserCost(const User *U) const { + if (isa<PHINode>(U)) + return TCC_Free; // Model all PHI nodes as free. + + if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) + // In the basic model we just assume that all-constant GEPs will be + // folded into their uses via addressing modes. + return GEP->hasAllConstantIndices() ? TCC_Free : TCC_Basic; + + if (ImmutableCallSite CS = U) { + const Function *F = CS.getCalledFunction(); + if (!F) { + // Just use the called value type. + Type *FTy = CS.getCalledValue()->getType()->getPointerElementType(); + return TopTTI->getCallCost(cast<FunctionType>(FTy), CS.arg_size()); + } + + SmallVector<const Value *, 8> Arguments; + for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), + AE = CS.arg_end(); + AI != AE; ++AI) + Arguments.push_back(*AI); + + return TopTTI->getCallCost(F, Arguments); + } + + if (const CastInst *CI = dyn_cast<CastInst>(U)) { + // Result of a cmp instruction is often extended (to be used by other + // cmp instructions, logical or return instructions). These are usually + // nop on most sane targets. + if (isa<CmpInst>(CI->getOperand(0))) + return TCC_Free; + } + + // Otherwise delegate to the fully generic implementations. + return getOperationCost(Operator::getOpcode(U), U->getType(), + U->getNumOperands() == 1 ? + U->getOperand(0)->getType() : 0); + } + + bool hasBranchDivergence() const { return false; } + + bool isLoweredToCall(const Function *F) const { + // FIXME: These should almost certainly not be handled here, and instead + // handled with the help of TLI or the target itself. This was largely + // ported from existing analysis heuristics here so that such refactorings + // can take place in the future. + + if (F->isIntrinsic()) + return false; + + if (F->hasLocalLinkage() || !F->hasName()) + return true; + + StringRef Name = F->getName(); + + // These will all likely lower to a single selection DAG node. + if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || + Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || + Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || + Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") + return false; + + // These are all likely to be optimized into something smaller. + if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || + Name == "exp2l" || Name == "exp2f" || Name == "floor" || Name == + "floorf" || Name == "ceil" || Name == "round" || Name == "ffs" || + Name == "ffsl" || Name == "abs" || Name == "labs" || Name == "llabs") + return false; + + return true; + } + + void getUnrollingPreferences(Loop *, UnrollingPreferences &) const { } + + bool isLegalAddImmediate(int64_t Imm) const { + return false; + } + + bool isLegalICmpImmediate(int64_t Imm) const { + return false; + } + + bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg, int64_t Scale) const { + // Guess that reg+reg addressing is allowed. This heuristic is taken from + // the implementation of LSR. + return !BaseGV && BaseOffset == 0 && Scale <= 1; + } + + int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg, int64_t Scale) const { + // Guess that all legal addressing mode are free. + if(isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale)) + return 0; + return -1; + } + + + bool isTruncateFree(Type *Ty1, Type *Ty2) const { + return false; + } + + bool isTypeLegal(Type *Ty) const { + return false; + } + + unsigned getJumpBufAlignment() const { + return 0; + } + + unsigned getJumpBufSize() const { + return 0; + } + + bool shouldBuildLookupTables() const { + return true; + } + + PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const { + return PSK_Software; + } + + bool haveFastSqrt(Type *Ty) const { + return false; + } + + unsigned getIntImmCost(const APInt &Imm, Type *Ty) const { + return 1; + } + + unsigned getNumberOfRegisters(bool Vector) const { + return 8; + } + + unsigned getRegisterBitWidth(bool Vector) const { + return 32; + } + + unsigned getMaximumUnrollFactor() const { + return 1; + } + + unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, + OperandValueKind) const { + return 1; + } + + unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index = 0, Type *SubTp = 0) const { + return 1; + } + + unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const { + return 1; + } + + unsigned getCFInstrCost(unsigned Opcode) const { + return 1; + } + + unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy = 0) const { + return 1; + } + + unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index = -1) const { + return 1; + } + + unsigned getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const { + return 1; + } + + unsigned getIntrinsicInstrCost(Intrinsic::ID ID, + Type *RetTy, + ArrayRef<Type*> Tys) const { + return 1; + } + + unsigned getNumberOfParts(Type *Tp) const { + return 0; + } + + unsigned getAddressComputationCost(Type *Tp, bool) const { + return 0; + } + + unsigned getReductionCost(unsigned, Type *, bool) const { + return 1; + } +}; + +} // end anonymous namespace + +INITIALIZE_AG_PASS(NoTTI, TargetTransformInfo, "notti", + "No target information", true, true, true) +char NoTTI::ID = 0; + +ImmutablePass *llvm::createNoTargetTransformInfoPass() { + return new NoTTI(); +} diff --git a/contrib/llvm/lib/Analysis/Trace.cpp b/contrib/llvm/lib/Analysis/Trace.cpp new file mode 100644 index 000000000000..4c68322b8282 --- /dev/null +++ b/contrib/llvm/lib/Analysis/Trace.cpp @@ -0,0 +1,53 @@ +//===- Trace.cpp - Implementation of Trace class --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class represents a single trace of LLVM basic blocks. A trace is a +// single entry, multiple exit, region of code that is often hot. Trace-based +// optimizations treat traces almost like they are a large, strange, basic +// block: because the trace path is assumed to be hot, optimizations for the +// fall-through path are made at the expense of the non-fall-through paths. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Trace.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +Function *Trace::getFunction() const { + return getEntryBasicBlock()->getParent(); +} + +Module *Trace::getModule() const { + return getFunction()->getParent(); +} + +/// print - Write trace to output stream. +/// +void Trace::print(raw_ostream &O) const { + Function *F = getFunction(); + O << "; Trace from function " << F->getName() << ", blocks:\n"; + for (const_iterator i = begin(), e = end(); i != e; ++i) { + O << "; "; + WriteAsOperand(O, *i, true, getModule()); + O << "\n"; + } + O << "; Trace parent function: \n" << *F; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +/// dump - Debugger convenience method; writes trace to standard error +/// output stream. +/// +void Trace::dump() const { + print(dbgs()); +} +#endif diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp new file mode 100644 index 000000000000..6791d4b9102b --- /dev/null +++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -0,0 +1,607 @@ +//===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the TypeBasedAliasAnalysis pass, which implements +// metadata-based TBAA. +// +// In LLVM IR, memory does not have types, so LLVM's own type system is not +// suitable for doing TBAA. Instead, metadata is added to the IR to describe +// a type system of a higher level language. This can be used to implement +// typical C/C++ TBAA, but it can also be used to implement custom alias +// analysis behavior for other languages. +// +// We now support two types of metadata format: scalar TBAA and struct-path +// aware TBAA. After all testing cases are upgraded to use struct-path aware +// TBAA and we can auto-upgrade existing bc files, the support for scalar TBAA +// can be dropped. +// +// The scalar TBAA metadata format is very simple. TBAA MDNodes have up to +// three fields, e.g.: +// !0 = metadata !{ metadata !"an example type tree" } +// !1 = metadata !{ metadata !"int", metadata !0 } +// !2 = metadata !{ metadata !"float", metadata !0 } +// !3 = metadata !{ metadata !"const float", metadata !2, i64 1 } +// +// The first field is an identity field. It can be any value, usually +// an MDString, which uniquely identifies the type. The most important +// name in the tree is the name of the root node. Two trees with +// different root node names are entirely disjoint, even if they +// have leaves with common names. +// +// The second field identifies the type's parent node in the tree, or +// is null or omitted for a root node. A type is considered to alias +// all of its descendants and all of its ancestors in the tree. Also, +// a type is considered to alias all types in other trees, so that +// bitcode produced from multiple front-ends is handled conservatively. +// +// If the third field is present, it's an integer which if equal to 1 +// indicates that the type is "constant" (meaning pointsToConstantMemory +// should return true; see +// http://llvm.org/docs/AliasAnalysis.html#OtherItfs). +// +// With struct-path aware TBAA, the MDNodes attached to an instruction using +// "!tbaa" are called path tag nodes. +// +// The path tag node has 4 fields with the last field being optional. +// +// The first field is the base type node, it can be a struct type node +// or a scalar type node. The second field is the access type node, it +// must be a scalar type node. The third field is the offset into the base type. +// The last field has the same meaning as the last field of our scalar TBAA: +// it's an integer which if equal to 1 indicates that the access is "constant". +// +// The struct type node has a name and a list of pairs, one pair for each member +// of the struct. The first element of each pair is a type node (a struct type +// node or a sclar type node), specifying the type of the member, the second +// element of each pair is the offset of the member. +// +// Given an example +// typedef struct { +// short s; +// } A; +// typedef struct { +// uint16_t s; +// A a; +// } B; +// +// For an acess to B.a.s, we attach !5 (a path tag node) to the load/store +// instruction. The base type is !4 (struct B), the access type is !2 (scalar +// type short) and the offset is 4. +// +// !0 = metadata !{metadata !"Simple C/C++ TBAA"} +// !1 = metadata !{metadata !"omnipotent char", metadata !0} // Scalar type node +// !2 = metadata !{metadata !"short", metadata !1} // Scalar type node +// !3 = metadata !{metadata !"A", metadata !2, i64 0} // Struct type node +// !4 = metadata !{metadata !"B", metadata !2, i64 0, metadata !3, i64 4} +// // Struct type node +// !5 = metadata !{metadata !4, metadata !2, i64 4} // Path tag node +// +// The struct type nodes and the scalar type nodes form a type DAG. +// Root (!0) +// char (!1) -- edge to Root +// short (!2) -- edge to char +// A (!3) -- edge with offset 0 to short +// B (!4) -- edge with offset 0 to short and edge with offset 4 to A +// +// To check if two tags (tagX and tagY) can alias, we start from the base type +// of tagX, follow the edge with the correct offset in the type DAG and adjust +// the offset until we reach the base type of tagY or until we reach the Root +// node. +// If we reach the base type of tagY, compare the adjusted offset with +// offset of tagY, return Alias if the offsets are the same, return NoAlias +// otherwise. +// If we reach the Root node, perform the above starting from base type of tagY +// to see if we reach base type of tagX. +// +// If they have different roots, they're part of different potentially +// unrelated type systems, so we return Alias to be conservative. +// If neither node is an ancestor of the other and they have the same root, +// then we say NoAlias. +// +// TODO: The current metadata format doesn't support struct +// fields. For example: +// struct X { +// double d; +// int i; +// }; +// void foo(struct X *x, struct X *y, double *p) { +// *x = *y; +// *p = 0.0; +// } +// Struct X has a double member, so the store to *x can alias the store to *p. +// Currently it's not possible to precisely describe all the things struct X +// aliases, so struct assignments must use conservative TBAA nodes. There's +// no scheme for attaching metadata to @llvm.memcpy yet either. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +// A handy option for disabling TBAA functionality. The same effect can also be +// achieved by stripping the !tbaa tags from IR, but this option is sometimes +// more convenient. +static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true)); + +namespace { + /// TBAANode - This is a simple wrapper around an MDNode which provides a + /// higher-level interface by hiding the details of how alias analysis + /// information is encoded in its operands. + class TBAANode { + const MDNode *Node; + + public: + TBAANode() : Node(0) {} + explicit TBAANode(const MDNode *N) : Node(N) {} + + /// getNode - Get the MDNode for this TBAANode. + const MDNode *getNode() const { return Node; } + + /// getParent - Get this TBAANode's Alias tree parent. + TBAANode getParent() const { + if (Node->getNumOperands() < 2) + return TBAANode(); + MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); + if (!P) + return TBAANode(); + // Ok, this node has a valid parent. Return it. + return TBAANode(P); + } + + /// TypeIsImmutable - Test if this TBAANode represents a type for objects + /// which are not modified (by any means) in the context where this + /// AliasAnalysis is relevant. + bool TypeIsImmutable() const { + if (Node->getNumOperands() < 3) + return false; + ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(2)); + if (!CI) + return false; + return CI->getValue()[0]; + } + }; + + /// This is a simple wrapper around an MDNode which provides a + /// higher-level interface by hiding the details of how alias analysis + /// information is encoded in its operands. + class TBAAStructTagNode { + /// This node should be created with createTBAAStructTagNode. + const MDNode *Node; + + public: + TBAAStructTagNode() : Node(0) {} + explicit TBAAStructTagNode(const MDNode *N) : Node(N) {} + + /// Get the MDNode for this TBAAStructTagNode. + const MDNode *getNode() const { return Node; } + + const MDNode *getBaseType() const { + return dyn_cast_or_null<MDNode>(Node->getOperand(0)); + } + const MDNode *getAccessType() const { + return dyn_cast_or_null<MDNode>(Node->getOperand(1)); + } + uint64_t getOffset() const { + return cast<ConstantInt>(Node->getOperand(2))->getZExtValue(); + } + /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for + /// objects which are not modified (by any means) in the context where this + /// AliasAnalysis is relevant. + bool TypeIsImmutable() const { + if (Node->getNumOperands() < 4) + return false; + ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(3)); + if (!CI) + return false; + return CI->getValue()[0]; + } + }; + + /// This is a simple wrapper around an MDNode which provides a + /// higher-level interface by hiding the details of how alias analysis + /// information is encoded in its operands. + class TBAAStructTypeNode { + /// This node should be created with createTBAAStructTypeNode. + const MDNode *Node; + + public: + TBAAStructTypeNode() : Node(0) {} + explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {} + + /// Get the MDNode for this TBAAStructTypeNode. + const MDNode *getNode() const { return Node; } + + /// Get this TBAAStructTypeNode's field in the type DAG with + /// given offset. Update the offset to be relative to the field type. + TBAAStructTypeNode getParent(uint64_t &Offset) const { + // Parent can be omitted for the root node. + if (Node->getNumOperands() < 2) + return TBAAStructTypeNode(); + + // Fast path for a scalar type node and a struct type node with a single + // field. + if (Node->getNumOperands() <= 3) { + uint64_t Cur = Node->getNumOperands() == 2 ? 0 : + cast<ConstantInt>(Node->getOperand(2))->getZExtValue(); + Offset -= Cur; + MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); + if (!P) + return TBAAStructTypeNode(); + return TBAAStructTypeNode(P); + } + + // Assume the offsets are in order. We return the previous field if + // the current offset is bigger than the given offset. + unsigned TheIdx = 0; + for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) { + uint64_t Cur = cast<ConstantInt>(Node->getOperand(Idx + 1))-> + getZExtValue(); + if (Cur > Offset) { + assert(Idx >= 3 && + "TBAAStructTypeNode::getParent should have an offset match!"); + TheIdx = Idx - 2; + break; + } + } + // Move along the last field. + if (TheIdx == 0) + TheIdx = Node->getNumOperands() - 2; + uint64_t Cur = cast<ConstantInt>(Node->getOperand(TheIdx + 1))-> + getZExtValue(); + Offset -= Cur; + MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx)); + if (!P) + return TBAAStructTypeNode(); + return TBAAStructTypeNode(P); + } + }; +} + +namespace { + /// TypeBasedAliasAnalysis - This is a simple alias analysis + /// implementation that uses TypeBased to answer queries. + class TypeBasedAliasAnalysis : public ImmutablePass, + public AliasAnalysis { + public: + static char ID; // Class identification, replacement for typeinfo + TypeBasedAliasAnalysis() : ImmutablePass(ID) { + initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + InitializeAliasAnalysis(this); + } + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(const void *PI) { + if (PI == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + + bool Aliases(const MDNode *A, const MDNode *B) const; + bool PathAliases(const MDNode *A, const MDNode *B) const; + + private: + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual AliasResult alias(const Location &LocA, const Location &LocB); + virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal); + virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS); + virtual ModRefBehavior getModRefBehavior(const Function *F); + virtual ModRefResult getModRefInfo(ImmutableCallSite CS, + const Location &Loc); + virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2); + }; +} // End of anonymous namespace + +// Register this pass... +char TypeBasedAliasAnalysis::ID = 0; +INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa", + "Type-Based Alias Analysis", false, true, false) + +ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() { + return new TypeBasedAliasAnalysis(); +} + +void +TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AliasAnalysis::getAnalysisUsage(AU); +} + +/// Check the first operand of the tbaa tag node, if it is a MDNode, we treat +/// it as struct-path aware TBAA format, otherwise, we treat it as scalar TBAA +/// format. +static bool isStructPathTBAA(const MDNode *MD) { + // Anonymous TBAA root starts with a MDNode and dragonegg uses it as + // a TBAA tag. + return isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3; +} + +/// Aliases - Test whether the type represented by A may alias the +/// type represented by B. +bool +TypeBasedAliasAnalysis::Aliases(const MDNode *A, + const MDNode *B) const { + if (isStructPathTBAA(A)) + return PathAliases(A, B); + + // Keep track of the root node for A and B. + TBAANode RootA, RootB; + + // Climb the tree from A to see if we reach B. + for (TBAANode T(A); ; ) { + if (T.getNode() == B) + // B is an ancestor of A. + return true; + + RootA = T; + T = T.getParent(); + if (!T.getNode()) + break; + } + + // Climb the tree from B to see if we reach A. + for (TBAANode T(B); ; ) { + if (T.getNode() == A) + // A is an ancestor of B. + return true; + + RootB = T; + T = T.getParent(); + if (!T.getNode()) + break; + } + + // Neither node is an ancestor of the other. + + // If they have different roots, they're part of different potentially + // unrelated type systems, so we must be conservative. + if (RootA.getNode() != RootB.getNode()) + return true; + + // If they have the same root, then we've proved there's no alias. + return false; +} + +/// Test whether the struct-path tag represented by A may alias the +/// struct-path tag represented by B. +bool +TypeBasedAliasAnalysis::PathAliases(const MDNode *A, + const MDNode *B) const { + // Keep track of the root node for A and B. + TBAAStructTypeNode RootA, RootB; + TBAAStructTagNode TagA(A), TagB(B); + + // TODO: We need to check if AccessType of TagA encloses AccessType of + // TagB to support aggregate AccessType. If yes, return true. + + // Start from the base type of A, follow the edge with the correct offset in + // the type DAG and adjust the offset until we reach the base type of B or + // until we reach the Root node. + // Compare the adjusted offset once we have the same base. + + // Climb the type DAG from base type of A to see if we reach base type of B. + const MDNode *BaseA = TagA.getBaseType(); + const MDNode *BaseB = TagB.getBaseType(); + uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset(); + for (TBAAStructTypeNode T(BaseA); ; ) { + if (T.getNode() == BaseB) + // Base type of A encloses base type of B, check if the offsets match. + return OffsetA == OffsetB; + + RootA = T; + // Follow the edge with the correct offset, OffsetA will be adjusted to + // be relative to the field type. + T = T.getParent(OffsetA); + if (!T.getNode()) + break; + } + + // Reset OffsetA and climb the type DAG from base type of B to see if we reach + // base type of A. + OffsetA = TagA.getOffset(); + for (TBAAStructTypeNode T(BaseB); ; ) { + if (T.getNode() == BaseA) + // Base type of B encloses base type of A, check if the offsets match. + return OffsetA == OffsetB; + + RootB = T; + // Follow the edge with the correct offset, OffsetB will be adjusted to + // be relative to the field type. + T = T.getParent(OffsetB); + if (!T.getNode()) + break; + } + + // Neither node is an ancestor of the other. + + // If they have different roots, they're part of different potentially + // unrelated type systems, so we must be conservative. + if (RootA.getNode() != RootB.getNode()) + return true; + + // If they have the same root, then we've proved there's no alias. + return false; +} + +AliasAnalysis::AliasResult +TypeBasedAliasAnalysis::alias(const Location &LocA, + const Location &LocB) { + if (!EnableTBAA) + return AliasAnalysis::alias(LocA, LocB); + + // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must + // be conservative. + const MDNode *AM = LocA.TBAATag; + if (!AM) return AliasAnalysis::alias(LocA, LocB); + const MDNode *BM = LocB.TBAATag; + if (!BM) return AliasAnalysis::alias(LocA, LocB); + + // If they may alias, chain to the next AliasAnalysis. + if (Aliases(AM, BM)) + return AliasAnalysis::alias(LocA, LocB); + + // Otherwise return a definitive result. + return NoAlias; +} + +bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc, + bool OrLocal) { + if (!EnableTBAA) + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + + const MDNode *M = Loc.TBAATag; + if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + + // If this is an "immutable" type, we can assume the pointer is pointing + // to constant memory. + if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) || + (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable())) + return true; + + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); +} + +AliasAnalysis::ModRefBehavior +TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { + if (!EnableTBAA) + return AliasAnalysis::getModRefBehavior(CS); + + ModRefBehavior Min = UnknownModRefBehavior; + + // If this is an "immutable" type, we can assume the call doesn't write + // to memory. + if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) + if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) || + (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable())) + Min = OnlyReadsMemory; + + return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); +} + +AliasAnalysis::ModRefBehavior +TypeBasedAliasAnalysis::getModRefBehavior(const Function *F) { + // Functions don't have metadata. Just chain to the next implementation. + return AliasAnalysis::getModRefBehavior(F); +} + +AliasAnalysis::ModRefResult +TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS, + const Location &Loc) { + if (!EnableTBAA) + return AliasAnalysis::getModRefInfo(CS, Loc); + + if (const MDNode *L = Loc.TBAATag) + if (const MDNode *M = + CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) + if (!Aliases(L, M)) + return NoModRef; + + return AliasAnalysis::getModRefInfo(CS, Loc); +} + +AliasAnalysis::ModRefResult +TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + if (!EnableTBAA) + return AliasAnalysis::getModRefInfo(CS1, CS2); + + if (const MDNode *M1 = + CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) + if (const MDNode *M2 = + CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) + if (!Aliases(M1, M2)) + return NoModRef; + + return AliasAnalysis::getModRefInfo(CS1, CS2); +} + +bool MDNode::isTBAAVtableAccess() const { + if (!isStructPathTBAA(this)) { + if (getNumOperands() < 1) return false; + if (MDString *Tag1 = dyn_cast<MDString>(getOperand(0))) { + if (Tag1->getString() == "vtable pointer") return true; + } + return false; + } + + // For struct-path aware TBAA, we use the access type of the tag. + if (getNumOperands() < 2) return false; + MDNode *Tag = cast_or_null<MDNode>(getOperand(1)); + if (!Tag) return false; + if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) { + if (Tag1->getString() == "vtable pointer") return true; + } + return false; +} + +MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { + if (!A || !B) + return NULL; + + if (A == B) + return A; + + // For struct-path aware TBAA, we use the access type of the tag. + bool StructPath = isStructPathTBAA(A); + if (StructPath) { + A = cast_or_null<MDNode>(A->getOperand(1)); + if (!A) return 0; + B = cast_or_null<MDNode>(B->getOperand(1)); + if (!B) return 0; + } + + SmallVector<MDNode *, 4> PathA; + MDNode *T = A; + while (T) { + PathA.push_back(T); + T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0; + } + + SmallVector<MDNode *, 4> PathB; + T = B; + while (T) { + PathB.push_back(T); + T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0; + } + + int IA = PathA.size() - 1; + int IB = PathB.size() - 1; + + MDNode *Ret = 0; + while (IA >= 0 && IB >=0) { + if (PathA[IA] == PathB[IB]) + Ret = PathA[IA]; + else + break; + --IA; + --IB; + } + if (!StructPath) + return Ret; + + if (!Ret) + return 0; + // We need to convert from a type node to a tag node. + Type *Int64 = IntegerType::get(A->getContext(), 64); + Value *Ops[3] = { Ret, Ret, ConstantInt::get(Int64, 0) }; + return MDNode::get(A->getContext(), Ops); +} diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp new file mode 100644 index 000000000000..e39ee628ff0c --- /dev/null +++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp @@ -0,0 +1,2084 @@ +//===- ValueTracking.cpp - Walk computations to compute properties --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains routines that help analyze properties that chains of +// computations have. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/ConstantRange.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/PatternMatch.h" +#include <cstring> +using namespace llvm; +using namespace llvm::PatternMatch; + +const unsigned MaxDepth = 6; + +/// getBitWidth - Returns the bitwidth of the given scalar or pointer type (if +/// unknown returns 0). For vector types, returns the element type's bitwidth. +static unsigned getBitWidth(Type *Ty, const DataLayout *TD) { + if (unsigned BitWidth = Ty->getScalarSizeInBits()) + return BitWidth; + + return TD ? TD->getPointerTypeSizeInBits(Ty) : 0; +} + +static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, + APInt &KnownZero, APInt &KnownOne, + APInt &KnownZero2, APInt &KnownOne2, + const DataLayout *TD, unsigned Depth) { + if (!Add) { + if (ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) { + // We know that the top bits of C-X are clear if X contains less bits + // than C (i.e. no wrap-around can happen). For example, 20-X is + // positive if we can prove that X is >= 0 and < 16. + if (!CLHS->getValue().isNegative()) { + unsigned BitWidth = KnownZero.getBitWidth(); + unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros(); + // NLZ can't be BitWidth with no sign bit + APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); + llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); + + // If all of the MaskV bits are known to be zero, then we know the + // output top bits are zero, because we now know that the output is + // from [0-C]. + if ((KnownZero2 & MaskV) == MaskV) { + unsigned NLZ2 = CLHS->getValue().countLeadingZeros(); + // Top bits known zero. + KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2); + } + } + } + } + + unsigned BitWidth = KnownZero.getBitWidth(); + + // If one of the operands has trailing zeros, then the bits that the + // other operand has in those bit positions will be preserved in the + // result. For an add, this works with either operand. For a subtract, + // this only works if the known zeros are in the right operand. + APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + llvm::ComputeMaskedBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1); + assert((LHSKnownZero & LHSKnownOne) == 0 && + "Bits known to be one AND zero?"); + unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes(); + + llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes(); + + // Determine which operand has more trailing zeros, and use that + // many bits from the other operand. + if (LHSKnownZeroOut > RHSKnownZeroOut) { + if (Add) { + APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut); + KnownZero |= KnownZero2 & Mask; + KnownOne |= KnownOne2 & Mask; + } else { + // If the known zeros are in the left operand for a subtract, + // fall back to the minimum known zeros in both operands. + KnownZero |= APInt::getLowBitsSet(BitWidth, + std::min(LHSKnownZeroOut, + RHSKnownZeroOut)); + } + } else if (RHSKnownZeroOut >= LHSKnownZeroOut) { + APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut); + KnownZero |= LHSKnownZero & Mask; + KnownOne |= LHSKnownOne & Mask; + } + + // Are we still trying to solve for the sign bit? + if (!KnownZero.isNegative() && !KnownOne.isNegative()) { + if (NSW) { + if (Add) { + // Adding two positive numbers can't wrap into negative + if (LHSKnownZero.isNegative() && KnownZero2.isNegative()) + KnownZero |= APInt::getSignBit(BitWidth); + // and adding two negative numbers can't wrap into positive. + else if (LHSKnownOne.isNegative() && KnownOne2.isNegative()) + KnownOne |= APInt::getSignBit(BitWidth); + } else { + // Subtracting a negative number from a positive one can't wrap + if (LHSKnownZero.isNegative() && KnownOne2.isNegative()) + KnownZero |= APInt::getSignBit(BitWidth); + // neither can subtracting a positive number from a negative one. + else if (LHSKnownOne.isNegative() && KnownZero2.isNegative()) + KnownOne |= APInt::getSignBit(BitWidth); + } + } + } +} + +static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW, + APInt &KnownZero, APInt &KnownOne, + APInt &KnownZero2, APInt &KnownOne2, + const DataLayout *TD, unsigned Depth) { + unsigned BitWidth = KnownZero.getBitWidth(); + ComputeMaskedBits(Op1, KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(Op0, KnownZero2, KnownOne2, TD, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + bool isKnownNegative = false; + bool isKnownNonNegative = false; + // If the multiplication is known not to overflow, compute the sign bit. + if (NSW) { + if (Op0 == Op1) { + // The product of a number with itself is non-negative. + isKnownNonNegative = true; + } else { + bool isKnownNonNegativeOp1 = KnownZero.isNegative(); + bool isKnownNonNegativeOp0 = KnownZero2.isNegative(); + bool isKnownNegativeOp1 = KnownOne.isNegative(); + bool isKnownNegativeOp0 = KnownOne2.isNegative(); + // The product of two numbers with the same sign is non-negative. + isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) || + (isKnownNonNegativeOp1 && isKnownNonNegativeOp0); + // The product of a negative number and a non-negative number is either + // negative or zero. + if (!isKnownNonNegative) + isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 && + isKnownNonZero(Op0, TD, Depth)) || + (isKnownNegativeOp0 && isKnownNonNegativeOp1 && + isKnownNonZero(Op1, TD, Depth)); + } + } + + // If low bits are zero in either operand, output low known-0 bits. + // Also compute a conserative estimate for high known-0 bits. + // More trickiness is possible, but this is sufficient for the + // interesting case of alignment computation. + KnownOne.clearAllBits(); + unsigned TrailZ = KnownZero.countTrailingOnes() + + KnownZero2.countTrailingOnes(); + unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + + KnownZero2.countLeadingOnes(), + BitWidth) - BitWidth; + + TrailZ = std::min(TrailZ, BitWidth); + LeadZ = std::min(LeadZ, BitWidth); + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | + APInt::getHighBitsSet(BitWidth, LeadZ); + + // Only make use of no-wrap flags if we failed to compute the sign bit + // directly. This matters if the multiplication always overflows, in + // which case we prefer to follow the result of the direct computation, + // though as the program is invoking undefined behaviour we can choose + // whatever we like here. + if (isKnownNonNegative && !KnownOne.isNegative()) + KnownZero.setBit(BitWidth - 1); + else if (isKnownNegative && !KnownZero.isNegative()) + KnownOne.setBit(BitWidth - 1); +} + +void llvm::computeMaskedBitsLoad(const MDNode &Ranges, APInt &KnownZero) { + unsigned BitWidth = KnownZero.getBitWidth(); + unsigned NumRanges = Ranges.getNumOperands() / 2; + assert(NumRanges >= 1); + + // Use the high end of the ranges to find leading zeros. + unsigned MinLeadingZeros = BitWidth; + for (unsigned i = 0; i < NumRanges; ++i) { + ConstantInt *Lower = cast<ConstantInt>(Ranges.getOperand(2*i + 0)); + ConstantInt *Upper = cast<ConstantInt>(Ranges.getOperand(2*i + 1)); + ConstantRange Range(Lower->getValue(), Upper->getValue()); + if (Range.isWrappedSet()) + MinLeadingZeros = 0; // -1 has no zeros + unsigned LeadingZeros = (Upper->getValue() - 1).countLeadingZeros(); + MinLeadingZeros = std::min(LeadingZeros, MinLeadingZeros); + } + + KnownZero = APInt::getHighBitsSet(BitWidth, MinLeadingZeros); +} +/// ComputeMaskedBits - Determine which of the bits are known to be either zero +/// or one and return them in the KnownZero/KnownOne bit sets. +/// +/// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that +/// we cannot optimize based on the assumption that it is zero without changing +/// it to be an explicit zero. If we don't change it to zero, other code could +/// optimized based on the contradictory assumption that it is non-zero. +/// Because instcombine aggressively folds operations with undef args anyway, +/// this won't lose us code quality. +/// +/// This function is defined on values with integer type, values with pointer +/// type (but only if TD is non-null), and vectors of integers. In the case +/// where V is a vector, known zero, and known one values are the +/// same width as the vector element, and the bit is set only if it is true +/// for all of the elements in the vector. +void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, + const DataLayout *TD, unsigned Depth) { + assert(V && "No Value?"); + assert(Depth <= MaxDepth && "Limit Search Depth"); + unsigned BitWidth = KnownZero.getBitWidth(); + + assert((V->getType()->isIntOrIntVectorTy() || + V->getType()->getScalarType()->isPointerTy()) && + "Not integer or pointer type!"); + assert((!TD || + TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && + (!V->getType()->isIntOrIntVectorTy() || + V->getType()->getScalarSizeInBits() == BitWidth) && + KnownZero.getBitWidth() == BitWidth && + KnownOne.getBitWidth() == BitWidth && + "V, Mask, KnownOne and KnownZero should have same BitWidth"); + + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + // We know all of the bits for a constant! + KnownOne = CI->getValue(); + KnownZero = ~KnownOne; + return; + } + // Null and aggregate-zero are all-zeros. + if (isa<ConstantPointerNull>(V) || + isa<ConstantAggregateZero>(V)) { + KnownOne.clearAllBits(); + KnownZero = APInt::getAllOnesValue(BitWidth); + return; + } + // Handle a constant vector by taking the intersection of the known bits of + // each element. There is no real need to handle ConstantVector here, because + // we don't handle undef in any particularly useful way. + if (ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) { + // We know that CDS must be a vector of integers. Take the intersection of + // each element. + KnownZero.setAllBits(); KnownOne.setAllBits(); + APInt Elt(KnownZero.getBitWidth(), 0); + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + Elt = CDS->getElementAsInteger(i); + KnownZero &= ~Elt; + KnownOne &= Elt; + } + return; + } + + // The address of an aligned GlobalValue has trailing zeros. + if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { + unsigned Align = GV->getAlignment(); + if (Align == 0 && TD) { + if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) { + Type *ObjectType = GVar->getType()->getElementType(); + if (ObjectType->isSized()) { + // If the object is defined in the current Module, we'll be giving + // it the preferred alignment. Otherwise, we have to assume that it + // may only have the minimum ABI alignment. + if (!GVar->isDeclaration() && !GVar->isWeakForLinker()) + Align = TD->getPreferredAlignment(GVar); + else + Align = TD->getABITypeAlignment(ObjectType); + } + } + } + if (Align > 0) + KnownZero = APInt::getLowBitsSet(BitWidth, + countTrailingZeros(Align)); + else + KnownZero.clearAllBits(); + KnownOne.clearAllBits(); + return; + } + // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has + // the bits of its aliasee. + if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { + if (GA->mayBeOverridden()) { + KnownZero.clearAllBits(); KnownOne.clearAllBits(); + } else { + ComputeMaskedBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1); + } + return; + } + + if (Argument *A = dyn_cast<Argument>(V)) { + unsigned Align = 0; + + if (A->hasByValAttr()) { + // Get alignment information off byval arguments if specified in the IR. + Align = A->getParamAlignment(); + } else if (TD && A->hasStructRetAttr()) { + // An sret parameter has at least the ABI alignment of the return type. + Type *EltTy = cast<PointerType>(A->getType())->getElementType(); + if (EltTy->isSized()) + Align = TD->getABITypeAlignment(EltTy); + } + + if (Align) + KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align)); + return; + } + + // Start out not knowing anything. + KnownZero.clearAllBits(); KnownOne.clearAllBits(); + + if (Depth == MaxDepth) + return; // Limit search depth. + + Operator *I = dyn_cast<Operator>(V); + if (!I) return; + + APInt KnownZero2(KnownZero), KnownOne2(KnownOne); + switch (I->getOpcode()) { + default: break; + case Instruction::Load: + if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range)) + computeMaskedBitsLoad(*MD, KnownZero); + return; + case Instruction::And: { + // If either the LHS or the RHS are Zero, the result is zero. + ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-1 bits are only known if set in both the LHS & RHS. + KnownOne &= KnownOne2; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + KnownZero |= KnownZero2; + return; + } + case Instruction::Or: { + ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are only known if clear in both the LHS & RHS. + KnownZero &= KnownZero2; + // Output known-1 are known to be set if set in either the LHS | RHS. + KnownOne |= KnownOne2; + return; + } + case Instruction::Xor: { + ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + KnownZero = KnownZeroOut; + return; + } + case Instruction::Mul: { + bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); + ComputeMaskedBitsMul(I->getOperand(0), I->getOperand(1), NSW, + KnownZero, KnownOne, KnownZero2, KnownOne2, TD, Depth); + break; + } + case Instruction::UDiv: { + // For the purposes of computing leading zeros we can conservatively + // treat a udiv as a logical right shift by the power of 2 known to + // be less than the denominator. + ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); + unsigned LeadZ = KnownZero2.countLeadingOnes(); + + KnownOne2.clearAllBits(); + KnownZero2.clearAllBits(); + ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); + unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); + if (RHSUnknownLeadingOnes != BitWidth) + LeadZ = std::min(BitWidth, + LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); + + KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ); + return; + } + case Instruction::Select: + ComputeMaskedBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + return; + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::SIToFP: + case Instruction::UIToFP: + return; // Can't work with floating point. + case Instruction::PtrToInt: + case Instruction::IntToPtr: + // We can't handle these if we don't know the pointer size. + if (!TD) return; + // FALL THROUGH and handle them the same as zext/trunc. + case Instruction::ZExt: + case Instruction::Trunc: { + Type *SrcTy = I->getOperand(0)->getType(); + + unsigned SrcBitWidth; + // Note that we handle pointer operands here because of inttoptr/ptrtoint + // which fall through here. + if(TD) { + SrcBitWidth = TD->getTypeSizeInBits(SrcTy->getScalarType()); + } else { + SrcBitWidth = SrcTy->getScalarSizeInBits(); + if (!SrcBitWidth) return; + } + + assert(SrcBitWidth && "SrcBitWidth can't be zero"); + KnownZero = KnownZero.zextOrTrunc(SrcBitWidth); + KnownOne = KnownOne.zextOrTrunc(SrcBitWidth); + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + KnownZero = KnownZero.zextOrTrunc(BitWidth); + KnownOne = KnownOne.zextOrTrunc(BitWidth); + // Any top bits are known to be zero. + if (BitWidth > SrcBitWidth) + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); + return; + } + case Instruction::BitCast: { + Type *SrcTy = I->getOperand(0)->getType(); + if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + // TODO: For now, not handling conversions like: + // (bitcast i64 %x to <2 x i32>) + !I->getType()->isVectorTy()) { + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + return; + } + break; + } + case Instruction::SExt: { + // Compute the bits in the result that are not present in the input. + unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); + + KnownZero = KnownZero.trunc(SrcBitWidth); + KnownOne = KnownOne.trunc(SrcBitWidth); + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + if (KnownZero[SrcBitWidth-1]) // Input sign bit known zero + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); + else if (KnownOne[SrcBitWidth-1]) // Input sign bit known set + KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); + return; + } + case Instruction::Shl: + // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 + if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero <<= ShiftAmt; + KnownOne <<= ShiftAmt; + KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0 + return; + } + break; + case Instruction::LShr: + // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 + if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { + // Compute the new bits that are at the top now. + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + + // Unsigned shift right. + ComputeMaskedBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); + KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); + // high bits known zero. + KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt); + return; + } + break; + case Instruction::AShr: + // (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 + if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { + // Compute the new bits that are at the top now. + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); + + // Signed shift right. + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); + KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); + + APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); + if (KnownZero[BitWidth-ShiftAmt-1]) // New bits are known zero. + KnownZero |= HighBits; + else if (KnownOne[BitWidth-ShiftAmt-1]) // New bits are known one. + KnownOne |= HighBits; + return; + } + break; + case Instruction::Sub: { + bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); + ComputeMaskedBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, + KnownZero, KnownOne, KnownZero2, KnownOne2, TD, + Depth); + break; + } + case Instruction::Add: { + bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); + ComputeMaskedBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, + KnownZero, KnownOne, KnownZero2, KnownOne2, TD, + Depth); + break; + } + case Instruction::SRem: + if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { + APInt RA = Rem->getValue().abs(); + if (RA.isPowerOf2()) { + APInt LowBits = RA - 1; + ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); + + // The low bits of the first operand are unchanged by the srem. + KnownZero = KnownZero2 & LowBits; + KnownOne = KnownOne2 & LowBits; + + // If the first operand is non-negative or has all low bits zero, then + // the upper bits are all zero. + if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits)) + KnownZero |= ~LowBits; + + // If the first operand is negative and not all low bits are zero, then + // the upper bits are all one. + if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0)) + KnownOne |= ~LowBits; + + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + } + } + + // The sign bit is the LHS's sign bit, except when the result of the + // remainder is zero. + if (KnownZero.isNonNegative()) { + APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD, + Depth+1); + // If it's known zero, our sign bit is also zero. + if (LHSKnownZero.isNegative()) + KnownZero.setBit(BitWidth - 1); + } + + break; + case Instruction::URem: { + if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { + APInt RA = Rem->getValue(); + if (RA.isPowerOf2()) { + APInt LowBits = (RA - 1); + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero |= ~LowBits; + KnownOne &= LowBits; + break; + } + } + + // Since the result is less than or equal to either operand, any leading + // zero bits in either operand must also exist in the result. + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); + + unsigned Leaders = std::max(KnownZero.countLeadingOnes(), + KnownZero2.countLeadingOnes()); + KnownOne.clearAllBits(); + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders); + break; + } + + case Instruction::Alloca: { + AllocaInst *AI = cast<AllocaInst>(V); + unsigned Align = AI->getAlignment(); + if (Align == 0 && TD) + Align = TD->getABITypeAlignment(AI->getType()->getElementType()); + + if (Align > 0) + KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align)); + break; + } + case Instruction::GetElementPtr: { + // Analyze all of the subscripts of this getelementptr instruction + // to determine if we can prove known low zero bits. + APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0); + ComputeMaskedBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD, + Depth+1); + unsigned TrailZ = LocalKnownZero.countTrailingOnes(); + + gep_type_iterator GTI = gep_type_begin(I); + for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { + Value *Index = I->getOperand(i); + if (StructType *STy = dyn_cast<StructType>(*GTI)) { + // Handle struct member offset arithmetic. + if (!TD) + return; + + // Handle case when index is vector zeroinitializer + Constant *CIndex = cast<Constant>(Index); + if (CIndex->isZeroValue()) + continue; + + if (CIndex->getType()->isVectorTy()) + Index = CIndex->getSplatValue(); + + unsigned Idx = cast<ConstantInt>(Index)->getZExtValue(); + const StructLayout *SL = TD->getStructLayout(STy); + uint64_t Offset = SL->getElementOffset(Idx); + TrailZ = std::min<unsigned>(TrailZ, + countTrailingZeros(Offset)); + } else { + // Handle array index arithmetic. + Type *IndexedTy = GTI.getIndexedType(); + if (!IndexedTy->isSized()) return; + unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits(); + uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1; + LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0); + ComputeMaskedBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1); + TrailZ = std::min(TrailZ, + unsigned(countTrailingZeros(TypeSize) + + LocalKnownZero.countTrailingOnes())); + } + } + + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ); + break; + } + case Instruction::PHI: { + PHINode *P = cast<PHINode>(I); + // Handle the case of a simple two-predecessor recurrence PHI. + // There's a lot more that could theoretically be done here, but + // this is sufficient to catch some interesting cases. + if (P->getNumIncomingValues() == 2) { + for (unsigned i = 0; i != 2; ++i) { + Value *L = P->getIncomingValue(i); + Value *R = P->getIncomingValue(!i); + Operator *LU = dyn_cast<Operator>(L); + if (!LU) + continue; + unsigned Opcode = LU->getOpcode(); + // Check for operations that have the property that if + // both their operands have low zero bits, the result + // will have low zero bits. + if (Opcode == Instruction::Add || + Opcode == Instruction::Sub || + Opcode == Instruction::And || + Opcode == Instruction::Or || + Opcode == Instruction::Mul) { + Value *LL = LU->getOperand(0); + Value *LR = LU->getOperand(1); + // Find a recurrence. + if (LL == I) + L = LR; + else if (LR == I) + L = LL; + else + break; + // Ok, we have a PHI of the form L op= R. Check for low + // zero bits. + ComputeMaskedBits(R, KnownZero2, KnownOne2, TD, Depth+1); + + // We need to take the minimum number of known bits + APInt KnownZero3(KnownZero), KnownOne3(KnownOne); + ComputeMaskedBits(L, KnownZero3, KnownOne3, TD, Depth+1); + + KnownZero = APInt::getLowBitsSet(BitWidth, + std::min(KnownZero2.countTrailingOnes(), + KnownZero3.countTrailingOnes())); + break; + } + } + } + + // Unreachable blocks may have zero-operand PHI nodes. + if (P->getNumIncomingValues() == 0) + return; + + // Otherwise take the unions of the known bit sets of the operands, + // taking conservative care to avoid excessive recursion. + if (Depth < MaxDepth - 1 && !KnownZero && !KnownOne) { + // Skip if every incoming value references to ourself. + if (dyn_cast_or_null<UndefValue>(P->hasConstantValue())) + break; + + KnownZero = APInt::getAllOnesValue(BitWidth); + KnownOne = APInt::getAllOnesValue(BitWidth); + for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) { + // Skip direct self references. + if (P->getIncomingValue(i) == P) continue; + + KnownZero2 = APInt(BitWidth, 0); + KnownOne2 = APInt(BitWidth, 0); + // Recurse, but cap the recursion to one level, because we don't + // want to waste time spinning around in loops. + ComputeMaskedBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD, + MaxDepth-1); + KnownZero &= KnownZero2; + KnownOne &= KnownOne2; + // If all bits have been ruled out, there's no need to check + // more operands. + if (!KnownZero && !KnownOne) + break; + } + } + break; + } + case Instruction::Call: + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::ctlz: + case Intrinsic::cttz: { + unsigned LowBits = Log2_32(BitWidth)+1; + // If this call is undefined for 0, the result will be less than 2^n. + if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) + LowBits -= 1; + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + break; + } + case Intrinsic::ctpop: { + unsigned LowBits = Log2_32(BitWidth)+1; + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + break; + } + case Intrinsic::x86_sse42_crc32_64_64: + KnownZero = APInt::getHighBitsSet(64, 32); + break; + } + } + break; + case Instruction::ExtractValue: + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) { + ExtractValueInst *EVI = cast<ExtractValueInst>(I); + if (EVI->getNumIndices() != 1) break; + if (EVI->getIndices()[0] == 0) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + ComputeMaskedBitsAddSub(true, II->getArgOperand(0), + II->getArgOperand(1), false, KnownZero, + KnownOne, KnownZero2, KnownOne2, TD, Depth); + break; + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + ComputeMaskedBitsAddSub(false, II->getArgOperand(0), + II->getArgOperand(1), false, KnownZero, + KnownOne, KnownZero2, KnownOne2, TD, Depth); + break; + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: + ComputeMaskedBitsMul(II->getArgOperand(0), II->getArgOperand(1), + false, KnownZero, KnownOne, + KnownZero2, KnownOne2, TD, Depth); + break; + } + } + } + } +} + +/// ComputeSignBit - Determine whether the sign bit is known to be zero or +/// one. Convenience wrapper around ComputeMaskedBits. +void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, + const DataLayout *TD, unsigned Depth) { + unsigned BitWidth = getBitWidth(V->getType(), TD); + if (!BitWidth) { + KnownZero = false; + KnownOne = false; + return; + } + APInt ZeroBits(BitWidth, 0); + APInt OneBits(BitWidth, 0); + ComputeMaskedBits(V, ZeroBits, OneBits, TD, Depth); + KnownOne = OneBits[BitWidth - 1]; + KnownZero = ZeroBits[BitWidth - 1]; +} + +/// isKnownToBeAPowerOfTwo - Return true if the given value is known to have exactly one +/// bit set when defined. For vectors return true if every element is known to +/// be a power of two when defined. Supports values with integer or pointer +/// types and vectors of integers. +bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) { + if (Constant *C = dyn_cast<Constant>(V)) { + if (C->isNullValue()) + return OrZero; + if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) + return CI->getValue().isPowerOf2(); + // TODO: Handle vector constants. + } + + // 1 << X is clearly a power of two if the one is not shifted off the end. If + // it is shifted off the end then the result is undefined. + if (match(V, m_Shl(m_One(), m_Value()))) + return true; + + // (signbit) >>l X is clearly a power of two if the one is not shifted off the + // bottom. If it is shifted off the bottom then the result is undefined. + if (match(V, m_LShr(m_SignBit(), m_Value()))) + return true; + + // The remaining tests are all recursive, so bail out if we hit the limit. + if (Depth++ == MaxDepth) + return false; + + Value *X = 0, *Y = 0; + // A shift of a power of two is a power of two or zero. + if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) || + match(V, m_Shr(m_Value(X), m_Value())))) + return isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth); + + if (ZExtInst *ZI = dyn_cast<ZExtInst>(V)) + return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth); + + if (SelectInst *SI = dyn_cast<SelectInst>(V)) + return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth) && + isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth); + + if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) { + // A power of two and'd with anything is a power of two or zero. + if (isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth) || + isKnownToBeAPowerOfTwo(Y, /*OrZero*/true, Depth)) + return true; + // X & (-X) is always a power of two or zero. + if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X)))) + return true; + return false; + } + + // Adding a power-of-two or zero to the same power-of-two or zero yields + // either the original power-of-two, a larger power-of-two or zero. + if (match(V, m_Add(m_Value(X), m_Value(Y)))) { + OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V); + if (OrZero || VOBO->hasNoUnsignedWrap() || VOBO->hasNoSignedWrap()) { + if (match(X, m_And(m_Specific(Y), m_Value())) || + match(X, m_And(m_Value(), m_Specific(Y)))) + if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth)) + return true; + if (match(Y, m_And(m_Specific(X), m_Value())) || + match(Y, m_And(m_Value(), m_Specific(X)))) + if (isKnownToBeAPowerOfTwo(X, OrZero, Depth)) + return true; + + unsigned BitWidth = V->getType()->getScalarSizeInBits(); + APInt LHSZeroBits(BitWidth, 0), LHSOneBits(BitWidth, 0); + ComputeMaskedBits(X, LHSZeroBits, LHSOneBits, 0, Depth); + + APInt RHSZeroBits(BitWidth, 0), RHSOneBits(BitWidth, 0); + ComputeMaskedBits(Y, RHSZeroBits, RHSOneBits, 0, Depth); + // If i8 V is a power of two or zero: + // ZeroBits: 1 1 1 0 1 1 1 1 + // ~ZeroBits: 0 0 0 1 0 0 0 0 + if ((~(LHSZeroBits & RHSZeroBits)).isPowerOf2()) + // If OrZero isn't set, we cannot give back a zero result. + // Make sure either the LHS or RHS has a bit set. + if (OrZero || RHSOneBits.getBoolValue() || LHSOneBits.getBoolValue()) + return true; + } + } + + // An exact divide or right shift can only shift off zero bits, so the result + // is a power of two only if the first operand is a power of two and not + // copying a sign bit (sdiv int_min, 2). + if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) || + match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) { + return isKnownToBeAPowerOfTwo(cast<Operator>(V)->getOperand(0), OrZero, Depth); + } + + return false; +} + +/// \brief Test whether a GEP's result is known to be non-null. +/// +/// Uses properties inherent in a GEP to try to determine whether it is known +/// to be non-null. +/// +/// Currently this routine does not support vector GEPs. +static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL, + unsigned Depth) { + if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0) + return false; + + // FIXME: Support vector-GEPs. + assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP"); + + // If the base pointer is non-null, we cannot walk to a null address with an + // inbounds GEP in address space zero. + if (isKnownNonZero(GEP->getPointerOperand(), DL, Depth)) + return true; + + // Past this, if we don't have DataLayout, we can't do much. + if (!DL) + return false; + + // Walk the GEP operands and see if any operand introduces a non-zero offset. + // If so, then the GEP cannot produce a null pointer, as doing so would + // inherently violate the inbounds contract within address space zero. + for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); + GTI != GTE; ++GTI) { + // Struct types are easy -- they must always be indexed by a constant. + if (StructType *STy = dyn_cast<StructType>(*GTI)) { + ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand()); + unsigned ElementIdx = OpC->getZExtValue(); + const StructLayout *SL = DL->getStructLayout(STy); + uint64_t ElementOffset = SL->getElementOffset(ElementIdx); + if (ElementOffset > 0) + return true; + continue; + } + + // If we have a zero-sized type, the index doesn't matter. Keep looping. + if (DL->getTypeAllocSize(GTI.getIndexedType()) == 0) + continue; + + // Fast path the constant operand case both for efficiency and so we don't + // increment Depth when just zipping down an all-constant GEP. + if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) { + if (!OpC->isZero()) + return true; + continue; + } + + // We post-increment Depth here because while isKnownNonZero increments it + // as well, when we pop back up that increment won't persist. We don't want + // to recurse 10k times just because we have 10k GEP operands. We don't + // bail completely out because we want to handle constant GEPs regardless + // of depth. + if (Depth++ >= MaxDepth) + continue; + + if (isKnownNonZero(GTI.getOperand(), DL, Depth)) + return true; + } + + return false; +} + +/// isKnownNonZero - Return true if the given value is known to be non-zero +/// when defined. For vectors return true if every element is known to be +/// non-zero when defined. Supports values with integer or pointer type and +/// vectors of integers. +bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { + if (Constant *C = dyn_cast<Constant>(V)) { + if (C->isNullValue()) + return false; + if (isa<ConstantInt>(C)) + // Must be non-zero due to null test above. + return true; + // TODO: Handle vectors + return false; + } + + // The remaining tests are all recursive, so bail out if we hit the limit. + if (Depth++ >= MaxDepth) + return false; + + // Check for pointer simplifications. + if (V->getType()->isPointerTy()) { + if (isKnownNonNull(V)) + return true; + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) + if (isGEPKnownNonNull(GEP, TD, Depth)) + return true; + } + + unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), TD); + + // X | Y != 0 if X != 0 or Y != 0. + Value *X = 0, *Y = 0; + if (match(V, m_Or(m_Value(X), m_Value(Y)))) + return isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth); + + // ext X != 0 if X != 0. + if (isa<SExtInst>(V) || isa<ZExtInst>(V)) + return isKnownNonZero(cast<Instruction>(V)->getOperand(0), TD, Depth); + + // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined + // if the lowest bit is shifted off the end. + if (BitWidth && match(V, m_Shl(m_Value(X), m_Value(Y)))) { + // shl nuw can't remove any non-zero bits. + OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V); + if (BO->hasNoUnsignedWrap()) + return isKnownNonZero(X, TD, Depth); + + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + ComputeMaskedBits(X, KnownZero, KnownOne, TD, Depth); + if (KnownOne[0]) + return true; + } + // shr X, Y != 0 if X is negative. Note that the value of the shift is not + // defined if the sign bit is shifted off the end. + else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) { + // shr exact can only shift out zero bits. + PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V); + if (BO->isExact()) + return isKnownNonZero(X, TD, Depth); + + bool XKnownNonNegative, XKnownNegative; + ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth); + if (XKnownNegative) + return true; + } + // div exact can only produce a zero if the dividend is zero. + else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) { + return isKnownNonZero(X, TD, Depth); + } + // X + Y. + else if (match(V, m_Add(m_Value(X), m_Value(Y)))) { + bool XKnownNonNegative, XKnownNegative; + bool YKnownNonNegative, YKnownNegative; + ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth); + ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, TD, Depth); + + // If X and Y are both non-negative (as signed values) then their sum is not + // zero unless both X and Y are zero. + if (XKnownNonNegative && YKnownNonNegative) + if (isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth)) + return true; + + // If X and Y are both negative (as signed values) then their sum is not + // zero unless both X and Y equal INT_MIN. + if (BitWidth && XKnownNegative && YKnownNegative) { + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + APInt Mask = APInt::getSignedMaxValue(BitWidth); + // The sign bit of X is set. If some other bit is set then X is not equal + // to INT_MIN. + ComputeMaskedBits(X, KnownZero, KnownOne, TD, Depth); + if ((KnownOne & Mask) != 0) + return true; + // The sign bit of Y is set. If some other bit is set then Y is not equal + // to INT_MIN. + ComputeMaskedBits(Y, KnownZero, KnownOne, TD, Depth); + if ((KnownOne & Mask) != 0) + return true; + } + + // The sum of a non-negative number and a power of two is not zero. + if (XKnownNonNegative && isKnownToBeAPowerOfTwo(Y, /*OrZero*/false, Depth)) + return true; + if (YKnownNonNegative && isKnownToBeAPowerOfTwo(X, /*OrZero*/false, Depth)) + return true; + } + // X * Y. + else if (match(V, m_Mul(m_Value(X), m_Value(Y)))) { + OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V); + // If X and Y are non-zero then so is X * Y as long as the multiplication + // does not overflow. + if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) && + isKnownNonZero(X, TD, Depth) && isKnownNonZero(Y, TD, Depth)) + return true; + } + // (C ? X : Y) != 0 if X != 0 and Y != 0. + else if (SelectInst *SI = dyn_cast<SelectInst>(V)) { + if (isKnownNonZero(SI->getTrueValue(), TD, Depth) && + isKnownNonZero(SI->getFalseValue(), TD, Depth)) + return true; + } + + if (!BitWidth) return false; + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); + return KnownOne != 0; +} + +/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use +/// this predicate to simplify operations downstream. Mask is known to be zero +/// for bits that V cannot have. +/// +/// This function is defined on values with integer type, values with pointer +/// type (but only if TD is non-null), and vectors of integers. In the case +/// where V is a vector, the mask, known zero, and known one values are the +/// same width as the vector element, and the bit is set only if it is true +/// for all of the elements in the vector. +bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, + const DataLayout *TD, unsigned Depth) { + APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); + ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + return (KnownZero & Mask) == Mask; +} + + + +/// ComputeNumSignBits - Return the number of times the sign bit of the +/// register is replicated into the other bits. We know that at least 1 bit +/// is always equal to the sign bit (itself), but other cases can give us +/// information. For example, immediately after an "ashr X, 2", we know that +/// the top 3 bits are all equal to each other, so we return 3. +/// +/// 'Op' must have a scalar integer type. +/// +unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, + unsigned Depth) { + assert((TD || V->getType()->isIntOrIntVectorTy()) && + "ComputeNumSignBits requires a DataLayout object to operate " + "on non-integer values!"); + Type *Ty = V->getType(); + unsigned TyBits = TD ? TD->getTypeSizeInBits(V->getType()->getScalarType()) : + Ty->getScalarSizeInBits(); + unsigned Tmp, Tmp2; + unsigned FirstAnswer = 1; + + // Note that ConstantInt is handled by the general ComputeMaskedBits case + // below. + + if (Depth == 6) + return 1; // Limit search depth. + + Operator *U = dyn_cast<Operator>(V); + switch (Operator::getOpcode(V)) { + default: break; + case Instruction::SExt: + Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); + return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp; + + case Instruction::AShr: { + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + // ashr X, C -> adds C sign bits. Vectors too. + const APInt *ShAmt; + if (match(U->getOperand(1), m_APInt(ShAmt))) { + Tmp += ShAmt->getZExtValue(); + if (Tmp > TyBits) Tmp = TyBits; + } + return Tmp; + } + case Instruction::Shl: { + const APInt *ShAmt; + if (match(U->getOperand(1), m_APInt(ShAmt))) { + // shl destroys sign bits. + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + Tmp2 = ShAmt->getZExtValue(); + if (Tmp2 >= TyBits || // Bad shift. + Tmp2 >= Tmp) break; // Shifted all sign bits out. + return Tmp - Tmp2; + } + break; + } + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: // NOT is handled here. + // Logical binary ops preserve the number of sign bits at the worst. + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + if (Tmp != 1) { + Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); + FirstAnswer = std::min(Tmp, Tmp2); + // We computed what we know about the sign bits as our first + // answer. Now proceed to the generic code that uses + // ComputeMaskedBits, and pick whichever answer is better. + } + break; + + case Instruction::Select: + Tmp = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); + if (Tmp == 1) return 1; // Early out. + Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1); + return std::min(Tmp, Tmp2); + + case Instruction::Add: + // Add can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + if (Tmp == 1) return 1; // Early out. + + // Special case decrementing a value (ADD X, -1): + if (ConstantInt *CRHS = dyn_cast<ConstantInt>(U->getOperand(1))) + if (CRHS->isAllOnesValue()) { + APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); + ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) + return TyBits; + + // If we are subtracting one from a positive number, there is no carry + // out of the result. + if (KnownZero.isNegative()) + return Tmp; + } + + Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); + if (Tmp2 == 1) return 1; + return std::min(Tmp, Tmp2)-1; + + case Instruction::Sub: + Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); + if (Tmp2 == 1) return 1; + + // Handle NEG. + if (ConstantInt *CLHS = dyn_cast<ConstantInt>(U->getOperand(0))) + if (CLHS->isNullValue()) { + APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); + ComputeMaskedBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) + return TyBits; + + // If the input is known to be positive (the sign bit is known clear), + // the output of the NEG has the same number of sign bits as the input. + if (KnownZero.isNegative()) + return Tmp2; + + // Otherwise, we treat this like a SUB. + } + + // Sub can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + if (Tmp == 1) return 1; // Early out. + return std::min(Tmp, Tmp2)-1; + + case Instruction::PHI: { + PHINode *PN = cast<PHINode>(U); + // Don't analyze large in-degree PHIs. + if (PN->getNumIncomingValues() > 4) break; + + // Take the minimum of all incoming values. This can't infinitely loop + // because of our depth threshold. + Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1); + for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { + if (Tmp == 1) return Tmp; + Tmp = std::min(Tmp, + ComputeNumSignBits(PN->getIncomingValue(i), TD, Depth+1)); + } + return Tmp; + } + + case Instruction::Trunc: + // FIXME: it's tricky to do anything useful for this, but it is an important + // case for targets like X86. + break; + } + + // Finally, if we can prove that the top bits of the result are 0's or 1's, + // use this information. + APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); + APInt Mask; + ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); + + if (KnownZero.isNegative()) { // sign bit is 0 + Mask = KnownZero; + } else if (KnownOne.isNegative()) { // sign bit is 1; + Mask = KnownOne; + } else { + // Nothing known. + return FirstAnswer; + } + + // Okay, we know that the sign bit in Mask is set. Use CLZ to determine + // the number of identical bits in the top of the input value. + Mask = ~Mask; + Mask <<= Mask.getBitWidth()-TyBits; + // Return # leading zeros. We use 'min' here in case Val was zero before + // shifting. We don't want to return '64' as for an i32 "0". + return std::max(FirstAnswer, std::min(TyBits, Mask.countLeadingZeros())); +} + +/// ComputeMultiple - This function computes the integer multiple of Base that +/// equals V. If successful, it returns true and returns the multiple in +/// Multiple. If unsuccessful, it returns false. It looks +/// through SExt instructions only if LookThroughSExt is true. +bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, + bool LookThroughSExt, unsigned Depth) { + const unsigned MaxDepth = 6; + + assert(V && "No Value?"); + assert(Depth <= MaxDepth && "Limit Search Depth"); + assert(V->getType()->isIntegerTy() && "Not integer or pointer type!"); + + Type *T = V->getType(); + + ConstantInt *CI = dyn_cast<ConstantInt>(V); + + if (Base == 0) + return false; + + if (Base == 1) { + Multiple = V; + return true; + } + + ConstantExpr *CO = dyn_cast<ConstantExpr>(V); + Constant *BaseVal = ConstantInt::get(T, Base); + if (CO && CO == BaseVal) { + // Multiple is 1. + Multiple = ConstantInt::get(T, 1); + return true; + } + + if (CI && CI->getZExtValue() % Base == 0) { + Multiple = ConstantInt::get(T, CI->getZExtValue() / Base); + return true; + } + + if (Depth == MaxDepth) return false; // Limit search depth. + + Operator *I = dyn_cast<Operator>(V); + if (!I) return false; + + switch (I->getOpcode()) { + default: break; + case Instruction::SExt: + if (!LookThroughSExt) return false; + // otherwise fall through to ZExt + case Instruction::ZExt: + return ComputeMultiple(I->getOperand(0), Base, Multiple, + LookThroughSExt, Depth+1); + case Instruction::Shl: + case Instruction::Mul: { + Value *Op0 = I->getOperand(0); + Value *Op1 = I->getOperand(1); + + if (I->getOpcode() == Instruction::Shl) { + ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1); + if (!Op1CI) return false; + // Turn Op0 << Op1 into Op0 * 2^Op1 + APInt Op1Int = Op1CI->getValue(); + uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1); + APInt API(Op1Int.getBitWidth(), 0); + API.setBit(BitToSet); + Op1 = ConstantInt::get(V->getContext(), API); + } + + Value *Mul0 = NULL; + if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) { + if (Constant *Op1C = dyn_cast<Constant>(Op1)) + if (Constant *MulC = dyn_cast<Constant>(Mul0)) { + if (Op1C->getType()->getPrimitiveSizeInBits() < + MulC->getType()->getPrimitiveSizeInBits()) + Op1C = ConstantExpr::getZExt(Op1C, MulC->getType()); + if (Op1C->getType()->getPrimitiveSizeInBits() > + MulC->getType()->getPrimitiveSizeInBits()) + MulC = ConstantExpr::getZExt(MulC, Op1C->getType()); + + // V == Base * (Mul0 * Op1), so return (Mul0 * Op1) + Multiple = ConstantExpr::getMul(MulC, Op1C); + return true; + } + + if (ConstantInt *Mul0CI = dyn_cast<ConstantInt>(Mul0)) + if (Mul0CI->getValue() == 1) { + // V == Base * Op1, so return Op1 + Multiple = Op1; + return true; + } + } + + Value *Mul1 = NULL; + if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) { + if (Constant *Op0C = dyn_cast<Constant>(Op0)) + if (Constant *MulC = dyn_cast<Constant>(Mul1)) { + if (Op0C->getType()->getPrimitiveSizeInBits() < + MulC->getType()->getPrimitiveSizeInBits()) + Op0C = ConstantExpr::getZExt(Op0C, MulC->getType()); + if (Op0C->getType()->getPrimitiveSizeInBits() > + MulC->getType()->getPrimitiveSizeInBits()) + MulC = ConstantExpr::getZExt(MulC, Op0C->getType()); + + // V == Base * (Mul1 * Op0), so return (Mul1 * Op0) + Multiple = ConstantExpr::getMul(MulC, Op0C); + return true; + } + + if (ConstantInt *Mul1CI = dyn_cast<ConstantInt>(Mul1)) + if (Mul1CI->getValue() == 1) { + // V == Base * Op0, so return Op0 + Multiple = Op0; + return true; + } + } + } + } + + // We could not determine if V is a multiple of Base. + return false; +} + +/// CannotBeNegativeZero - Return true if we can prove that the specified FP +/// value is never equal to -0.0. +/// +/// NOTE: this function will need to be revisited when we support non-default +/// rounding modes! +/// +bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) + return !CFP->getValueAPF().isNegZero(); + + if (Depth == 6) + return 1; // Limit search depth. + + const Operator *I = dyn_cast<Operator>(V); + if (I == 0) return false; + + // Check if the nsz fast-math flag is set + if (const FPMathOperator *FPO = dyn_cast<FPMathOperator>(I)) + if (FPO->hasNoSignedZeros()) + return true; + + // (add x, 0.0) is guaranteed to return +0.0, not -0.0. + if (I->getOpcode() == Instruction::FAdd) + if (ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(1))) + if (CFP->isNullValue()) + return true; + + // sitofp and uitofp turn into +0.0 for zero. + if (isa<SIToFPInst>(I) || isa<UIToFPInst>(I)) + return true; + + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + // sqrt(-0.0) = -0.0, no other negative results are possible. + if (II->getIntrinsicID() == Intrinsic::sqrt) + return CannotBeNegativeZero(II->getArgOperand(0), Depth+1); + + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (const Function *F = CI->getCalledFunction()) { + if (F->isDeclaration()) { + // abs(x) != -0.0 + if (F->getName() == "abs") return true; + // fabs[lf](x) != -0.0 + if (F->getName() == "fabs") return true; + if (F->getName() == "fabsf") return true; + if (F->getName() == "fabsl") return true; + if (F->getName() == "sqrt" || F->getName() == "sqrtf" || + F->getName() == "sqrtl") + return CannotBeNegativeZero(CI->getArgOperand(0), Depth+1); + } + } + + return false; +} + +/// isBytewiseValue - If the specified value can be set by repeating the same +/// byte in memory, return the i8 value that it is represented with. This is +/// true for all i8 values obviously, but is also true for i32 0, i32 -1, +/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated +/// byte store (e.g. i16 0x1234), return null. +Value *llvm::isBytewiseValue(Value *V) { + // All byte-wide stores are splatable, even of arbitrary variables. + if (V->getType()->isIntegerTy(8)) return V; + + // Handle 'null' ConstantArrayZero etc. + if (Constant *C = dyn_cast<Constant>(V)) + if (C->isNullValue()) + return Constant::getNullValue(Type::getInt8Ty(V->getContext())); + + // Constant float and double values can be handled as integer values if the + // corresponding integer value is "byteable". An important case is 0.0. + if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { + if (CFP->getType()->isFloatTy()) + V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext())); + if (CFP->getType()->isDoubleTy()) + V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext())); + // Don't handle long double formats, which have strange constraints. + } + + // We can handle constant integers that are power of two in size and a + // multiple of 8 bits. + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + unsigned Width = CI->getBitWidth(); + if (isPowerOf2_32(Width) && Width > 8) { + // We can handle this value if the recursive binary decomposition is the + // same at all levels. + APInt Val = CI->getValue(); + APInt Val2; + while (Val.getBitWidth() != 8) { + unsigned NextWidth = Val.getBitWidth()/2; + Val2 = Val.lshr(NextWidth); + Val2 = Val2.trunc(Val.getBitWidth()/2); + Val = Val.trunc(Val.getBitWidth()/2); + + // If the top/bottom halves aren't the same, reject it. + if (Val != Val2) + return 0; + } + return ConstantInt::get(V->getContext(), Val); + } + } + + // A ConstantDataArray/Vector is splatable if all its members are equal and + // also splatable. + if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(V)) { + Value *Elt = CA->getElementAsConstant(0); + Value *Val = isBytewiseValue(Elt); + if (!Val) + return 0; + + for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I) + if (CA->getElementAsConstant(I) != Elt) + return 0; + + return Val; + } + + // Conceptually, we could handle things like: + // %a = zext i8 %X to i16 + // %b = shl i16 %a, 8 + // %c = or i16 %a, %b + // but until there is an example that actually needs this, it doesn't seem + // worth worrying about. + return 0; +} + + +// This is the recursive version of BuildSubAggregate. It takes a few different +// arguments. Idxs is the index within the nested struct From that we are +// looking at now (which is of type IndexedType). IdxSkip is the number of +// indices from Idxs that should be left out when inserting into the resulting +// struct. To is the result struct built so far, new insertvalue instructions +// build on that. +static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, + SmallVectorImpl<unsigned> &Idxs, + unsigned IdxSkip, + Instruction *InsertBefore) { + llvm::StructType *STy = dyn_cast<llvm::StructType>(IndexedType); + if (STy) { + // Save the original To argument so we can modify it + Value *OrigTo = To; + // General case, the type indexed by Idxs is a struct + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + // Process each struct element recursively + Idxs.push_back(i); + Value *PrevTo = To; + To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip, + InsertBefore); + Idxs.pop_back(); + if (!To) { + // Couldn't find any inserted value for this index? Cleanup + while (PrevTo != OrigTo) { + InsertValueInst* Del = cast<InsertValueInst>(PrevTo); + PrevTo = Del->getAggregateOperand(); + Del->eraseFromParent(); + } + // Stop processing elements + break; + } + } + // If we successfully found a value for each of our subaggregates + if (To) + return To; + } + // Base case, the type indexed by SourceIdxs is not a struct, or not all of + // the struct's elements had a value that was inserted directly. In the latter + // case, perhaps we can't determine each of the subelements individually, but + // we might be able to find the complete struct somewhere. + + // Find the value that is at that particular spot + Value *V = FindInsertedValue(From, Idxs); + + if (!V) + return NULL; + + // Insert the value in the new (sub) aggregrate + return llvm::InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip), + "tmp", InsertBefore); +} + +// This helper takes a nested struct and extracts a part of it (which is again a +// struct) into a new value. For example, given the struct: +// { a, { b, { c, d }, e } } +// and the indices "1, 1" this returns +// { c, d }. +// +// It does this by inserting an insertvalue for each element in the resulting +// struct, as opposed to just inserting a single struct. This will only work if +// each of the elements of the substruct are known (ie, inserted into From by an +// insertvalue instruction somewhere). +// +// All inserted insertvalue instructions are inserted before InsertBefore +static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range, + Instruction *InsertBefore) { + assert(InsertBefore && "Must have someplace to insert!"); + Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(), + idx_range); + Value *To = UndefValue::get(IndexedType); + SmallVector<unsigned, 10> Idxs(idx_range.begin(), idx_range.end()); + unsigned IdxSkip = Idxs.size(); + + return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore); +} + +/// FindInsertedValue - Given an aggregrate and an sequence of indices, see if +/// the scalar value indexed is already around as a register, for example if it +/// were inserted directly into the aggregrate. +/// +/// If InsertBefore is not null, this function will duplicate (modified) +/// insertvalues when a part of a nested struct is extracted. +Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, + Instruction *InsertBefore) { + // Nothing to index? Just return V then (this is useful at the end of our + // recursion). + if (idx_range.empty()) + return V; + // We have indices, so V should have an indexable type. + assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) && + "Not looking at a struct or array?"); + assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) && + "Invalid indices for type?"); + + if (Constant *C = dyn_cast<Constant>(V)) { + C = C->getAggregateElement(idx_range[0]); + if (C == 0) return 0; + return FindInsertedValue(C, idx_range.slice(1), InsertBefore); + } + + if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) { + // Loop the indices for the insertvalue instruction in parallel with the + // requested indices + const unsigned *req_idx = idx_range.begin(); + for (const unsigned *i = I->idx_begin(), *e = I->idx_end(); + i != e; ++i, ++req_idx) { + if (req_idx == idx_range.end()) { + // We can't handle this without inserting insertvalues + if (!InsertBefore) + return 0; + + // The requested index identifies a part of a nested aggregate. Handle + // this specially. For example, + // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0 + // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1 + // %C = extractvalue {i32, { i32, i32 } } %B, 1 + // This can be changed into + // %A = insertvalue {i32, i32 } undef, i32 10, 0 + // %C = insertvalue {i32, i32 } %A, i32 11, 1 + // which allows the unused 0,0 element from the nested struct to be + // removed. + return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx), + InsertBefore); + } + + // This insert value inserts something else than what we are looking for. + // See if the (aggregrate) value inserted into has the value we are + // looking for, then. + if (*req_idx != *i) + return FindInsertedValue(I->getAggregateOperand(), idx_range, + InsertBefore); + } + // If we end up here, the indices of the insertvalue match with those + // requested (though possibly only partially). Now we recursively look at + // the inserted value, passing any remaining indices. + return FindInsertedValue(I->getInsertedValueOperand(), + makeArrayRef(req_idx, idx_range.end()), + InsertBefore); + } + + if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) { + // If we're extracting a value from an aggregrate that was extracted from + // something else, we can extract from that something else directly instead. + // However, we will need to chain I's indices with the requested indices. + + // Calculate the number of indices required + unsigned size = I->getNumIndices() + idx_range.size(); + // Allocate some space to put the new indices in + SmallVector<unsigned, 5> Idxs; + Idxs.reserve(size); + // Add indices from the extract value instruction + Idxs.append(I->idx_begin(), I->idx_end()); + + // Add requested indices + Idxs.append(idx_range.begin(), idx_range.end()); + + assert(Idxs.size() == size + && "Number of indices added not correct?"); + + return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore); + } + // Otherwise, we don't know (such as, extracting from a function return value + // or load instruction) + return 0; +} + +/// GetPointerBaseWithConstantOffset - Analyze the specified pointer to see if +/// it can be expressed as a base pointer plus a constant offset. Return the +/// base and offset to the caller. +Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, + const DataLayout *DL) { + // Without DataLayout, conservatively assume 64-bit offsets, which is + // the widest we support. + unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(Ptr->getType()) : 64; + APInt ByteOffset(BitWidth, 0); + while (1) { + if (Ptr->getType()->isVectorTy()) + break; + + if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) { + if (DL) { + APInt GEPOffset(BitWidth, 0); + if (!GEP->accumulateConstantOffset(*DL, GEPOffset)) + break; + + ByteOffset += GEPOffset; + } + + Ptr = GEP->getPointerOperand(); + } else if (Operator::getOpcode(Ptr) == Instruction::BitCast) { + Ptr = cast<Operator>(Ptr)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) { + if (GA->mayBeOverridden()) + break; + Ptr = GA->getAliasee(); + } else { + break; + } + } + Offset = ByteOffset.getSExtValue(); + return Ptr; +} + + +/// getConstantStringInfo - This function computes the length of a +/// null-terminated C string pointed to by V. If successful, it returns true +/// and returns the string in Str. If unsuccessful, it returns false. +bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, + uint64_t Offset, bool TrimAtNul) { + assert(V); + + // Look through bitcast instructions and geps. + V = V->stripPointerCasts(); + + // If the value is a GEP instructionor constant expression, treat it as an + // offset. + if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { + // Make sure the GEP has exactly three arguments. + if (GEP->getNumOperands() != 3) + return false; + + // Make sure the index-ee is a pointer to array of i8. + PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType()); + ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType()); + if (AT == 0 || !AT->getElementType()->isIntegerTy(8)) + return false; + + // Check to make sure that the first operand of the GEP is an integer and + // has value 0 so that we are sure we're indexing into the initializer. + const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1)); + if (FirstIdx == 0 || !FirstIdx->isZero()) + return false; + + // If the second index isn't a ConstantInt, then this is a variable index + // into the array. If this occurs, we can't say anything meaningful about + // the string. + uint64_t StartIdx = 0; + if (const ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2))) + StartIdx = CI->getZExtValue(); + else + return false; + return getConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset); + } + + // The GEP instruction, constant or instruction, must reference a global + // variable that is a constant and is initialized. The referenced constant + // initializer is the array that we'll use for optimization. + const GlobalVariable *GV = dyn_cast<GlobalVariable>(V); + if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) + return false; + + // Handle the all-zeros case + if (GV->getInitializer()->isNullValue()) { + // This is a degenerate case. The initializer is constant zero so the + // length of the string must be zero. + Str = ""; + return true; + } + + // Must be a Constant Array + const ConstantDataArray *Array = + dyn_cast<ConstantDataArray>(GV->getInitializer()); + if (Array == 0 || !Array->isString()) + return false; + + // Get the number of elements in the array + uint64_t NumElts = Array->getType()->getArrayNumElements(); + + // Start out with the entire array in the StringRef. + Str = Array->getAsString(); + + if (Offset > NumElts) + return false; + + // Skip over 'offset' bytes. + Str = Str.substr(Offset); + + if (TrimAtNul) { + // Trim off the \0 and anything after it. If the array is not nul + // terminated, we just return the whole end of string. The client may know + // some other way that the string is length-bound. + Str = Str.substr(0, Str.find('\0')); + } + return true; +} + +// These next two are very similar to the above, but also look through PHI +// nodes. +// TODO: See if we can integrate these two together. + +/// GetStringLengthH - If we can compute the length of the string pointed to by +/// the specified pointer, return 'len+1'. If we can't, return 0. +static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) { + // Look through noop bitcast instructions. + V = V->stripPointerCasts(); + + // If this is a PHI node, there are two cases: either we have already seen it + // or we haven't. + if (PHINode *PN = dyn_cast<PHINode>(V)) { + if (!PHIs.insert(PN)) + return ~0ULL; // already in the set. + + // If it was new, see if all the input strings are the same length. + uint64_t LenSoFar = ~0ULL; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs); + if (Len == 0) return 0; // Unknown length -> unknown. + + if (Len == ~0ULL) continue; + + if (Len != LenSoFar && LenSoFar != ~0ULL) + return 0; // Disagree -> unknown. + LenSoFar = Len; + } + + // Success, all agree. + return LenSoFar; + } + + // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y) + if (SelectInst *SI = dyn_cast<SelectInst>(V)) { + uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs); + if (Len1 == 0) return 0; + uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs); + if (Len2 == 0) return 0; + if (Len1 == ~0ULL) return Len2; + if (Len2 == ~0ULL) return Len1; + if (Len1 != Len2) return 0; + return Len1; + } + + // Otherwise, see if we can read the string. + StringRef StrData; + if (!getConstantStringInfo(V, StrData)) + return 0; + + return StrData.size()+1; +} + +/// GetStringLength - If we can compute the length of the string pointed to by +/// the specified pointer, return 'len+1'. If we can't, return 0. +uint64_t llvm::GetStringLength(Value *V) { + if (!V->getType()->isPointerTy()) return 0; + + SmallPtrSet<PHINode*, 32> PHIs; + uint64_t Len = GetStringLengthH(V, PHIs); + // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return + // an empty string as a length. + return Len == ~0ULL ? 1 : Len; +} + +Value * +llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) { + if (!V->getType()->isPointerTy()) + return V; + for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { + V = GEP->getPointerOperand(); + } else if (Operator::getOpcode(V) == Instruction::BitCast) { + V = cast<Operator>(V)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { + if (GA->mayBeOverridden()) + return V; + V = GA->getAliasee(); + } else { + // See if InstructionSimplify knows any relevant tricks. + if (Instruction *I = dyn_cast<Instruction>(V)) + // TODO: Acquire a DominatorTree and use it. + if (Value *Simplified = SimplifyInstruction(I, TD, 0)) { + V = Simplified; + continue; + } + + return V; + } + assert(V->getType()->isPointerTy() && "Unexpected operand type!"); + } + return V; +} + +void +llvm::GetUnderlyingObjects(Value *V, + SmallVectorImpl<Value *> &Objects, + const DataLayout *TD, + unsigned MaxLookup) { + SmallPtrSet<Value *, 4> Visited; + SmallVector<Value *, 4> Worklist; + Worklist.push_back(V); + do { + Value *P = Worklist.pop_back_val(); + P = GetUnderlyingObject(P, TD, MaxLookup); + + if (!Visited.insert(P)) + continue; + + if (SelectInst *SI = dyn_cast<SelectInst>(P)) { + Worklist.push_back(SI->getTrueValue()); + Worklist.push_back(SI->getFalseValue()); + continue; + } + + if (PHINode *PN = dyn_cast<PHINode>(P)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + Worklist.push_back(PN->getIncomingValue(i)); + continue; + } + + Objects.push_back(P); + } while (!Worklist.empty()); +} + +/// onlyUsedByLifetimeMarkers - Return true if the only users of this pointer +/// are lifetime markers. +/// +bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { + for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); + UI != UE; ++UI) { + const IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI); + if (!II) return false; + + if (II->getIntrinsicID() != Intrinsic::lifetime_start && + II->getIntrinsicID() != Intrinsic::lifetime_end) + return false; + } + return true; +} + +bool llvm::isSafeToSpeculativelyExecute(const Value *V, + const DataLayout *TD) { + const Operator *Inst = dyn_cast<Operator>(V); + if (!Inst) + return false; + + for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) + if (Constant *C = dyn_cast<Constant>(Inst->getOperand(i))) + if (C->canTrap()) + return false; + + switch (Inst->getOpcode()) { + default: + return true; + case Instruction::UDiv: + case Instruction::URem: + // x / y is undefined if y == 0, but calcuations like x / 3 are safe. + return isKnownNonZero(Inst->getOperand(1), TD); + case Instruction::SDiv: + case Instruction::SRem: { + Value *Op = Inst->getOperand(1); + // x / y is undefined if y == 0 + if (!isKnownNonZero(Op, TD)) + return false; + // x / y might be undefined if y == -1 + unsigned BitWidth = getBitWidth(Op->getType(), TD); + if (BitWidth == 0) + return false; + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + ComputeMaskedBits(Op, KnownZero, KnownOne, TD); + return !!KnownZero; + } + case Instruction::Load: { + const LoadInst *LI = cast<LoadInst>(Inst); + if (!LI->isUnordered()) + return false; + return LI->getPointerOperand()->isDereferenceablePointer(); + } + case Instruction::Call: { + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { + switch (II->getIntrinsicID()) { + // These synthetic intrinsics have no side-effects, and just mark + // information about their operands. + // FIXME: There are other no-op synthetic instructions that potentially + // should be considered at least *safe* to speculate... + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + return true; + + case Intrinsic::bswap: + case Intrinsic::ctlz: + case Intrinsic::ctpop: + case Intrinsic::cttz: + case Intrinsic::objectsize: + case Intrinsic::sadd_with_overflow: + case Intrinsic::smul_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::umul_with_overflow: + case Intrinsic::usub_with_overflow: + return true; + // TODO: some fp intrinsics are marked as having the same error handling + // as libm. They're safe to speculate when they won't error. + // TODO: are convert_{from,to}_fp16 safe? + // TODO: can we list target-specific intrinsics here? + default: break; + } + } + return false; // The called function could have undefined behavior or + // side-effects, even if marked readnone nounwind. + } + case Instruction::VAArg: + case Instruction::Alloca: + case Instruction::Invoke: + case Instruction::PHI: + case Instruction::Store: + case Instruction::Ret: + case Instruction::Br: + case Instruction::IndirectBr: + case Instruction::Switch: + case Instruction::Unreachable: + case Instruction::Fence: + case Instruction::LandingPad: + case Instruction::AtomicRMW: + case Instruction::AtomicCmpXchg: + case Instruction::Resume: + return false; // Misc instructions which have effects + } +} + +/// isKnownNonNull - Return true if we know that the specified value is never +/// null. +bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { + // Alloca never returns null, malloc might. + if (isa<AllocaInst>(V)) return true; + + // A byval argument is never null. + if (const Argument *A = dyn_cast<Argument>(V)) + return A->hasByValAttr(); + + // Global values are not null unless extern weak. + if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) + return !GV->hasExternalWeakLinkage(); + + // operator new never returns null. + if (isOperatorNewLikeFn(V, TLI, /*LookThroughBitCast=*/true)) + return true; + + return false; +} |