aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Analysis
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Analysis')
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysis.cpp402
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp173
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp304
-rw-r--r--contrib/llvm/lib/Analysis/AliasDebugger.cpp138
-rw-r--r--contrib/llvm/lib/Analysis/AliasSetTracker.cpp652
-rw-r--r--contrib/llvm/lib/Analysis/Analysis.cpp102
-rw-r--r--contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp1190
-rw-r--r--contrib/llvm/lib/Analysis/CFGPrinter.cpp165
-rw-r--r--contrib/llvm/lib/Analysis/CaptureTracking.cpp148
-rw-r--r--contrib/llvm/lib/Analysis/ConstantFolding.cpp1406
-rw-r--r--contrib/llvm/lib/Analysis/DIBuilder.cpp835
-rw-r--r--contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp224
-rw-r--r--contrib/llvm/lib/Analysis/DebugInfo.cpp948
-rw-r--r--contrib/llvm/lib/Analysis/DomPrinter.cpp232
-rw-r--r--contrib/llvm/lib/Analysis/DominanceFrontier.cpp137
-rw-r--r--contrib/llvm/lib/Analysis/IPA/CallGraph.cpp340
-rw-r--r--contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp608
-rw-r--r--contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp103
-rw-r--r--contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp609
-rw-r--r--contrib/llvm/lib/Analysis/IPA/IPA.cpp29
-rw-r--r--contrib/llvm/lib/Analysis/IVUsers.cpp268
-rw-r--r--contrib/llvm/lib/Analysis/InlineCost.cpp648
-rw-r--r--contrib/llvm/lib/Analysis/InstCount.cpp87
-rw-r--r--contrib/llvm/lib/Analysis/InstructionSimplify.cpp2314
-rw-r--r--contrib/llvm/lib/Analysis/Interval.cpp58
-rw-r--r--contrib/llvm/lib/Analysis/IntervalPartition.cpp114
-rw-r--r--contrib/llvm/lib/Analysis/LazyValueInfo.cpp1126
-rw-r--r--contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp137
-rw-r--r--contrib/llvm/lib/Analysis/LibCallSemantics.cpp63
-rw-r--r--contrib/llvm/lib/Analysis/Lint.cpp658
-rw-r--r--contrib/llvm/lib/Analysis/Loads.cpp236
-rw-r--r--contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp358
-rw-r--r--contrib/llvm/lib/Analysis/LoopInfo.cpp419
-rw-r--r--contrib/llvm/lib/Analysis/LoopPass.cpp422
-rw-r--r--contrib/llvm/lib/Analysis/MemDepPrinter.cpp167
-rw-r--r--contrib/llvm/lib/Analysis/MemoryBuiltins.cpp218
-rw-r--r--contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp1408
-rw-r--r--contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp87
-rw-r--r--contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp88
-rw-r--r--contrib/llvm/lib/Analysis/PHITransAddr.cpp442
-rw-r--r--contrib/llvm/lib/Analysis/PathNumbering.cpp522
-rw-r--r--contrib/llvm/lib/Analysis/PathProfileInfo.cpp434
-rw-r--r--contrib/llvm/lib/Analysis/PathProfileVerifier.cpp207
-rw-r--r--contrib/llvm/lib/Analysis/PostDominators.cpp51
-rw-r--r--contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp426
-rw-r--r--contrib/llvm/lib/Analysis/ProfileInfo.cpp1105
-rw-r--r--contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp157
-rw-r--r--contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp267
-rw-r--r--contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp382
-rw-r--r--contrib/llvm/lib/Analysis/RegionInfo.cpp851
-rw-r--r--contrib/llvm/lib/Analysis/RegionPass.cpp275
-rw-r--r--contrib/llvm/lib/Analysis/RegionPrinter.cpp220
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolution.cpp6361
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp173
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp1390
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp184
-rw-r--r--contrib/llvm/lib/Analysis/SparsePropagation.cpp347
-rw-r--r--contrib/llvm/lib/Analysis/Trace.cpp51
-rw-r--r--contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp300
-rw-r--r--contrib/llvm/lib/Analysis/ValueTracking.cpp1771
60 files changed, 33537 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
new file mode 100644
index 000000000000..c189a0042928
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -0,0 +1,402 @@
+//===- AliasAnalysis.cpp - Generic Alias Analysis Interface Implementation -==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the generic AliasAnalysis interface which is used as the
+// common interface used by all clients and implementations of alias analysis.
+//
+// This file also implements the default version of the AliasAnalysis interface
+// that is to be used when no other implementation is specified. This does some
+// simple tests that detect obvious cases: two different global pointers cannot
+// alias, a global cannot alias a malloc, two different mallocs cannot alias,
+// etc.
+//
+// This alias analysis implementation really isn't very good for anything, but
+// it is very fast, and makes a nice clean default implementation. Because it
+// handles lots of little corner cases, other, more complex, alias analysis
+// implementations may choose to rely on this pass to resolve these simple and
+// easy cases.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Pass.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Type.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+// Register the AliasAnalysis interface, providing a nice name to refer to.
+INITIALIZE_ANALYSIS_GROUP(AliasAnalysis, "Alias Analysis", NoAA)
+char AliasAnalysis::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// Default chaining methods
+//===----------------------------------------------------------------------===//
+
+AliasAnalysis::AliasResult
+AliasAnalysis::alias(const Location &LocA, const Location &LocB) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ return AA->alias(LocA, LocB);
+}
+
+bool AliasAnalysis::pointsToConstantMemory(const Location &Loc,
+ bool OrLocal) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ return AA->pointsToConstantMemory(Loc, OrLocal);
+}
+
+void AliasAnalysis::deleteValue(Value *V) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ AA->deleteValue(V);
+}
+
+void AliasAnalysis::copyValue(Value *From, Value *To) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ AA->copyValue(From, To);
+}
+
+void AliasAnalysis::addEscapingUse(Use &U) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ AA->addEscapingUse(U);
+}
+
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+
+ ModRefBehavior MRB = getModRefBehavior(CS);
+ if (MRB == DoesNotAccessMemory)
+ return NoModRef;
+
+ ModRefResult Mask = ModRef;
+ if (onlyReadsMemory(MRB))
+ Mask = Ref;
+
+ if (onlyAccessesArgPointees(MRB)) {
+ bool doesAlias = false;
+ if (doesAccessArgPointees(MRB)) {
+ MDNode *CSTag = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa);
+ for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+ AI != AE; ++AI) {
+ const Value *Arg = *AI;
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ Location CSLoc(Arg, UnknownSize, CSTag);
+ if (!isNoAlias(CSLoc, Loc)) {
+ doesAlias = true;
+ break;
+ }
+ }
+ }
+ if (!doesAlias)
+ return NoModRef;
+ }
+
+ // If Loc is a constant memory location, the call definitely could not
+ // modify the memory location.
+ if ((Mask & Mod) && pointsToConstantMemory(Loc))
+ Mask = ModRefResult(Mask & ~Mod);
+
+ // If this is the end of the chain, don't forward.
+ if (!AA) return Mask;
+
+ // Otherwise, fall back to the next AA in the chain. But we can merge
+ // in any mask we've managed to compute.
+ return ModRefResult(AA->getModRefInfo(CS, Loc) & Mask);
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+
+ // If CS1 or CS2 are readnone, they don't interact.
+ ModRefBehavior CS1B = getModRefBehavior(CS1);
+ if (CS1B == DoesNotAccessMemory) return NoModRef;
+
+ ModRefBehavior CS2B = getModRefBehavior(CS2);
+ if (CS2B == DoesNotAccessMemory) return NoModRef;
+
+ // If they both only read from memory, there is no dependence.
+ if (onlyReadsMemory(CS1B) && onlyReadsMemory(CS2B))
+ return NoModRef;
+
+ AliasAnalysis::ModRefResult Mask = ModRef;
+
+ // If CS1 only reads memory, the only dependence on CS2 can be
+ // from CS1 reading memory written by CS2.
+ if (onlyReadsMemory(CS1B))
+ Mask = ModRefResult(Mask & Ref);
+
+ // If CS2 only access memory through arguments, accumulate the mod/ref
+ // information from CS1's references to the memory referenced by
+ // CS2's arguments.
+ if (onlyAccessesArgPointees(CS2B)) {
+ AliasAnalysis::ModRefResult R = NoModRef;
+ if (doesAccessArgPointees(CS2B)) {
+ MDNode *CS2Tag = CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa);
+ for (ImmutableCallSite::arg_iterator
+ I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) {
+ const Value *Arg = *I;
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ Location CS2Loc(Arg, UnknownSize, CS2Tag);
+ R = ModRefResult((R | getModRefInfo(CS1, CS2Loc)) & Mask);
+ if (R == Mask)
+ break;
+ }
+ }
+ return R;
+ }
+
+ // If CS1 only accesses memory through arguments, check if CS2 references
+ // any of the memory referenced by CS1's arguments. If not, return NoModRef.
+ if (onlyAccessesArgPointees(CS1B)) {
+ AliasAnalysis::ModRefResult R = NoModRef;
+ if (doesAccessArgPointees(CS1B)) {
+ MDNode *CS1Tag = CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa);
+ for (ImmutableCallSite::arg_iterator
+ I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) {
+ const Value *Arg = *I;
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ Location CS1Loc(Arg, UnknownSize, CS1Tag);
+ if (getModRefInfo(CS2, CS1Loc) != NoModRef) {
+ R = Mask;
+ break;
+ }
+ }
+ }
+ if (R == NoModRef)
+ return R;
+ }
+
+ // If this is the end of the chain, don't forward.
+ if (!AA) return Mask;
+
+ // Otherwise, fall back to the next AA in the chain. But we can merge
+ // in any mask we've managed to compute.
+ return ModRefResult(AA->getModRefInfo(CS1, CS2) & Mask);
+}
+
+AliasAnalysis::ModRefBehavior
+AliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ // Call back into the alias analysis with the other form of getModRefBehavior
+ // to see if it can give a better response.
+ if (const Function *F = CS.getCalledFunction())
+ Min = getModRefBehavior(F);
+
+ // If this is the end of the chain, don't forward.
+ if (!AA) return Min;
+
+ // Otherwise, fall back to the next AA in the chain. But we can merge
+ // in any result we've managed to compute.
+ return ModRefBehavior(AA->getModRefBehavior(CS) & Min);
+}
+
+AliasAnalysis::ModRefBehavior
+AliasAnalysis::getModRefBehavior(const Function *F) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ return AA->getModRefBehavior(F);
+}
+
+//===----------------------------------------------------------------------===//
+// AliasAnalysis non-virtual helper method implementation
+//===----------------------------------------------------------------------===//
+
+AliasAnalysis::Location AliasAnalysis::getLocation(const LoadInst *LI) {
+ return Location(LI->getPointerOperand(),
+ getTypeStoreSize(LI->getType()),
+ LI->getMetadata(LLVMContext::MD_tbaa));
+}
+
+AliasAnalysis::Location AliasAnalysis::getLocation(const StoreInst *SI) {
+ return Location(SI->getPointerOperand(),
+ getTypeStoreSize(SI->getValueOperand()->getType()),
+ SI->getMetadata(LLVMContext::MD_tbaa));
+}
+
+AliasAnalysis::Location AliasAnalysis::getLocation(const VAArgInst *VI) {
+ return Location(VI->getPointerOperand(),
+ UnknownSize,
+ VI->getMetadata(LLVMContext::MD_tbaa));
+}
+
+
+AliasAnalysis::Location
+AliasAnalysis::getLocationForSource(const MemTransferInst *MTI) {
+ uint64_t Size = UnknownSize;
+ if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+ Size = C->getValue().getZExtValue();
+
+ // memcpy/memmove can have TBAA tags. For memcpy, they apply
+ // to both the source and the destination.
+ MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa);
+
+ return Location(MTI->getRawSource(), Size, TBAATag);
+}
+
+AliasAnalysis::Location
+AliasAnalysis::getLocationForDest(const MemIntrinsic *MTI) {
+ uint64_t Size = UnknownSize;
+ if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+ Size = C->getValue().getZExtValue();
+
+ // memcpy/memmove can have TBAA tags. For memcpy, they apply
+ // to both the source and the destination.
+ MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa);
+
+ return Location(MTI->getRawDest(), Size, TBAATag);
+}
+
+
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) {
+ // Be conservative in the face of volatile.
+ if (L->isVolatile())
+ return ModRef;
+
+ // If the load address doesn't alias the given address, it doesn't read
+ // or write the specified memory.
+ if (!alias(getLocation(L), Loc))
+ return NoModRef;
+
+ // Otherwise, a load just reads.
+ return Ref;
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) {
+ // Be conservative in the face of volatile.
+ if (S->isVolatile())
+ return ModRef;
+
+ // If the store address cannot alias the pointer in question, then the
+ // specified memory cannot be modified by the store.
+ if (!alias(getLocation(S), Loc))
+ return NoModRef;
+
+ // If the pointer is a pointer to constant memory, then it could not have been
+ // modified by this store.
+ if (pointsToConstantMemory(Loc))
+ return NoModRef;
+
+ // Otherwise, a store just writes.
+ return Mod;
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) {
+ // If the va_arg address cannot alias the pointer in question, then the
+ // specified memory cannot be accessed by the va_arg.
+ if (!alias(getLocation(V), Loc))
+ return NoModRef;
+
+ // If the pointer is a pointer to constant memory, then it could not have been
+ // modified by this va_arg.
+ if (pointsToConstantMemory(Loc))
+ return NoModRef;
+
+ // Otherwise, a va_arg reads and writes.
+ return ModRef;
+}
+
+// AliasAnalysis destructor: DO NOT move this to the header file for
+// AliasAnalysis or else clients of the AliasAnalysis class may not depend on
+// the AliasAnalysis.o file in the current .a file, causing alias analysis
+// support to not be included in the tool correctly!
+//
+AliasAnalysis::~AliasAnalysis() {}
+
+/// InitializeAliasAnalysis - Subclasses must call this method to initialize the
+/// AliasAnalysis interface before any other methods are called.
+///
+void AliasAnalysis::InitializeAliasAnalysis(Pass *P) {
+ TD = P->getAnalysisIfAvailable<TargetData>();
+ AA = &P->getAnalysis<AliasAnalysis>();
+}
+
+// getAnalysisUsage - All alias analysis implementations should invoke this
+// directly (using AliasAnalysis::getAnalysisUsage(AU)).
+void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>(); // All AA's chain
+}
+
+/// getTypeStoreSize - Return the TargetData store size for the given type,
+/// if known, or a conservative value otherwise.
+///
+uint64_t AliasAnalysis::getTypeStoreSize(const Type *Ty) {
+ return TD ? TD->getTypeStoreSize(Ty) : UnknownSize;
+}
+
+/// canBasicBlockModify - Return true if it is possible for execution of the
+/// specified basic block to modify the value pointed to by Ptr.
+///
+bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB,
+ const Location &Loc) {
+ return canInstructionRangeModify(BB.front(), BB.back(), Loc);
+}
+
+/// canInstructionRangeModify - Return true if it is possible for the execution
+/// of the specified instructions to modify the value pointed to by Ptr. The
+/// instructions to consider are all of the instructions in the range of [I1,I2]
+/// INCLUSIVE. I1 and I2 must be in the same basic block.
+///
+bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1,
+ const Instruction &I2,
+ const Location &Loc) {
+ assert(I1.getParent() == I2.getParent() &&
+ "Instructions not in same basic block!");
+ BasicBlock::const_iterator I = &I1;
+ BasicBlock::const_iterator E = &I2;
+ ++E; // Convert from inclusive to exclusive range.
+
+ for (; I != E; ++I) // Check every instruction in range
+ if (getModRefInfo(I, Loc) & Mod)
+ return true;
+ return false;
+}
+
+/// isNoAliasCall - Return true if this pointer is returned by a noalias
+/// function.
+bool llvm::isNoAliasCall(const Value *V) {
+ if (isa<CallInst>(V) || isa<InvokeInst>(V))
+ return ImmutableCallSite(cast<Instruction>(V))
+ .paramHasAttr(0, Attribute::NoAlias);
+ return false;
+}
+
+/// isIdentifiedObject - Return true if this pointer refers to a distinct and
+/// identifiable object. This returns true for:
+/// Global Variables and Functions (but not Global Aliases)
+/// Allocas and Mallocs
+/// ByVal and NoAlias Arguments
+/// NoAlias returns
+///
+bool llvm::isIdentifiedObject(const Value *V) {
+ if (isa<AllocaInst>(V))
+ return true;
+ if (isa<GlobalValue>(V) && !isa<GlobalAlias>(V))
+ return true;
+ if (isNoAliasCall(V))
+ return true;
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return A->hasNoAliasAttr() || A->hasByValAttr();
+ return false;
+}
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp
new file mode 100644
index 000000000000..d947220e078d
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp
@@ -0,0 +1,173 @@
+//===- AliasAnalysisCounter.cpp - Alias Analysis Query Counter ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass which can be used to count how many alias queries
+// are being made and how the alias analysis implementation being used responds.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool>
+PrintAll("count-aa-print-all-queries", cl::ReallyHidden, cl::init(true));
+static cl::opt<bool>
+PrintAllFailures("count-aa-print-all-failed-queries", cl::ReallyHidden);
+
+namespace {
+ class AliasAnalysisCounter : public ModulePass, public AliasAnalysis {
+ unsigned No, May, Partial, Must;
+ unsigned NoMR, JustRef, JustMod, MR;
+ Module *M;
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ AliasAnalysisCounter() : ModulePass(ID) {
+ initializeAliasAnalysisCounterPass(*PassRegistry::getPassRegistry());
+ No = May = Partial = Must = 0;
+ NoMR = JustRef = JustMod = MR = 0;
+ }
+
+ void printLine(const char *Desc, unsigned Val, unsigned Sum) {
+ errs() << " " << Val << " " << Desc << " responses ("
+ << Val*100/Sum << "%)\n";
+ }
+ ~AliasAnalysisCounter() {
+ unsigned AASum = No+May+Partial+Must;
+ unsigned MRSum = NoMR+JustRef+JustMod+MR;
+ if (AASum + MRSum) { // Print a report if any counted queries occurred...
+ errs() << "\n===== Alias Analysis Counter Report =====\n"
+ << " Analysis counted:\n"
+ << " " << AASum << " Total Alias Queries Performed\n";
+ if (AASum) {
+ printLine("no alias", No, AASum);
+ printLine("may alias", May, AASum);
+ printLine("partial alias", Partial, AASum);
+ printLine("must alias", Must, AASum);
+ errs() << " Alias Analysis Counter Summary: " << No*100/AASum << "%/"
+ << May*100/AASum << "%/"
+ << Partial*100/AASum << "%/"
+ << Must*100/AASum<<"%\n\n";
+ }
+
+ errs() << " " << MRSum << " Total Mod/Ref Queries Performed\n";
+ if (MRSum) {
+ printLine("no mod/ref", NoMR, MRSum);
+ printLine("ref", JustRef, MRSum);
+ printLine("mod", JustMod, MRSum);
+ printLine("mod/ref", MR, MRSum);
+ errs() << " Mod/Ref Analysis Counter Summary: " <<NoMR*100/MRSum
+ << "%/" << JustRef*100/MRSum << "%/" << JustMod*100/MRSum
+ << "%/" << MR*100/MRSum <<"%\n\n";
+ }
+ }
+ }
+
+ bool runOnModule(Module &M) {
+ this->M = &M;
+ InitializeAliasAnalysis(this);
+ return false;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AliasAnalysis::getAnalysisUsage(AU);
+ AU.addRequired<AliasAnalysis>();
+ AU.setPreservesAll();
+ }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+
+ // FIXME: We could count these too...
+ bool pointsToConstantMemory(const Location &Loc, bool OrLocal) {
+ return getAnalysis<AliasAnalysis>().pointsToConstantMemory(Loc, OrLocal);
+ }
+
+ // Forwarding functions: just delegate to a real AA implementation, counting
+ // the number of responses...
+ AliasResult alias(const Location &LocA, const Location &LocB);
+
+ ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc);
+ ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ return AliasAnalysis::getModRefInfo(CS1,CS2);
+ }
+ };
+}
+
+char AliasAnalysisCounter::ID = 0;
+INITIALIZE_AG_PASS(AliasAnalysisCounter, AliasAnalysis, "count-aa",
+ "Count Alias Analysis Query Responses", false, true, false)
+
+ModulePass *llvm::createAliasAnalysisCounterPass() {
+ return new AliasAnalysisCounter();
+}
+
+AliasAnalysis::AliasResult
+AliasAnalysisCounter::alias(const Location &LocA, const Location &LocB) {
+ AliasResult R = getAnalysis<AliasAnalysis>().alias(LocA, LocB);
+
+ const char *AliasString;
+ switch (R) {
+ default: llvm_unreachable("Unknown alias type!");
+ case NoAlias: No++; AliasString = "No alias"; break;
+ case MayAlias: May++; AliasString = "May alias"; break;
+ case PartialAlias: Partial++; AliasString = "Partial alias"; break;
+ case MustAlias: Must++; AliasString = "Must alias"; break;
+ }
+
+ if (PrintAll || (PrintAllFailures && R == MayAlias)) {
+ errs() << AliasString << ":\t";
+ errs() << "[" << LocA.Size << "B] ";
+ WriteAsOperand(errs(), LocA.Ptr, true, M);
+ errs() << ", ";
+ errs() << "[" << LocB.Size << "B] ";
+ WriteAsOperand(errs(), LocB.Ptr, true, M);
+ errs() << "\n";
+ }
+
+ return R;
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, Loc);
+
+ const char *MRString;
+ switch (R) {
+ default: llvm_unreachable("Unknown mod/ref type!");
+ case NoModRef: NoMR++; MRString = "NoModRef"; break;
+ case Ref: JustRef++; MRString = "JustRef"; break;
+ case Mod: JustMod++; MRString = "JustMod"; break;
+ case ModRef: MR++; MRString = "ModRef"; break;
+ }
+
+ if (PrintAll || (PrintAllFailures && R == ModRef)) {
+ errs() << MRString << ": Ptr: ";
+ errs() << "[" << Loc.Size << "B] ";
+ WriteAsOperand(errs(), Loc.Ptr, true, M);
+ errs() << "\t<->" << *CS.getInstruction() << '\n';
+ }
+ return R;
+}
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
new file mode 100644
index 000000000000..1afc1b71d93e
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -0,0 +1,304 @@
+//===- AliasAnalysisEvaluator.cpp - Alias Analysis Accuracy Evaluator -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple N^2 alias analysis accuracy evaluator.
+// Basically, for each function in the program, it simply queries to see how the
+// alias analysis implementation answers alias queries between each pair of
+// pointers in the function.
+//
+// This is inspired and adapted from code by: Naveen Neelakantam, Francesco
+// Spadini, and Wojciech Stryjewski.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
+using namespace llvm;
+
+static cl::opt<bool> PrintAll("print-all-alias-modref-info", cl::ReallyHidden);
+
+static cl::opt<bool> PrintNoAlias("print-no-aliases", cl::ReallyHidden);
+static cl::opt<bool> PrintMayAlias("print-may-aliases", cl::ReallyHidden);
+static cl::opt<bool> PrintPartialAlias("print-partial-aliases", cl::ReallyHidden);
+static cl::opt<bool> PrintMustAlias("print-must-aliases", cl::ReallyHidden);
+
+static cl::opt<bool> PrintNoModRef("print-no-modref", cl::ReallyHidden);
+static cl::opt<bool> PrintMod("print-mod", cl::ReallyHidden);
+static cl::opt<bool> PrintRef("print-ref", cl::ReallyHidden);
+static cl::opt<bool> PrintModRef("print-modref", cl::ReallyHidden);
+
+namespace {
+ class AAEval : public FunctionPass {
+ unsigned NoAlias, MayAlias, PartialAlias, MustAlias;
+ unsigned NoModRef, Mod, Ref, ModRef;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ AAEval() : FunctionPass(ID) {
+ initializeAAEvalPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.setPreservesAll();
+ }
+
+ bool doInitialization(Module &M) {
+ NoAlias = MayAlias = PartialAlias = MustAlias = 0;
+ NoModRef = Mod = Ref = ModRef = 0;
+
+ if (PrintAll) {
+ PrintNoAlias = PrintMayAlias = true;
+ PrintPartialAlias = PrintMustAlias = true;
+ PrintNoModRef = PrintMod = PrintRef = PrintModRef = true;
+ }
+ return false;
+ }
+
+ bool runOnFunction(Function &F);
+ bool doFinalization(Module &M);
+ };
+}
+
+char AAEval::ID = 0;
+INITIALIZE_PASS_BEGIN(AAEval, "aa-eval",
+ "Exhaustive Alias Analysis Precision Evaluator", false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(AAEval, "aa-eval",
+ "Exhaustive Alias Analysis Precision Evaluator", false, true)
+
+FunctionPass *llvm::createAAEvalPass() { return new AAEval(); }
+
+static void PrintResults(const char *Msg, bool P, const Value *V1,
+ const Value *V2, const Module *M) {
+ if (P) {
+ std::string o1, o2;
+ {
+ raw_string_ostream os1(o1), os2(o2);
+ WriteAsOperand(os1, V1, true, M);
+ WriteAsOperand(os2, V2, true, M);
+ }
+
+ if (o2 < o1)
+ std::swap(o1, o2);
+ errs() << " " << Msg << ":\t"
+ << o1 << ", "
+ << o2 << "\n";
+ }
+}
+
+static inline void
+PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr,
+ Module *M) {
+ if (P) {
+ errs() << " " << Msg << ": Ptr: ";
+ WriteAsOperand(errs(), Ptr, true, M);
+ errs() << "\t<->" << *I << '\n';
+ }
+}
+
+static inline void
+PrintModRefResults(const char *Msg, bool P, CallSite CSA, CallSite CSB,
+ Module *M) {
+ if (P) {
+ errs() << " " << Msg << ": " << *CSA.getInstruction()
+ << " <-> " << *CSB.getInstruction() << '\n';
+ }
+}
+
+static inline bool isInterestingPointer(Value *V) {
+ return V->getType()->isPointerTy()
+ && !isa<ConstantPointerNull>(V);
+}
+
+bool AAEval::runOnFunction(Function &F) {
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+ SetVector<Value *> Pointers;
+ SetVector<CallSite> CallSites;
+
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
+ if (I->getType()->isPointerTy()) // Add all pointer arguments.
+ Pointers.insert(I);
+
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+ if (I->getType()->isPointerTy()) // Add all pointer instructions.
+ Pointers.insert(&*I);
+ Instruction &Inst = *I;
+ if (CallSite CS = cast<Value>(&Inst)) {
+ Value *Callee = CS.getCalledValue();
+ // Skip actual functions for direct function calls.
+ if (!isa<Function>(Callee) && isInterestingPointer(Callee))
+ Pointers.insert(Callee);
+ // Consider formals.
+ for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+ AI != AE; ++AI)
+ if (isInterestingPointer(*AI))
+ Pointers.insert(*AI);
+ CallSites.insert(CS);
+ } else {
+ // Consider all operands.
+ for (Instruction::op_iterator OI = Inst.op_begin(), OE = Inst.op_end();
+ OI != OE; ++OI)
+ if (isInterestingPointer(*OI))
+ Pointers.insert(*OI);
+ }
+ }
+
+ if (PrintNoAlias || PrintMayAlias || PrintPartialAlias || PrintMustAlias ||
+ PrintNoModRef || PrintMod || PrintRef || PrintModRef)
+ errs() << "Function: " << F.getName() << ": " << Pointers.size()
+ << " pointers, " << CallSites.size() << " call sites\n";
+
+ // iterate over the worklist, and run the full (n^2)/2 disambiguations
+ for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end();
+ I1 != E; ++I1) {
+ uint64_t I1Size = AliasAnalysis::UnknownSize;
+ const Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType();
+ if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy);
+
+ for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
+ uint64_t I2Size = AliasAnalysis::UnknownSize;
+ const Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType();
+ if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy);
+
+ switch (AA.alias(*I1, I1Size, *I2, I2Size)) {
+ case AliasAnalysis::NoAlias:
+ PrintResults("NoAlias", PrintNoAlias, *I1, *I2, F.getParent());
+ ++NoAlias; break;
+ case AliasAnalysis::MayAlias:
+ PrintResults("MayAlias", PrintMayAlias, *I1, *I2, F.getParent());
+ ++MayAlias; break;
+ case AliasAnalysis::PartialAlias:
+ PrintResults("PartialAlias", PrintPartialAlias, *I1, *I2,
+ F.getParent());
+ ++PartialAlias; break;
+ case AliasAnalysis::MustAlias:
+ PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent());
+ ++MustAlias; break;
+ default:
+ errs() << "Unknown alias query result!\n";
+ }
+ }
+ }
+
+ // Mod/ref alias analysis: compare all pairs of calls and values
+ for (SetVector<CallSite>::iterator C = CallSites.begin(),
+ Ce = CallSites.end(); C != Ce; ++C) {
+ Instruction *I = C->getInstruction();
+
+ for (SetVector<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end();
+ V != Ve; ++V) {
+ uint64_t Size = AliasAnalysis::UnknownSize;
+ const Type *ElTy = cast<PointerType>((*V)->getType())->getElementType();
+ if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy);
+
+ switch (AA.getModRefInfo(*C, *V, Size)) {
+ case AliasAnalysis::NoModRef:
+ PrintModRefResults("NoModRef", PrintNoModRef, I, *V, F.getParent());
+ ++NoModRef; break;
+ case AliasAnalysis::Mod:
+ PrintModRefResults("Just Mod", PrintMod, I, *V, F.getParent());
+ ++Mod; break;
+ case AliasAnalysis::Ref:
+ PrintModRefResults("Just Ref", PrintRef, I, *V, F.getParent());
+ ++Ref; break;
+ case AliasAnalysis::ModRef:
+ PrintModRefResults("Both ModRef", PrintModRef, I, *V, F.getParent());
+ ++ModRef; break;
+ default:
+ errs() << "Unknown alias query result!\n";
+ }
+ }
+ }
+
+ // Mod/ref alias analysis: compare all pairs of calls
+ for (SetVector<CallSite>::iterator C = CallSites.begin(),
+ Ce = CallSites.end(); C != Ce; ++C) {
+ for (SetVector<CallSite>::iterator D = CallSites.begin(); D != Ce; ++D) {
+ if (D == C)
+ continue;
+ switch (AA.getModRefInfo(*C, *D)) {
+ case AliasAnalysis::NoModRef:
+ PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent());
+ ++NoModRef; break;
+ case AliasAnalysis::Mod:
+ PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent());
+ ++Mod; break;
+ case AliasAnalysis::Ref:
+ PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent());
+ ++Ref; break;
+ case AliasAnalysis::ModRef:
+ PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent());
+ ++ModRef; break;
+ }
+ }
+ }
+
+ return false;
+}
+
+static void PrintPercent(unsigned Num, unsigned Sum) {
+ errs() << "(" << Num*100ULL/Sum << "."
+ << ((Num*1000ULL/Sum) % 10) << "%)\n";
+}
+
+bool AAEval::doFinalization(Module &M) {
+ unsigned AliasSum = NoAlias + MayAlias + PartialAlias + MustAlias;
+ errs() << "===== Alias Analysis Evaluator Report =====\n";
+ if (AliasSum == 0) {
+ errs() << " Alias Analysis Evaluator Summary: No pointers!\n";
+ } else {
+ errs() << " " << AliasSum << " Total Alias Queries Performed\n";
+ errs() << " " << NoAlias << " no alias responses ";
+ PrintPercent(NoAlias, AliasSum);
+ errs() << " " << MayAlias << " may alias responses ";
+ PrintPercent(MayAlias, AliasSum);
+ errs() << " " << PartialAlias << " partial alias responses ";
+ PrintPercent(PartialAlias, AliasSum);
+ errs() << " " << MustAlias << " must alias responses ";
+ PrintPercent(MustAlias, AliasSum);
+ errs() << " Alias Analysis Evaluator Pointer Alias Summary: "
+ << NoAlias*100/AliasSum << "%/" << MayAlias*100/AliasSum << "%/"
+ << PartialAlias*100/AliasSum << "%/"
+ << MustAlias*100/AliasSum << "%\n";
+ }
+
+ // Display the summary for mod/ref analysis
+ unsigned ModRefSum = NoModRef + Mod + Ref + ModRef;
+ if (ModRefSum == 0) {
+ errs() << " Alias Analysis Mod/Ref Evaluator Summary: no mod/ref!\n";
+ } else {
+ errs() << " " << ModRefSum << " Total ModRef Queries Performed\n";
+ errs() << " " << NoModRef << " no mod/ref responses ";
+ PrintPercent(NoModRef, ModRefSum);
+ errs() << " " << Mod << " mod responses ";
+ PrintPercent(Mod, ModRefSum);
+ errs() << " " << Ref << " ref responses ";
+ PrintPercent(Ref, ModRefSum);
+ errs() << " " << ModRef << " mod & ref responses ";
+ PrintPercent(ModRef, ModRefSum);
+ errs() << " Alias Analysis Evaluator Mod/Ref Summary: "
+ << NoModRef*100/ModRefSum << "%/" << Mod*100/ModRefSum << "%/"
+ << Ref*100/ModRefSum << "%/" << ModRef*100/ModRefSum << "%\n";
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Analysis/AliasDebugger.cpp b/contrib/llvm/lib/Analysis/AliasDebugger.cpp
new file mode 100644
index 000000000000..f15c05153e10
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/AliasDebugger.cpp
@@ -0,0 +1,138 @@
+//===- AliasDebugger.cpp - Simple Alias Analysis Use Checker --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This simple pass checks alias analysis users to ensure that if they
+// create a new value, they do not query AA without informing it of the value.
+// It acts as a shim over any other AA pass you want.
+//
+// Yes keeping track of every value in the program is expensive, but this is
+// a debugging pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Instructions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include <set>
+using namespace llvm;
+
+namespace {
+
+ class AliasDebugger : public ModulePass, public AliasAnalysis {
+
+ //What we do is simple. Keep track of every value the AA could
+ //know about, and verify that queries are one of those.
+ //A query to a value that didn't exist when the AA was created
+ //means someone forgot to update the AA when creating new values
+
+ std::set<const Value*> Vals;
+
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ AliasDebugger() : ModulePass(ID) {
+ initializeAliasDebuggerPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) {
+ InitializeAliasAnalysis(this); // set up super class
+
+ for(Module::global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I) {
+ Vals.insert(&*I);
+ for (User::const_op_iterator OI = I->op_begin(),
+ OE = I->op_end(); OI != OE; ++OI)
+ Vals.insert(*OI);
+ }
+
+ for(Module::iterator I = M.begin(),
+ E = M.end(); I != E; ++I){
+ Vals.insert(&*I);
+ if(!I->isDeclaration()) {
+ for (Function::arg_iterator AI = I->arg_begin(), AE = I->arg_end();
+ AI != AE; ++AI)
+ Vals.insert(&*AI);
+ for (Function::const_iterator FI = I->begin(), FE = I->end();
+ FI != FE; ++FI)
+ for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end();
+ BI != BE; ++BI) {
+ Vals.insert(&*BI);
+ for (User::const_op_iterator OI = BI->op_begin(),
+ OE = BI->op_end(); OI != OE; ++OI)
+ Vals.insert(*OI);
+ }
+ }
+
+ }
+ return false;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AliasAnalysis::getAnalysisUsage(AU);
+ AU.setPreservesAll(); // Does not transform code
+ }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+
+ //------------------------------------------------
+ // Implement the AliasAnalysis API
+ //
+ AliasResult alias(const Location &LocA, const Location &LocB) {
+ assert(Vals.find(LocA.Ptr) != Vals.end() &&
+ "Never seen value in AA before");
+ assert(Vals.find(LocB.Ptr) != Vals.end() &&
+ "Never seen value in AA before");
+ return AliasAnalysis::alias(LocA, LocB);
+ }
+
+ ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before");
+ return AliasAnalysis::getModRefInfo(CS, Loc);
+ }
+
+ ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ return AliasAnalysis::getModRefInfo(CS1,CS2);
+ }
+
+ bool pointsToConstantMemory(const Location &Loc, bool OrLocal) {
+ assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before");
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ }
+
+ virtual void deleteValue(Value *V) {
+ assert(Vals.find(V) != Vals.end() && "Never seen value in AA before");
+ AliasAnalysis::deleteValue(V);
+ }
+ virtual void copyValue(Value *From, Value *To) {
+ Vals.insert(To);
+ AliasAnalysis::copyValue(From, To);
+ }
+
+ };
+}
+
+char AliasDebugger::ID = 0;
+INITIALIZE_AG_PASS(AliasDebugger, AliasAnalysis, "debug-aa",
+ "AA use debugger", false, true, false)
+
+Pass *llvm::createAliasDebugger() { return new AliasDebugger(); }
+
diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
new file mode 100644
index 000000000000..2ed694941212
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -0,0 +1,652 @@
+//===- AliasSetTracker.cpp - Alias Sets Tracker implementation-------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AliasSetTracker and AliasSet classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// mergeSetIn - Merge the specified alias set into this alias set.
+///
+void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
+ assert(!AS.Forward && "Alias set is already forwarding!");
+ assert(!Forward && "This set is a forwarding set!!");
+
+ // Update the alias and access types of this set...
+ AccessTy |= AS.AccessTy;
+ AliasTy |= AS.AliasTy;
+ Volatile |= AS.Volatile;
+
+ if (AliasTy == MustAlias) {
+ // Check that these two merged sets really are must aliases. Since both
+ // used to be must-alias sets, we can just check any pointer from each set
+ // for aliasing.
+ AliasAnalysis &AA = AST.getAliasAnalysis();
+ PointerRec *L = getSomePointer();
+ PointerRec *R = AS.getSomePointer();
+
+ // If the pointers are not a must-alias pair, this set becomes a may alias.
+ if (AA.alias(AliasAnalysis::Location(L->getValue(),
+ L->getSize(),
+ L->getTBAAInfo()),
+ AliasAnalysis::Location(R->getValue(),
+ R->getSize(),
+ R->getTBAAInfo()))
+ != AliasAnalysis::MustAlias)
+ AliasTy = MayAlias;
+ }
+
+ if (CallSites.empty()) { // Merge call sites...
+ if (!AS.CallSites.empty())
+ std::swap(CallSites, AS.CallSites);
+ } else if (!AS.CallSites.empty()) {
+ CallSites.insert(CallSites.end(), AS.CallSites.begin(), AS.CallSites.end());
+ AS.CallSites.clear();
+ }
+
+ AS.Forward = this; // Forward across AS now...
+ addRef(); // AS is now pointing to us...
+
+ // Merge the list of constituent pointers...
+ if (AS.PtrList) {
+ *PtrListEnd = AS.PtrList;
+ AS.PtrList->setPrevInList(PtrListEnd);
+ PtrListEnd = AS.PtrListEnd;
+
+ AS.PtrList = 0;
+ AS.PtrListEnd = &AS.PtrList;
+ assert(*AS.PtrListEnd == 0 && "End of list is not null?");
+ }
+}
+
+void AliasSetTracker::removeAliasSet(AliasSet *AS) {
+ if (AliasSet *Fwd = AS->Forward) {
+ Fwd->dropRef(*this);
+ AS->Forward = 0;
+ }
+ AliasSets.erase(AS);
+}
+
+void AliasSet::removeFromTracker(AliasSetTracker &AST) {
+ assert(RefCount == 0 && "Cannot remove non-dead alias set from tracker!");
+ AST.removeAliasSet(this);
+}
+
+void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
+ uint64_t Size, const MDNode *TBAAInfo,
+ bool KnownMustAlias) {
+ assert(!Entry.hasAliasSet() && "Entry already in set!");
+
+ // Check to see if we have to downgrade to _may_ alias.
+ if (isMustAlias() && !KnownMustAlias)
+ if (PointerRec *P = getSomePointer()) {
+ AliasAnalysis &AA = AST.getAliasAnalysis();
+ AliasAnalysis::AliasResult Result =
+ AA.alias(AliasAnalysis::Location(P->getValue(), P->getSize(),
+ P->getTBAAInfo()),
+ AliasAnalysis::Location(Entry.getValue(), Size, TBAAInfo));
+ if (Result != AliasAnalysis::MustAlias)
+ AliasTy = MayAlias;
+ else // First entry of must alias must have maximum size!
+ P->updateSizeAndTBAAInfo(Size, TBAAInfo);
+ assert(Result != AliasAnalysis::NoAlias && "Cannot be part of must set!");
+ }
+
+ Entry.setAliasSet(this);
+ Entry.updateSizeAndTBAAInfo(Size, TBAAInfo);
+
+ // Add it to the end of the list...
+ assert(*PtrListEnd == 0 && "End of list is not null?");
+ *PtrListEnd = &Entry;
+ PtrListEnd = Entry.setPrevInList(PtrListEnd);
+ assert(*PtrListEnd == 0 && "End of list is not null?");
+ addRef(); // Entry points to alias set.
+}
+
+void AliasSet::addCallSite(CallSite CS, AliasAnalysis &AA) {
+ CallSites.push_back(CS.getInstruction());
+
+ AliasAnalysis::ModRefBehavior Behavior = AA.getModRefBehavior(CS);
+ if (Behavior == AliasAnalysis::DoesNotAccessMemory)
+ return;
+ if (AliasAnalysis::onlyReadsMemory(Behavior)) {
+ AliasTy = MayAlias;
+ AccessTy |= Refs;
+ return;
+ }
+
+ // FIXME: This should use mod/ref information to make this not suck so bad
+ AliasTy = MayAlias;
+ AccessTy = ModRef;
+}
+
+/// aliasesPointer - Return true if the specified pointer "may" (or must)
+/// alias one of the members in the set.
+///
+bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
+ const MDNode *TBAAInfo,
+ AliasAnalysis &AA) const {
+ if (AliasTy == MustAlias) {
+ assert(CallSites.empty() && "Illegal must alias set!");
+
+ // If this is a set of MustAliases, only check to see if the pointer aliases
+ // SOME value in the set.
+ PointerRec *SomePtr = getSomePointer();
+ assert(SomePtr && "Empty must-alias set??");
+ return AA.alias(AliasAnalysis::Location(SomePtr->getValue(),
+ SomePtr->getSize(),
+ SomePtr->getTBAAInfo()),
+ AliasAnalysis::Location(Ptr, Size, TBAAInfo));
+ }
+
+ // If this is a may-alias set, we have to check all of the pointers in the set
+ // to be sure it doesn't alias the set...
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ if (AA.alias(AliasAnalysis::Location(Ptr, Size, TBAAInfo),
+ AliasAnalysis::Location(I.getPointer(), I.getSize(),
+ I.getTBAAInfo())))
+ return true;
+
+ // Check the call sites list and invoke list...
+ if (!CallSites.empty()) {
+ for (unsigned i = 0, e = CallSites.size(); i != e; ++i)
+ if (AA.getModRefInfo(CallSites[i],
+ AliasAnalysis::Location(Ptr, Size, TBAAInfo)) !=
+ AliasAnalysis::NoModRef)
+ return true;
+ }
+
+ return false;
+}
+
+bool AliasSet::aliasesCallSite(CallSite CS, AliasAnalysis &AA) const {
+ if (AA.doesNotAccessMemory(CS))
+ return false;
+
+ for (unsigned i = 0, e = CallSites.size(); i != e; ++i) {
+ if (AA.getModRefInfo(getCallSite(i), CS) != AliasAnalysis::NoModRef ||
+ AA.getModRefInfo(CS, getCallSite(i)) != AliasAnalysis::NoModRef)
+ return true;
+ }
+
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ if (AA.getModRefInfo(CS, I.getPointer(), I.getSize()) !=
+ AliasAnalysis::NoModRef)
+ return true;
+
+ return false;
+}
+
+void AliasSetTracker::clear() {
+ // Delete all the PointerRec entries.
+ for (PointerMapType::iterator I = PointerMap.begin(), E = PointerMap.end();
+ I != E; ++I)
+ I->second->eraseFromList();
+
+ PointerMap.clear();
+
+ // The alias sets should all be clear now.
+ AliasSets.clear();
+}
+
+
+/// findAliasSetForPointer - Given a pointer, find the one alias set to put the
+/// instruction referring to the pointer into. If there are multiple alias sets
+/// that may alias the pointer, merge them together and return the unified set.
+///
+AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
+ uint64_t Size,
+ const MDNode *TBAAInfo) {
+ AliasSet *FoundSet = 0;
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ if (I->Forward || !I->aliasesPointer(Ptr, Size, TBAAInfo, AA)) continue;
+
+ if (FoundSet == 0) { // If this is the first alias set ptr can go into.
+ FoundSet = I; // Remember it.
+ } else { // Otherwise, we must merge the sets.
+ FoundSet->mergeSetIn(*I, *this); // Merge in contents.
+ }
+ }
+
+ return FoundSet;
+}
+
+/// containsPointer - Return true if the specified location is represented by
+/// this alias set, false otherwise. This does not modify the AST object or
+/// alias sets.
+bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size,
+ const MDNode *TBAAInfo) const {
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ if (!I->Forward && I->aliasesPointer(Ptr, Size, TBAAInfo, AA))
+ return true;
+ return false;
+}
+
+
+
+AliasSet *AliasSetTracker::findAliasSetForCallSite(CallSite CS) {
+ AliasSet *FoundSet = 0;
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ if (I->Forward || !I->aliasesCallSite(CS, AA))
+ continue;
+
+ if (FoundSet == 0) // If this is the first alias set ptr can go into.
+ FoundSet = I; // Remember it.
+ else if (!I->Forward) // Otherwise, we must merge the sets.
+ FoundSet->mergeSetIn(*I, *this); // Merge in contents.
+ }
+ return FoundSet;
+}
+
+
+
+
+/// getAliasSetForPointer - Return the alias set that the specified pointer
+/// lives in.
+AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size,
+ const MDNode *TBAAInfo,
+ bool *New) {
+ AliasSet::PointerRec &Entry = getEntryFor(Pointer);
+
+ // Check to see if the pointer is already known.
+ if (Entry.hasAliasSet()) {
+ Entry.updateSizeAndTBAAInfo(Size, TBAAInfo);
+ // Return the set!
+ return *Entry.getAliasSet(*this)->getForwardedTarget(*this);
+ }
+
+ if (AliasSet *AS = findAliasSetForPointer(Pointer, Size, TBAAInfo)) {
+ // Add it to the alias set it aliases.
+ AS->addPointer(*this, Entry, Size, TBAAInfo);
+ return *AS;
+ }
+
+ if (New) *New = true;
+ // Otherwise create a new alias set to hold the loaded pointer.
+ AliasSets.push_back(new AliasSet());
+ AliasSets.back().addPointer(*this, Entry, Size, TBAAInfo);
+ return AliasSets.back();
+}
+
+bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
+ bool NewPtr;
+ addPointer(Ptr, Size, TBAAInfo, AliasSet::NoModRef, NewPtr);
+ return NewPtr;
+}
+
+
+bool AliasSetTracker::add(LoadInst *LI) {
+ bool NewPtr;
+ AliasSet &AS = addPointer(LI->getOperand(0),
+ AA.getTypeStoreSize(LI->getType()),
+ LI->getMetadata(LLVMContext::MD_tbaa),
+ AliasSet::Refs, NewPtr);
+ if (LI->isVolatile()) AS.setVolatile();
+ return NewPtr;
+}
+
+bool AliasSetTracker::add(StoreInst *SI) {
+ bool NewPtr;
+ Value *Val = SI->getOperand(0);
+ AliasSet &AS = addPointer(SI->getOperand(1),
+ AA.getTypeStoreSize(Val->getType()),
+ SI->getMetadata(LLVMContext::MD_tbaa),
+ AliasSet::Mods, NewPtr);
+ if (SI->isVolatile()) AS.setVolatile();
+ return NewPtr;
+}
+
+bool AliasSetTracker::add(VAArgInst *VAAI) {
+ bool NewPtr;
+ addPointer(VAAI->getOperand(0), AliasAnalysis::UnknownSize,
+ VAAI->getMetadata(LLVMContext::MD_tbaa),
+ AliasSet::ModRef, NewPtr);
+ return NewPtr;
+}
+
+
+bool AliasSetTracker::add(CallSite CS) {
+ if (isa<DbgInfoIntrinsic>(CS.getInstruction()))
+ return true; // Ignore DbgInfo Intrinsics.
+ if (AA.doesNotAccessMemory(CS))
+ return true; // doesn't alias anything
+
+ AliasSet *AS = findAliasSetForCallSite(CS);
+ if (AS) {
+ AS->addCallSite(CS, AA);
+ return false;
+ }
+ AliasSets.push_back(new AliasSet());
+ AS = &AliasSets.back();
+ AS->addCallSite(CS, AA);
+ return true;
+}
+
+bool AliasSetTracker::add(Instruction *I) {
+ // Dispatch to one of the other add methods.
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return add(LI);
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return add(SI);
+ if (CallInst *CI = dyn_cast<CallInst>(I))
+ return add(CI);
+ if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+ return add(II);
+ if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
+ return add(VAAI);
+ return true;
+}
+
+void AliasSetTracker::add(BasicBlock &BB) {
+ for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
+ add(I);
+}
+
+void AliasSetTracker::add(const AliasSetTracker &AST) {
+ assert(&AA == &AST.AA &&
+ "Merging AliasSetTracker objects with different Alias Analyses!");
+
+ // Loop over all of the alias sets in AST, adding the pointers contained
+ // therein into the current alias sets. This can cause alias sets to be
+ // merged together in the current AST.
+ for (const_iterator I = AST.begin(), E = AST.end(); I != E; ++I) {
+ if (I->Forward) continue; // Ignore forwarding alias sets
+
+ AliasSet &AS = const_cast<AliasSet&>(*I);
+
+ // If there are any call sites in the alias set, add them to this AST.
+ for (unsigned i = 0, e = AS.CallSites.size(); i != e; ++i)
+ add(AS.CallSites[i]);
+
+ // Loop over all of the pointers in this alias set.
+ bool X;
+ for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
+ AliasSet &NewAS = addPointer(ASI.getPointer(), ASI.getSize(),
+ ASI.getTBAAInfo(),
+ (AliasSet::AccessType)AS.AccessTy, X);
+ if (AS.isVolatile()) NewAS.setVolatile();
+ }
+ }
+}
+
+/// remove - Remove the specified (potentially non-empty) alias set from the
+/// tracker.
+void AliasSetTracker::remove(AliasSet &AS) {
+ // Drop all call sites.
+ AS.CallSites.clear();
+
+ // Clear the alias set.
+ unsigned NumRefs = 0;
+ while (!AS.empty()) {
+ AliasSet::PointerRec *P = AS.PtrList;
+
+ Value *ValToRemove = P->getValue();
+
+ // Unlink and delete entry from the list of values.
+ P->eraseFromList();
+
+ // Remember how many references need to be dropped.
+ ++NumRefs;
+
+ // Finally, remove the entry.
+ PointerMap.erase(ValToRemove);
+ }
+
+ // Stop using the alias set, removing it.
+ AS.RefCount -= NumRefs;
+ if (AS.RefCount == 0)
+ AS.removeFromTracker(*this);
+}
+
+bool
+AliasSetTracker::remove(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
+ AliasSet *AS = findAliasSetForPointer(Ptr, Size, TBAAInfo);
+ if (!AS) return false;
+ remove(*AS);
+ return true;
+}
+
+bool AliasSetTracker::remove(LoadInst *LI) {
+ uint64_t Size = AA.getTypeStoreSize(LI->getType());
+ const MDNode *TBAAInfo = LI->getMetadata(LLVMContext::MD_tbaa);
+ AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size, TBAAInfo);
+ if (!AS) return false;
+ remove(*AS);
+ return true;
+}
+
+bool AliasSetTracker::remove(StoreInst *SI) {
+ uint64_t Size = AA.getTypeStoreSize(SI->getOperand(0)->getType());
+ const MDNode *TBAAInfo = SI->getMetadata(LLVMContext::MD_tbaa);
+ AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size, TBAAInfo);
+ if (!AS) return false;
+ remove(*AS);
+ return true;
+}
+
+bool AliasSetTracker::remove(VAArgInst *VAAI) {
+ AliasSet *AS = findAliasSetForPointer(VAAI->getOperand(0),
+ AliasAnalysis::UnknownSize,
+ VAAI->getMetadata(LLVMContext::MD_tbaa));
+ if (!AS) return false;
+ remove(*AS);
+ return true;
+}
+
+bool AliasSetTracker::remove(CallSite CS) {
+ if (AA.doesNotAccessMemory(CS))
+ return false; // doesn't alias anything
+
+ AliasSet *AS = findAliasSetForCallSite(CS);
+ if (!AS) return false;
+ remove(*AS);
+ return true;
+}
+
+bool AliasSetTracker::remove(Instruction *I) {
+ // Dispatch to one of the other remove methods...
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return remove(LI);
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return remove(SI);
+ if (CallInst *CI = dyn_cast<CallInst>(I))
+ return remove(CI);
+ if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
+ return remove(VAAI);
+ return true;
+}
+
+
+// deleteValue method - This method is used to remove a pointer value from the
+// AliasSetTracker entirely. It should be used when an instruction is deleted
+// from the program to update the AST. If you don't use this, you would have
+// dangling pointers to deleted instructions.
+//
+void AliasSetTracker::deleteValue(Value *PtrVal) {
+ // Notify the alias analysis implementation that this value is gone.
+ AA.deleteValue(PtrVal);
+
+ // If this is a call instruction, remove the callsite from the appropriate
+ // AliasSet (if present).
+ if (CallSite CS = PtrVal) {
+ if (!AA.doesNotAccessMemory(CS)) {
+ // Scan all the alias sets to see if this call site is contained.
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ if (I->Forward) continue;
+
+ I->removeCallSite(CS);
+ }
+ }
+ }
+
+ // First, look up the PointerRec for this pointer.
+ PointerMapType::iterator I = PointerMap.find(PtrVal);
+ if (I == PointerMap.end()) return; // Noop
+
+ // If we found one, remove the pointer from the alias set it is in.
+ AliasSet::PointerRec *PtrValEnt = I->second;
+ AliasSet *AS = PtrValEnt->getAliasSet(*this);
+
+ // Unlink and delete from the list of values.
+ PtrValEnt->eraseFromList();
+
+ // Stop using the alias set.
+ AS->dropRef(*this);
+
+ PointerMap.erase(I);
+}
+
+// copyValue - This method should be used whenever a preexisting value in the
+// program is copied or cloned, introducing a new value. Note that it is ok for
+// clients that use this method to introduce the same value multiple times: if
+// the tracker already knows about a value, it will ignore the request.
+//
+void AliasSetTracker::copyValue(Value *From, Value *To) {
+ // Notify the alias analysis implementation that this value is copied.
+ AA.copyValue(From, To);
+
+ // First, look up the PointerRec for this pointer.
+ PointerMapType::iterator I = PointerMap.find(From);
+ if (I == PointerMap.end())
+ return; // Noop
+ assert(I->second->hasAliasSet() && "Dead entry?");
+
+ AliasSet::PointerRec &Entry = getEntryFor(To);
+ if (Entry.hasAliasSet()) return; // Already in the tracker!
+
+ // Add it to the alias set it aliases...
+ I = PointerMap.find(From);
+ AliasSet *AS = I->second->getAliasSet(*this);
+ AS->addPointer(*this, Entry, I->second->getSize(),
+ I->second->getTBAAInfo(),
+ true);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// AliasSet/AliasSetTracker Printing Support
+//===----------------------------------------------------------------------===//
+
+void AliasSet::print(raw_ostream &OS) const {
+ OS << " AliasSet[" << (void*)this << ", " << RefCount << "] ";
+ OS << (AliasTy == MustAlias ? "must" : "may") << " alias, ";
+ switch (AccessTy) {
+ case NoModRef: OS << "No access "; break;
+ case Refs : OS << "Ref "; break;
+ case Mods : OS << "Mod "; break;
+ case ModRef : OS << "Mod/Ref "; break;
+ default: llvm_unreachable("Bad value for AccessTy!");
+ }
+ if (isVolatile()) OS << "[volatile] ";
+ if (Forward)
+ OS << " forwarding to " << (void*)Forward;
+
+
+ if (!empty()) {
+ OS << "Pointers: ";
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ if (I != begin()) OS << ", ";
+ WriteAsOperand(OS << "(", I.getPointer());
+ OS << ", " << I.getSize() << ")";
+ }
+ }
+ if (!CallSites.empty()) {
+ OS << "\n " << CallSites.size() << " Call Sites: ";
+ for (unsigned i = 0, e = CallSites.size(); i != e; ++i) {
+ if (i) OS << ", ";
+ WriteAsOperand(OS, CallSites[i]);
+ }
+ }
+ OS << "\n";
+}
+
+void AliasSetTracker::print(raw_ostream &OS) const {
+ OS << "Alias Set Tracker: " << AliasSets.size() << " alias sets for "
+ << PointerMap.size() << " pointer values.\n";
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ I->print(OS);
+ OS << "\n";
+}
+
+void AliasSet::dump() const { print(dbgs()); }
+void AliasSetTracker::dump() const { print(dbgs()); }
+
+//===----------------------------------------------------------------------===//
+// ASTCallbackVH Class Implementation
+//===----------------------------------------------------------------------===//
+
+void AliasSetTracker::ASTCallbackVH::deleted() {
+ assert(AST && "ASTCallbackVH called with a null AliasSetTracker!");
+ AST->deleteValue(getValPtr());
+ // this now dangles!
+}
+
+void AliasSetTracker::ASTCallbackVH::allUsesReplacedWith(Value *V) {
+ AST->copyValue(getValPtr(), V);
+}
+
+AliasSetTracker::ASTCallbackVH::ASTCallbackVH(Value *V, AliasSetTracker *ast)
+ : CallbackVH(V), AST(ast) {}
+
+AliasSetTracker::ASTCallbackVH &
+AliasSetTracker::ASTCallbackVH::operator=(Value *V) {
+ return *this = ASTCallbackVH(V, AST);
+}
+
+//===----------------------------------------------------------------------===//
+// AliasSetPrinter Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+ class AliasSetPrinter : public FunctionPass {
+ AliasSetTracker *Tracker;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ AliasSetPrinter() : FunctionPass(ID) {
+ initializeAliasSetPrinterPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<AliasAnalysis>();
+ }
+
+ virtual bool runOnFunction(Function &F) {
+ Tracker = new AliasSetTracker(getAnalysis<AliasAnalysis>());
+
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+ Tracker->add(&*I);
+ Tracker->print(errs());
+ delete Tracker;
+ return false;
+ }
+ };
+}
+
+char AliasSetPrinter::ID = 0;
+INITIALIZE_PASS_BEGIN(AliasSetPrinter, "print-alias-sets",
+ "Alias Set Printer", false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(AliasSetPrinter, "print-alias-sets",
+ "Alias Set Printer", false, true)
diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp
new file mode 100644
index 000000000000..6ebe100b1330
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Analysis.cpp
@@ -0,0 +1,102 @@
+//===-- Analysis.cpp ------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Analysis.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Analysis/Verifier.h"
+#include <cstring>
+
+using namespace llvm;
+
+/// initializeAnalysis - Initialize all passes linked into the Analysis library.
+void llvm::initializeAnalysis(PassRegistry &Registry) {
+ initializeAliasAnalysisAnalysisGroup(Registry);
+ initializeAliasAnalysisCounterPass(Registry);
+ initializeAAEvalPass(Registry);
+ initializeAliasDebuggerPass(Registry);
+ initializeAliasSetPrinterPass(Registry);
+ initializeNoAAPass(Registry);
+ initializeBasicAliasAnalysisPass(Registry);
+ initializeCFGViewerPass(Registry);
+ initializeCFGPrinterPass(Registry);
+ initializeCFGOnlyViewerPass(Registry);
+ initializeCFGOnlyPrinterPass(Registry);
+ initializePrintDbgInfoPass(Registry);
+ initializeDominanceFrontierPass(Registry);
+ initializeDomViewerPass(Registry);
+ initializeDomPrinterPass(Registry);
+ initializeDomOnlyViewerPass(Registry);
+ initializePostDomViewerPass(Registry);
+ initializeDomOnlyPrinterPass(Registry);
+ initializePostDomPrinterPass(Registry);
+ initializePostDomOnlyViewerPass(Registry);
+ initializePostDomOnlyPrinterPass(Registry);
+ initializeIVUsersPass(Registry);
+ initializeInstCountPass(Registry);
+ initializeIntervalPartitionPass(Registry);
+ initializeLazyValueInfoPass(Registry);
+ initializeLibCallAliasAnalysisPass(Registry);
+ initializeLintPass(Registry);
+ initializeLoopDependenceAnalysisPass(Registry);
+ initializeLoopInfoPass(Registry);
+ initializeMemDepPrinterPass(Registry);
+ initializeMemoryDependenceAnalysisPass(Registry);
+ initializeModuleDebugInfoPrinterPass(Registry);
+ initializePostDominatorTreePass(Registry);
+ initializeProfileEstimatorPassPass(Registry);
+ initializeNoProfileInfoPass(Registry);
+ initializeNoPathProfileInfoPass(Registry);
+ initializeProfileInfoAnalysisGroup(Registry);
+ initializePathProfileInfoAnalysisGroup(Registry);
+ initializeLoaderPassPass(Registry);
+ initializePathProfileLoaderPassPass(Registry);
+ initializeProfileVerifierPassPass(Registry);
+ initializePathProfileVerifierPass(Registry);
+ initializeRegionInfoPass(Registry);
+ initializeRegionViewerPass(Registry);
+ initializeRegionPrinterPass(Registry);
+ initializeRegionOnlyViewerPass(Registry);
+ initializeRegionOnlyPrinterPass(Registry);
+ initializeScalarEvolutionPass(Registry);
+ initializeScalarEvolutionAliasAnalysisPass(Registry);
+ initializeTypeBasedAliasAnalysisPass(Registry);
+}
+
+void LLVMInitializeAnalysis(LLVMPassRegistryRef R) {
+ initializeAnalysis(*unwrap(R));
+}
+
+LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action,
+ char **OutMessages) {
+ std::string Messages;
+
+ LLVMBool Result = verifyModule(*unwrap(M),
+ static_cast<VerifierFailureAction>(Action),
+ OutMessages? &Messages : 0);
+
+ if (OutMessages)
+ *OutMessages = strdup(Messages.c_str());
+
+ return Result;
+}
+
+LLVMBool LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action) {
+ return verifyFunction(*unwrap<Function>(Fn),
+ static_cast<VerifierFailureAction>(Action));
+}
+
+void LLVMViewFunctionCFG(LLVMValueRef Fn) {
+ Function *F = unwrap<Function>(Fn);
+ F->viewCFG();
+}
+
+void LLVMViewFunctionCFGOnly(LLVMValueRef Fn) {
+ Function *F = unwrap<Function>(Fn);
+ F->viewCFGOnly();
+}
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
new file mode 100644
index 000000000000..f1bb8a38f090
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -0,0 +1,1190 @@
+//===- BasicAliasAnalysis.cpp - Stateless Alias Analysis Impl -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the primary stateless implementation of the
+// Alias Analysis interface that implements identities (two different
+// globals cannot alias, etc), but does no stateful analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include <algorithm>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Useful predicates
+//===----------------------------------------------------------------------===//
+
+/// isKnownNonNull - Return true if we know that the specified value is never
+/// null.
+static bool isKnownNonNull(const Value *V) {
+ // Alloca never returns null, malloc might.
+ if (isa<AllocaInst>(V)) return true;
+
+ // A byval argument is never null.
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return A->hasByValAttr();
+
+ // Global values are not null unless extern weak.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+ return !GV->hasExternalWeakLinkage();
+ return false;
+}
+
+/// isNonEscapingLocalObject - Return true if the pointer is to a function-local
+/// object that never escapes from the function.
+static bool isNonEscapingLocalObject(const Value *V) {
+ // If this is a local allocation, check to see if it escapes.
+ if (isa<AllocaInst>(V) || isNoAliasCall(V))
+ // Set StoreCaptures to True so that we can assume in our callers that the
+ // pointer is not the result of a load instruction. Currently
+ // PointerMayBeCaptured doesn't have any special analysis for the
+ // StoreCaptures=false case; if it did, our callers could be refined to be
+ // more precise.
+ return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+
+ // If this is an argument that corresponds to a byval or noalias argument,
+ // then it has not escaped before entering the function. Check if it escapes
+ // inside the function.
+ if (const Argument *A = dyn_cast<Argument>(V))
+ if (A->hasByValAttr() || A->hasNoAliasAttr()) {
+ // Don't bother analyzing arguments already known not to escape.
+ if (A->hasNoCaptureAttr())
+ return true;
+ return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+ }
+ return false;
+}
+
+/// isEscapeSource - Return true if the pointer is one which would have
+/// been considered an escape by isNonEscapingLocalObject.
+static bool isEscapeSource(const Value *V) {
+ if (isa<CallInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V))
+ return true;
+
+ // The load case works because isNonEscapingLocalObject considers all
+ // stores to be escapes (it passes true for the StoreCaptures argument
+ // to PointerMayBeCaptured).
+ if (isa<LoadInst>(V))
+ return true;
+
+ return false;
+}
+
+/// getObjectSize - Return the size of the object specified by V, or
+/// UnknownSize if unknown.
+static uint64_t getObjectSize(const Value *V, const TargetData &TD) {
+ const Type *AccessTy;
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+ if (!GV->hasDefinitiveInitializer())
+ return AliasAnalysis::UnknownSize;
+ AccessTy = GV->getType()->getElementType();
+ } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ if (!AI->isArrayAllocation())
+ AccessTy = AI->getType()->getElementType();
+ else
+ return AliasAnalysis::UnknownSize;
+ } else if (const CallInst* CI = extractMallocCall(V)) {
+ if (!isArrayMalloc(V, &TD))
+ // The size is the argument to the malloc call.
+ if (const ConstantInt* C = dyn_cast<ConstantInt>(CI->getArgOperand(0)))
+ return C->getZExtValue();
+ return AliasAnalysis::UnknownSize;
+ } else if (const Argument *A = dyn_cast<Argument>(V)) {
+ if (A->hasByValAttr())
+ AccessTy = cast<PointerType>(A->getType())->getElementType();
+ else
+ return AliasAnalysis::UnknownSize;
+ } else {
+ return AliasAnalysis::UnknownSize;
+ }
+
+ if (AccessTy->isSized())
+ return TD.getTypeAllocSize(AccessTy);
+ return AliasAnalysis::UnknownSize;
+}
+
+/// isObjectSmallerThan - Return true if we can prove that the object specified
+/// by V is smaller than Size.
+static bool isObjectSmallerThan(const Value *V, uint64_t Size,
+ const TargetData &TD) {
+ uint64_t ObjectSize = getObjectSize(V, TD);
+ return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size;
+}
+
+/// isObjectSize - Return true if we can prove that the object specified
+/// by V has size Size.
+static bool isObjectSize(const Value *V, uint64_t Size,
+ const TargetData &TD) {
+ uint64_t ObjectSize = getObjectSize(V, TD);
+ return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize == Size;
+}
+
+//===----------------------------------------------------------------------===//
+// GetElementPtr Instruction Decomposition and Analysis
+//===----------------------------------------------------------------------===//
+
+namespace {
+ enum ExtensionKind {
+ EK_NotExtended,
+ EK_SignExt,
+ EK_ZeroExt
+ };
+
+ struct VariableGEPIndex {
+ const Value *V;
+ ExtensionKind Extension;
+ int64_t Scale;
+ };
+}
+
+
+/// GetLinearExpression - Analyze the specified value as a linear expression:
+/// "A*V + B", where A and B are constant integers. Return the scale and offset
+/// values as APInts and return V as a Value*, and return whether we looked
+/// through any sign or zero extends. The incoming Value is known to have
+/// IntegerType and it may already be sign or zero extended.
+///
+/// Note that this looks through extends, so the high bits may not be
+/// represented in the result.
+static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
+ ExtensionKind &Extension,
+ const TargetData &TD, unsigned Depth) {
+ assert(V->getType()->isIntegerTy() && "Not an integer value");
+
+ // Limit our recursion depth.
+ if (Depth == 6) {
+ Scale = 1;
+ Offset = 0;
+ return V;
+ }
+
+ if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
+ if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
+ switch (BOp->getOpcode()) {
+ default: break;
+ case Instruction::Or:
+ // X|C == X+C if all the bits in C are unset in X. Otherwise we can't
+ // analyze it.
+ if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &TD))
+ break;
+ // FALL THROUGH.
+ case Instruction::Add:
+ V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
+ TD, Depth+1);
+ Offset += RHSC->getValue();
+ return V;
+ case Instruction::Mul:
+ V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
+ TD, Depth+1);
+ Offset *= RHSC->getValue();
+ Scale *= RHSC->getValue();
+ return V;
+ case Instruction::Shl:
+ V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
+ TD, Depth+1);
+ Offset <<= RHSC->getValue().getLimitedValue();
+ Scale <<= RHSC->getValue().getLimitedValue();
+ return V;
+ }
+ }
+ }
+
+ // Since GEP indices are sign extended anyway, we don't care about the high
+ // bits of a sign or zero extended value - just scales and offsets. The
+ // extensions have to be consistent though.
+ if ((isa<SExtInst>(V) && Extension != EK_ZeroExt) ||
+ (isa<ZExtInst>(V) && Extension != EK_SignExt)) {
+ Value *CastOp = cast<CastInst>(V)->getOperand(0);
+ unsigned OldWidth = Scale.getBitWidth();
+ unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits();
+ Scale = Scale.trunc(SmallWidth);
+ Offset = Offset.trunc(SmallWidth);
+ Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt;
+
+ Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension,
+ TD, Depth+1);
+ Scale = Scale.zext(OldWidth);
+ Offset = Offset.zext(OldWidth);
+
+ return Result;
+ }
+
+ Scale = 1;
+ Offset = 0;
+ return V;
+}
+
+/// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it
+/// into a base pointer with a constant offset and a number of scaled symbolic
+/// offsets.
+///
+/// The scaled symbolic offsets (represented by pairs of a Value* and a scale in
+/// the VarIndices vector) are Value*'s that are known to be scaled by the
+/// specified amount, but which may have other unrepresented high bits. As such,
+/// the gep cannot necessarily be reconstructed from its decomposed form.
+///
+/// When TargetData is around, this function is capable of analyzing everything
+/// that GetUnderlyingObject can look through. When not, it just looks
+/// through pointer casts.
+///
+static const Value *
+DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
+ SmallVectorImpl<VariableGEPIndex> &VarIndices,
+ const TargetData *TD) {
+ // Limit recursion depth to limit compile time in crazy cases.
+ unsigned MaxLookup = 6;
+
+ BaseOffs = 0;
+ do {
+ // See if this is a bitcast or GEP.
+ const Operator *Op = dyn_cast<Operator>(V);
+ if (Op == 0) {
+ // The only non-operator case we can handle are GlobalAliases.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (!GA->mayBeOverridden()) {
+ V = GA->getAliasee();
+ continue;
+ }
+ }
+ return V;
+ }
+
+ if (Op->getOpcode() == Instruction::BitCast) {
+ V = Op->getOperand(0);
+ continue;
+ }
+
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ // TODO: Get a DominatorTree and use it here.
+ if (const Value *Simplified =
+ SimplifyInstruction(const_cast<Instruction *>(I), TD)) {
+ V = Simplified;
+ continue;
+ }
+
+ const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
+ if (GEPOp == 0)
+ return V;
+
+ // Don't attempt to analyze GEPs over unsized objects.
+ if (!cast<PointerType>(GEPOp->getOperand(0)->getType())
+ ->getElementType()->isSized())
+ return V;
+
+ // If we are lacking TargetData information, we can't compute the offets of
+ // elements computed by GEPs. However, we can handle bitcast equivalent
+ // GEPs.
+ if (TD == 0) {
+ if (!GEPOp->hasAllZeroIndices())
+ return V;
+ V = GEPOp->getOperand(0);
+ continue;
+ }
+
+ // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
+ gep_type_iterator GTI = gep_type_begin(GEPOp);
+ for (User::const_op_iterator I = GEPOp->op_begin()+1,
+ E = GEPOp->op_end(); I != E; ++I) {
+ Value *Index = *I;
+ // Compute the (potentially symbolic) offset in bytes for this index.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI++)) {
+ // For a struct, add the member offset.
+ unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
+ if (FieldNo == 0) continue;
+
+ BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo);
+ continue;
+ }
+
+ // For an array/pointer, add the element offset, explicitly scaled.
+ if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
+ if (CIdx->isZero()) continue;
+ BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue();
+ continue;
+ }
+
+ uint64_t Scale = TD->getTypeAllocSize(*GTI);
+ ExtensionKind Extension = EK_NotExtended;
+
+ // If the integer type is smaller than the pointer size, it is implicitly
+ // sign extended to pointer size.
+ unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth();
+ if (TD->getPointerSizeInBits() > Width)
+ Extension = EK_SignExt;
+
+ // Use GetLinearExpression to decompose the index into a C1*V+C2 form.
+ APInt IndexScale(Width, 0), IndexOffset(Width, 0);
+ Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension,
+ *TD, 0);
+
+ // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
+ // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
+ BaseOffs += IndexOffset.getSExtValue()*Scale;
+ Scale *= IndexScale.getSExtValue();
+
+
+ // If we already had an occurrence of this index variable, merge this
+ // scale into it. For example, we want to handle:
+ // A[x][x] -> x*16 + x*4 -> x*20
+ // This also ensures that 'x' only appears in the index list once.
+ for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) {
+ if (VarIndices[i].V == Index &&
+ VarIndices[i].Extension == Extension) {
+ Scale += VarIndices[i].Scale;
+ VarIndices.erase(VarIndices.begin()+i);
+ break;
+ }
+ }
+
+ // Make sure that we have a scale that makes sense for this target's
+ // pointer size.
+ if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) {
+ Scale <<= ShiftBits;
+ Scale = (int64_t)Scale >> ShiftBits;
+ }
+
+ if (Scale) {
+ VariableGEPIndex Entry = {Index, Extension, Scale};
+ VarIndices.push_back(Entry);
+ }
+ }
+
+ // Analyze the base pointer next.
+ V = GEPOp->getOperand(0);
+ } while (--MaxLookup);
+
+ // If the chain of expressions is too deep, just return early.
+ return V;
+}
+
+/// GetIndexDifference - Dest and Src are the variable indices from two
+/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
+/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
+/// difference between the two pointers.
+static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
+ const SmallVectorImpl<VariableGEPIndex> &Src) {
+ if (Src.empty()) return;
+
+ for (unsigned i = 0, e = Src.size(); i != e; ++i) {
+ const Value *V = Src[i].V;
+ ExtensionKind Extension = Src[i].Extension;
+ int64_t Scale = Src[i].Scale;
+
+ // Find V in Dest. This is N^2, but pointer indices almost never have more
+ // than a few variable indexes.
+ for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
+ if (Dest[j].V != V || Dest[j].Extension != Extension) continue;
+
+ // If we found it, subtract off Scale V's from the entry in Dest. If it
+ // goes to zero, remove the entry.
+ if (Dest[j].Scale != Scale)
+ Dest[j].Scale -= Scale;
+ else
+ Dest.erase(Dest.begin()+j);
+ Scale = 0;
+ break;
+ }
+
+ // If we didn't consume this entry, add it to the end of the Dest list.
+ if (Scale) {
+ VariableGEPIndex Entry = { V, Extension, -Scale };
+ Dest.push_back(Entry);
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// BasicAliasAnalysis Pass
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+static const Function *getParent(const Value *V) {
+ if (const Instruction *inst = dyn_cast<Instruction>(V))
+ return inst->getParent()->getParent();
+
+ if (const Argument *arg = dyn_cast<Argument>(V))
+ return arg->getParent();
+
+ return NULL;
+}
+
+static bool notDifferentParent(const Value *O1, const Value *O2) {
+
+ const Function *F1 = getParent(O1);
+ const Function *F2 = getParent(O2);
+
+ return !F1 || !F2 || F1 == F2;
+}
+#endif
+
+namespace {
+ /// BasicAliasAnalysis - This is the primary alias analysis implementation.
+ struct BasicAliasAnalysis : public ImmutablePass, public AliasAnalysis {
+ static char ID; // Class identification, replacement for typeinfo
+ BasicAliasAnalysis() : ImmutablePass(ID) {
+ initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() {
+ InitializeAliasAnalysis(this);
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ }
+
+ virtual AliasResult alias(const Location &LocA,
+ const Location &LocB) {
+ assert(Visited.empty() && "Visited must be cleared after use!");
+ assert(notDifferentParent(LocA.Ptr, LocB.Ptr) &&
+ "BasicAliasAnalysis doesn't support interprocedural queries.");
+ AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.TBAATag,
+ LocB.Ptr, LocB.Size, LocB.TBAATag);
+ Visited.clear();
+ return Alias;
+ }
+
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc);
+
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ // The AliasAnalysis base class has some smarts, lets use them.
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+ }
+
+ /// pointsToConstantMemory - Chase pointers until we find a (constant
+ /// global) or not.
+ virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
+
+ /// getModRefBehavior - Return the behavior when calling the given
+ /// call site.
+ virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+
+ /// getModRefBehavior - Return the behavior when calling the given function.
+ /// For use when the call site is not known.
+ virtual ModRefBehavior getModRefBehavior(const Function *F);
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+
+ private:
+ // Visited - Track instructions visited by a aliasPHI, aliasSelect(), and aliasGEP().
+ SmallPtrSet<const Value*, 16> Visited;
+
+ // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP
+ // instruction against another.
+ AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo,
+ const Value *UnderlyingV1, const Value *UnderlyingV2);
+
+ // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI
+ // instruction against another.
+ AliasResult aliasPHI(const PHINode *PN, uint64_t PNSize,
+ const MDNode *PNTBAAInfo,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo);
+
+ /// aliasSelect - Disambiguate a Select instruction against another value.
+ AliasResult aliasSelect(const SelectInst *SI, uint64_t SISize,
+ const MDNode *SITBAAInfo,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo);
+
+ AliasResult aliasCheck(const Value *V1, uint64_t V1Size,
+ const MDNode *V1TBAATag,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAATag);
+ };
+} // End of anonymous namespace
+
+// Register this pass...
+char BasicAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS(BasicAliasAnalysis, AliasAnalysis, "basicaa",
+ "Basic Alias Analysis (stateless AA impl)",
+ false, true, false)
+
+ImmutablePass *llvm::createBasicAliasAnalysisPass() {
+ return new BasicAliasAnalysis();
+}
+
+/// pointsToConstantMemory - Returns whether the given pointer value
+/// points to memory that is local to the function, with global constants being
+/// considered local to all functions.
+bool
+BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) {
+ assert(Visited.empty() && "Visited must be cleared after use!");
+
+ unsigned MaxLookup = 8;
+ SmallVector<const Value *, 16> Worklist;
+ Worklist.push_back(Loc.Ptr);
+ do {
+ const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), TD);
+ if (!Visited.insert(V)) {
+ Visited.clear();
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ }
+
+ // An alloca instruction defines local memory.
+ if (OrLocal && isa<AllocaInst>(V))
+ continue;
+
+ // A global constant counts as local memory for our purposes.
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+ // Note: this doesn't require GV to be "ODR" because it isn't legal for a
+ // global to be marked constant in some modules and non-constant in
+ // others. GV may even be a declaration, not a definition.
+ if (!GV->isConstant()) {
+ Visited.clear();
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ }
+ continue;
+ }
+
+ // If both select values point to local memory, then so does the select.
+ if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
+ Worklist.push_back(SI->getTrueValue());
+ Worklist.push_back(SI->getFalseValue());
+ continue;
+ }
+
+ // If all values incoming to a phi node point to local memory, then so does
+ // the phi.
+ if (const PHINode *PN = dyn_cast<PHINode>(V)) {
+ // Don't bother inspecting phi nodes with many operands.
+ if (PN->getNumIncomingValues() > MaxLookup) {
+ Visited.clear();
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ }
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ Worklist.push_back(PN->getIncomingValue(i));
+ continue;
+ }
+
+ // Otherwise be conservative.
+ Visited.clear();
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+
+ } while (!Worklist.empty() && --MaxLookup);
+
+ Visited.clear();
+ return Worklist.empty();
+}
+
+/// getModRefBehavior - Return the behavior when calling the given call site.
+AliasAnalysis::ModRefBehavior
+BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+ if (CS.doesNotAccessMemory())
+ // Can't do better than this.
+ return DoesNotAccessMemory;
+
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ // If the callsite knows it only reads memory, don't return worse
+ // than that.
+ if (CS.onlyReadsMemory())
+ Min = OnlyReadsMemory;
+
+ // The AliasAnalysis base class has some smarts, lets use them.
+ return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+}
+
+/// getModRefBehavior - Return the behavior when calling the given function.
+/// For use when the call site is not known.
+AliasAnalysis::ModRefBehavior
+BasicAliasAnalysis::getModRefBehavior(const Function *F) {
+ // If the function declares it doesn't access memory, we can't do better.
+ if (F->doesNotAccessMemory())
+ return DoesNotAccessMemory;
+
+ // For intrinsics, we can check the table.
+ if (unsigned iid = F->getIntrinsicID()) {
+#define GET_INTRINSIC_MODREF_BEHAVIOR
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_MODREF_BEHAVIOR
+ }
+
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ // If the function declares it only reads memory, go with that.
+ if (F->onlyReadsMemory())
+ Min = OnlyReadsMemory;
+
+ // Otherwise be conservative.
+ return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
+}
+
+/// getModRefInfo - Check to see if the specified callsite can clobber the
+/// specified memory object. Since we only look at local properties of this
+/// function, we really can't say much about this query. We do, however, use
+/// simple "address taken" analysis on local objects.
+AliasAnalysis::ModRefResult
+BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) &&
+ "AliasAnalysis query involving multiple functions!");
+
+ const Value *Object = GetUnderlyingObject(Loc.Ptr, TD);
+
+ // If this is a tail call and Loc.Ptr points to a stack location, we know that
+ // the tail call cannot access or modify the local stack.
+ // We cannot exclude byval arguments here; these belong to the caller of
+ // the current function not to the current function, and a tail callee
+ // may reference them.
+ if (isa<AllocaInst>(Object))
+ if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
+ if (CI->isTailCall())
+ return NoModRef;
+
+ // If the pointer is to a locally allocated object that does not escape,
+ // then the call can not mod/ref the pointer unless the call takes the pointer
+ // as an argument, and itself doesn't capture it.
+ if (!isa<Constant>(Object) && CS.getInstruction() != Object &&
+ isNonEscapingLocalObject(Object)) {
+ bool PassedAsArg = false;
+ unsigned ArgNo = 0;
+ for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+ CI != CE; ++CI, ++ArgNo) {
+ // Only look at the no-capture pointer arguments.
+ if (!(*CI)->getType()->isPointerTy() ||
+ !CS.paramHasAttr(ArgNo+1, Attribute::NoCapture))
+ continue;
+
+ // If this is a no-capture pointer argument, see if we can tell that it
+ // is impossible to alias the pointer we're checking. If not, we have to
+ // assume that the call could touch the pointer, even though it doesn't
+ // escape.
+ if (!isNoAlias(Location(cast<Value>(CI)), Loc)) {
+ PassedAsArg = true;
+ break;
+ }
+ }
+
+ if (!PassedAsArg)
+ return NoModRef;
+ }
+
+ ModRefResult Min = ModRef;
+
+ // Finally, handle specific knowledge of intrinsics.
+ const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
+ if (II != 0)
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove: {
+ uint64_t Len = UnknownSize;
+ if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
+ Len = LenCI->getZExtValue();
+ Value *Dest = II->getArgOperand(0);
+ Value *Src = II->getArgOperand(1);
+ // If it can't overlap the source dest, then it doesn't modref the loc.
+ if (isNoAlias(Location(Dest, Len), Loc)) {
+ if (isNoAlias(Location(Src, Len), Loc))
+ return NoModRef;
+ // If it can't overlap the dest, then worst case it reads the loc.
+ Min = Ref;
+ } else if (isNoAlias(Location(Src, Len), Loc)) {
+ // If it can't overlap the source, then worst case it mutates the loc.
+ Min = Mod;
+ }
+ break;
+ }
+ case Intrinsic::memset:
+ // Since memset is 'accesses arguments' only, the AliasAnalysis base class
+ // will handle it for the variable length case.
+ if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
+ uint64_t Len = LenCI->getZExtValue();
+ Value *Dest = II->getArgOperand(0);
+ if (isNoAlias(Location(Dest, Len), Loc))
+ return NoModRef;
+ }
+ // We know that memset doesn't load anything.
+ Min = Mod;
+ break;
+ case Intrinsic::atomic_cmp_swap:
+ case Intrinsic::atomic_swap:
+ case Intrinsic::atomic_load_add:
+ case Intrinsic::atomic_load_sub:
+ case Intrinsic::atomic_load_and:
+ case Intrinsic::atomic_load_nand:
+ case Intrinsic::atomic_load_or:
+ case Intrinsic::atomic_load_xor:
+ case Intrinsic::atomic_load_max:
+ case Intrinsic::atomic_load_min:
+ case Intrinsic::atomic_load_umax:
+ case Intrinsic::atomic_load_umin:
+ if (TD) {
+ Value *Op1 = II->getArgOperand(0);
+ uint64_t Op1Size = TD->getTypeStoreSize(Op1->getType());
+ MDNode *Tag = II->getMetadata(LLVMContext::MD_tbaa);
+ if (isNoAlias(Location(Op1, Op1Size, Tag), Loc))
+ return NoModRef;
+ }
+ break;
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::invariant_start: {
+ uint64_t PtrSize =
+ cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
+ if (isNoAlias(Location(II->getArgOperand(1),
+ PtrSize,
+ II->getMetadata(LLVMContext::MD_tbaa)),
+ Loc))
+ return NoModRef;
+ break;
+ }
+ case Intrinsic::invariant_end: {
+ uint64_t PtrSize =
+ cast<ConstantInt>(II->getArgOperand(1))->getZExtValue();
+ if (isNoAlias(Location(II->getArgOperand(2),
+ PtrSize,
+ II->getMetadata(LLVMContext::MD_tbaa)),
+ Loc))
+ return NoModRef;
+ break;
+ }
+ case Intrinsic::arm_neon_vld1: {
+ // LLVM's vld1 and vst1 intrinsics currently only support a single
+ // vector register.
+ uint64_t Size =
+ TD ? TD->getTypeStoreSize(II->getType()) : UnknownSize;
+ if (isNoAlias(Location(II->getArgOperand(0), Size,
+ II->getMetadata(LLVMContext::MD_tbaa)),
+ Loc))
+ return NoModRef;
+ break;
+ }
+ case Intrinsic::arm_neon_vst1: {
+ uint64_t Size =
+ TD ? TD->getTypeStoreSize(II->getArgOperand(1)->getType()) : UnknownSize;
+ if (isNoAlias(Location(II->getArgOperand(0), Size,
+ II->getMetadata(LLVMContext::MD_tbaa)),
+ Loc))
+ return NoModRef;
+ break;
+ }
+ }
+
+ // The AliasAnalysis base class has some smarts, lets use them.
+ return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min);
+}
+
+/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
+/// against another pointer. We know that V1 is a GEP, but we don't know
+/// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, TD),
+/// UnderlyingV2 is the same for V2.
+///
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo,
+ const Value *UnderlyingV1,
+ const Value *UnderlyingV2) {
+ // If this GEP has been visited before, we're on a use-def cycle.
+ // Such cycles are only valid when PHI nodes are involved or in unreachable
+ // code. The visitPHI function catches cycles containing PHIs, but there
+ // could still be a cycle without PHIs in unreachable code.
+ if (!Visited.insert(GEP1))
+ return MayAlias;
+
+ int64_t GEP1BaseOffset;
+ SmallVector<VariableGEPIndex, 4> GEP1VariableIndices;
+
+ // If we have two gep instructions with must-alias'ing base pointers, figure
+ // out if the indexes to the GEP tell us anything about the derived pointer.
+ if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) {
+ // Do the base pointers alias?
+ AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0,
+ UnderlyingV2, UnknownSize, 0);
+
+ // If we get a No or May, then return it immediately, no amount of analysis
+ // will improve this situation.
+ if (BaseAlias != MustAlias) return BaseAlias;
+
+ // Otherwise, we have a MustAlias. Since the base pointers alias each other
+ // exactly, see if the computed offset from the common pointer tells us
+ // about the relation of the resulting pointer.
+ const Value *GEP1BasePtr =
+ DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
+
+ int64_t GEP2BaseOffset;
+ SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
+ const Value *GEP2BasePtr =
+ DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD);
+
+ // If DecomposeGEPExpression isn't able to look all the way through the
+ // addressing operation, we must not have TD and this is too complex for us
+ // to handle without it.
+ if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
+ assert(TD == 0 &&
+ "DecomposeGEPExpression and GetUnderlyingObject disagree!");
+ return MayAlias;
+ }
+
+ // Subtract the GEP2 pointer from the GEP1 pointer to find out their
+ // symbolic difference.
+ GEP1BaseOffset -= GEP2BaseOffset;
+ GetIndexDifference(GEP1VariableIndices, GEP2VariableIndices);
+
+ } else {
+ // Check to see if these two pointers are related by the getelementptr
+ // instruction. If one pointer is a GEP with a non-zero index of the other
+ // pointer, we know they cannot alias.
+
+ // If both accesses are unknown size, we can't do anything useful here.
+ if (V1Size == UnknownSize && V2Size == UnknownSize)
+ return MayAlias;
+
+ AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, 0,
+ V2, V2Size, V2TBAAInfo);
+ if (R != MustAlias)
+ // If V2 may alias GEP base pointer, conservatively returns MayAlias.
+ // If V2 is known not to alias GEP base pointer, then the two values
+ // cannot alias per GEP semantics: "A pointer value formed from a
+ // getelementptr instruction is associated with the addresses associated
+ // with the first operand of the getelementptr".
+ return R;
+
+ const Value *GEP1BasePtr =
+ DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
+
+ // If DecomposeGEPExpression isn't able to look all the way through the
+ // addressing operation, we must not have TD and this is too complex for us
+ // to handle without it.
+ if (GEP1BasePtr != UnderlyingV1) {
+ assert(TD == 0 &&
+ "DecomposeGEPExpression and GetUnderlyingObject disagree!");
+ return MayAlias;
+ }
+ }
+
+ // In the two GEP Case, if there is no difference in the offsets of the
+ // computed pointers, the resultant pointers are a must alias. This
+ // hapens when we have two lexically identical GEP's (for example).
+ //
+ // In the other case, if we have getelementptr <ptr>, 0, 0, 0, 0, ... and V2
+ // must aliases the GEP, the end result is a must alias also.
+ if (GEP1BaseOffset == 0 && GEP1VariableIndices.empty())
+ return MustAlias;
+
+ // If there is a difference between the pointers, but the difference is
+ // less than the size of the associated memory object, then we know
+ // that the objects are partially overlapping.
+ if (GEP1BaseOffset != 0 && GEP1VariableIndices.empty()) {
+ if (GEP1BaseOffset >= 0 ?
+ (V2Size != UnknownSize && (uint64_t)GEP1BaseOffset < V2Size) :
+ (V1Size != UnknownSize && -(uint64_t)GEP1BaseOffset < V1Size &&
+ GEP1BaseOffset != INT64_MIN))
+ return PartialAlias;
+ }
+
+ // If we have a known constant offset, see if this offset is larger than the
+ // access size being queried. If so, and if no variable indices can remove
+ // pieces of this constant, then we know we have a no-alias. For example,
+ // &A[100] != &A.
+
+ // In order to handle cases like &A[100][i] where i is an out of range
+ // subscript, we have to ignore all constant offset pieces that are a multiple
+ // of a scaled index. Do this by removing constant offsets that are a
+ // multiple of any of our variable indices. This allows us to transform
+ // things like &A[i][1] because i has a stride of (e.g.) 8 bytes but the 1
+ // provides an offset of 4 bytes (assuming a <= 4 byte access).
+ for (unsigned i = 0, e = GEP1VariableIndices.size();
+ i != e && GEP1BaseOffset;++i)
+ if (int64_t RemovedOffset = GEP1BaseOffset/GEP1VariableIndices[i].Scale)
+ GEP1BaseOffset -= RemovedOffset*GEP1VariableIndices[i].Scale;
+
+ // If our known offset is bigger than the access size, we know we don't have
+ // an alias.
+ if (GEP1BaseOffset) {
+ if (GEP1BaseOffset >= 0 ?
+ (V2Size != UnknownSize && (uint64_t)GEP1BaseOffset >= V2Size) :
+ (V1Size != UnknownSize && -(uint64_t)GEP1BaseOffset >= V1Size &&
+ GEP1BaseOffset != INT64_MIN))
+ return NoAlias;
+ }
+
+ return MayAlias;
+}
+
+/// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select
+/// instruction against another.
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize,
+ const MDNode *SITBAAInfo,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo) {
+ // If this select has been visited before, we're on a use-def cycle.
+ // Such cycles are only valid when PHI nodes are involved or in unreachable
+ // code. The visitPHI function catches cycles containing PHIs, but there
+ // could still be a cycle without PHIs in unreachable code.
+ if (!Visited.insert(SI))
+ return MayAlias;
+
+ // If the values are Selects with the same condition, we can do a more precise
+ // check: just check for aliases between the values on corresponding arms.
+ if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
+ if (SI->getCondition() == SI2->getCondition()) {
+ AliasResult Alias =
+ aliasCheck(SI->getTrueValue(), SISize, SITBAAInfo,
+ SI2->getTrueValue(), V2Size, V2TBAAInfo);
+ if (Alias == MayAlias)
+ return MayAlias;
+ AliasResult ThisAlias =
+ aliasCheck(SI->getFalseValue(), SISize, SITBAAInfo,
+ SI2->getFalseValue(), V2Size, V2TBAAInfo);
+ if (ThisAlias != Alias)
+ return MayAlias;
+ return Alias;
+ }
+
+ // If both arms of the Select node NoAlias or MustAlias V2, then returns
+ // NoAlias / MustAlias. Otherwise, returns MayAlias.
+ AliasResult Alias =
+ aliasCheck(V2, V2Size, V2TBAAInfo, SI->getTrueValue(), SISize, SITBAAInfo);
+ if (Alias == MayAlias)
+ return MayAlias;
+
+ // If V2 is visited, the recursive case will have been caught in the
+ // above aliasCheck call, so these subsequent calls to aliasCheck
+ // don't need to assume that V2 is being visited recursively.
+ Visited.erase(V2);
+
+ AliasResult ThisAlias =
+ aliasCheck(V2, V2Size, V2TBAAInfo, SI->getFalseValue(), SISize, SITBAAInfo);
+ if (ThisAlias != Alias)
+ return MayAlias;
+ return Alias;
+}
+
+// aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction
+// against another.
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
+ const MDNode *PNTBAAInfo,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo) {
+ // The PHI node has already been visited, avoid recursion any further.
+ if (!Visited.insert(PN))
+ return MayAlias;
+
+ // If the values are PHIs in the same block, we can do a more precise
+ // as well as efficient check: just check for aliases between the values
+ // on corresponding edges.
+ if (const PHINode *PN2 = dyn_cast<PHINode>(V2))
+ if (PN2->getParent() == PN->getParent()) {
+ AliasResult Alias =
+ aliasCheck(PN->getIncomingValue(0), PNSize, PNTBAAInfo,
+ PN2->getIncomingValueForBlock(PN->getIncomingBlock(0)),
+ V2Size, V2TBAAInfo);
+ if (Alias == MayAlias)
+ return MayAlias;
+ for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+ AliasResult ThisAlias =
+ aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo,
+ PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
+ V2Size, V2TBAAInfo);
+ if (ThisAlias != Alias)
+ return MayAlias;
+ }
+ return Alias;
+ }
+
+ SmallPtrSet<Value*, 4> UniqueSrc;
+ SmallVector<Value*, 4> V1Srcs;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *PV1 = PN->getIncomingValue(i);
+ if (isa<PHINode>(PV1))
+ // If any of the source itself is a PHI, return MayAlias conservatively
+ // to avoid compile time explosion. The worst possible case is if both
+ // sides are PHI nodes. In which case, this is O(m x n) time where 'm'
+ // and 'n' are the number of PHI sources.
+ return MayAlias;
+ if (UniqueSrc.insert(PV1))
+ V1Srcs.push_back(PV1);
+ }
+
+ AliasResult Alias = aliasCheck(V2, V2Size, V2TBAAInfo,
+ V1Srcs[0], PNSize, PNTBAAInfo);
+ // Early exit if the check of the first PHI source against V2 is MayAlias.
+ // Other results are not possible.
+ if (Alias == MayAlias)
+ return MayAlias;
+
+ // If all sources of the PHI node NoAlias or MustAlias V2, then returns
+ // NoAlias / MustAlias. Otherwise, returns MayAlias.
+ for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) {
+ Value *V = V1Srcs[i];
+
+ // If V2 is visited, the recursive case will have been caught in the
+ // above aliasCheck call, so these subsequent calls to aliasCheck
+ // don't need to assume that V2 is being visited recursively.
+ Visited.erase(V2);
+
+ AliasResult ThisAlias = aliasCheck(V2, V2Size, V2TBAAInfo,
+ V, PNSize, PNTBAAInfo);
+ if (ThisAlias != Alias || ThisAlias == MayAlias)
+ return MayAlias;
+ }
+
+ return Alias;
+}
+
+// aliasCheck - Provide a bunch of ad-hoc rules to disambiguate in common cases,
+// such as array references.
+//
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
+ const MDNode *V1TBAAInfo,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo) {
+ // If either of the memory references is empty, it doesn't matter what the
+ // pointer values are.
+ if (V1Size == 0 || V2Size == 0)
+ return NoAlias;
+
+ // Strip off any casts if they exist.
+ V1 = V1->stripPointerCasts();
+ V2 = V2->stripPointerCasts();
+
+ // Are we checking for alias of the same value?
+ if (V1 == V2) return MustAlias;
+
+ if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy())
+ return NoAlias; // Scalars cannot alias each other
+
+ // Figure out what objects these things are pointing to if we can.
+ const Value *O1 = GetUnderlyingObject(V1, TD);
+ const Value *O2 = GetUnderlyingObject(V2, TD);
+
+ // Null values in the default address space don't point to any object, so they
+ // don't alias any other pointer.
+ if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O1))
+ if (CPN->getType()->getAddressSpace() == 0)
+ return NoAlias;
+ if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O2))
+ if (CPN->getType()->getAddressSpace() == 0)
+ return NoAlias;
+
+ if (O1 != O2) {
+ // If V1/V2 point to two different objects we know that we have no alias.
+ if (isIdentifiedObject(O1) && isIdentifiedObject(O2))
+ return NoAlias;
+
+ // Constant pointers can't alias with non-const isIdentifiedObject objects.
+ if ((isa<Constant>(O1) && isIdentifiedObject(O2) && !isa<Constant>(O2)) ||
+ (isa<Constant>(O2) && isIdentifiedObject(O1) && !isa<Constant>(O1)))
+ return NoAlias;
+
+ // Arguments can't alias with local allocations or noalias calls
+ // in the same function.
+ if (((isa<Argument>(O1) && (isa<AllocaInst>(O2) || isNoAliasCall(O2))) ||
+ (isa<Argument>(O2) && (isa<AllocaInst>(O1) || isNoAliasCall(O1)))))
+ return NoAlias;
+
+ // Most objects can't alias null.
+ if ((isa<ConstantPointerNull>(O2) && isKnownNonNull(O1)) ||
+ (isa<ConstantPointerNull>(O1) && isKnownNonNull(O2)))
+ return NoAlias;
+
+ // If one pointer is the result of a call/invoke or load and the other is a
+ // non-escaping local object within the same function, then we know the
+ // object couldn't escape to a point where the call could return it.
+ //
+ // Note that if the pointers are in different functions, there are a
+ // variety of complications. A call with a nocapture argument may still
+ // temporary store the nocapture argument's value in a temporary memory
+ // location if that memory location doesn't escape. Or it may pass a
+ // nocapture value to other functions as long as they don't capture it.
+ if (isEscapeSource(O1) && isNonEscapingLocalObject(O2))
+ return NoAlias;
+ if (isEscapeSource(O2) && isNonEscapingLocalObject(O1))
+ return NoAlias;
+ }
+
+ // If the size of one access is larger than the entire object on the other
+ // side, then we know such behavior is undefined and can assume no alias.
+ if (TD)
+ if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD)) ||
+ (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD)))
+ return NoAlias;
+
+ // FIXME: This isn't aggressively handling alias(GEP, PHI) for example: if the
+ // GEP can't simplify, we don't even look at the PHI cases.
+ if (!isa<GEPOperator>(V1) && isa<GEPOperator>(V2)) {
+ std::swap(V1, V2);
+ std::swap(V1Size, V2Size);
+ std::swap(O1, O2);
+ }
+ if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) {
+ AliasResult Result = aliasGEP(GV1, V1Size, V2, V2Size, V2TBAAInfo, O1, O2);
+ if (Result != MayAlias) return Result;
+ }
+
+ if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
+ std::swap(V1, V2);
+ std::swap(V1Size, V2Size);
+ }
+ if (const PHINode *PN = dyn_cast<PHINode>(V1)) {
+ AliasResult Result = aliasPHI(PN, V1Size, V1TBAAInfo,
+ V2, V2Size, V2TBAAInfo);
+ if (Result != MayAlias) return Result;
+ }
+
+ if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) {
+ std::swap(V1, V2);
+ std::swap(V1Size, V2Size);
+ }
+ if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) {
+ AliasResult Result = aliasSelect(S1, V1Size, V1TBAAInfo,
+ V2, V2Size, V2TBAAInfo);
+ if (Result != MayAlias) return Result;
+ }
+
+ // If both pointers are pointing into the same object and one of them
+ // accesses is accessing the entire object, then the accesses must
+ // overlap in some way.
+ if (TD && O1 == O2)
+ if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *TD)) ||
+ (V2Size != UnknownSize && isObjectSize(O2, V2Size, *TD)))
+ return PartialAlias;
+
+ return AliasAnalysis::alias(Location(V1, V1Size, V1TBAAInfo),
+ Location(V2, V2Size, V2TBAAInfo));
+}
diff --git a/contrib/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
new file mode 100644
index 000000000000..7bb063fbbbcf
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
@@ -0,0 +1,165 @@
+//===- CFGPrinter.cpp - DOT printer for the control flow graph ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a '-dot-cfg' analysis pass, which emits the
+// cfg.<fnname>.dot file for each function in the program, with a graph of the
+// CFG for that function.
+//
+// The other main feature of this file is that it implements the
+// Function::viewCFG method, which is useful for debugging passes which operate
+// on the CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CFGPrinter.h"
+
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+ struct CFGViewer : public FunctionPass {
+ static char ID; // Pass identifcation, replacement for typeid
+ CFGViewer() : FunctionPass(ID) {
+ initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F) {
+ F.viewCFG();
+ return false;
+ }
+
+ void print(raw_ostream &OS, const Module* = 0) const {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+char CFGViewer::ID = 0;
+INITIALIZE_PASS(CFGViewer, "view-cfg", "View CFG of function", false, true)
+
+namespace {
+ struct CFGOnlyViewer : public FunctionPass {
+ static char ID; // Pass identifcation, replacement for typeid
+ CFGOnlyViewer() : FunctionPass(ID) {
+ initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F) {
+ F.viewCFGOnly();
+ return false;
+ }
+
+ void print(raw_ostream &OS, const Module* = 0) const {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+char CFGOnlyViewer::ID = 0;
+INITIALIZE_PASS(CFGOnlyViewer, "view-cfg-only",
+ "View CFG of function (with no function bodies)", false, true)
+
+namespace {
+ struct CFGPrinter : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ CFGPrinter() : FunctionPass(ID) {
+ initializeCFGPrinterPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F) {
+ std::string Filename = "cfg." + F.getNameStr() + ".dot";
+ errs() << "Writing '" << Filename << "'...";
+
+ std::string ErrorInfo;
+ raw_fd_ostream File(Filename.c_str(), ErrorInfo);
+
+ if (ErrorInfo.empty())
+ WriteGraph(File, (const Function*)&F);
+ else
+ errs() << " error opening file for writing!";
+ errs() << "\n";
+ return false;
+ }
+
+ void print(raw_ostream &OS, const Module* = 0) const {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+char CFGPrinter::ID = 0;
+INITIALIZE_PASS(CFGPrinter, "dot-cfg", "Print CFG of function to 'dot' file",
+ false, true)
+
+namespace {
+ struct CFGOnlyPrinter : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ CFGOnlyPrinter() : FunctionPass(ID) {
+ initializeCFGOnlyPrinterPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F) {
+ std::string Filename = "cfg." + F.getNameStr() + ".dot";
+ errs() << "Writing '" << Filename << "'...";
+
+ std::string ErrorInfo;
+ raw_fd_ostream File(Filename.c_str(), ErrorInfo);
+
+ if (ErrorInfo.empty())
+ WriteGraph(File, (const Function*)&F, true);
+ else
+ errs() << " error opening file for writing!";
+ errs() << "\n";
+ return false;
+ }
+ void print(raw_ostream &OS, const Module* = 0) const {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+char CFGOnlyPrinter::ID = 0;
+INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only",
+ "Print CFG of function to 'dot' file (with no function bodies)",
+ false, true)
+
+/// viewCFG - This function is meant for use from the debugger. You can just
+/// say 'call F->viewCFG()' and a ghostview window should pop up from the
+/// program, displaying the CFG of the current function. This depends on there
+/// being a 'dot' and 'gv' program in your path.
+///
+void Function::viewCFG() const {
+ ViewGraph(this, "cfg" + getNameStr());
+}
+
+/// viewCFGOnly - This function is meant for use from the debugger. It works
+/// just like viewCFG, but it does not include the contents of basic blocks
+/// into the nodes, just the label. If you are only interested in the CFG t
+/// his can make the graph smaller.
+///
+void Function::viewCFGOnly() const {
+ ViewGraph(this, "cfg" + getNameStr(), true);
+}
+
+FunctionPass *llvm::createCFGPrinterPass () {
+ return new CFGPrinter();
+}
+
+FunctionPass *llvm::createCFGOnlyPrinterPass () {
+ return new CFGOnlyPrinter();
+}
+
diff --git a/contrib/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
new file mode 100644
index 000000000000..b2c27d1dfc4b
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
@@ -0,0 +1,148 @@
+//===--- CaptureTracking.cpp - Determine whether a pointer is captured ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains routines that help determine which pointers are captured.
+// A pointer value is captured if the function makes a copy of any part of the
+// pointer that outlives the call. Not being captured means, more or less, that
+// the pointer is only dereferenced and not stored in a global. Returning part
+// of the pointer as the function return value may or may not count as capturing
+// the pointer, depending on the context.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Value.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CallSite.h"
+using namespace llvm;
+
+/// As its comment mentions, PointerMayBeCaptured can be expensive.
+/// However, it's not easy for BasicAA to cache the result, because
+/// it's an ImmutablePass. To work around this, bound queries at a
+/// fixed number of uses.
+///
+/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep
+/// a cache. Then we can move the code from BasicAliasAnalysis into
+/// that path, and remove this threshold.
+static int const Threshold = 20;
+
+/// PointerMayBeCaptured - Return true if this pointer value may be captured
+/// by the enclosing function (which is required to exist). This routine can
+/// be expensive, so consider caching the results. The boolean ReturnCaptures
+/// specifies whether returning the value (or part of it) from the function
+/// counts as capturing it or not. The boolean StoreCaptures specified whether
+/// storing the value (or part of it) into memory anywhere automatically
+/// counts as capturing it or not.
+bool llvm::PointerMayBeCaptured(const Value *V,
+ bool ReturnCaptures, bool StoreCaptures) {
+ assert(V->getType()->isPointerTy() && "Capture is for pointers only!");
+ SmallVector<Use*, Threshold> Worklist;
+ SmallSet<Use*, Threshold> Visited;
+ int Count = 0;
+
+ for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+ UI != UE; ++UI) {
+ // If there are lots of uses, conservatively say that the value
+ // is captured to avoid taking too much compile time.
+ if (Count++ >= Threshold)
+ return true;
+
+ Use *U = &UI.getUse();
+ Visited.insert(U);
+ Worklist.push_back(U);
+ }
+
+ while (!Worklist.empty()) {
+ Use *U = Worklist.pop_back_val();
+ Instruction *I = cast<Instruction>(U->getUser());
+ V = U->get();
+
+ switch (I->getOpcode()) {
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ CallSite CS(I);
+ // Not captured if the callee is readonly, doesn't return a copy through
+ // its return value and doesn't unwind (a readonly function can leak bits
+ // by throwing an exception or not depending on the input value).
+ if (CS.onlyReadsMemory() && CS.doesNotThrow() && I->getType()->isVoidTy())
+ break;
+
+ // Not captured if only passed via 'nocapture' arguments. Note that
+ // calling a function pointer does not in itself cause the pointer to
+ // be captured. This is a subtle point considering that (for example)
+ // the callee might return its own address. It is analogous to saying
+ // that loading a value from a pointer does not cause the pointer to be
+ // captured, even though the loaded value might be the pointer itself
+ // (think of self-referential objects).
+ CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
+ for (CallSite::arg_iterator A = B; A != E; ++A)
+ if (A->get() == V && !CS.paramHasAttr(A - B + 1, Attribute::NoCapture))
+ // The parameter is not marked 'nocapture' - captured.
+ return true;
+ // Only passed via 'nocapture' arguments, or is the called function - not
+ // captured.
+ break;
+ }
+ case Instruction::Load:
+ // Loading from a pointer does not cause it to be captured.
+ break;
+ case Instruction::VAArg:
+ // "va-arg" from a pointer does not cause it to be captured.
+ break;
+ case Instruction::Ret:
+ if (ReturnCaptures)
+ return true;
+ break;
+ case Instruction::Store:
+ if (V == I->getOperand(0))
+ // Stored the pointer - conservatively assume it may be captured.
+ // TODO: If StoreCaptures is not true, we could do Fancy analysis
+ // to determine whether this store is not actually an escape point.
+ // In that case, BasicAliasAnalysis should be updated as well to
+ // take advantage of this.
+ return true;
+ // Storing to the pointee does not cause the pointer to be captured.
+ break;
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::PHI:
+ case Instruction::Select:
+ // The original value is not captured via this if the new value isn't.
+ for (Instruction::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI) {
+ Use *U = &UI.getUse();
+ if (Visited.insert(U))
+ Worklist.push_back(U);
+ }
+ break;
+ case Instruction::ICmp:
+ // Don't count comparisons of a no-alias return value against null as
+ // captures. This allows us to ignore comparisons of malloc results
+ // with null, for example.
+ if (isNoAliasCall(V->stripPointerCasts()))
+ if (ConstantPointerNull *CPN =
+ dyn_cast<ConstantPointerNull>(I->getOperand(1)))
+ if (CPN->getType()->getAddressSpace() == 0)
+ break;
+ // Otherwise, be conservative. There are crazy ways to capture pointers
+ // using comparisons.
+ return true;
+ default:
+ // Something else - be conservative and say it is captured.
+ return true;
+ }
+ }
+
+ // All uses examined - not captured.
+ return false;
+}
diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
new file mode 100644
index 000000000000..5de2b04e80dd
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
@@ -0,0 +1,1406 @@
+//===-- ConstantFolding.cpp - Fold instructions into constants ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines routines for folding instructions into constants.
+//
+// Also, to supplement the basic VMCore ConstantExpr simplifications,
+// this file defines some additional folding routines that can make use of
+// TargetData information. These functions cannot go in VMCore due to library
+// dependency issues.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Operator.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/FEnv.h"
+#include <cerrno>
+#include <cmath>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Constant Folding internal helper functions
+//===----------------------------------------------------------------------===//
+
+/// FoldBitCast - Constant fold bitcast, symbolically evaluating it with
+/// TargetData. This always returns a non-null constant, but it may be a
+/// ConstantExpr if unfoldable.
+static Constant *FoldBitCast(Constant *C, const Type *DestTy,
+ const TargetData &TD) {
+
+ // This only handles casts to vectors currently.
+ const VectorType *DestVTy = dyn_cast<VectorType>(DestTy);
+ if (DestVTy == 0)
+ return ConstantExpr::getBitCast(C, DestTy);
+
+ // If this is a scalar -> vector cast, convert the input into a <1 x scalar>
+ // vector so the code below can handle it uniformly.
+ if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) {
+ Constant *Ops = C; // don't take the address of C!
+ return FoldBitCast(ConstantVector::get(Ops), DestTy, TD);
+ }
+
+ // If this is a bitcast from constant vector -> vector, fold it.
+ ConstantVector *CV = dyn_cast<ConstantVector>(C);
+ if (CV == 0)
+ return ConstantExpr::getBitCast(C, DestTy);
+
+ // If the element types match, VMCore can fold it.
+ unsigned NumDstElt = DestVTy->getNumElements();
+ unsigned NumSrcElt = CV->getNumOperands();
+ if (NumDstElt == NumSrcElt)
+ return ConstantExpr::getBitCast(C, DestTy);
+
+ const Type *SrcEltTy = CV->getType()->getElementType();
+ const Type *DstEltTy = DestVTy->getElementType();
+
+ // Otherwise, we're changing the number of elements in a vector, which
+ // requires endianness information to do the right thing. For example,
+ // bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
+ // folds to (little endian):
+ // <4 x i32> <i32 0, i32 0, i32 1, i32 0>
+ // and to (big endian):
+ // <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+
+ // First thing is first. We only want to think about integer here, so if
+ // we have something in FP form, recast it as integer.
+ if (DstEltTy->isFloatingPointTy()) {
+ // Fold to an vector of integers with same size as our FP type.
+ unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();
+ const Type *DestIVTy =
+ VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt);
+ // Recursively handle this integer conversion, if possible.
+ C = FoldBitCast(C, DestIVTy, TD);
+ if (!C) return ConstantExpr::getBitCast(C, DestTy);
+
+ // Finally, VMCore can handle this now that #elts line up.
+ return ConstantExpr::getBitCast(C, DestTy);
+ }
+
+ // Okay, we know the destination is integer, if the input is FP, convert
+ // it to integer first.
+ if (SrcEltTy->isFloatingPointTy()) {
+ unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
+ const Type *SrcIVTy =
+ VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt);
+ // Ask VMCore to do the conversion now that #elts line up.
+ C = ConstantExpr::getBitCast(C, SrcIVTy);
+ CV = dyn_cast<ConstantVector>(C);
+ if (!CV) // If VMCore wasn't able to fold it, bail out.
+ return C;
+ }
+
+ // Now we know that the input and output vectors are both integer vectors
+ // of the same size, and that their #elements is not the same. Do the
+ // conversion here, which depends on whether the input or output has
+ // more elements.
+ bool isLittleEndian = TD.isLittleEndian();
+
+ SmallVector<Constant*, 32> Result;
+ if (NumDstElt < NumSrcElt) {
+ // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)
+ Constant *Zero = Constant::getNullValue(DstEltTy);
+ unsigned Ratio = NumSrcElt/NumDstElt;
+ unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();
+ unsigned SrcElt = 0;
+ for (unsigned i = 0; i != NumDstElt; ++i) {
+ // Build each element of the result.
+ Constant *Elt = Zero;
+ unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);
+ for (unsigned j = 0; j != Ratio; ++j) {
+ Constant *Src = dyn_cast<ConstantInt>(CV->getOperand(SrcElt++));
+ if (!Src) // Reject constantexpr elements.
+ return ConstantExpr::getBitCast(C, DestTy);
+
+ // Zero extend the element to the right size.
+ Src = ConstantExpr::getZExt(Src, Elt->getType());
+
+ // Shift it to the right place, depending on endianness.
+ Src = ConstantExpr::getShl(Src,
+ ConstantInt::get(Src->getType(), ShiftAmt));
+ ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
+
+ // Mix it in.
+ Elt = ConstantExpr::getOr(Elt, Src);
+ }
+ Result.push_back(Elt);
+ }
+ } else {
+ // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
+ unsigned Ratio = NumDstElt/NumSrcElt;
+ unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits();
+
+ // Loop over each source value, expanding into multiple results.
+ for (unsigned i = 0; i != NumSrcElt; ++i) {
+ Constant *Src = dyn_cast<ConstantInt>(CV->getOperand(i));
+ if (!Src) // Reject constantexpr elements.
+ return ConstantExpr::getBitCast(C, DestTy);
+
+ unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
+ for (unsigned j = 0; j != Ratio; ++j) {
+ // Shift the piece of the value into the right place, depending on
+ // endianness.
+ Constant *Elt = ConstantExpr::getLShr(Src,
+ ConstantInt::get(Src->getType(), ShiftAmt));
+ ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
+
+ // Truncate and remember this piece.
+ Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy));
+ }
+ }
+ }
+
+ return ConstantVector::get(Result);
+}
+
+
+/// IsConstantOffsetFromGlobal - If this constant is actually a constant offset
+/// from a global, return the global and the constant. Because of
+/// constantexprs, this function is recursive.
+static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
+ int64_t &Offset, const TargetData &TD) {
+ // Trivial case, constant is the global.
+ if ((GV = dyn_cast<GlobalValue>(C))) {
+ Offset = 0;
+ return true;
+ }
+
+ // Otherwise, if this isn't a constant expr, bail out.
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+ if (!CE) return false;
+
+ // Look through ptr->int and ptr->ptr casts.
+ if (CE->getOpcode() == Instruction::PtrToInt ||
+ CE->getOpcode() == Instruction::BitCast)
+ return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD);
+
+ // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ // Cannot compute this if the element type of the pointer is missing size
+ // info.
+ if (!cast<PointerType>(CE->getOperand(0)->getType())
+ ->getElementType()->isSized())
+ return false;
+
+ // If the base isn't a global+constant, we aren't either.
+ if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD))
+ return false;
+
+ // Otherwise, add any offset that our operands provide.
+ gep_type_iterator GTI = gep_type_begin(CE);
+ for (User::const_op_iterator i = CE->op_begin() + 1, e = CE->op_end();
+ i != e; ++i, ++GTI) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(*i);
+ if (!CI) return false; // Index isn't a simple constant?
+ if (CI->isZero()) continue; // Not adding anything.
+
+ if (const StructType *ST = dyn_cast<StructType>(*GTI)) {
+ // N = N + Offset
+ Offset += TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue());
+ } else {
+ const SequentialType *SQT = cast<SequentialType>(*GTI);
+ Offset += TD.getTypeAllocSize(SQT->getElementType())*CI->getSExtValue();
+ }
+ }
+ return true;
+ }
+
+ return false;
+}
+
+/// ReadDataFromGlobal - Recursive helper to read bits out of global. C is the
+/// constant being copied out of. ByteOffset is an offset into C. CurPtr is the
+/// pointer to copy results into and BytesLeft is the number of bytes left in
+/// the CurPtr buffer. TD is the target data.
+static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
+ unsigned char *CurPtr, unsigned BytesLeft,
+ const TargetData &TD) {
+ assert(ByteOffset <= TD.getTypeAllocSize(C->getType()) &&
+ "Out of range access");
+
+ // If this element is zero or undefined, we can just return since *CurPtr is
+ // zero initialized.
+ if (isa<ConstantAggregateZero>(C) || isa<UndefValue>(C))
+ return true;
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+ if (CI->getBitWidth() > 64 ||
+ (CI->getBitWidth() & 7) != 0)
+ return false;
+
+ uint64_t Val = CI->getZExtValue();
+ unsigned IntBytes = unsigned(CI->getBitWidth()/8);
+
+ for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) {
+ CurPtr[i] = (unsigned char)(Val >> (ByteOffset * 8));
+ ++ByteOffset;
+ }
+ return true;
+ }
+
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+ if (CFP->getType()->isDoubleTy()) {
+ C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), TD);
+ return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
+ }
+ if (CFP->getType()->isFloatTy()){
+ C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), TD);
+ return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
+ }
+ return false;
+ }
+
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
+ const StructLayout *SL = TD.getStructLayout(CS->getType());
+ unsigned Index = SL->getElementContainingOffset(ByteOffset);
+ uint64_t CurEltOffset = SL->getElementOffset(Index);
+ ByteOffset -= CurEltOffset;
+
+ while (1) {
+ // If the element access is to the element itself and not to tail padding,
+ // read the bytes from the element.
+ uint64_t EltSize = TD.getTypeAllocSize(CS->getOperand(Index)->getType());
+
+ if (ByteOffset < EltSize &&
+ !ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr,
+ BytesLeft, TD))
+ return false;
+
+ ++Index;
+
+ // Check to see if we read from the last struct element, if so we're done.
+ if (Index == CS->getType()->getNumElements())
+ return true;
+
+ // If we read all of the bytes we needed from this element we're done.
+ uint64_t NextEltOffset = SL->getElementOffset(Index);
+
+ if (BytesLeft <= NextEltOffset-CurEltOffset-ByteOffset)
+ return true;
+
+ // Move to the next element of the struct.
+ CurPtr += NextEltOffset-CurEltOffset-ByteOffset;
+ BytesLeft -= NextEltOffset-CurEltOffset-ByteOffset;
+ ByteOffset = 0;
+ CurEltOffset = NextEltOffset;
+ }
+ // not reached.
+ }
+
+ if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
+ uint64_t EltSize = TD.getTypeAllocSize(CA->getType()->getElementType());
+ uint64_t Index = ByteOffset / EltSize;
+ uint64_t Offset = ByteOffset - Index * EltSize;
+ for (; Index != CA->getType()->getNumElements(); ++Index) {
+ if (!ReadDataFromGlobal(CA->getOperand(Index), Offset, CurPtr,
+ BytesLeft, TD))
+ return false;
+ if (EltSize >= BytesLeft)
+ return true;
+
+ Offset = 0;
+ BytesLeft -= EltSize;
+ CurPtr += EltSize;
+ }
+ return true;
+ }
+
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
+ uint64_t EltSize = TD.getTypeAllocSize(CV->getType()->getElementType());
+ uint64_t Index = ByteOffset / EltSize;
+ uint64_t Offset = ByteOffset - Index * EltSize;
+ for (; Index != CV->getType()->getNumElements(); ++Index) {
+ if (!ReadDataFromGlobal(CV->getOperand(Index), Offset, CurPtr,
+ BytesLeft, TD))
+ return false;
+ if (EltSize >= BytesLeft)
+ return true;
+
+ Offset = 0;
+ BytesLeft -= EltSize;
+ CurPtr += EltSize;
+ }
+ return true;
+ }
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ if (CE->getOpcode() == Instruction::IntToPtr &&
+ CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext()))
+ return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,
+ BytesLeft, TD);
+ }
+
+ // Otherwise, unknown initializer type.
+ return false;
+}
+
+static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
+ const TargetData &TD) {
+ const Type *LoadTy = cast<PointerType>(C->getType())->getElementType();
+ const IntegerType *IntType = dyn_cast<IntegerType>(LoadTy);
+
+ // If this isn't an integer load we can't fold it directly.
+ if (!IntType) {
+ // If this is a float/double load, we can try folding it as an int32/64 load
+ // and then bitcast the result. This can be useful for union cases. Note
+ // that address spaces don't matter here since we're not going to result in
+ // an actual new load.
+ const Type *MapTy;
+ if (LoadTy->isFloatTy())
+ MapTy = Type::getInt32PtrTy(C->getContext());
+ else if (LoadTy->isDoubleTy())
+ MapTy = Type::getInt64PtrTy(C->getContext());
+ else if (LoadTy->isVectorTy()) {
+ MapTy = IntegerType::get(C->getContext(),
+ TD.getTypeAllocSizeInBits(LoadTy));
+ MapTy = PointerType::getUnqual(MapTy);
+ } else
+ return 0;
+
+ C = FoldBitCast(C, MapTy, TD);
+ if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, TD))
+ return FoldBitCast(Res, LoadTy, TD);
+ return 0;
+ }
+
+ unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8;
+ if (BytesLoaded > 32 || BytesLoaded == 0) return 0;
+
+ GlobalValue *GVal;
+ int64_t Offset;
+ if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD))
+ return 0;
+
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(GVal);
+ if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+ !GV->getInitializer()->getType()->isSized())
+ return 0;
+
+ // If we're loading off the beginning of the global, some bytes may be valid,
+ // but we don't try to handle this.
+ if (Offset < 0) return 0;
+
+ // If we're not accessing anything in this constant, the result is undefined.
+ if (uint64_t(Offset) >= TD.getTypeAllocSize(GV->getInitializer()->getType()))
+ return UndefValue::get(IntType);
+
+ unsigned char RawBytes[32] = {0};
+ if (!ReadDataFromGlobal(GV->getInitializer(), Offset, RawBytes,
+ BytesLoaded, TD))
+ return 0;
+
+ APInt ResultVal = APInt(IntType->getBitWidth(), RawBytes[BytesLoaded-1]);
+ for (unsigned i = 1; i != BytesLoaded; ++i) {
+ ResultVal <<= 8;
+ ResultVal |= RawBytes[BytesLoaded-1-i];
+ }
+
+ return ConstantInt::get(IntType->getContext(), ResultVal);
+}
+
+/// ConstantFoldLoadFromConstPtr - Return the value that a load from C would
+/// produce if it is constant and determinable. If this is not determinable,
+/// return null.
+Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
+ const TargetData *TD) {
+ // First, try the easy cases:
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer())
+ return GV->getInitializer();
+
+ // If the loaded value isn't a constant expr, we can't handle it.
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+ if (!CE) return 0;
+
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0)))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer())
+ if (Constant *V =
+ ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE))
+ return V;
+ }
+
+ // Instead of loading constant c string, use corresponding integer value
+ // directly if string length is small enough.
+ std::string Str;
+ if (TD && GetConstantStringInfo(CE, Str) && !Str.empty()) {
+ unsigned StrLen = Str.length();
+ const Type *Ty = cast<PointerType>(CE->getType())->getElementType();
+ unsigned NumBits = Ty->getPrimitiveSizeInBits();
+ // Replace load with immediate integer if the result is an integer or fp
+ // value.
+ if ((NumBits >> 3) == StrLen + 1 && (NumBits & 7) == 0 &&
+ (isa<IntegerType>(Ty) || Ty->isFloatingPointTy())) {
+ APInt StrVal(NumBits, 0);
+ APInt SingleChar(NumBits, 0);
+ if (TD->isLittleEndian()) {
+ for (signed i = StrLen-1; i >= 0; i--) {
+ SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
+ StrVal = (StrVal << 8) | SingleChar;
+ }
+ } else {
+ for (unsigned i = 0; i < StrLen; i++) {
+ SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
+ StrVal = (StrVal << 8) | SingleChar;
+ }
+ // Append NULL at the end.
+ SingleChar = 0;
+ StrVal = (StrVal << 8) | SingleChar;
+ }
+
+ Constant *Res = ConstantInt::get(CE->getContext(), StrVal);
+ if (Ty->isFloatingPointTy())
+ Res = ConstantExpr::getBitCast(Res, Ty);
+ return Res;
+ }
+ }
+
+ // If this load comes from anywhere in a constant global, and if the global
+ // is all undef or zero, we know what it loads.
+ if (GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(GetUnderlyingObject(CE, TD))) {
+ if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
+ const Type *ResTy = cast<PointerType>(C->getType())->getElementType();
+ if (GV->getInitializer()->isNullValue())
+ return Constant::getNullValue(ResTy);
+ if (isa<UndefValue>(GV->getInitializer()))
+ return UndefValue::get(ResTy);
+ }
+ }
+
+ // Try hard to fold loads from bitcasted strange and non-type-safe things. We
+ // currently don't do any of this for big endian systems. It can be
+ // generalized in the future if someone is interested.
+ if (TD && TD->isLittleEndian())
+ return FoldReinterpretLoadFromConstPtr(CE, *TD);
+ return 0;
+}
+
+static Constant *ConstantFoldLoadInst(const LoadInst *LI, const TargetData *TD){
+ if (LI->isVolatile()) return 0;
+
+ if (Constant *C = dyn_cast<Constant>(LI->getOperand(0)))
+ return ConstantFoldLoadFromConstPtr(C, TD);
+
+ return 0;
+}
+
+/// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression.
+/// Attempt to symbolically evaluate the result of a binary operator merging
+/// these together. If target data info is available, it is provided as TD,
+/// otherwise TD is null.
+static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
+ Constant *Op1, const TargetData *TD){
+ // SROA
+
+ // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.
+ // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute
+ // bits.
+
+
+ // If the constant expr is something like &A[123] - &A[4].f, fold this into a
+ // constant. This happens frequently when iterating over a global array.
+ if (Opc == Instruction::Sub && TD) {
+ GlobalValue *GV1, *GV2;
+ int64_t Offs1, Offs2;
+
+ if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *TD))
+ if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *TD) &&
+ GV1 == GV2) {
+ // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.
+ return ConstantInt::get(Op0->getType(), Offs1-Offs2);
+ }
+ }
+
+ return 0;
+}
+
+/// CastGEPIndices - If array indices are not pointer-sized integers,
+/// explicitly cast them so that they aren't implicitly casted by the
+/// getelementptr.
+static Constant *CastGEPIndices(Constant *const *Ops, unsigned NumOps,
+ const Type *ResultTy,
+ const TargetData *TD) {
+ if (!TD) return 0;
+ const Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext());
+
+ bool Any = false;
+ SmallVector<Constant*, 32> NewIdxs;
+ for (unsigned i = 1; i != NumOps; ++i) {
+ if ((i == 1 ||
+ !isa<StructType>(GetElementPtrInst::getIndexedType(Ops[0]->getType(),
+ reinterpret_cast<Value *const *>(Ops+1),
+ i-1))) &&
+ Ops[i]->getType() != IntPtrTy) {
+ Any = true;
+ NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
+ true,
+ IntPtrTy,
+ true),
+ Ops[i], IntPtrTy));
+ } else
+ NewIdxs.push_back(Ops[i]);
+ }
+ if (!Any) return 0;
+
+ Constant *C =
+ ConstantExpr::getGetElementPtr(Ops[0], &NewIdxs[0], NewIdxs.size());
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ C = Folded;
+ return C;
+}
+
+/// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP
+/// constant expression, do so.
+static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
+ const Type *ResultTy,
+ const TargetData *TD) {
+ Constant *Ptr = Ops[0];
+ if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized())
+ return 0;
+
+ const Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext());
+
+ // If this is a constant expr gep that is effectively computing an
+ // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12'
+ for (unsigned i = 1; i != NumOps; ++i)
+ if (!isa<ConstantInt>(Ops[i])) {
+
+ // If this is "gep i8* Ptr, (sub 0, V)", fold this as:
+ // "inttoptr (sub (ptrtoint Ptr), V)"
+ if (NumOps == 2 &&
+ cast<PointerType>(ResultTy)->getElementType()->isIntegerTy(8)) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[1]);
+ assert((CE == 0 || CE->getType() == IntPtrTy) &&
+ "CastGEPIndices didn't canonicalize index types!");
+ if (CE && CE->getOpcode() == Instruction::Sub &&
+ CE->getOperand(0)->isNullValue()) {
+ Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType());
+ Res = ConstantExpr::getSub(Res, CE->getOperand(1));
+ Res = ConstantExpr::getIntToPtr(Res, ResultTy);
+ if (ConstantExpr *ResCE = dyn_cast<ConstantExpr>(Res))
+ Res = ConstantFoldConstantExpression(ResCE, TD);
+ return Res;
+ }
+ }
+ return 0;
+ }
+
+ unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy);
+ APInt Offset = APInt(BitWidth,
+ TD->getIndexedOffset(Ptr->getType(),
+ (Value**)Ops+1, NumOps-1));
+ Ptr = cast<Constant>(Ptr->stripPointerCasts());
+
+ // If this is a GEP of a GEP, fold it all into a single GEP.
+ while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
+ SmallVector<Value *, 4> NestedOps(GEP->op_begin()+1, GEP->op_end());
+
+ // Do not try the incorporate the sub-GEP if some index is not a number.
+ bool AllConstantInt = true;
+ for (unsigned i = 0, e = NestedOps.size(); i != e; ++i)
+ if (!isa<ConstantInt>(NestedOps[i])) {
+ AllConstantInt = false;
+ break;
+ }
+ if (!AllConstantInt)
+ break;
+
+ Ptr = cast<Constant>(GEP->getOperand(0));
+ Offset += APInt(BitWidth,
+ TD->getIndexedOffset(Ptr->getType(),
+ (Value**)NestedOps.data(),
+ NestedOps.size()));
+ Ptr = cast<Constant>(Ptr->stripPointerCasts());
+ }
+
+ // If the base value for this address is a literal integer value, fold the
+ // getelementptr to the resulting integer value casted to the pointer type.
+ APInt BasePtr(BitWidth, 0);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+ if (CE->getOpcode() == Instruction::IntToPtr)
+ if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0)))
+ BasePtr = Base->getValue().zextOrTrunc(BitWidth);
+ if (Ptr->isNullValue() || BasePtr != 0) {
+ Constant *C = ConstantInt::get(Ptr->getContext(), Offset+BasePtr);
+ return ConstantExpr::getIntToPtr(C, ResultTy);
+ }
+
+ // Otherwise form a regular getelementptr. Recompute the indices so that
+ // we eliminate over-indexing of the notional static type array bounds.
+ // This makes it easy to determine if the getelementptr is "inbounds".
+ // Also, this helps GlobalOpt do SROA on GlobalVariables.
+ const Type *Ty = Ptr->getType();
+ SmallVector<Constant*, 32> NewIdxs;
+ do {
+ if (const SequentialType *ATy = dyn_cast<SequentialType>(Ty)) {
+ if (ATy->isPointerTy()) {
+ // The only pointer indexing we'll do is on the first index of the GEP.
+ if (!NewIdxs.empty())
+ break;
+
+ // Only handle pointers to sized types, not pointers to functions.
+ if (!ATy->getElementType()->isSized())
+ return 0;
+ }
+
+ // Determine which element of the array the offset points into.
+ APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType()));
+ const IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext());
+ if (ElemSize == 0)
+ // The element size is 0. This may be [0 x Ty]*, so just use a zero
+ // index for this level and proceed to the next level to see if it can
+ // accommodate the offset.
+ NewIdxs.push_back(ConstantInt::get(IntPtrTy, 0));
+ else {
+ // The element size is non-zero divide the offset by the element
+ // size (rounding down), to compute the index at this level.
+ APInt NewIdx = Offset.udiv(ElemSize);
+ Offset -= NewIdx * ElemSize;
+ NewIdxs.push_back(ConstantInt::get(IntPtrTy, NewIdx));
+ }
+ Ty = ATy->getElementType();
+ } else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ // Determine which field of the struct the offset points into. The
+ // getZExtValue is at least as safe as the StructLayout API because we
+ // know the offset is within the struct at this point.
+ const StructLayout &SL = *TD->getStructLayout(STy);
+ unsigned ElIdx = SL.getElementContainingOffset(Offset.getZExtValue());
+ NewIdxs.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
+ ElIdx));
+ Offset -= APInt(BitWidth, SL.getElementOffset(ElIdx));
+ Ty = STy->getTypeAtIndex(ElIdx);
+ } else {
+ // We've reached some non-indexable type.
+ break;
+ }
+ } while (Ty != cast<PointerType>(ResultTy)->getElementType());
+
+ // If we haven't used up the entire offset by descending the static
+ // type, then the offset is pointing into the middle of an indivisible
+ // member, so we can't simplify it.
+ if (Offset != 0)
+ return 0;
+
+ // Create a GEP.
+ Constant *C =
+ ConstantExpr::getGetElementPtr(Ptr, &NewIdxs[0], NewIdxs.size());
+ assert(cast<PointerType>(C->getType())->getElementType() == Ty &&
+ "Computed GetElementPtr has unexpected type!");
+
+ // If we ended up indexing a member with a type that doesn't match
+ // the type of what the original indices indexed, add a cast.
+ if (Ty != cast<PointerType>(ResultTy)->getElementType())
+ C = FoldBitCast(C, ResultTy, *TD);
+
+ return C;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Constant Folding public APIs
+//===----------------------------------------------------------------------===//
+
+/// ConstantFoldInstruction - Try to constant fold the specified instruction.
+/// If successful, the constant result is returned, if not, null is returned.
+/// Note that this fails if not all of the operands are constant. Otherwise,
+/// this function can only fail when attempting to fold instructions like loads
+/// and stores, which have no constant expression form.
+Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
+ // Handle PHI nodes quickly here...
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ Constant *CommonValue = 0;
+
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming = PN->getIncomingValue(i);
+ // If the incoming value is undef then skip it. Note that while we could
+ // skip the value if it is equal to the phi node itself we choose not to
+ // because that would break the rule that constant folding only applies if
+ // all operands are constants.
+ if (isa<UndefValue>(Incoming))
+ continue;
+ // If the incoming value is not a constant, or is a different constant to
+ // the one we saw previously, then give up.
+ Constant *C = dyn_cast<Constant>(Incoming);
+ if (!C || (CommonValue && C != CommonValue))
+ return 0;
+ CommonValue = C;
+ }
+
+ // If we reach here, all incoming values are the same constant or undef.
+ return CommonValue ? CommonValue : UndefValue::get(PN->getType());
+ }
+
+ // Scan the operand list, checking to see if they are all constants, if so,
+ // hand off to ConstantFoldInstOperands.
+ SmallVector<Constant*, 8> Ops;
+ for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
+ if (Constant *Op = dyn_cast<Constant>(*i))
+ Ops.push_back(Op);
+ else
+ return 0; // All operands not constant!
+
+ if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+ return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
+ TD);
+
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+ return ConstantFoldLoadInst(LI, TD);
+
+ if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I))
+ return ConstantExpr::getInsertValue(
+ cast<Constant>(IVI->getAggregateOperand()),
+ cast<Constant>(IVI->getInsertedValueOperand()),
+ IVI->idx_begin(), IVI->getNumIndices());
+
+ if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I))
+ return ConstantExpr::getExtractValue(
+ cast<Constant>(EVI->getAggregateOperand()),
+ EVI->idx_begin(), EVI->getNumIndices());
+
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
+ Ops.data(), Ops.size(), TD);
+}
+
+/// ConstantFoldConstantExpression - Attempt to fold the constant expression
+/// using the specified TargetData. If successful, the constant result is
+/// result is returned, if not, null is returned.
+Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
+ const TargetData *TD) {
+ SmallVector<Constant*, 8> Ops;
+ for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end();
+ i != e; ++i) {
+ Constant *NewC = cast<Constant>(*i);
+ // Recursively fold the ConstantExpr's operands.
+ if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC))
+ NewC = ConstantFoldConstantExpression(NewCE, TD);
+ Ops.push_back(NewC);
+ }
+
+ if (CE->isCompare())
+ return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
+ TD);
+ return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(),
+ Ops.data(), Ops.size(), TD);
+}
+
+/// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
+/// specified opcode and operands. If successful, the constant result is
+/// returned, if not, null is returned. Note that this function can fail when
+/// attempting to fold instructions like loads and stores, which have no
+/// constant expression form.
+///
+/// TODO: This function neither utilizes nor preserves nsw/nuw/inbounds/etc
+/// information, due to only being passed an opcode and operands. Constant
+/// folding using this function strips this information.
+///
+Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
+ Constant* const* Ops, unsigned NumOps,
+ const TargetData *TD) {
+ // Handle easy binops first.
+ if (Instruction::isBinaryOp(Opcode)) {
+ if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1]))
+ if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD))
+ return C;
+
+ return ConstantExpr::get(Opcode, Ops[0], Ops[1]);
+ }
+
+ switch (Opcode) {
+ default: return 0;
+ case Instruction::ICmp:
+ case Instruction::FCmp: assert(0 && "Invalid for compares");
+ case Instruction::Call:
+ if (Function *F = dyn_cast<Function>(Ops[NumOps - 1]))
+ if (canConstantFoldCallTo(F))
+ return ConstantFoldCall(F, Ops, NumOps - 1);
+ return 0;
+ case Instruction::PtrToInt:
+ // If the input is a inttoptr, eliminate the pair. This requires knowing
+ // the width of a pointer, so it can't be done in ConstantExpr::getCast.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
+ if (TD && CE->getOpcode() == Instruction::IntToPtr) {
+ Constant *Input = CE->getOperand(0);
+ unsigned InWidth = Input->getType()->getScalarSizeInBits();
+ if (TD->getPointerSizeInBits() < InWidth) {
+ Constant *Mask =
+ ConstantInt::get(CE->getContext(), APInt::getLowBitsSet(InWidth,
+ TD->getPointerSizeInBits()));
+ Input = ConstantExpr::getAnd(Input, Mask);
+ }
+ // Do a zext or trunc to get to the dest size.
+ return ConstantExpr::getIntegerCast(Input, DestTy, false);
+ }
+ }
+ return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
+ case Instruction::IntToPtr:
+ // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if
+ // the int size is >= the ptr size. This requires knowing the width of a
+ // pointer, so it can't be done in ConstantExpr::getCast.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
+ if (TD &&
+ TD->getPointerSizeInBits() <= CE->getType()->getScalarSizeInBits() &&
+ CE->getOpcode() == Instruction::PtrToInt)
+ return FoldBitCast(CE->getOperand(0), DestTy, *TD);
+
+ return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
+ case Instruction::BitCast:
+ if (TD)
+ return FoldBitCast(Ops[0], DestTy, *TD);
+ return ConstantExpr::getBitCast(Ops[0], DestTy);
+ case Instruction::Select:
+ return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
+ case Instruction::ExtractElement:
+ return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
+ case Instruction::InsertElement:
+ return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
+ case Instruction::ShuffleVector:
+ return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
+ case Instruction::GetElementPtr:
+ if (Constant *C = CastGEPIndices(Ops, NumOps, DestTy, TD))
+ return C;
+ if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, TD))
+ return C;
+
+ return ConstantExpr::getGetElementPtr(Ops[0], Ops+1, NumOps-1);
+ }
+}
+
+/// ConstantFoldCompareInstOperands - Attempt to constant fold a compare
+/// instruction (icmp/fcmp) with the specified operands. If it fails, it
+/// returns a constant expression of the specified operands.
+///
+Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
+ Constant *Ops0, Constant *Ops1,
+ const TargetData *TD) {
+ // fold: icmp (inttoptr x), null -> icmp x, 0
+ // fold: icmp (ptrtoint x), 0 -> icmp x, null
+ // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y
+ // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y
+ //
+ // ConstantExpr::getCompare cannot do this, because it doesn't have TD
+ // around to know if bit truncation is happening.
+ if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) {
+ if (TD && Ops1->isNullValue()) {
+ const Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
+ if (CE0->getOpcode() == Instruction::IntToPtr) {
+ // Convert the integer value to the right size to ensure we get the
+ // proper extension or truncation.
+ Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
+ IntPtrTy, false);
+ Constant *Null = Constant::getNullValue(C->getType());
+ return ConstantFoldCompareInstOperands(Predicate, C, Null, TD);
+ }
+
+ // Only do this transformation if the int is intptrty in size, otherwise
+ // there is a truncation or extension that we aren't modeling.
+ if (CE0->getOpcode() == Instruction::PtrToInt &&
+ CE0->getType() == IntPtrTy) {
+ Constant *C = CE0->getOperand(0);
+ Constant *Null = Constant::getNullValue(C->getType());
+ return ConstantFoldCompareInstOperands(Predicate, C, Null, TD);
+ }
+ }
+
+ if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) {
+ if (TD && CE0->getOpcode() == CE1->getOpcode()) {
+ const Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
+
+ if (CE0->getOpcode() == Instruction::IntToPtr) {
+ // Convert the integer value to the right size to ensure we get the
+ // proper extension or truncation.
+ Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0),
+ IntPtrTy, false);
+ Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0),
+ IntPtrTy, false);
+ return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD);
+ }
+
+ // Only do this transformation if the int is intptrty in size, otherwise
+ // there is a truncation or extension that we aren't modeling.
+ if ((CE0->getOpcode() == Instruction::PtrToInt &&
+ CE0->getType() == IntPtrTy &&
+ CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()))
+ return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0),
+ CE1->getOperand(0), TD);
+ }
+ }
+
+ // icmp eq (or x, y), 0 -> (icmp eq x, 0) & (icmp eq y, 0)
+ // icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0)
+ if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) &&
+ CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) {
+ Constant *LHS =
+ ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1,TD);
+ Constant *RHS =
+ ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1,TD);
+ unsigned OpC =
+ Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
+ Constant *Ops[] = { LHS, RHS };
+ return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, 2, TD);
+ }
+ }
+
+ return ConstantExpr::getCompare(Predicate, Ops0, Ops1);
+}
+
+
+/// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a
+/// getelementptr constantexpr, return the constant value being addressed by the
+/// constant expression, or null if something is funny and we can't decide.
+Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C,
+ ConstantExpr *CE) {
+ if (CE->getOperand(1) != Constant::getNullValue(CE->getOperand(1)->getType()))
+ return 0; // Do not allow stepping over the value!
+
+ // Loop over all of the operands, tracking down which value we are
+ // addressing...
+ gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE);
+ for (++I; I != E; ++I)
+ if (const StructType *STy = dyn_cast<StructType>(*I)) {
+ ConstantInt *CU = cast<ConstantInt>(I.getOperand());
+ assert(CU->getZExtValue() < STy->getNumElements() &&
+ "Struct index out of range!");
+ unsigned El = (unsigned)CU->getZExtValue();
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
+ C = CS->getOperand(El);
+ } else if (isa<ConstantAggregateZero>(C)) {
+ C = Constant::getNullValue(STy->getElementType(El));
+ } else if (isa<UndefValue>(C)) {
+ C = UndefValue::get(STy->getElementType(El));
+ } else {
+ return 0;
+ }
+ } else if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand())) {
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(*I)) {
+ if (CI->getZExtValue() >= ATy->getNumElements())
+ return 0;
+ if (ConstantArray *CA = dyn_cast<ConstantArray>(C))
+ C = CA->getOperand(CI->getZExtValue());
+ else if (isa<ConstantAggregateZero>(C))
+ C = Constant::getNullValue(ATy->getElementType());
+ else if (isa<UndefValue>(C))
+ C = UndefValue::get(ATy->getElementType());
+ else
+ return 0;
+ } else if (const VectorType *VTy = dyn_cast<VectorType>(*I)) {
+ if (CI->getZExtValue() >= VTy->getNumElements())
+ return 0;
+ if (ConstantVector *CP = dyn_cast<ConstantVector>(C))
+ C = CP->getOperand(CI->getZExtValue());
+ else if (isa<ConstantAggregateZero>(C))
+ C = Constant::getNullValue(VTy->getElementType());
+ else if (isa<UndefValue>(C))
+ C = UndefValue::get(VTy->getElementType());
+ else
+ return 0;
+ } else {
+ return 0;
+ }
+ } else {
+ return 0;
+ }
+ return C;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Constant Folding for Calls
+//
+
+/// canConstantFoldCallTo - Return true if its even possible to fold a call to
+/// the specified function.
+bool
+llvm::canConstantFoldCallTo(const Function *F) {
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::sqrt:
+ case Intrinsic::powi:
+ case Intrinsic::bswap:
+ case Intrinsic::ctpop:
+ case Intrinsic::ctlz:
+ case Intrinsic::cttz:
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::convert_from_fp16:
+ case Intrinsic::convert_to_fp16:
+ case Intrinsic::x86_sse_cvtss2si:
+ case Intrinsic::x86_sse_cvtss2si64:
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ case Intrinsic::x86_sse2_cvtsd2si:
+ case Intrinsic::x86_sse2_cvtsd2si64:
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64:
+ return true;
+ default:
+ return false;
+ case 0: break;
+ }
+
+ if (!F->hasName()) return false;
+ StringRef Name = F->getName();
+
+ // In these cases, the check of the length is required. We don't want to
+ // return true for a name like "cos\0blah" which strcmp would return equal to
+ // "cos", but has length 8.
+ switch (Name[0]) {
+ default: return false;
+ case 'a':
+ return Name == "acos" || Name == "asin" ||
+ Name == "atan" || Name == "atan2";
+ case 'c':
+ return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh";
+ case 'e':
+ return Name == "exp";
+ case 'f':
+ return Name == "fabs" || Name == "fmod" || Name == "floor";
+ case 'l':
+ return Name == "log" || Name == "log10";
+ case 'p':
+ return Name == "pow";
+ case 's':
+ return Name == "sin" || Name == "sinh" || Name == "sqrt" ||
+ Name == "sinf" || Name == "sqrtf";
+ case 't':
+ return Name == "tan" || Name == "tanh";
+ }
+}
+
+static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
+ const Type *Ty) {
+ sys::llvm_fenv_clearexcept();
+ V = NativeFP(V);
+ if (sys::llvm_fenv_testexcept()) {
+ sys::llvm_fenv_clearexcept();
+ return 0;
+ }
+
+ if (Ty->isFloatTy())
+ return ConstantFP::get(Ty->getContext(), APFloat((float)V));
+ if (Ty->isDoubleTy())
+ return ConstantFP::get(Ty->getContext(), APFloat(V));
+ llvm_unreachable("Can only constant fold float/double");
+ return 0; // dummy return to suppress warning
+}
+
+static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
+ double V, double W, const Type *Ty) {
+ sys::llvm_fenv_clearexcept();
+ V = NativeFP(V, W);
+ if (sys::llvm_fenv_testexcept()) {
+ sys::llvm_fenv_clearexcept();
+ return 0;
+ }
+
+ if (Ty->isFloatTy())
+ return ConstantFP::get(Ty->getContext(), APFloat((float)V));
+ if (Ty->isDoubleTy())
+ return ConstantFP::get(Ty->getContext(), APFloat(V));
+ llvm_unreachable("Can only constant fold float/double");
+ return 0; // dummy return to suppress warning
+}
+
+/// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer
+/// conversion of a constant floating point. If roundTowardZero is false, the
+/// default IEEE rounding is used (toward nearest, ties to even). This matches
+/// the behavior of the non-truncating SSE instructions in the default rounding
+/// mode. The desired integer type Ty is used to select how many bits are
+/// available for the result. Returns null if the conversion cannot be
+/// performed, otherwise returns the Constant value resulting from the
+/// conversion.
+static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero,
+ const Type *Ty) {
+ assert(Op && "Called with NULL operand");
+ APFloat Val(Op->getValueAPF());
+
+ // All of these conversion intrinsics form an integer of at most 64bits.
+ unsigned ResultWidth = cast<IntegerType>(Ty)->getBitWidth();
+ assert(ResultWidth <= 64 &&
+ "Can only constant fold conversions to 64 and 32 bit ints");
+
+ uint64_t UIntVal;
+ bool isExact = false;
+ APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero
+ : APFloat::rmNearestTiesToEven;
+ APFloat::opStatus status = Val.convertToInteger(&UIntVal, ResultWidth,
+ /*isSigned=*/true, mode,
+ &isExact);
+ if (status != APFloat::opOK && status != APFloat::opInexact)
+ return 0;
+ return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true);
+}
+
+/// ConstantFoldCall - Attempt to constant fold a call to the specified function
+/// with the specified arguments, returning null if unsuccessful.
+Constant *
+llvm::ConstantFoldCall(Function *F,
+ Constant *const *Operands, unsigned NumOperands) {
+ if (!F->hasName()) return 0;
+ StringRef Name = F->getName();
+
+ const Type *Ty = F->getReturnType();
+ if (NumOperands == 1) {
+ if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) {
+ if (F->getIntrinsicID() == Intrinsic::convert_to_fp16) {
+ APFloat Val(Op->getValueAPF());
+
+ bool lost = false;
+ Val.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &lost);
+
+ return ConstantInt::get(F->getContext(), Val.bitcastToAPInt());
+ }
+
+ if (!Ty->isFloatTy() && !Ty->isDoubleTy())
+ return 0;
+
+ /// We only fold functions with finite arguments. Folding NaN and inf is
+ /// likely to be aborted with an exception anyway, and some host libms
+ /// have known errors raising exceptions.
+ if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity())
+ return 0;
+
+ /// Currently APFloat versions of these functions do not exist, so we use
+ /// the host native double versions. Float versions are not called
+ /// directly but for all these it is true (float)(f((double)arg)) ==
+ /// f(arg). Long double not supported yet.
+ double V = Ty->isFloatTy() ? (double)Op->getValueAPF().convertToFloat() :
+ Op->getValueAPF().convertToDouble();
+ switch (Name[0]) {
+ case 'a':
+ if (Name == "acos")
+ return ConstantFoldFP(acos, V, Ty);
+ else if (Name == "asin")
+ return ConstantFoldFP(asin, V, Ty);
+ else if (Name == "atan")
+ return ConstantFoldFP(atan, V, Ty);
+ break;
+ case 'c':
+ if (Name == "ceil")
+ return ConstantFoldFP(ceil, V, Ty);
+ else if (Name == "cos")
+ return ConstantFoldFP(cos, V, Ty);
+ else if (Name == "cosh")
+ return ConstantFoldFP(cosh, V, Ty);
+ else if (Name == "cosf")
+ return ConstantFoldFP(cos, V, Ty);
+ break;
+ case 'e':
+ if (Name == "exp")
+ return ConstantFoldFP(exp, V, Ty);
+ break;
+ case 'f':
+ if (Name == "fabs")
+ return ConstantFoldFP(fabs, V, Ty);
+ else if (Name == "floor")
+ return ConstantFoldFP(floor, V, Ty);
+ break;
+ case 'l':
+ if (Name == "log" && V > 0)
+ return ConstantFoldFP(log, V, Ty);
+ else if (Name == "log10" && V > 0)
+ return ConstantFoldFP(log10, V, Ty);
+ else if (F->getIntrinsicID() == Intrinsic::sqrt &&
+ (Ty->isFloatTy() || Ty->isDoubleTy())) {
+ if (V >= -0.0)
+ return ConstantFoldFP(sqrt, V, Ty);
+ else // Undefined
+ return Constant::getNullValue(Ty);
+ }
+ break;
+ case 's':
+ if (Name == "sin")
+ return ConstantFoldFP(sin, V, Ty);
+ else if (Name == "sinh")
+ return ConstantFoldFP(sinh, V, Ty);
+ else if (Name == "sqrt" && V >= 0)
+ return ConstantFoldFP(sqrt, V, Ty);
+ else if (Name == "sqrtf" && V >= 0)
+ return ConstantFoldFP(sqrt, V, Ty);
+ else if (Name == "sinf")
+ return ConstantFoldFP(sin, V, Ty);
+ break;
+ case 't':
+ if (Name == "tan")
+ return ConstantFoldFP(tan, V, Ty);
+ else if (Name == "tanh")
+ return ConstantFoldFP(tanh, V, Ty);
+ break;
+ default:
+ break;
+ }
+ return 0;
+ }
+
+ if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) {
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::bswap:
+ return ConstantInt::get(F->getContext(), Op->getValue().byteSwap());
+ case Intrinsic::ctpop:
+ return ConstantInt::get(Ty, Op->getValue().countPopulation());
+ case Intrinsic::cttz:
+ return ConstantInt::get(Ty, Op->getValue().countTrailingZeros());
+ case Intrinsic::ctlz:
+ return ConstantInt::get(Ty, Op->getValue().countLeadingZeros());
+ case Intrinsic::convert_from_fp16: {
+ APFloat Val(Op->getValue());
+
+ bool lost = false;
+ APFloat::opStatus status =
+ Val.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &lost);
+
+ // Conversion is always precise.
+ (void)status;
+ assert(status == APFloat::opOK && !lost &&
+ "Precision lost during fp16 constfolding");
+
+ return ConstantFP::get(F->getContext(), Val);
+ }
+ default:
+ return 0;
+ }
+ }
+
+ if (ConstantVector *Op = dyn_cast<ConstantVector>(Operands[0])) {
+ switch (F->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::x86_sse_cvtss2si:
+ case Intrinsic::x86_sse_cvtss2si64:
+ case Intrinsic::x86_sse2_cvtsd2si:
+ case Intrinsic::x86_sse2_cvtsd2si64:
+ if (ConstantFP *FPOp = dyn_cast<ConstantFP>(Op->getOperand(0)))
+ return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/false, Ty);
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64:
+ if (ConstantFP *FPOp = dyn_cast<ConstantFP>(Op->getOperand(0)))
+ return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/true, Ty);
+ }
+ }
+
+ if (isa<UndefValue>(Operands[0])) {
+ if (F->getIntrinsicID() == Intrinsic::bswap)
+ return Operands[0];
+ return 0;
+ }
+
+ return 0;
+ }
+
+ if (NumOperands == 2) {
+ if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
+ if (!Ty->isFloatTy() && !Ty->isDoubleTy())
+ return 0;
+ double Op1V = Ty->isFloatTy() ?
+ (double)Op1->getValueAPF().convertToFloat() :
+ Op1->getValueAPF().convertToDouble();
+ if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
+ if (Op2->getType() != Op1->getType())
+ return 0;
+
+ double Op2V = Ty->isFloatTy() ?
+ (double)Op2->getValueAPF().convertToFloat():
+ Op2->getValueAPF().convertToDouble();
+
+ if (Name == "pow")
+ return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
+ if (Name == "fmod")
+ return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
+ if (Name == "atan2")
+ return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
+ } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
+ if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy())
+ return ConstantFP::get(F->getContext(),
+ APFloat((float)std::pow((float)Op1V,
+ (int)Op2C->getZExtValue())));
+ if (F->getIntrinsicID() == Intrinsic::powi && Ty->isDoubleTy())
+ return ConstantFP::get(F->getContext(),
+ APFloat((double)std::pow((double)Op1V,
+ (int)Op2C->getZExtValue())));
+ }
+ return 0;
+ }
+
+
+ if (ConstantInt *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
+ if (ConstantInt *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
+ switch (F->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow: {
+ APInt Res;
+ bool Overflow;
+ switch (F->getIntrinsicID()) {
+ default: assert(0 && "Invalid case");
+ case Intrinsic::sadd_with_overflow:
+ Res = Op1->getValue().sadd_ov(Op2->getValue(), Overflow);
+ break;
+ case Intrinsic::uadd_with_overflow:
+ Res = Op1->getValue().uadd_ov(Op2->getValue(), Overflow);
+ break;
+ case Intrinsic::ssub_with_overflow:
+ Res = Op1->getValue().ssub_ov(Op2->getValue(), Overflow);
+ break;
+ case Intrinsic::usub_with_overflow:
+ Res = Op1->getValue().usub_ov(Op2->getValue(), Overflow);
+ break;
+ case Intrinsic::smul_with_overflow:
+ Res = Op1->getValue().smul_ov(Op2->getValue(), Overflow);
+ break;
+ case Intrinsic::umul_with_overflow:
+ Res = Op1->getValue().umul_ov(Op2->getValue(), Overflow);
+ break;
+ }
+ Constant *Ops[] = {
+ ConstantInt::get(F->getContext(), Res),
+ ConstantInt::get(Type::getInt1Ty(F->getContext()), Overflow)
+ };
+ return ConstantStruct::get(F->getContext(), Ops, 2, false);
+ }
+ }
+ }
+
+ return 0;
+ }
+ return 0;
+ }
+ return 0;
+}
diff --git a/contrib/llvm/lib/Analysis/DIBuilder.cpp b/contrib/llvm/lib/Analysis/DIBuilder.cpp
new file mode 100644
index 000000000000..dc98c9e67a80
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DIBuilder.cpp
@@ -0,0 +1,835 @@
+//===--- DIBuilder.cpp - Debug Information Builder ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the DIBuilder.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Dwarf.h"
+
+using namespace llvm;
+using namespace llvm::dwarf;
+
+static Constant *GetTagConstant(LLVMContext &VMContext, unsigned Tag) {
+ assert((Tag & LLVMDebugVersionMask) == 0 &&
+ "Tag too large for debug encoding!");
+ return ConstantInt::get(Type::getInt32Ty(VMContext), Tag | LLVMDebugVersion);
+}
+
+DIBuilder::DIBuilder(Module &m)
+ : M(m), VMContext(M.getContext()), TheCU(0), DeclareFn(0), ValueFn(0) {}
+
+/// createCompileUnit - A CompileUnit provides an anchor for all debugging
+/// information generated during this instance of compilation.
+void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
+ StringRef Directory, StringRef Producer,
+ bool isOptimized, StringRef Flags,
+ unsigned RunTimeVer) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Lang),
+ MDString::get(VMContext, Filename),
+ MDString::get(VMContext, Directory),
+ MDString::get(VMContext, Producer),
+ // Deprecate isMain field.
+ ConstantInt::get(Type::getInt1Ty(VMContext), true), // isMain
+ ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+ MDString::get(VMContext, Flags),
+ ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer)
+ };
+ TheCU = DICompileUnit(MDNode::get(VMContext, Elts));
+}
+
+/// createFile - Create a file descriptor to hold debugging information
+/// for a file.
+DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) {
+ assert(TheCU && "Unable to create DW_TAG_file_type without CompileUnit");
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_file_type),
+ MDString::get(VMContext, Filename),
+ MDString::get(VMContext, Directory),
+ TheCU
+ };
+ return DIFile(MDNode::get(VMContext, Elts));
+}
+
+/// createEnumerator - Create a single enumerator value.
+DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_enumerator),
+ MDString::get(VMContext, Name),
+ ConstantInt::get(Type::getInt64Ty(VMContext), Val)
+ };
+ return DIEnumerator(MDNode::get(VMContext, Elts));
+}
+
+/// createBasicType - Create debugging information entry for a basic
+/// type, e.g 'char'.
+DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
+ uint64_t AlignInBits,
+ unsigned Encoding) {
+ // Basic types are encoded in DIBasicType format. Line number, filename,
+ // offset and flags are always empty here.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_base_type),
+ TheCU,
+ MDString::get(VMContext, Name),
+ NULL, // Filename
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags;
+ ConstantInt::get(Type::getInt32Ty(VMContext), Encoding)
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createQaulifiedType - Create debugging information entry for a qualified
+/// type, e.g. 'const int'.
+DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) {
+ // Qualified types are encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, Tag),
+ TheCU,
+ MDString::get(VMContext, StringRef()), // Empty name.
+ NULL, // Filename
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+ FromTy
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createPointerType - Create debugging information entry for a pointer.
+DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
+ uint64_t AlignInBits, StringRef Name) {
+ // Pointer types are encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type),
+ TheCU,
+ MDString::get(VMContext, Name),
+ NULL, // Filename
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+ PointeeTy
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createReferenceType - Create debugging information entry for a reference.
+DIType DIBuilder::createReferenceType(DIType RTy) {
+ // References are encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_reference_type),
+ TheCU,
+ NULL, // Name
+ NULL, // Filename
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+ RTy
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createTypedef - Create debugging information entry for a typedef.
+DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File,
+ unsigned LineNo) {
+ // typedefs are encoded in DIDerivedType format.
+ assert(Ty.Verify() && "Invalid typedef type!");
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_typedef),
+ Ty.getContext(),
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+ Ty
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createFriend - Create debugging information entry for a 'friend'.
+DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) {
+ // typedefs are encoded in DIDerivedType format.
+ assert(Ty.Verify() && "Invalid type!");
+ assert(FriendTy.Verify() && "Invalid friend type!");
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_friend),
+ Ty,
+ NULL, // Name
+ Ty.getFile(),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+ FriendTy
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createInheritance - Create debugging information entry to establish
+/// inheritnace relationship between two types.
+DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy,
+ uint64_t BaseOffset, unsigned Flags) {
+ // TAG_inheritance is encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_inheritance),
+ Ty,
+ NULL, // Name
+ Ty.getFile(),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+ ConstantInt::get(Type::getInt64Ty(VMContext), BaseOffset),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ BaseTy
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createMemberType - Create debugging information entry for a member.
+DIType DIBuilder::createMemberType(StringRef Name,
+ DIFile File, unsigned LineNumber,
+ uint64_t SizeInBits, uint64_t AlignInBits,
+ uint64_t OffsetInBits, unsigned Flags,
+ DIType Ty) {
+ // TAG_member is encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_member),
+ File, // Or TheCU ? Ty ?
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ Ty
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createObjCIVar - Create debugging information entry for Objective-C
+/// instance variable.
+DIType DIBuilder::createObjCIVar(StringRef Name,
+ DIFile File, unsigned LineNumber,
+ uint64_t SizeInBits, uint64_t AlignInBits,
+ uint64_t OffsetInBits, unsigned Flags,
+ DIType Ty, StringRef PropertyName,
+ StringRef GetterName, StringRef SetterName,
+ unsigned PropertyAttributes) {
+ // TAG_member is encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_member),
+ File, // Or TheCU ? Ty ?
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ Ty,
+ MDString::get(VMContext, PropertyName),
+ MDString::get(VMContext, GetterName),
+ MDString::get(VMContext, SetterName),
+ ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes)
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createClassType - Create debugging information entry for a class.
+DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
+ DIFile File, unsigned LineNumber,
+ uint64_t SizeInBits, uint64_t AlignInBits,
+ uint64_t OffsetInBits, unsigned Flags,
+ DIType DerivedFrom, DIArray Elements,
+ MDNode *VTableHoder, MDNode *TemplateParams) {
+ // TAG_class_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_class_type),
+ Context,
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), OffsetInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ DerivedFrom,
+ Elements,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ VTableHoder,
+ TemplateParams
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createTemplateTypeParameter - Create debugging information for template
+/// type parameter.
+DITemplateTypeParameter
+DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name,
+ DIType Ty, MDNode *File, unsigned LineNo,
+ unsigned ColumnNo) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_template_type_parameter),
+ Context,
+ MDString::get(VMContext, Name),
+ Ty,
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+ ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo)
+ };
+ return DITemplateTypeParameter(MDNode::get(VMContext, Elts));
+}
+
+/// createTemplateValueParameter - Create debugging information for template
+/// value parameter.
+DITemplateValueParameter
+DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name,
+ DIType Ty, uint64_t Val,
+ MDNode *File, unsigned LineNo,
+ unsigned ColumnNo) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_template_value_parameter),
+ Context,
+ MDString::get(VMContext, Name),
+ Ty,
+ ConstantInt::get(Type::getInt64Ty(VMContext), Val),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+ ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo)
+ };
+ return DITemplateValueParameter(MDNode::get(VMContext, Elts));
+}
+
+/// createStructType - Create debugging information entry for a struct.
+DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name,
+ DIFile File, unsigned LineNumber,
+ uint64_t SizeInBits, uint64_t AlignInBits,
+ unsigned Flags, DIArray Elements,
+ unsigned RunTimeLang) {
+ // TAG_structure_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_structure_type),
+ Context,
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ Elements,
+ ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createUnionType - Create debugging information entry for an union.
+DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
+ DIFile File,
+ unsigned LineNumber, uint64_t SizeInBits,
+ uint64_t AlignInBits, unsigned Flags,
+ DIArray Elements, unsigned RunTimeLang) {
+ // TAG_union_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_union_type),
+ Scope,
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ Elements,
+ ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createSubroutineType - Create subroutine type.
+DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) {
+ // TAG_subroutine_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type),
+ File,
+ MDString::get(VMContext, ""),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ ParameterTypes,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createEnumerationType - Create debugging information entry for an
+/// enumeration.
+DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name,
+ DIFile File, unsigned LineNumber,
+ uint64_t SizeInBits,
+ uint64_t AlignInBits, DIArray Elements) {
+ // TAG_enumeration_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type),
+ Scope,
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ Elements,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ };
+ MDNode *Node = MDNode::get(VMContext, Elts);
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum");
+ NMD->addOperand(Node);
+ return DIType(Node);
+}
+
+/// createArrayType - Create debugging information entry for an array.
+DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
+ DIType Ty, DIArray Subscripts) {
+ // TAG_array_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_array_type),
+ TheCU,
+ MDString::get(VMContext, ""),
+ TheCU,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt64Ty(VMContext), Size),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ Ty,
+ Subscripts,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createVectorType - Create debugging information entry for a vector.
+DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits,
+ DIType Ty, DIArray Subscripts) {
+ // TAG_vector_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_vector_type),
+ TheCU,
+ MDString::get(VMContext, ""),
+ TheCU,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt64Ty(VMContext), Size),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ Ty,
+ Subscripts,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createArtificialType - Create a new DIType with "artificial" flag set.
+DIType DIBuilder::createArtificialType(DIType Ty) {
+ if (Ty.isArtificial())
+ return Ty;
+
+ SmallVector<Value *, 9> Elts;
+ MDNode *N = Ty;
+ assert (N && "Unexpected input DIType!");
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (Value *V = N->getOperand(i))
+ Elts.push_back(V);
+ else
+ Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)));
+ }
+
+ unsigned CurFlags = Ty.getFlags();
+ CurFlags = CurFlags | DIType::FlagArtificial;
+
+ // Flags are stored at this slot.
+ Elts[8] = ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags);
+
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// retainType - Retain DIType in a module even if it is not referenced
+/// through debug info anchors.
+void DIBuilder::retainType(DIType T) {
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.ty");
+ NMD->addOperand(T);
+}
+
+/// createUnspecifiedParameter - Create unspeicified type descriptor
+/// for the subroutine type.
+DIDescriptor DIBuilder::createUnspecifiedParameter() {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters)
+ };
+ return DIDescriptor(MDNode::get(VMContext, Elts));
+}
+
+/// createTemporaryType - Create a temporary forward-declared type.
+DIType DIBuilder::createTemporaryType() {
+ // Give the temporary MDNode a tag. It doesn't matter what tag we
+ // use here as long as DIType accepts it.
+ Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
+ MDNode *Node = MDNode::getTemporary(VMContext, Elts);
+ return DIType(Node);
+}
+
+/// createTemporaryType - Create a temporary forward-declared type.
+DIType DIBuilder::createTemporaryType(DIFile F) {
+ // Give the temporary MDNode a tag. It doesn't matter what tag we
+ // use here as long as DIType accepts it.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, DW_TAG_base_type),
+ F.getCompileUnit(),
+ NULL,
+ F
+ };
+ MDNode *Node = MDNode::getTemporary(VMContext, Elts);
+ return DIType(Node);
+}
+
+/// getOrCreateArray - Get a DIArray, create one if required.
+DIArray DIBuilder::getOrCreateArray(ArrayRef<Value *> Elements) {
+ if (Elements.empty()) {
+ Value *Null = llvm::Constant::getNullValue(Type::getInt32Ty(VMContext));
+ return DIArray(MDNode::get(VMContext, Null));
+ }
+ return DIArray(MDNode::get(VMContext, Elements));
+}
+
+/// getOrCreateSubrange - Create a descriptor for a value range. This
+/// implicitly uniques the values returned.
+DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Hi) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_subrange_type),
+ ConstantInt::get(Type::getInt64Ty(VMContext), Lo),
+ ConstantInt::get(Type::getInt64Ty(VMContext), Hi)
+ };
+
+ return DISubrange(MDNode::get(VMContext, Elts));
+}
+
+/// createGlobalVariable - Create a new descriptor for the specified global.
+DIGlobalVariable DIBuilder::
+createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber,
+ DIType Ty, bool isLocalToUnit, llvm::Value *Val) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_variable),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ TheCU,
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, Name),
+ F,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ Ty,
+ ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/
+ Val
+ };
+ MDNode *Node = MDNode::get(VMContext, Elts);
+ // Create a named metadata so that we do not lose this mdnode.
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
+ NMD->addOperand(Node);
+ return DIGlobalVariable(Node);
+}
+
+/// createStaticVariable - Create a new descriptor for the specified static
+/// variable.
+DIGlobalVariable DIBuilder::
+createStaticVariable(DIDescriptor Context, StringRef Name,
+ StringRef LinkageName, DIFile F, unsigned LineNumber,
+ DIType Ty, bool isLocalToUnit, llvm::Value *Val) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_variable),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ Context,
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, LinkageName),
+ F,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ Ty,
+ ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/
+ Val
+ };
+ MDNode *Node = MDNode::get(VMContext, Elts);
+ // Create a named metadata so that we do not lose this mdnode.
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
+ NMD->addOperand(Node);
+ return DIGlobalVariable(Node);
+}
+
+/// createVariable - Create a new descriptor for the specified variable.
+DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
+ StringRef Name, DIFile File,
+ unsigned LineNo, DIType Ty,
+ bool AlwaysPreserve, unsigned Flags,
+ unsigned ArgNo) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, Tag),
+ Scope,
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))),
+ Ty,
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags)
+ };
+ MDNode *Node = MDNode::get(VMContext, Elts);
+ if (AlwaysPreserve) {
+ // The optimizer may remove local variable. If there is an interest
+ // to preserve variable info in such situation then stash it in a
+ // named mdnode.
+ DISubprogram Fn(getDISubprogram(Scope));
+ StringRef FName = "fn";
+ if (Fn.getFunction())
+ FName = Fn.getFunction()->getName();
+ char One = '\1';
+ if (FName.startswith(StringRef(&One, 1)))
+ FName = FName.substr(1);
+ NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, FName);
+ FnLocals->addOperand(Node);
+ }
+ return DIVariable(Node);
+}
+
+/// createComplexVariable - Create a new descriptor for the specified variable
+/// which has a complex address expression for its address.
+DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope,
+ StringRef Name, DIFile F,
+ unsigned LineNo,
+ DIType Ty, ArrayRef<Value *> Addr,
+ unsigned ArgNo) {
+ SmallVector<Value *, 15> Elts;
+ Elts.push_back(GetTagConstant(VMContext, Tag));
+ Elts.push_back(Scope);
+ Elts.push_back(MDString::get(VMContext, Name));
+ Elts.push_back(F);
+ Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))));
+ Elts.push_back(Ty);
+ Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)));
+ Elts.append(Addr.begin(), Addr.end());
+
+ return DIVariable(MDNode::get(VMContext, Elts));
+}
+
+/// createFunction - Create a new descriptor for the specified function.
+DISubprogram DIBuilder::createFunction(DIDescriptor Context,
+ StringRef Name,
+ StringRef LinkageName,
+ DIFile File, unsigned LineNo,
+ DIType Ty,
+ bool isLocalToUnit, bool isDefinition,
+ unsigned Flags, bool isOptimized,
+ Function *Fn,
+ MDNode *TParams,
+ MDNode *Decl) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ Context,
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, LinkageName),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+ Ty,
+ ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
+ ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+ Fn,
+ TParams,
+ Decl
+ };
+ MDNode *Node = MDNode::get(VMContext, Elts);
+
+ // Create a named metadata so that we do not lose this mdnode.
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
+ NMD->addOperand(Node);
+ return DISubprogram(Node);
+}
+
+/// createMethod - Create a new descriptor for the specified C++ method.
+DISubprogram DIBuilder::createMethod(DIDescriptor Context,
+ StringRef Name,
+ StringRef LinkageName,
+ DIFile F,
+ unsigned LineNo, DIType Ty,
+ bool isLocalToUnit,
+ bool isDefinition,
+ unsigned VK, unsigned VIndex,
+ MDNode *VTableHolder,
+ unsigned Flags,
+ bool isOptimized,
+ Function *Fn,
+ MDNode *TParam) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ Context,
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, LinkageName),
+ F,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+ Ty,
+ ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
+ ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
+ ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK),
+ ConstantInt::get(Type::getInt32Ty(VMContext), VIndex),
+ VTableHolder,
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+ Fn,
+ TParam,
+ };
+ MDNode *Node = MDNode::get(VMContext, Elts);
+
+ // Create a named metadata so that we do not lose this mdnode.
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
+ NMD->addOperand(Node);
+ return DISubprogram(Node);
+}
+
+/// createNameSpace - This creates new descriptor for a namespace
+/// with the specified parent scope.
+DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
+ DIFile File, unsigned LineNo) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_namespace),
+ Scope,
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
+ };
+ return DINameSpace(MDNode::get(VMContext, Elts));
+}
+
+DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
+ unsigned Line, unsigned Col) {
+ // Defeat MDNode uniqing for lexical blocks by using unique id.
+ static unsigned int unique_id = 0;
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block),
+ Scope,
+ ConstantInt::get(Type::getInt32Ty(VMContext), Line),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Col),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++)
+ };
+ return DILexicalBlock(MDNode::get(VMContext, Elts));
+}
+
+/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
+ Instruction *InsertBefore) {
+ assert(Storage && "no storage passed to dbg.declare");
+ assert(VarInfo.Verify() && "empty DIVariable passed to dbg.declare");
+ if (!DeclareFn)
+ DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
+
+ Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo };
+ return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore);
+}
+
+/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
+ BasicBlock *InsertAtEnd) {
+ assert(Storage && "no storage passed to dbg.declare");
+ assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.declare");
+ if (!DeclareFn)
+ DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
+
+ Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo };
+
+ // If this block already has a terminator then insert this intrinsic
+ // before the terminator.
+ if (TerminatorInst *T = InsertAtEnd->getTerminator())
+ return CallInst::Create(DeclareFn, Args, Args+2, "", T);
+ else
+ return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);
+}
+
+/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
+ DIVariable VarInfo,
+ Instruction *InsertBefore) {
+ assert(V && "no value passed to dbg.value");
+ assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value");
+ if (!ValueFn)
+ ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
+
+ Value *Args[] = { MDNode::get(V->getContext(), V),
+ ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
+ VarInfo };
+ return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore);
+}
+
+/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
+ DIVariable VarInfo,
+ BasicBlock *InsertAtEnd) {
+ assert(V && "no value passed to dbg.value");
+ assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value");
+ if (!ValueFn)
+ ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
+
+ Value *Args[] = { MDNode::get(V->getContext(), V),
+ ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
+ VarInfo };
+ return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd);
+}
+
diff --git a/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp b/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp
new file mode 100644
index 000000000000..b23c3514d0bd
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp
@@ -0,0 +1,224 @@
+//===- DbgInfoPrinter.cpp - Print debug info in a human readable form ------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass that prints instructions, and associated debug
+// info:
+//
+// - source/line/col information
+// - original variable name
+// - original type name
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Metadata.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+PrintDirectory("print-fullpath",
+ cl::desc("Print fullpath when printing debug info"),
+ cl::Hidden);
+
+namespace {
+ class PrintDbgInfo : public FunctionPass {
+ raw_ostream &Out;
+ void printVariableDeclaration(const Value *V);
+ public:
+ static char ID; // Pass identification
+ PrintDbgInfo() : FunctionPass(ID), Out(errs()) {
+ initializePrintDbgInfoPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+ char PrintDbgInfo::ID = 0;
+}
+
+INITIALIZE_PASS(PrintDbgInfo, "print-dbginfo",
+ "Print debug info in human readable form", false, false)
+
+FunctionPass *llvm::createDbgInfoPrinterPass() { return new PrintDbgInfo(); }
+
+/// Find the debug info descriptor corresponding to this global variable.
+static Value *findDbgGlobalDeclare(GlobalVariable *V) {
+ const Module *M = V->getParent();
+ NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv");
+ if (!NMD)
+ return 0;
+
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i)));
+ if (!DIG.isGlobalVariable())
+ continue;
+ if (DIGlobalVariable(DIG).getGlobal() == V)
+ return DIG;
+ }
+ return 0;
+}
+
+/// Find the debug info descriptor corresponding to this function.
+static Value *findDbgSubprogramDeclare(Function *V) {
+ const Module *M = V->getParent();
+ NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp");
+ if (!NMD)
+ return 0;
+
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i)));
+ if (!DIG.isSubprogram())
+ continue;
+ if (DISubprogram(DIG).getFunction() == V)
+ return DIG;
+ }
+ return 0;
+}
+
+/// Finds the llvm.dbg.declare intrinsic corresponding to this value if any.
+/// It looks through pointer casts too.
+static const DbgDeclareInst *findDbgDeclare(const Value *V) {
+ V = V->stripPointerCasts();
+
+ if (!isa<Instruction>(V) && !isa<Argument>(V))
+ return 0;
+
+ const Function *F = NULL;
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ F = I->getParent()->getParent();
+ else if (const Argument *A = dyn_cast<Argument>(V))
+ F = A->getParent();
+
+ for (Function::const_iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI)
+ for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end();
+ BI != BE; ++BI)
+ if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+ if (DDI->getAddress() == V)
+ return DDI;
+
+ return 0;
+}
+
+static bool getLocationInfo(const Value *V, std::string &DisplayName,
+ std::string &Type, unsigned &LineNo,
+ std::string &File, std::string &Dir) {
+ DICompileUnit Unit;
+ DIType TypeD;
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(const_cast<Value*>(V))) {
+ Value *DIGV = findDbgGlobalDeclare(GV);
+ if (!DIGV) return false;
+ DIGlobalVariable Var(cast<MDNode>(DIGV));
+
+ StringRef D = Var.getDisplayName();
+ if (!D.empty())
+ DisplayName = D;
+ LineNo = Var.getLineNumber();
+ Unit = Var.getCompileUnit();
+ TypeD = Var.getType();
+ } else if (Function *F = dyn_cast<Function>(const_cast<Value*>(V))){
+ Value *DIF = findDbgSubprogramDeclare(F);
+ if (!DIF) return false;
+ DISubprogram Var(cast<MDNode>(DIF));
+
+ StringRef D = Var.getDisplayName();
+ if (!D.empty())
+ DisplayName = D;
+ LineNo = Var.getLineNumber();
+ Unit = Var.getCompileUnit();
+ TypeD = Var.getType();
+ } else {
+ const DbgDeclareInst *DDI = findDbgDeclare(V);
+ if (!DDI) return false;
+ DIVariable Var(cast<MDNode>(DDI->getVariable()));
+
+ StringRef D = Var.getName();
+ if (!D.empty())
+ DisplayName = D;
+ LineNo = Var.getLineNumber();
+ Unit = Var.getCompileUnit();
+ TypeD = Var.getType();
+ }
+
+ StringRef T = TypeD.getName();
+ if (!T.empty())
+ Type = T;
+ StringRef F = Unit.getFilename();
+ if (!F.empty())
+ File = F;
+ StringRef D = Unit.getDirectory();
+ if (!D.empty())
+ Dir = D;
+ return true;
+}
+
+void PrintDbgInfo::printVariableDeclaration(const Value *V) {
+ std::string DisplayName, File, Directory, Type;
+ unsigned LineNo;
+
+ if (!getLocationInfo(V, DisplayName, Type, LineNo, File, Directory))
+ return;
+
+ Out << "; ";
+ WriteAsOperand(Out, V, false, 0);
+ if (isa<Function>(V))
+ Out << " is function " << DisplayName
+ << " of type " << Type << " declared at ";
+ else
+ Out << " is variable " << DisplayName
+ << " of type " << Type << " declared at ";
+
+ if (PrintDirectory)
+ Out << Directory << "/";
+
+ Out << File << ":" << LineNo << "\n";
+}
+
+bool PrintDbgInfo::runOnFunction(Function &F) {
+ if (F.isDeclaration())
+ return false;
+
+ Out << "function " << F.getName() << "\n\n";
+
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ BasicBlock *BB = I;
+
+ if (I != F.begin() && (pred_begin(BB) == pred_end(BB)))
+ // Skip dead blocks.
+ continue;
+
+ Out << BB->getName();
+ Out << ":";
+
+ Out << "\n";
+
+ for (BasicBlock::const_iterator i = BB->begin(), e = BB->end();
+ i != e; ++i) {
+
+ printVariableDeclaration(i);
+
+ if (const User *U = dyn_cast<User>(i)) {
+ for(unsigned i=0;i<U->getNumOperands();i++)
+ printVariableDeclaration(U->getOperand(i));
+ }
+ }
+ }
+ return false;
+}
diff --git a/contrib/llvm/lib/Analysis/DebugInfo.cpp b/contrib/llvm/lib/Analysis/DebugInfo.cpp
new file mode 100644
index 000000000000..67f8147f4d61
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DebugInfo.cpp
@@ -0,0 +1,948 @@
+//===--- DebugInfo.cpp - Debug Information Helper Classes -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the helper classes used to build and interpret debug
+// information in LLVM IR form.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace llvm::dwarf;
+
+//===----------------------------------------------------------------------===//
+// DIDescriptor
+//===----------------------------------------------------------------------===//
+
+DIDescriptor::DIDescriptor(const DIFile F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DISubprogram F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DILexicalBlock F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DIVariable F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DIType F) : DbgNode(F.DbgNode) {
+}
+
+StringRef
+DIDescriptor::getStringField(unsigned Elt) const {
+ if (DbgNode == 0)
+ return StringRef();
+
+ if (Elt < DbgNode->getNumOperands())
+ if (MDString *MDS = dyn_cast_or_null<MDString>(DbgNode->getOperand(Elt)))
+ return MDS->getString();
+
+ return StringRef();
+}
+
+uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
+ if (DbgNode == 0)
+ return 0;
+
+ if (Elt < DbgNode->getNumOperands())
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(DbgNode->getOperand(Elt)))
+ return CI->getZExtValue();
+
+ return 0;
+}
+
+DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const {
+ if (DbgNode == 0)
+ return DIDescriptor();
+
+ if (Elt < DbgNode->getNumOperands())
+ return
+ DIDescriptor(dyn_cast_or_null<const MDNode>(DbgNode->getOperand(Elt)));
+ return DIDescriptor();
+}
+
+GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const {
+ if (DbgNode == 0)
+ return 0;
+
+ if (Elt < DbgNode->getNumOperands())
+ return dyn_cast_or_null<GlobalVariable>(DbgNode->getOperand(Elt));
+ return 0;
+}
+
+Constant *DIDescriptor::getConstantField(unsigned Elt) const {
+ if (DbgNode == 0)
+ return 0;
+
+ if (Elt < DbgNode->getNumOperands())
+ return dyn_cast_or_null<Constant>(DbgNode->getOperand(Elt));
+ return 0;
+}
+
+Function *DIDescriptor::getFunctionField(unsigned Elt) const {
+ if (DbgNode == 0)
+ return 0;
+
+ if (Elt < DbgNode->getNumOperands())
+ return dyn_cast_or_null<Function>(DbgNode->getOperand(Elt));
+ return 0;
+}
+
+unsigned DIVariable::getNumAddrElements() const {
+ if (getVersion() <= llvm::LLVMDebugVersion8)
+ return DbgNode->getNumOperands()-6;
+ return DbgNode->getNumOperands()-7;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Predicates
+//===----------------------------------------------------------------------===//
+
+/// isBasicType - Return true if the specified tag is legal for
+/// DIBasicType.
+bool DIDescriptor::isBasicType() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_base_type;
+}
+
+/// isDerivedType - Return true if the specified tag is legal for DIDerivedType.
+bool DIDescriptor::isDerivedType() const {
+ if (!DbgNode) return false;
+ switch (getTag()) {
+ case dwarf::DW_TAG_typedef:
+ case dwarf::DW_TAG_pointer_type:
+ case dwarf::DW_TAG_reference_type:
+ case dwarf::DW_TAG_const_type:
+ case dwarf::DW_TAG_volatile_type:
+ case dwarf::DW_TAG_restrict_type:
+ case dwarf::DW_TAG_member:
+ case dwarf::DW_TAG_inheritance:
+ case dwarf::DW_TAG_friend:
+ return true;
+ default:
+ // CompositeTypes are currently modelled as DerivedTypes.
+ return isCompositeType();
+ }
+}
+
+/// isCompositeType - Return true if the specified tag is legal for
+/// DICompositeType.
+bool DIDescriptor::isCompositeType() const {
+ if (!DbgNode) return false;
+ switch (getTag()) {
+ case dwarf::DW_TAG_array_type:
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_union_type:
+ case dwarf::DW_TAG_enumeration_type:
+ case dwarf::DW_TAG_vector_type:
+ case dwarf::DW_TAG_subroutine_type:
+ case dwarf::DW_TAG_class_type:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// isVariable - Return true if the specified tag is legal for DIVariable.
+bool DIDescriptor::isVariable() const {
+ if (!DbgNode) return false;
+ switch (getTag()) {
+ case dwarf::DW_TAG_auto_variable:
+ case dwarf::DW_TAG_arg_variable:
+ case dwarf::DW_TAG_return_variable:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// isType - Return true if the specified tag is legal for DIType.
+bool DIDescriptor::isType() const {
+ return isBasicType() || isCompositeType() || isDerivedType();
+}
+
+/// isSubprogram - Return true if the specified tag is legal for
+/// DISubprogram.
+bool DIDescriptor::isSubprogram() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_subprogram;
+}
+
+/// isGlobalVariable - Return true if the specified tag is legal for
+/// DIGlobalVariable.
+bool DIDescriptor::isGlobalVariable() const {
+ return DbgNode && (getTag() == dwarf::DW_TAG_variable ||
+ getTag() == dwarf::DW_TAG_constant);
+}
+
+/// isGlobal - Return true if the specified tag is legal for DIGlobal.
+bool DIDescriptor::isGlobal() const {
+ return isGlobalVariable();
+}
+
+/// isUnspecifiedParmeter - Return true if the specified tag is
+/// DW_TAG_unspecified_parameters.
+bool DIDescriptor::isUnspecifiedParameter() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_unspecified_parameters;
+}
+
+/// isScope - Return true if the specified tag is one of the scope
+/// related tag.
+bool DIDescriptor::isScope() const {
+ if (!DbgNode) return false;
+ switch (getTag()) {
+ case dwarf::DW_TAG_compile_unit:
+ case dwarf::DW_TAG_lexical_block:
+ case dwarf::DW_TAG_subprogram:
+ case dwarf::DW_TAG_namespace:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+/// isTemplateTypeParameter - Return true if the specified tag is
+/// DW_TAG_template_type_parameter.
+bool DIDescriptor::isTemplateTypeParameter() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_template_type_parameter;
+}
+
+/// isTemplateValueParameter - Return true if the specified tag is
+/// DW_TAG_template_value_parameter.
+bool DIDescriptor::isTemplateValueParameter() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_template_value_parameter;
+}
+
+/// isCompileUnit - Return true if the specified tag is DW_TAG_compile_unit.
+bool DIDescriptor::isCompileUnit() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_compile_unit;
+}
+
+/// isFile - Return true if the specified tag is DW_TAG_file_type.
+bool DIDescriptor::isFile() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_file_type;
+}
+
+/// isNameSpace - Return true if the specified tag is DW_TAG_namespace.
+bool DIDescriptor::isNameSpace() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_namespace;
+}
+
+/// isLexicalBlock - Return true if the specified tag is DW_TAG_lexical_block.
+bool DIDescriptor::isLexicalBlock() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_lexical_block;
+}
+
+/// isSubrange - Return true if the specified tag is DW_TAG_subrange_type.
+bool DIDescriptor::isSubrange() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_subrange_type;
+}
+
+/// isEnumerator - Return true if the specified tag is DW_TAG_enumerator.
+bool DIDescriptor::isEnumerator() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_enumerator;
+}
+
+//===----------------------------------------------------------------------===//
+// Simple Descriptor Constructors and other Methods
+//===----------------------------------------------------------------------===//
+
+DIType::DIType(const MDNode *N) : DIScope(N) {
+ if (!N) return;
+ if (!isBasicType() && !isDerivedType() && !isCompositeType()) {
+ DbgNode = 0;
+ }
+}
+
+unsigned DIArray::getNumElements() const {
+ if (!DbgNode)
+ return 0;
+ return DbgNode->getNumOperands();
+}
+
+/// replaceAllUsesWith - Replace all uses of debug info referenced by
+/// this descriptor.
+void DIType::replaceAllUsesWith(DIDescriptor &D) {
+ if (!DbgNode)
+ return;
+
+ // Since we use a TrackingVH for the node, its easy for clients to manufacture
+ // legitimate situations where they want to replaceAllUsesWith() on something
+ // which, due to uniquing, has merged with the source. We shield clients from
+ // this detail by allowing a value to be replaced with replaceAllUsesWith()
+ // itself.
+ if (DbgNode != D) {
+ MDNode *Node = const_cast<MDNode*>(DbgNode);
+ const MDNode *DN = D;
+ const Value *V = cast_or_null<Value>(DN);
+ Node->replaceAllUsesWith(const_cast<Value*>(V));
+ MDNode::deleteTemporary(Node);
+ }
+}
+
+/// replaceAllUsesWith - Replace all uses of debug info referenced by
+/// this descriptor.
+void DIType::replaceAllUsesWith(MDNode *D) {
+ if (!DbgNode)
+ return;
+
+ // Since we use a TrackingVH for the node, its easy for clients to manufacture
+ // legitimate situations where they want to replaceAllUsesWith() on something
+ // which, due to uniquing, has merged with the source. We shield clients from
+ // this detail by allowing a value to be replaced with replaceAllUsesWith()
+ // itself.
+ if (DbgNode != D) {
+ MDNode *Node = const_cast<MDNode*>(DbgNode);
+ const MDNode *DN = D;
+ const Value *V = cast_or_null<Value>(DN);
+ Node->replaceAllUsesWith(const_cast<Value*>(V));
+ MDNode::deleteTemporary(Node);
+ }
+}
+
+/// Verify - Verify that a compile unit is well formed.
+bool DICompileUnit::Verify() const {
+ if (!DbgNode)
+ return false;
+ StringRef N = getFilename();
+ if (N.empty())
+ return false;
+ // It is possible that directory and produce string is empty.
+ return true;
+}
+
+/// Verify - Verify that a type descriptor is well formed.
+bool DIType::Verify() const {
+ if (!DbgNode)
+ return false;
+ if (!getContext().Verify())
+ return false;
+ unsigned Tag = getTag();
+ if (!isBasicType() && Tag != dwarf::DW_TAG_const_type &&
+ Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_pointer_type &&
+ Tag != dwarf::DW_TAG_reference_type && Tag != dwarf::DW_TAG_restrict_type
+ && Tag != dwarf::DW_TAG_vector_type && Tag != dwarf::DW_TAG_array_type
+ && Tag != dwarf::DW_TAG_enumeration_type
+ && getFilename().empty())
+ return false;
+ return true;
+}
+
+/// Verify - Verify that a basic type descriptor is well formed.
+bool DIBasicType::Verify() const {
+ return isBasicType();
+}
+
+/// Verify - Verify that a derived type descriptor is well formed.
+bool DIDerivedType::Verify() const {
+ return isDerivedType();
+}
+
+/// Verify - Verify that a composite type descriptor is well formed.
+bool DICompositeType::Verify() const {
+ if (!DbgNode)
+ return false;
+ if (!getContext().Verify())
+ return false;
+
+ DICompileUnit CU = getCompileUnit();
+ if (!CU.Verify())
+ return false;
+ return true;
+}
+
+/// Verify - Verify that a subprogram descriptor is well formed.
+bool DISubprogram::Verify() const {
+ if (!DbgNode)
+ return false;
+
+ if (!getContext().Verify())
+ return false;
+
+ DICompileUnit CU = getCompileUnit();
+ if (!CU.Verify())
+ return false;
+
+ DICompositeType Ty = getType();
+ if (!Ty.Verify())
+ return false;
+ return true;
+}
+
+/// Verify - Verify that a global variable descriptor is well formed.
+bool DIGlobalVariable::Verify() const {
+ if (!DbgNode)
+ return false;
+
+ if (getDisplayName().empty())
+ return false;
+
+ if (!getContext().Verify())
+ return false;
+
+ DICompileUnit CU = getCompileUnit();
+ if (!CU.Verify())
+ return false;
+
+ DIType Ty = getType();
+ if (!Ty.Verify())
+ return false;
+
+ if (!getGlobal() && !getConstant())
+ return false;
+
+ return true;
+}
+
+/// Verify - Verify that a variable descriptor is well formed.
+bool DIVariable::Verify() const {
+ if (!DbgNode)
+ return false;
+
+ if (!getContext().Verify())
+ return false;
+
+ if (!getCompileUnit().Verify())
+ return false;
+
+ DIType Ty = getType();
+ if (!Ty.Verify())
+ return false;
+
+ return true;
+}
+
+/// Verify - Verify that a location descriptor is well formed.
+bool DILocation::Verify() const {
+ if (!DbgNode)
+ return false;
+
+ return DbgNode->getNumOperands() == 4;
+}
+
+/// Verify - Verify that a namespace descriptor is well formed.
+bool DINameSpace::Verify() const {
+ if (!DbgNode)
+ return false;
+ if (getName().empty())
+ return false;
+ if (!getCompileUnit().Verify())
+ return false;
+ return true;
+}
+
+/// getOriginalTypeSize - If this type is derived from a base type then
+/// return base type size.
+uint64_t DIDerivedType::getOriginalTypeSize() const {
+ unsigned Tag = getTag();
+ if (Tag == dwarf::DW_TAG_member || Tag == dwarf::DW_TAG_typedef ||
+ Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type ||
+ Tag == dwarf::DW_TAG_restrict_type) {
+ DIType BaseType = getTypeDerivedFrom();
+ // If this type is not derived from any type then take conservative
+ // approach.
+ if (!BaseType.isValid())
+ return getSizeInBits();
+ if (BaseType.isDerivedType())
+ return DIDerivedType(BaseType).getOriginalTypeSize();
+ else
+ return BaseType.getSizeInBits();
+ }
+
+ return getSizeInBits();
+}
+
+/// isInlinedFnArgument - Return true if this variable provides debugging
+/// information for an inlined function arguments.
+bool DIVariable::isInlinedFnArgument(const Function *CurFn) {
+ assert(CurFn && "Invalid function");
+ if (!getContext().isSubprogram())
+ return false;
+ // This variable is not inlined function argument if its scope
+ // does not describe current function.
+ return !(DISubprogram(getContext()).describes(CurFn));
+}
+
+/// describes - Return true if this subprogram provides debugging
+/// information for the function F.
+bool DISubprogram::describes(const Function *F) {
+ assert(F && "Invalid function");
+ if (F == getFunction())
+ return true;
+ StringRef Name = getLinkageName();
+ if (Name.empty())
+ Name = getName();
+ if (F->getName() == Name)
+ return true;
+ return false;
+}
+
+unsigned DISubprogram::isOptimized() const {
+ assert (DbgNode && "Invalid subprogram descriptor!");
+ if (DbgNode->getNumOperands() == 16)
+ return getUnsignedField(15);
+ return 0;
+}
+
+StringRef DIScope::getFilename() const {
+ if (!DbgNode)
+ return StringRef();
+ if (isLexicalBlock())
+ return DILexicalBlock(DbgNode).getFilename();
+ if (isSubprogram())
+ return DISubprogram(DbgNode).getFilename();
+ if (isCompileUnit())
+ return DICompileUnit(DbgNode).getFilename();
+ if (isNameSpace())
+ return DINameSpace(DbgNode).getFilename();
+ if (isType())
+ return DIType(DbgNode).getFilename();
+ if (isFile())
+ return DIFile(DbgNode).getFilename();
+ assert(0 && "Invalid DIScope!");
+ return StringRef();
+}
+
+StringRef DIScope::getDirectory() const {
+ if (!DbgNode)
+ return StringRef();
+ if (isLexicalBlock())
+ return DILexicalBlock(DbgNode).getDirectory();
+ if (isSubprogram())
+ return DISubprogram(DbgNode).getDirectory();
+ if (isCompileUnit())
+ return DICompileUnit(DbgNode).getDirectory();
+ if (isNameSpace())
+ return DINameSpace(DbgNode).getDirectory();
+ if (isType())
+ return DIType(DbgNode).getDirectory();
+ if (isFile())
+ return DIFile(DbgNode).getDirectory();
+ assert(0 && "Invalid DIScope!");
+ return StringRef();
+}
+
+//===----------------------------------------------------------------------===//
+// DIDescriptor: dump routines for all descriptors.
+//===----------------------------------------------------------------------===//
+
+
+/// print - Print descriptor.
+void DIDescriptor::print(raw_ostream &OS) const {
+ OS << "[" << dwarf::TagString(getTag()) << "] ";
+ OS.write_hex((intptr_t) &*DbgNode) << ']';
+}
+
+/// print - Print compile unit.
+void DICompileUnit::print(raw_ostream &OS) const {
+ if (getLanguage())
+ OS << " [" << dwarf::LanguageString(getLanguage()) << "] ";
+
+ OS << " [" << getDirectory() << "/" << getFilename() << "]";
+}
+
+/// print - Print type.
+void DIType::print(raw_ostream &OS) const {
+ if (!DbgNode) return;
+
+ StringRef Res = getName();
+ if (!Res.empty())
+ OS << " [" << Res << "] ";
+
+ unsigned Tag = getTag();
+ OS << " [" << dwarf::TagString(Tag) << "] ";
+
+ // TODO : Print context
+ getCompileUnit().print(OS);
+ OS << " ["
+ << "line " << getLineNumber() << ", "
+ << getSizeInBits() << " bits, "
+ << getAlignInBits() << " bit alignment, "
+ << getOffsetInBits() << " bit offset"
+ << "] ";
+
+ if (isPrivate())
+ OS << " [private] ";
+ else if (isProtected())
+ OS << " [protected] ";
+
+ if (isForwardDecl())
+ OS << " [fwd] ";
+
+ if (isBasicType())
+ DIBasicType(DbgNode).print(OS);
+ else if (isDerivedType())
+ DIDerivedType(DbgNode).print(OS);
+ else if (isCompositeType())
+ DICompositeType(DbgNode).print(OS);
+ else {
+ OS << "Invalid DIType\n";
+ return;
+ }
+
+ OS << "\n";
+}
+
+/// print - Print basic type.
+void DIBasicType::print(raw_ostream &OS) const {
+ OS << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] ";
+}
+
+/// print - Print derived type.
+void DIDerivedType::print(raw_ostream &OS) const {
+ OS << "\n\t Derived From: "; getTypeDerivedFrom().print(OS);
+}
+
+/// print - Print composite type.
+void DICompositeType::print(raw_ostream &OS) const {
+ DIArray A = getTypeArray();
+ OS << " [" << A.getNumElements() << " elements]";
+}
+
+/// print - Print subprogram.
+void DISubprogram::print(raw_ostream &OS) const {
+ StringRef Res = getName();
+ if (!Res.empty())
+ OS << " [" << Res << "] ";
+
+ unsigned Tag = getTag();
+ OS << " [" << dwarf::TagString(Tag) << "] ";
+
+ // TODO : Print context
+ getCompileUnit().print(OS);
+ OS << " [" << getLineNumber() << "] ";
+
+ if (isLocalToUnit())
+ OS << " [local] ";
+
+ if (isDefinition())
+ OS << " [def] ";
+
+ OS << "\n";
+}
+
+/// print - Print global variable.
+void DIGlobalVariable::print(raw_ostream &OS) const {
+ OS << " [";
+ StringRef Res = getName();
+ if (!Res.empty())
+ OS << " [" << Res << "] ";
+
+ unsigned Tag = getTag();
+ OS << " [" << dwarf::TagString(Tag) << "] ";
+
+ // TODO : Print context
+ getCompileUnit().print(OS);
+ OS << " [" << getLineNumber() << "] ";
+
+ if (isLocalToUnit())
+ OS << " [local] ";
+
+ if (isDefinition())
+ OS << " [def] ";
+
+ if (isGlobalVariable())
+ DIGlobalVariable(DbgNode).print(OS);
+ OS << "]\n";
+}
+
+/// print - Print variable.
+void DIVariable::print(raw_ostream &OS) const {
+ StringRef Res = getName();
+ if (!Res.empty())
+ OS << " [" << Res << "] ";
+
+ getCompileUnit().print(OS);
+ OS << " [" << getLineNumber() << "] ";
+ getType().print(OS);
+ OS << "\n";
+
+ // FIXME: Dump complex addresses
+}
+
+/// dump - Print descriptor to dbgs() with a newline.
+void DIDescriptor::dump() const {
+ print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print compile unit to dbgs() with a newline.
+void DICompileUnit::dump() const {
+ print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print type to dbgs() with a newline.
+void DIType::dump() const {
+ print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print basic type to dbgs() with a newline.
+void DIBasicType::dump() const {
+ print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print derived type to dbgs() with a newline.
+void DIDerivedType::dump() const {
+ print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print composite type to dbgs() with a newline.
+void DICompositeType::dump() const {
+ print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print subprogram to dbgs() with a newline.
+void DISubprogram::dump() const {
+ print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print global variable.
+void DIGlobalVariable::dump() const {
+ print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print variable.
+void DIVariable::dump() const {
+ print(dbgs()); dbgs() << '\n';
+}
+
+/// fixupObjcLikeName - Replace contains special characters used
+/// in a typical Objective-C names with '.' in a given string.
+static void fixupObjcLikeName(std::string &Str) {
+ for (size_t i = 0, e = Str.size(); i < e; ++i) {
+ char C = Str[i];
+ if (C == '[' || C == ']' || C == ' ' || C == ':' || C == '+' ||
+ C == '(' || C == ')')
+ Str[i] = '.';
+ }
+}
+
+/// getFnSpecificMDNode - Return a NameMDNode, if available, that is
+/// suitable to hold function specific information.
+NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, StringRef FuncName) {
+ if (FuncName.find('[') == StringRef::npos)
+ return M.getNamedMetadata(Twine("llvm.dbg.lv.", FuncName));
+ std::string Name = FuncName;
+ fixupObjcLikeName(Name);
+ return M.getNamedMetadata(Twine("llvm.dbg.lv.", Name));
+}
+
+/// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable
+/// to hold function specific information.
+NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, StringRef FuncName) {
+ SmallString<32> Out;
+ if (FuncName.find('[') == StringRef::npos)
+ return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", FuncName)
+ .toStringRef(Out));
+
+ std::string Name = FuncName;
+ fixupObjcLikeName(Name);
+ return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", Name)
+ .toStringRef(Out));
+}
+
+
+//===----------------------------------------------------------------------===//
+// DebugInfoFinder implementations.
+//===----------------------------------------------------------------------===//
+
+/// processModule - Process entire module and collect debug info.
+void DebugInfoFinder::processModule(Module &M) {
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI)
+ for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE;
+ ++BI) {
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+ processDeclare(DDI);
+
+ DebugLoc Loc = BI->getDebugLoc();
+ if (Loc.isUnknown())
+ continue;
+
+ LLVMContext &Ctx = BI->getContext();
+ DIDescriptor Scope(Loc.getScope(Ctx));
+
+ if (Scope.isCompileUnit())
+ addCompileUnit(DICompileUnit(Scope));
+ else if (Scope.isSubprogram())
+ processSubprogram(DISubprogram(Scope));
+ else if (Scope.isLexicalBlock())
+ processLexicalBlock(DILexicalBlock(Scope));
+
+ if (MDNode *IA = Loc.getInlinedAt(Ctx))
+ processLocation(DILocation(IA));
+ }
+
+ if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) {
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ DIGlobalVariable DIG(cast<MDNode>(NMD->getOperand(i)));
+ if (addGlobalVariable(DIG)) {
+ addCompileUnit(DIG.getCompileUnit());
+ processType(DIG.getType());
+ }
+ }
+ }
+
+ if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp"))
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ processSubprogram(DISubprogram(NMD->getOperand(i)));
+}
+
+/// processLocation - Process DILocation.
+void DebugInfoFinder::processLocation(DILocation Loc) {
+ if (!Loc.Verify()) return;
+ DIDescriptor S(Loc.getScope());
+ if (S.isCompileUnit())
+ addCompileUnit(DICompileUnit(S));
+ else if (S.isSubprogram())
+ processSubprogram(DISubprogram(S));
+ else if (S.isLexicalBlock())
+ processLexicalBlock(DILexicalBlock(S));
+ processLocation(Loc.getOrigLocation());
+}
+
+/// processType - Process DIType.
+void DebugInfoFinder::processType(DIType DT) {
+ if (!addType(DT))
+ return;
+
+ addCompileUnit(DT.getCompileUnit());
+ if (DT.isCompositeType()) {
+ DICompositeType DCT(DT);
+ processType(DCT.getTypeDerivedFrom());
+ DIArray DA = DCT.getTypeArray();
+ for (unsigned i = 0, e = DA.getNumElements(); i != e; ++i) {
+ DIDescriptor D = DA.getElement(i);
+ if (D.isType())
+ processType(DIType(D));
+ else if (D.isSubprogram())
+ processSubprogram(DISubprogram(D));
+ }
+ } else if (DT.isDerivedType()) {
+ DIDerivedType DDT(DT);
+ processType(DDT.getTypeDerivedFrom());
+ }
+}
+
+/// processLexicalBlock
+void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) {
+ DIScope Context = LB.getContext();
+ if (Context.isLexicalBlock())
+ return processLexicalBlock(DILexicalBlock(Context));
+ else
+ return processSubprogram(DISubprogram(Context));
+}
+
+/// processSubprogram - Process DISubprogram.
+void DebugInfoFinder::processSubprogram(DISubprogram SP) {
+ if (!addSubprogram(SP))
+ return;
+ addCompileUnit(SP.getCompileUnit());
+ processType(SP.getType());
+}
+
+/// processDeclare - Process DbgDeclareInst.
+void DebugInfoFinder::processDeclare(DbgDeclareInst *DDI) {
+ MDNode *N = dyn_cast<MDNode>(DDI->getVariable());
+ if (!N) return;
+
+ DIDescriptor DV(N);
+ if (!DV.isVariable())
+ return;
+
+ if (!NodesSeen.insert(DV))
+ return;
+
+ addCompileUnit(DIVariable(N).getCompileUnit());
+ processType(DIVariable(N).getType());
+}
+
+/// addType - Add type into Tys.
+bool DebugInfoFinder::addType(DIType DT) {
+ if (!DT.isValid())
+ return false;
+
+ if (!NodesSeen.insert(DT))
+ return false;
+
+ TYs.push_back(DT);
+ return true;
+}
+
+/// addCompileUnit - Add compile unit into CUs.
+bool DebugInfoFinder::addCompileUnit(DICompileUnit CU) {
+ if (!CU.Verify())
+ return false;
+
+ if (!NodesSeen.insert(CU))
+ return false;
+
+ CUs.push_back(CU);
+ return true;
+}
+
+/// addGlobalVariable - Add global variable into GVs.
+bool DebugInfoFinder::addGlobalVariable(DIGlobalVariable DIG) {
+ if (!DIDescriptor(DIG).isGlobalVariable())
+ return false;
+
+ if (!NodesSeen.insert(DIG))
+ return false;
+
+ GVs.push_back(DIG);
+ return true;
+}
+
+// addSubprogram - Add subprgoram into SPs.
+bool DebugInfoFinder::addSubprogram(DISubprogram SP) {
+ if (!DIDescriptor(SP).isSubprogram())
+ return false;
+
+ if (!NodesSeen.insert(SP))
+ return false;
+
+ SPs.push_back(SP);
+ return true;
+}
+
+/// getDISubprogram - Find subprogram that is enclosing this scope.
+DISubprogram llvm::getDISubprogram(const MDNode *Scope) {
+ DIDescriptor D(Scope);
+ if (D.isSubprogram())
+ return DISubprogram(Scope);
+
+ if (D.isLexicalBlock())
+ return getDISubprogram(DILexicalBlock(Scope).getContext());
+
+ return DISubprogram();
+}
+
+/// getDICompositeType - Find underlying composite type.
+DICompositeType llvm::getDICompositeType(DIType T) {
+ if (T.isCompositeType())
+ return DICompositeType(T);
+
+ if (T.isDerivedType())
+ return getDICompositeType(DIDerivedType(T).getTypeDerivedFrom());
+
+ return DICompositeType();
+}
diff --git a/contrib/llvm/lib/Analysis/DomPrinter.cpp b/contrib/llvm/lib/Analysis/DomPrinter.cpp
new file mode 100644
index 000000000000..cde431459d50
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DomPrinter.cpp
@@ -0,0 +1,232 @@
+//===- DomPrinter.cpp - DOT printer for the dominance trees ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines '-dot-dom' and '-dot-postdom' analysis passes, which emit
+// a dom.<fnname>.dot or postdom.<fnname>.dot file for each function in the
+// program, with a graph of the dominance/postdominance tree of that
+// function.
+//
+// There are also passes available to directly call dotty ('-view-dom' or
+// '-view-postdom'). By appending '-only' like '-dot-dom-only' only the
+// names of the bbs are printed, but the content is hidden.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DomPrinter.h"
+#include "llvm/Analysis/DOTGraphTraitsPass.h"
+#include "llvm/Analysis/PostDominators.h"
+
+using namespace llvm;
+
+namespace llvm {
+template<>
+struct DOTGraphTraits<DomTreeNode*> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits (bool isSimple=false)
+ : DefaultDOTGraphTraits(isSimple) {}
+
+ std::string getNodeLabel(DomTreeNode *Node, DomTreeNode *Graph) {
+
+ BasicBlock *BB = Node->getBlock();
+
+ if (!BB)
+ return "Post dominance root node";
+
+
+ if (isSimple())
+ return DOTGraphTraits<const Function*>
+ ::getSimpleNodeLabel(BB, BB->getParent());
+ else
+ return DOTGraphTraits<const Function*>
+ ::getCompleteNodeLabel(BB, BB->getParent());
+ }
+};
+
+template<>
+struct DOTGraphTraits<DominatorTree*> : public DOTGraphTraits<DomTreeNode*> {
+
+ DOTGraphTraits (bool isSimple=false)
+ : DOTGraphTraits<DomTreeNode*>(isSimple) {}
+
+ static std::string getGraphName(DominatorTree *DT) {
+ return "Dominator tree";
+ }
+
+ std::string getNodeLabel(DomTreeNode *Node, DominatorTree *G) {
+ return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode());
+ }
+};
+
+template<>
+struct DOTGraphTraits<PostDominatorTree*>
+ : public DOTGraphTraits<DomTreeNode*> {
+
+ DOTGraphTraits (bool isSimple=false)
+ : DOTGraphTraits<DomTreeNode*>(isSimple) {}
+
+ static std::string getGraphName(PostDominatorTree *DT) {
+ return "Post dominator tree";
+ }
+
+ std::string getNodeLabel(DomTreeNode *Node, PostDominatorTree *G ) {
+ return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode());
+ }
+};
+}
+
+namespace {
+struct DomViewer
+ : public DOTGraphTraitsViewer<DominatorTree, false> {
+ static char ID;
+ DomViewer() : DOTGraphTraitsViewer<DominatorTree, false>("dom", ID){
+ initializeDomViewerPass(*PassRegistry::getPassRegistry());
+ }
+};
+
+struct DomOnlyViewer
+ : public DOTGraphTraitsViewer<DominatorTree, true> {
+ static char ID;
+ DomOnlyViewer() : DOTGraphTraitsViewer<DominatorTree, true>("domonly", ID){
+ initializeDomOnlyViewerPass(*PassRegistry::getPassRegistry());
+ }
+};
+
+struct PostDomViewer
+ : public DOTGraphTraitsViewer<PostDominatorTree, false> {
+ static char ID;
+ PostDomViewer() :
+ DOTGraphTraitsViewer<PostDominatorTree, false>("postdom", ID){
+ initializePostDomViewerPass(*PassRegistry::getPassRegistry());
+ }
+};
+
+struct PostDomOnlyViewer
+ : public DOTGraphTraitsViewer<PostDominatorTree, true> {
+ static char ID;
+ PostDomOnlyViewer() :
+ DOTGraphTraitsViewer<PostDominatorTree, true>("postdomonly", ID){
+ initializePostDomOnlyViewerPass(*PassRegistry::getPassRegistry());
+ }
+};
+} // end anonymous namespace
+
+char DomViewer::ID = 0;
+INITIALIZE_PASS(DomViewer, "view-dom",
+ "View dominance tree of function", false, false)
+
+char DomOnlyViewer::ID = 0;
+INITIALIZE_PASS(DomOnlyViewer, "view-dom-only",
+ "View dominance tree of function (with no function bodies)",
+ false, false)
+
+char PostDomViewer::ID = 0;
+INITIALIZE_PASS(PostDomViewer, "view-postdom",
+ "View postdominance tree of function", false, false)
+
+char PostDomOnlyViewer::ID = 0;
+INITIALIZE_PASS(PostDomOnlyViewer, "view-postdom-only",
+ "View postdominance tree of function "
+ "(with no function bodies)",
+ false, false)
+
+namespace {
+struct DomPrinter
+ : public DOTGraphTraitsPrinter<DominatorTree, false> {
+ static char ID;
+ DomPrinter() : DOTGraphTraitsPrinter<DominatorTree, false>("dom", ID) {
+ initializeDomPrinterPass(*PassRegistry::getPassRegistry());
+ }
+};
+
+struct DomOnlyPrinter
+ : public DOTGraphTraitsPrinter<DominatorTree, true> {
+ static char ID;
+ DomOnlyPrinter() : DOTGraphTraitsPrinter<DominatorTree, true>("domonly", ID) {
+ initializeDomOnlyPrinterPass(*PassRegistry::getPassRegistry());
+ }
+};
+
+struct PostDomPrinter
+ : public DOTGraphTraitsPrinter<PostDominatorTree, false> {
+ static char ID;
+ PostDomPrinter() :
+ DOTGraphTraitsPrinter<PostDominatorTree, false>("postdom", ID) {
+ initializePostDomPrinterPass(*PassRegistry::getPassRegistry());
+ }
+};
+
+struct PostDomOnlyPrinter
+ : public DOTGraphTraitsPrinter<PostDominatorTree, true> {
+ static char ID;
+ PostDomOnlyPrinter() :
+ DOTGraphTraitsPrinter<PostDominatorTree, true>("postdomonly", ID) {
+ initializePostDomOnlyPrinterPass(*PassRegistry::getPassRegistry());
+ }
+};
+} // end anonymous namespace
+
+
+
+char DomPrinter::ID = 0;
+INITIALIZE_PASS(DomPrinter, "dot-dom",
+ "Print dominance tree of function to 'dot' file",
+ false, false)
+
+char DomOnlyPrinter::ID = 0;
+INITIALIZE_PASS(DomOnlyPrinter, "dot-dom-only",
+ "Print dominance tree of function to 'dot' file "
+ "(with no function bodies)",
+ false, false)
+
+char PostDomPrinter::ID = 0;
+INITIALIZE_PASS(PostDomPrinter, "dot-postdom",
+ "Print postdominance tree of function to 'dot' file",
+ false, false)
+
+char PostDomOnlyPrinter::ID = 0;
+INITIALIZE_PASS(PostDomOnlyPrinter, "dot-postdom-only",
+ "Print postdominance tree of function to 'dot' file "
+ "(with no function bodies)",
+ false, false)
+
+// Create methods available outside of this file, to use them
+// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
+// the link time optimization.
+
+FunctionPass *llvm::createDomPrinterPass() {
+ return new DomPrinter();
+}
+
+FunctionPass *llvm::createDomOnlyPrinterPass() {
+ return new DomOnlyPrinter();
+}
+
+FunctionPass *llvm::createDomViewerPass() {
+ return new DomViewer();
+}
+
+FunctionPass *llvm::createDomOnlyViewerPass() {
+ return new DomOnlyViewer();
+}
+
+FunctionPass *llvm::createPostDomPrinterPass() {
+ return new PostDomPrinter();
+}
+
+FunctionPass *llvm::createPostDomOnlyPrinterPass() {
+ return new PostDomOnlyPrinter();
+}
+
+FunctionPass *llvm::createPostDomViewerPass() {
+ return new PostDomViewer();
+}
+
+FunctionPass *llvm::createPostDomOnlyViewerPass() {
+ return new PostDomOnlyViewer();
+}
diff --git a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
new file mode 100644
index 000000000000..6de4e1e1d7de
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
@@ -0,0 +1,137 @@
+//===- DominanceFrontier.cpp - Dominance Frontier Calculation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DominanceFrontier.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+char DominanceFrontier::ID = 0;
+INITIALIZE_PASS_BEGIN(DominanceFrontier, "domfrontier",
+ "Dominance Frontier Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(DominanceFrontier, "domfrontier",
+ "Dominance Frontier Construction", true, true)
+
+namespace {
+ class DFCalculateWorkObject {
+ public:
+ DFCalculateWorkObject(BasicBlock *B, BasicBlock *P,
+ const DomTreeNode *N,
+ const DomTreeNode *PN)
+ : currentBB(B), parentBB(P), Node(N), parentNode(PN) {}
+ BasicBlock *currentBB;
+ BasicBlock *parentBB;
+ const DomTreeNode *Node;
+ const DomTreeNode *parentNode;
+ };
+}
+
+const DominanceFrontier::DomSetType &
+DominanceFrontier::calculate(const DominatorTree &DT,
+ const DomTreeNode *Node) {
+ BasicBlock *BB = Node->getBlock();
+ DomSetType *Result = NULL;
+
+ std::vector<DFCalculateWorkObject> workList;
+ SmallPtrSet<BasicBlock *, 32> visited;
+
+ workList.push_back(DFCalculateWorkObject(BB, NULL, Node, NULL));
+ do {
+ DFCalculateWorkObject *currentW = &workList.back();
+ assert (currentW && "Missing work object.");
+
+ BasicBlock *currentBB = currentW->currentBB;
+ BasicBlock *parentBB = currentW->parentBB;
+ const DomTreeNode *currentNode = currentW->Node;
+ const DomTreeNode *parentNode = currentW->parentNode;
+ assert (currentBB && "Invalid work object. Missing current Basic Block");
+ assert (currentNode && "Invalid work object. Missing current Node");
+ DomSetType &S = Frontiers[currentBB];
+
+ // Visit each block only once.
+ if (visited.count(currentBB) == 0) {
+ visited.insert(currentBB);
+
+ // Loop over CFG successors to calculate DFlocal[currentNode]
+ for (succ_iterator SI = succ_begin(currentBB), SE = succ_end(currentBB);
+ SI != SE; ++SI) {
+ // Does Node immediately dominate this successor?
+ if (DT[*SI]->getIDom() != currentNode)
+ S.insert(*SI);
+ }
+ }
+
+ // At this point, S is DFlocal. Now we union in DFup's of our children...
+ // Loop through and visit the nodes that Node immediately dominates (Node's
+ // children in the IDomTree)
+ bool visitChild = false;
+ for (DomTreeNode::const_iterator NI = currentNode->begin(),
+ NE = currentNode->end(); NI != NE; ++NI) {
+ DomTreeNode *IDominee = *NI;
+ BasicBlock *childBB = IDominee->getBlock();
+ if (visited.count(childBB) == 0) {
+ workList.push_back(DFCalculateWorkObject(childBB, currentBB,
+ IDominee, currentNode));
+ visitChild = true;
+ }
+ }
+
+ // If all children are visited or there is any child then pop this block
+ // from the workList.
+ if (!visitChild) {
+
+ if (!parentBB) {
+ Result = &S;
+ break;
+ }
+
+ DomSetType::const_iterator CDFI = S.begin(), CDFE = S.end();
+ DomSetType &parentSet = Frontiers[parentBB];
+ for (; CDFI != CDFE; ++CDFI) {
+ if (!DT.properlyDominates(parentNode, DT[*CDFI]))
+ parentSet.insert(*CDFI);
+ }
+ workList.pop_back();
+ }
+
+ } while (!workList.empty());
+
+ return *Result;
+}
+
+void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const {
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ OS << " DomFrontier for BB ";
+ if (I->first)
+ WriteAsOperand(OS, I->first, false);
+ else
+ OS << " <<exit node>>";
+ OS << " is:\t";
+
+ const std::set<BasicBlock*> &BBs = I->second;
+
+ for (std::set<BasicBlock*>::const_iterator I = BBs.begin(), E = BBs.end();
+ I != E; ++I) {
+ OS << ' ';
+ if (*I)
+ WriteAsOperand(OS, *I, false);
+ else
+ OS << "<<exit node>>";
+ }
+ OS << "\n";
+ }
+}
+
+void DominanceFrontierBase::dump() const {
+ print(dbgs());
+}
+
diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp
new file mode 100644
index 000000000000..690c4b4b6f1a
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp
@@ -0,0 +1,340 @@
+//===- CallGraph.cpp - Build a Module's call graph ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CallGraph class and provides the BasicCallGraph
+// default implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// BasicCallGraph class definition
+//
+class BasicCallGraph : public ModulePass, public CallGraph {
+ // Root is root of the call graph, or the external node if a 'main' function
+ // couldn't be found.
+ //
+ CallGraphNode *Root;
+
+ // ExternalCallingNode - This node has edges to all external functions and
+ // those internal functions that have their address taken.
+ CallGraphNode *ExternalCallingNode;
+
+ // CallsExternalNode - This node has edges to it from all functions making
+ // indirect calls or calling an external function.
+ CallGraphNode *CallsExternalNode;
+
+public:
+ static char ID; // Class identification, replacement for typeinfo
+ BasicCallGraph() : ModulePass(ID), Root(0),
+ ExternalCallingNode(0), CallsExternalNode(0) {
+ initializeBasicCallGraphPass(*PassRegistry::getPassRegistry());
+ }
+
+ // runOnModule - Compute the call graph for the specified module.
+ virtual bool runOnModule(Module &M) {
+ CallGraph::initialize(M);
+
+ ExternalCallingNode = getOrInsertFunction(0);
+ CallsExternalNode = new CallGraphNode(0);
+ Root = 0;
+
+ // Add every function to the call graph.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ addToCallGraph(I);
+
+ // If we didn't find a main function, use the external call graph node
+ if (Root == 0) Root = ExternalCallingNode;
+
+ return false;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ virtual void print(raw_ostream &OS, const Module *) const {
+ OS << "CallGraph Root is: ";
+ if (Function *F = getRoot()->getFunction())
+ OS << F->getName() << "\n";
+ else {
+ OS << "<<null function: 0x" << getRoot() << ">>\n";
+ }
+
+ CallGraph::print(OS, 0);
+ }
+
+ virtual void releaseMemory() {
+ destroy();
+ }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it should
+ /// override this to adjust the this pointer as needed for the specified pass
+ /// info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &CallGraph::ID)
+ return (CallGraph*)this;
+ return this;
+ }
+
+ CallGraphNode* getExternalCallingNode() const { return ExternalCallingNode; }
+ CallGraphNode* getCallsExternalNode() const { return CallsExternalNode; }
+
+ // getRoot - Return the root of the call graph, which is either main, or if
+ // main cannot be found, the external node.
+ //
+ CallGraphNode *getRoot() { return Root; }
+ const CallGraphNode *getRoot() const { return Root; }
+
+private:
+ //===---------------------------------------------------------------------
+ // Implementation of CallGraph construction
+ //
+
+ // addToCallGraph - Add a function to the call graph, and link the node to all
+ // of the functions that it calls.
+ //
+ void addToCallGraph(Function *F) {
+ CallGraphNode *Node = getOrInsertFunction(F);
+
+ // If this function has external linkage, anything could call it.
+ if (!F->hasLocalLinkage()) {
+ ExternalCallingNode->addCalledFunction(CallSite(), Node);
+
+ // Found the entry point?
+ if (F->getName() == "main") {
+ if (Root) // Found multiple external mains? Don't pick one.
+ Root = ExternalCallingNode;
+ else
+ Root = Node; // Found a main, keep track of it!
+ }
+ }
+
+ // Loop over all of the users of the function, looking for non-call uses.
+ for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; ++I){
+ User *U = *I;
+ if ((!isa<CallInst>(U) && !isa<InvokeInst>(U))
+ || !CallSite(cast<Instruction>(U)).isCallee(I)) {
+ // Not a call, or being used as a parameter rather than as the callee.
+ ExternalCallingNode->addCalledFunction(CallSite(), Node);
+ break;
+ }
+ }
+
+ // If this function is not defined in this translation unit, it could call
+ // anything.
+ if (F->isDeclaration() && !F->isIntrinsic())
+ Node->addCalledFunction(CallSite(), CallsExternalNode);
+
+ // Look for calls by this function.
+ for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
+ II != IE; ++II) {
+ CallSite CS(cast<Value>(II));
+ if (CS && !isa<DbgInfoIntrinsic>(II)) {
+ const Function *Callee = CS.getCalledFunction();
+ if (Callee)
+ Node->addCalledFunction(CS, getOrInsertFunction(Callee));
+ else
+ Node->addCalledFunction(CS, CallsExternalNode);
+ }
+ }
+ }
+
+ //
+ // destroy - Release memory for the call graph
+ virtual void destroy() {
+ /// CallsExternalNode is not in the function map, delete it explicitly.
+ if (CallsExternalNode) {
+ CallsExternalNode->allReferencesDropped();
+ delete CallsExternalNode;
+ CallsExternalNode = 0;
+ }
+ CallGraph::destroy();
+ }
+};
+
+} //End anonymous namespace
+
+INITIALIZE_ANALYSIS_GROUP(CallGraph, "Call Graph", BasicCallGraph)
+INITIALIZE_AG_PASS(BasicCallGraph, CallGraph, "basiccg",
+ "Basic CallGraph Construction", false, true, true)
+
+char CallGraph::ID = 0;
+char BasicCallGraph::ID = 0;
+
+void CallGraph::initialize(Module &M) {
+ Mod = &M;
+}
+
+void CallGraph::destroy() {
+ if (FunctionMap.empty()) return;
+
+ // Reset all node's use counts to zero before deleting them to prevent an
+ // assertion from firing.
+#ifndef NDEBUG
+ for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
+ I != E; ++I)
+ I->second->allReferencesDropped();
+#endif
+
+ for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
+ I != E; ++I)
+ delete I->second;
+ FunctionMap.clear();
+}
+
+void CallGraph::print(raw_ostream &OS, Module*) const {
+ for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I)
+ I->second->print(OS);
+}
+void CallGraph::dump() const {
+ print(dbgs(), 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Implementations of public modification methods
+//
+
+// removeFunctionFromModule - Unlink the function from this module, returning
+// it. Because this removes the function from the module, the call graph node
+// is destroyed. This is only valid if the function does not call any other
+// functions (ie, there are no edges in it's CGN). The easiest way to do this
+// is to dropAllReferences before calling this.
+//
+Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) {
+ assert(CGN->empty() && "Cannot remove function from call "
+ "graph if it references other functions!");
+ Function *F = CGN->getFunction(); // Get the function for the call graph node
+ delete CGN; // Delete the call graph node for this func
+ FunctionMap.erase(F); // Remove the call graph node from the map
+
+ Mod->getFunctionList().remove(F);
+ return F;
+}
+
+/// spliceFunction - Replace the function represented by this node by another.
+/// This does not rescan the body of the function, so it is suitable when
+/// splicing the body of the old function to the new while also updating all
+/// callers from old to new.
+///
+void CallGraph::spliceFunction(const Function *From, const Function *To) {
+ assert(FunctionMap.count(From) && "No CallGraphNode for function!");
+ assert(!FunctionMap.count(To) &&
+ "Pointing CallGraphNode at a function that already exists");
+ FunctionMapTy::iterator I = FunctionMap.find(From);
+ I->second->F = const_cast<Function*>(To);
+ FunctionMap[To] = I->second;
+ FunctionMap.erase(I);
+}
+
+// getOrInsertFunction - This method is identical to calling operator[], but
+// it will insert a new CallGraphNode for the specified function if one does
+// not already exist.
+CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) {
+ CallGraphNode *&CGN = FunctionMap[F];
+ if (CGN) return CGN;
+
+ assert((!F || F->getParent() == Mod) && "Function not in current module!");
+ return CGN = new CallGraphNode(const_cast<Function*>(F));
+}
+
+void CallGraphNode::print(raw_ostream &OS) const {
+ if (Function *F = getFunction())
+ OS << "Call graph node for function: '" << F->getName() << "'";
+ else
+ OS << "Call graph node <<null function>>";
+
+ OS << "<<" << this << ">> #uses=" << getNumReferences() << '\n';
+
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ OS << " CS<" << I->first << "> calls ";
+ if (Function *FI = I->second->getFunction())
+ OS << "function '" << FI->getName() <<"'\n";
+ else
+ OS << "external node\n";
+ }
+ OS << '\n';
+}
+
+void CallGraphNode::dump() const { print(dbgs()); }
+
+/// removeCallEdgeFor - This method removes the edge in the node for the
+/// specified call site. Note that this method takes linear time, so it
+/// should be used sparingly.
+void CallGraphNode::removeCallEdgeFor(CallSite CS) {
+ for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
+ assert(I != CalledFunctions.end() && "Cannot find callsite to remove!");
+ if (I->first == CS.getInstruction()) {
+ I->second->DropRef();
+ *I = CalledFunctions.back();
+ CalledFunctions.pop_back();
+ return;
+ }
+ }
+}
+
+// removeAnyCallEdgeTo - This method removes any call edges from this node to
+// the specified callee function. This takes more time to execute than
+// removeCallEdgeTo, so it should not be used unless necessary.
+void CallGraphNode::removeAnyCallEdgeTo(CallGraphNode *Callee) {
+ for (unsigned i = 0, e = CalledFunctions.size(); i != e; ++i)
+ if (CalledFunctions[i].second == Callee) {
+ Callee->DropRef();
+ CalledFunctions[i] = CalledFunctions.back();
+ CalledFunctions.pop_back();
+ --i; --e;
+ }
+}
+
+/// removeOneAbstractEdgeTo - Remove one edge associated with a null callsite
+/// from this node to the specified callee function.
+void CallGraphNode::removeOneAbstractEdgeTo(CallGraphNode *Callee) {
+ for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
+ assert(I != CalledFunctions.end() && "Cannot find callee to remove!");
+ CallRecord &CR = *I;
+ if (CR.second == Callee && CR.first == 0) {
+ Callee->DropRef();
+ *I = CalledFunctions.back();
+ CalledFunctions.pop_back();
+ return;
+ }
+ }
+}
+
+/// replaceCallEdge - This method replaces the edge in the node for the
+/// specified call site with a new one. Note that this method takes linear
+/// time, so it should be used sparingly.
+void CallGraphNode::replaceCallEdge(CallSite CS,
+ CallSite NewCS, CallGraphNode *NewNode){
+ for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
+ assert(I != CalledFunctions.end() && "Cannot find callsite to remove!");
+ if (I->first == CS.getInstruction()) {
+ I->second->DropRef();
+ I->first = NewCS.getInstruction();
+ I->second = NewNode;
+ NewNode->AddRef();
+ return;
+ }
+ }
+}
+
+// Enuse that users of CallGraph.h also link with this file
+DEFINING_FILE_FOR(CallGraph)
diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp
new file mode 100644
index 000000000000..725ab72f5595
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -0,0 +1,608 @@
+//===- CallGraphSCCPass.cpp - Pass that operates BU on call graph ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CallGraphSCCPass class, which is used for passes
+// which are implemented as bottom-up traversals on the call graph. Because
+// there may be cycles in the call graph, passes of this type operate on the
+// call-graph in SCC order: that is, they process function bottom-up, except for
+// recursive functions, which they process all at once.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "cgscc-passmgr"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Function.h"
+#include "llvm/PassManagers.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<unsigned>
+MaxIterations("max-cg-scc-iterations", cl::ReallyHidden, cl::init(4));
+
+STATISTIC(MaxSCCIterations, "Maximum CGSCCPassMgr iterations on one SCC");
+
+//===----------------------------------------------------------------------===//
+// CGPassManager
+//
+/// CGPassManager manages FPPassManagers and CallGraphSCCPasses.
+
+namespace {
+
+class CGPassManager : public ModulePass, public PMDataManager {
+public:
+ static char ID;
+ explicit CGPassManager(int Depth)
+ : ModulePass(ID), PMDataManager(Depth) { }
+
+ /// run - Execute all of the passes scheduled for execution. Keep track of
+ /// whether any of the passes modifies the module, and if so, return true.
+ bool runOnModule(Module &M);
+
+ bool doInitialization(CallGraph &CG);
+ bool doFinalization(CallGraph &CG);
+
+ /// Pass Manager itself does not invalidate any analysis info.
+ void getAnalysisUsage(AnalysisUsage &Info) const {
+ // CGPassManager walks SCC and it needs CallGraph.
+ Info.addRequired<CallGraph>();
+ Info.setPreservesAll();
+ }
+
+ virtual const char *getPassName() const {
+ return "CallGraph Pass Manager";
+ }
+
+ virtual PMDataManager *getAsPMDataManager() { return this; }
+ virtual Pass *getAsPass() { return this; }
+
+ // Print passes managed by this manager
+ void dumpPassStructure(unsigned Offset) {
+ errs().indent(Offset*2) << "Call Graph SCC Pass Manager\n";
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+ P->dumpPassStructure(Offset + 1);
+ dumpLastUses(P, Offset+1);
+ }
+ }
+
+ Pass *getContainedPass(unsigned N) {
+ assert(N < PassVector.size() && "Pass number out of range!");
+ return static_cast<Pass *>(PassVector[N]);
+ }
+
+ virtual PassManagerType getPassManagerType() const {
+ return PMT_CallGraphPassManager;
+ }
+
+private:
+ bool RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
+ bool &DevirtualizedCall);
+
+ bool RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
+ CallGraph &CG, bool &CallGraphUpToDate,
+ bool &DevirtualizedCall);
+ bool RefreshCallGraph(CallGraphSCC &CurSCC, CallGraph &CG,
+ bool IsCheckingMode);
+};
+
+} // end anonymous namespace.
+
+char CGPassManager::ID = 0;
+
+
+bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
+ CallGraph &CG, bool &CallGraphUpToDate,
+ bool &DevirtualizedCall) {
+ bool Changed = false;
+ PMDataManager *PM = P->getAsPMDataManager();
+
+ if (PM == 0) {
+ CallGraphSCCPass *CGSP = (CallGraphSCCPass*)P;
+ if (!CallGraphUpToDate) {
+ DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false);
+ CallGraphUpToDate = true;
+ }
+
+ {
+ TimeRegion PassTimer(getPassTimer(CGSP));
+ Changed = CGSP->runOnSCC(CurSCC);
+ }
+
+ // After the CGSCCPass is done, when assertions are enabled, use
+ // RefreshCallGraph to verify that the callgraph was correctly updated.
+#ifndef NDEBUG
+ if (Changed)
+ RefreshCallGraph(CurSCC, CG, true);
+#endif
+
+ return Changed;
+ }
+
+
+ assert(PM->getPassManagerType() == PMT_FunctionPassManager &&
+ "Invalid CGPassManager member");
+ FPPassManager *FPP = (FPPassManager*)P;
+
+ // Run pass P on all functions in the current SCC.
+ for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+ I != E; ++I) {
+ if (Function *F = (*I)->getFunction()) {
+ dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName());
+ TimeRegion PassTimer(getPassTimer(FPP));
+ Changed |= FPP->runOnFunction(*F);
+ }
+ }
+
+ // The function pass(es) modified the IR, they may have clobbered the
+ // callgraph.
+ if (Changed && CallGraphUpToDate) {
+ DEBUG(dbgs() << "CGSCCPASSMGR: Pass Dirtied SCC: "
+ << P->getPassName() << '\n');
+ CallGraphUpToDate = false;
+ }
+ return Changed;
+}
+
+
+/// RefreshCallGraph - Scan the functions in the specified CFG and resync the
+/// callgraph with the call sites found in it. This is used after
+/// FunctionPasses have potentially munged the callgraph, and can be used after
+/// CallGraphSCC passes to verify that they correctly updated the callgraph.
+///
+/// This function returns true if it devirtualized an existing function call,
+/// meaning it turned an indirect call into a direct call. This happens when
+/// a function pass like GVN optimizes away stuff feeding the indirect call.
+/// This never happens in checking mode.
+///
+bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
+ CallGraph &CG, bool CheckingMode) {
+ DenseMap<Value*, CallGraphNode*> CallSites;
+
+ DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size()
+ << " nodes:\n";
+ for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+ I != E; ++I)
+ (*I)->dump();
+ );
+
+ bool MadeChange = false;
+ bool DevirtualizedCall = false;
+
+ // Scan all functions in the SCC.
+ unsigned FunctionNo = 0;
+ for (CallGraphSCC::iterator SCCIdx = CurSCC.begin(), E = CurSCC.end();
+ SCCIdx != E; ++SCCIdx, ++FunctionNo) {
+ CallGraphNode *CGN = *SCCIdx;
+ Function *F = CGN->getFunction();
+ if (F == 0 || F->isDeclaration()) continue;
+
+ // Walk the function body looking for call sites. Sync up the call sites in
+ // CGN with those actually in the function.
+
+ // Keep track of the number of direct and indirect calls that were
+ // invalidated and removed.
+ unsigned NumDirectRemoved = 0, NumIndirectRemoved = 0;
+
+ // Get the set of call sites currently in the function.
+ for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) {
+ // If this call site is null, then the function pass deleted the call
+ // entirely and the WeakVH nulled it out.
+ if (I->first == 0 ||
+ // If we've already seen this call site, then the FunctionPass RAUW'd
+ // one call with another, which resulted in two "uses" in the edge
+ // list of the same call.
+ CallSites.count(I->first) ||
+
+ // If the call edge is not from a call or invoke, then the function
+ // pass RAUW'd a call with another value. This can happen when
+ // constant folding happens of well known functions etc.
+ !CallSite(I->first)) {
+ assert(!CheckingMode &&
+ "CallGraphSCCPass did not update the CallGraph correctly!");
+
+ // If this was an indirect call site, count it.
+ if (I->second->getFunction() == 0)
+ ++NumIndirectRemoved;
+ else
+ ++NumDirectRemoved;
+
+ // Just remove the edge from the set of callees, keep track of whether
+ // I points to the last element of the vector.
+ bool WasLast = I + 1 == E;
+ CGN->removeCallEdge(I);
+
+ // If I pointed to the last element of the vector, we have to bail out:
+ // iterator checking rejects comparisons of the resultant pointer with
+ // end.
+ if (WasLast)
+ break;
+ E = CGN->end();
+ continue;
+ }
+
+ assert(!CallSites.count(I->first) &&
+ "Call site occurs in node multiple times");
+ CallSites.insert(std::make_pair(I->first, I->second));
+ ++I;
+ }
+
+ // Loop over all of the instructions in the function, getting the callsites.
+ // Keep track of the number of direct/indirect calls added.
+ unsigned NumDirectAdded = 0, NumIndirectAdded = 0;
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ CallSite CS(cast<Value>(I));
+ if (!CS || isa<DbgInfoIntrinsic>(I)) continue;
+
+ // If this call site already existed in the callgraph, just verify it
+ // matches up to expectations and remove it from CallSites.
+ DenseMap<Value*, CallGraphNode*>::iterator ExistingIt =
+ CallSites.find(CS.getInstruction());
+ if (ExistingIt != CallSites.end()) {
+ CallGraphNode *ExistingNode = ExistingIt->second;
+
+ // Remove from CallSites since we have now seen it.
+ CallSites.erase(ExistingIt);
+
+ // Verify that the callee is right.
+ if (ExistingNode->getFunction() == CS.getCalledFunction())
+ continue;
+
+ // If we are in checking mode, we are not allowed to actually mutate
+ // the callgraph. If this is a case where we can infer that the
+ // callgraph is less precise than it could be (e.g. an indirect call
+ // site could be turned direct), don't reject it in checking mode, and
+ // don't tweak it to be more precise.
+ if (CheckingMode && CS.getCalledFunction() &&
+ ExistingNode->getFunction() == 0)
+ continue;
+
+ assert(!CheckingMode &&
+ "CallGraphSCCPass did not update the CallGraph correctly!");
+
+ // If not, we either went from a direct call to indirect, indirect to
+ // direct, or direct to different direct.
+ CallGraphNode *CalleeNode;
+ if (Function *Callee = CS.getCalledFunction()) {
+ CalleeNode = CG.getOrInsertFunction(Callee);
+ // Keep track of whether we turned an indirect call into a direct
+ // one.
+ if (ExistingNode->getFunction() == 0) {
+ DevirtualizedCall = true;
+ DEBUG(dbgs() << " CGSCCPASSMGR: Devirtualized call to '"
+ << Callee->getName() << "'\n");
+ }
+ } else {
+ CalleeNode = CG.getCallsExternalNode();
+ }
+
+ // Update the edge target in CGN.
+ CGN->replaceCallEdge(CS, CS, CalleeNode);
+ MadeChange = true;
+ continue;
+ }
+
+ assert(!CheckingMode &&
+ "CallGraphSCCPass did not update the CallGraph correctly!");
+
+ // If the call site didn't exist in the CGN yet, add it.
+ CallGraphNode *CalleeNode;
+ if (Function *Callee = CS.getCalledFunction()) {
+ CalleeNode = CG.getOrInsertFunction(Callee);
+ ++NumDirectAdded;
+ } else {
+ CalleeNode = CG.getCallsExternalNode();
+ ++NumIndirectAdded;
+ }
+
+ CGN->addCalledFunction(CS, CalleeNode);
+ MadeChange = true;
+ }
+
+ // We scanned the old callgraph node, removing invalidated call sites and
+ // then added back newly found call sites. One thing that can happen is
+ // that an old indirect call site was deleted and replaced with a new direct
+ // call. In this case, we have devirtualized a call, and CGSCCPM would like
+ // to iteratively optimize the new code. Unfortunately, we don't really
+ // have a great way to detect when this happens. As an approximation, we
+ // just look at whether the number of indirect calls is reduced and the
+ // number of direct calls is increased. There are tons of ways to fool this
+ // (e.g. DCE'ing an indirect call and duplicating an unrelated block with a
+ // direct call) but this is close enough.
+ if (NumIndirectRemoved > NumIndirectAdded &&
+ NumDirectRemoved < NumDirectAdded)
+ DevirtualizedCall = true;
+
+ // After scanning this function, if we still have entries in callsites, then
+ // they are dangling pointers. WeakVH should save us for this, so abort if
+ // this happens.
+ assert(CallSites.empty() && "Dangling pointers found in call sites map");
+
+ // Periodically do an explicit clear to remove tombstones when processing
+ // large scc's.
+ if ((FunctionNo & 15) == 15)
+ CallSites.clear();
+ }
+
+ DEBUG(if (MadeChange) {
+ dbgs() << "CGSCCPASSMGR: Refreshed SCC is now:\n";
+ for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+ I != E; ++I)
+ (*I)->dump();
+ if (DevirtualizedCall)
+ dbgs() << "CGSCCPASSMGR: Refresh devirtualized a call!\n";
+
+ } else {
+ dbgs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n";
+ }
+ );
+
+ return DevirtualizedCall;
+}
+
+/// RunAllPassesOnSCC - Execute the body of the entire pass manager on the
+/// specified SCC. This keeps track of whether a function pass devirtualizes
+/// any calls and returns it in DevirtualizedCall.
+bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
+ bool &DevirtualizedCall) {
+ bool Changed = false;
+
+ // CallGraphUpToDate - Keep track of whether the callgraph is known to be
+ // up-to-date or not. The CGSSC pass manager runs two types of passes:
+ // CallGraphSCC Passes and other random function passes. Because other
+ // random function passes are not CallGraph aware, they may clobber the
+ // call graph by introducing new calls or deleting other ones. This flag
+ // is set to false when we run a function pass so that we know to clean up
+ // the callgraph when we need to run a CGSCCPass again.
+ bool CallGraphUpToDate = true;
+
+ // Run all passes on current SCC.
+ for (unsigned PassNo = 0, e = getNumContainedPasses();
+ PassNo != e; ++PassNo) {
+ Pass *P = getContainedPass(PassNo);
+
+ // If we're in -debug-pass=Executions mode, construct the SCC node list,
+ // otherwise avoid constructing this string as it is expensive.
+ if (isPassDebuggingExecutionsOrMore()) {
+ std::string Functions;
+ #ifndef NDEBUG
+ raw_string_ostream OS(Functions);
+ for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+ I != E; ++I) {
+ if (I != CurSCC.begin()) OS << ", ";
+ (*I)->print(OS);
+ }
+ OS.flush();
+ #endif
+ dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, Functions);
+ }
+ dumpRequiredSet(P);
+
+ initializeAnalysisImpl(P);
+
+ // Actually run this pass on the current SCC.
+ Changed |= RunPassOnSCC(P, CurSCC, CG,
+ CallGraphUpToDate, DevirtualizedCall);
+
+ if (Changed)
+ dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, "");
+ dumpPreservedSet(P);
+
+ verifyPreservedAnalysis(P);
+ removeNotPreservedAnalysis(P);
+ recordAvailableAnalysis(P);
+ removeDeadPasses(P, "", ON_CG_MSG);
+ }
+
+ // If the callgraph was left out of date (because the last pass run was a
+ // functionpass), refresh it before we move on to the next SCC.
+ if (!CallGraphUpToDate)
+ DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false);
+ return Changed;
+}
+
+/// run - Execute all of the passes scheduled for execution. Keep track of
+/// whether any of the passes modifies the module, and if so, return true.
+bool CGPassManager::runOnModule(Module &M) {
+ CallGraph &CG = getAnalysis<CallGraph>();
+ bool Changed = doInitialization(CG);
+
+ // Walk the callgraph in bottom-up SCC order.
+ scc_iterator<CallGraph*> CGI = scc_begin(&CG);
+
+ CallGraphSCC CurSCC(&CGI);
+ while (!CGI.isAtEnd()) {
+ // Copy the current SCC and increment past it so that the pass can hack
+ // on the SCC if it wants to without invalidating our iterator.
+ std::vector<CallGraphNode*> &NodeVec = *CGI;
+ CurSCC.initialize(&NodeVec[0], &NodeVec[0]+NodeVec.size());
+ ++CGI;
+
+ // At the top level, we run all the passes in this pass manager on the
+ // functions in this SCC. However, we support iterative compilation in the
+ // case where a function pass devirtualizes a call to a function. For
+ // example, it is very common for a function pass (often GVN or instcombine)
+ // to eliminate the addressing that feeds into a call. With that improved
+ // information, we would like the call to be an inline candidate, infer
+ // mod-ref information etc.
+ //
+ // Because of this, we allow iteration up to a specified iteration count.
+ // This only happens in the case of a devirtualized call, so we only burn
+ // compile time in the case that we're making progress. We also have a hard
+ // iteration count limit in case there is crazy code.
+ unsigned Iteration = 0;
+ bool DevirtualizedCall = false;
+ do {
+ DEBUG(if (Iteration)
+ dbgs() << " SCCPASSMGR: Re-visiting SCC, iteration #"
+ << Iteration << '\n');
+ DevirtualizedCall = false;
+ Changed |= RunAllPassesOnSCC(CurSCC, CG, DevirtualizedCall);
+ } while (Iteration++ < MaxIterations && DevirtualizedCall);
+
+ if (DevirtualizedCall)
+ DEBUG(dbgs() << " CGSCCPASSMGR: Stopped iteration after " << Iteration
+ << " times, due to -max-cg-scc-iterations\n");
+
+ if (Iteration > MaxSCCIterations)
+ MaxSCCIterations = Iteration;
+
+ }
+ Changed |= doFinalization(CG);
+ return Changed;
+}
+
+
+/// Initialize CG
+bool CGPassManager::doInitialization(CallGraph &CG) {
+ bool Changed = false;
+ for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) {
+ if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) {
+ assert(PM->getPassManagerType() == PMT_FunctionPassManager &&
+ "Invalid CGPassManager member");
+ Changed |= ((FPPassManager*)PM)->doInitialization(CG.getModule());
+ } else {
+ Changed |= ((CallGraphSCCPass*)getContainedPass(i))->doInitialization(CG);
+ }
+ }
+ return Changed;
+}
+
+/// Finalize CG
+bool CGPassManager::doFinalization(CallGraph &CG) {
+ bool Changed = false;
+ for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) {
+ if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) {
+ assert(PM->getPassManagerType() == PMT_FunctionPassManager &&
+ "Invalid CGPassManager member");
+ Changed |= ((FPPassManager*)PM)->doFinalization(CG.getModule());
+ } else {
+ Changed |= ((CallGraphSCCPass*)getContainedPass(i))->doFinalization(CG);
+ }
+ }
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// CallGraphSCC Implementation
+//===----------------------------------------------------------------------===//
+
+/// ReplaceNode - This informs the SCC and the pass manager that the specified
+/// Old node has been deleted, and New is to be used in its place.
+void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) {
+ assert(Old != New && "Should not replace node with self");
+ for (unsigned i = 0; ; ++i) {
+ assert(i != Nodes.size() && "Node not in SCC");
+ if (Nodes[i] != Old) continue;
+ Nodes[i] = New;
+ break;
+ }
+
+ // Update the active scc_iterator so that it doesn't contain dangling
+ // pointers to the old CallGraphNode.
+ scc_iterator<CallGraph*> *CGI = (scc_iterator<CallGraph*>*)Context;
+ CGI->ReplaceNode(Old, New);
+}
+
+
+//===----------------------------------------------------------------------===//
+// CallGraphSCCPass Implementation
+//===----------------------------------------------------------------------===//
+
+/// Assign pass manager to manage this pass.
+void CallGraphSCCPass::assignPassManager(PMStack &PMS,
+ PassManagerType PreferredType) {
+ // Find CGPassManager
+ while (!PMS.empty() &&
+ PMS.top()->getPassManagerType() > PMT_CallGraphPassManager)
+ PMS.pop();
+
+ assert(!PMS.empty() && "Unable to handle Call Graph Pass");
+ CGPassManager *CGP;
+
+ if (PMS.top()->getPassManagerType() == PMT_CallGraphPassManager)
+ CGP = (CGPassManager*)PMS.top();
+ else {
+ // Create new Call Graph SCC Pass Manager if it does not exist.
+ assert(!PMS.empty() && "Unable to create Call Graph Pass Manager");
+ PMDataManager *PMD = PMS.top();
+
+ // [1] Create new Call Graph Pass Manager
+ CGP = new CGPassManager(PMD->getDepth() + 1);
+
+ // [2] Set up new manager's top level manager
+ PMTopLevelManager *TPM = PMD->getTopLevelManager();
+ TPM->addIndirectPassManager(CGP);
+
+ // [3] Assign manager to manage this new manager. This may create
+ // and push new managers into PMS
+ Pass *P = CGP;
+ TPM->schedulePass(P);
+
+ // [4] Push new manager into PMS
+ PMS.push(CGP);
+ }
+
+ CGP->add(this);
+}
+
+/// getAnalysisUsage - For this class, we declare that we require and preserve
+/// the call graph. If the derived class implements this method, it should
+/// always explicitly call the implementation here.
+void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<CallGraph>();
+ AU.addPreserved<CallGraph>();
+}
+
+
+//===----------------------------------------------------------------------===//
+// PrintCallGraphPass Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// PrintCallGraphPass - Print a Module corresponding to a call graph.
+ ///
+ class PrintCallGraphPass : public CallGraphSCCPass {
+ std::string Banner;
+ raw_ostream &Out; // raw_ostream to print on.
+
+ public:
+ static char ID;
+ PrintCallGraphPass(const std::string &B, raw_ostream &o)
+ : CallGraphSCCPass(ID), Banner(B), Out(o) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ bool runOnSCC(CallGraphSCC &SCC) {
+ Out << Banner;
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+ (*I)->getFunction()->print(Out);
+ return false;
+ }
+ };
+
+} // end anonymous namespace.
+
+char PrintCallGraphPass::ID = 0;
+
+Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &O,
+ const std::string &Banner) const {
+ return new PrintCallGraphPass(Banner, O);
+}
+
diff --git a/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp b/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp
new file mode 100644
index 000000000000..06ae34cfd989
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp
@@ -0,0 +1,103 @@
+//===- FindUsedTypes.cpp - Find all Types used by a module ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is used to seek out all of the types in use by the program. Note
+// that this analysis explicitly does not include types only used by the symbol
+// table.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/FindUsedTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+char FindUsedTypes::ID = 0;
+INITIALIZE_PASS(FindUsedTypes, "print-used-types",
+ "Find Used Types", false, true)
+
+// IncorporateType - Incorporate one type and all of its subtypes into the
+// collection of used types.
+//
+void FindUsedTypes::IncorporateType(const Type *Ty) {
+ // If ty doesn't already exist in the used types map, add it now, otherwise
+ // return.
+ if (!UsedTypes.insert(Ty).second) return; // Already contain Ty.
+
+ // Make sure to add any types this type references now.
+ //
+ for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
+ I != E; ++I)
+ IncorporateType(*I);
+}
+
+void FindUsedTypes::IncorporateValue(const Value *V) {
+ IncorporateType(V->getType());
+
+ // If this is a constant, it could be using other types...
+ if (const Constant *C = dyn_cast<Constant>(V)) {
+ if (!isa<GlobalValue>(C))
+ for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
+ OI != OE; ++OI)
+ IncorporateValue(*OI);
+ }
+}
+
+
+// run - This incorporates all types used by the specified module
+//
+bool FindUsedTypes::runOnModule(Module &m) {
+ UsedTypes.clear(); // reset if run multiple times...
+
+ // Loop over global variables, incorporating their types
+ for (Module::const_global_iterator I = m.global_begin(), E = m.global_end();
+ I != E; ++I) {
+ IncorporateType(I->getType());
+ if (I->hasInitializer())
+ IncorporateValue(I->getInitializer());
+ }
+
+ for (Module::iterator MI = m.begin(), ME = m.end(); MI != ME; ++MI) {
+ IncorporateType(MI->getType());
+ const Function &F = *MI;
+
+ // Loop over all of the instructions in the function, adding their return
+ // type as well as the types of their operands.
+ //
+ for (const_inst_iterator II = inst_begin(F), IE = inst_end(F);
+ II != IE; ++II) {
+ const Instruction &I = *II;
+
+ IncorporateType(I.getType()); // Incorporate the type of the instruction
+ for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end();
+ OI != OE; ++OI)
+ IncorporateValue(*OI); // Insert inst operand types as well
+ }
+ }
+
+ return false;
+}
+
+// Print the types found in the module. If the optional Module parameter is
+// passed in, then the types are printed symbolically if possible, using the
+// symbol table from the module.
+//
+void FindUsedTypes::print(raw_ostream &OS, const Module *M) const {
+ OS << "Types in use by this module:\n";
+ for (std::set<const Type *>::const_iterator I = UsedTypes.begin(),
+ E = UsedTypes.end(); I != E; ++I) {
+ OS << " ";
+ WriteTypeSymbolic(OS, *I, M);
+ OS << '\n';
+ }
+}
diff --git a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
new file mode 100644
index 000000000000..b226d66cd78a
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -0,0 +1,609 @@
+//===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This simple pass provides alias and mod/ref information for global values
+// that do not have their address taken, and keeps track of whether functions
+// read or write memory (are "pure"). For this simple (but very common) case,
+// we can provide pretty accurate and useful information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "globalsmodref-aa"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Instructions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SCCIterator.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumNonAddrTakenGlobalVars,
+ "Number of global vars without address taken");
+STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken");
+STATISTIC(NumNoMemFunctions, "Number of functions that do not access memory");
+STATISTIC(NumReadMemFunctions, "Number of functions that only read memory");
+STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects");
+
+namespace {
+ /// FunctionRecord - One instance of this structure is stored for every
+ /// function in the program. Later, the entries for these functions are
+ /// removed if the function is found to call an external function (in which
+ /// case we know nothing about it.
+ struct FunctionRecord {
+ /// GlobalInfo - Maintain mod/ref info for all of the globals without
+ /// addresses taken that are read or written (transitively) by this
+ /// function.
+ std::map<const GlobalValue*, unsigned> GlobalInfo;
+
+ /// MayReadAnyGlobal - May read global variables, but it is not known which.
+ bool MayReadAnyGlobal;
+
+ unsigned getInfoForGlobal(const GlobalValue *GV) const {
+ unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0;
+ std::map<const GlobalValue*, unsigned>::const_iterator I =
+ GlobalInfo.find(GV);
+ if (I != GlobalInfo.end())
+ Effect |= I->second;
+ return Effect;
+ }
+
+ /// FunctionEffect - Capture whether or not this function reads or writes to
+ /// ANY memory. If not, we can do a lot of aggressive analysis on it.
+ unsigned FunctionEffect;
+
+ FunctionRecord() : MayReadAnyGlobal (false), FunctionEffect(0) {}
+ };
+
+ /// GlobalsModRef - The actual analysis pass.
+ class GlobalsModRef : public ModulePass, public AliasAnalysis {
+ /// NonAddressTakenGlobals - The globals that do not have their addresses
+ /// taken.
+ std::set<const GlobalValue*> NonAddressTakenGlobals;
+
+ /// IndirectGlobals - The memory pointed to by this global is known to be
+ /// 'owned' by the global.
+ std::set<const GlobalValue*> IndirectGlobals;
+
+ /// AllocsForIndirectGlobals - If an instruction allocates memory for an
+ /// indirect global, this map indicates which one.
+ std::map<const Value*, const GlobalValue*> AllocsForIndirectGlobals;
+
+ /// FunctionInfo - For each function, keep track of what globals are
+ /// modified or read.
+ std::map<const Function*, FunctionRecord> FunctionInfo;
+
+ public:
+ static char ID;
+ GlobalsModRef() : ModulePass(ID) {
+ initializeGlobalsModRefPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) {
+ InitializeAliasAnalysis(this); // set up super class
+ AnalyzeGlobals(M); // find non-addr taken globals
+ AnalyzeCallGraph(getAnalysis<CallGraph>(), M); // Propagate on CG
+ return false;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AliasAnalysis::getAnalysisUsage(AU);
+ AU.addRequired<CallGraph>();
+ AU.setPreservesAll(); // Does not transform code
+ }
+
+ //------------------------------------------------
+ // Implement the AliasAnalysis API
+ //
+ AliasResult alias(const Location &LocA, const Location &LocB);
+ ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc);
+ ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+ }
+
+ /// getModRefBehavior - Return the behavior of the specified function if
+ /// called from the specified call site. The call site may be null in which
+ /// case the most generic behavior of this function should be returned.
+ ModRefBehavior getModRefBehavior(const Function *F) {
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ if (FunctionRecord *FR = getFunctionInfo(F)) {
+ if (FR->FunctionEffect == 0)
+ Min = DoesNotAccessMemory;
+ else if ((FR->FunctionEffect & Mod) == 0)
+ Min = OnlyReadsMemory;
+ }
+
+ return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
+ }
+
+ /// getModRefBehavior - Return the behavior of the specified function if
+ /// called from the specified call site. The call site may be null in which
+ /// case the most generic behavior of this function should be returned.
+ ModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ if (const Function* F = CS.getCalledFunction())
+ if (FunctionRecord *FR = getFunctionInfo(F)) {
+ if (FR->FunctionEffect == 0)
+ Min = DoesNotAccessMemory;
+ else if ((FR->FunctionEffect & Mod) == 0)
+ Min = OnlyReadsMemory;
+ }
+
+ return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+ }
+
+ virtual void deleteValue(Value *V);
+ virtual void copyValue(Value *From, Value *To);
+ virtual void addEscapingUse(Use &U);
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+
+ private:
+ /// getFunctionInfo - Return the function info for the function, or null if
+ /// we don't have anything useful to say about it.
+ FunctionRecord *getFunctionInfo(const Function *F) {
+ std::map<const Function*, FunctionRecord>::iterator I =
+ FunctionInfo.find(F);
+ if (I != FunctionInfo.end())
+ return &I->second;
+ return 0;
+ }
+
+ void AnalyzeGlobals(Module &M);
+ void AnalyzeCallGraph(CallGraph &CG, Module &M);
+ bool AnalyzeUsesOfPointer(Value *V, std::vector<Function*> &Readers,
+ std::vector<Function*> &Writers,
+ GlobalValue *OkayStoreDest = 0);
+ bool AnalyzeIndirectGlobalMemory(GlobalValue *GV);
+ };
+}
+
+char GlobalsModRef::ID = 0;
+INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis,
+ "globalsmodref-aa", "Simple mod/ref analysis for globals",
+ false, true, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis,
+ "globalsmodref-aa", "Simple mod/ref analysis for globals",
+ false, true, false)
+
+Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); }
+
+/// AnalyzeGlobals - Scan through the users of all of the internal
+/// GlobalValue's in the program. If none of them have their "address taken"
+/// (really, their address passed to something nontrivial), record this fact,
+/// and record the functions that they are used directly in.
+void GlobalsModRef::AnalyzeGlobals(Module &M) {
+ std::vector<Function*> Readers, Writers;
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (I->hasLocalLinkage()) {
+ if (!AnalyzeUsesOfPointer(I, Readers, Writers)) {
+ // Remember that we are tracking this global.
+ NonAddressTakenGlobals.insert(I);
+ ++NumNonAddrTakenFunctions;
+ }
+ Readers.clear(); Writers.clear();
+ }
+
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (I->hasLocalLinkage()) {
+ if (!AnalyzeUsesOfPointer(I, Readers, Writers)) {
+ // Remember that we are tracking this global, and the mod/ref fns
+ NonAddressTakenGlobals.insert(I);
+
+ for (unsigned i = 0, e = Readers.size(); i != e; ++i)
+ FunctionInfo[Readers[i]].GlobalInfo[I] |= Ref;
+
+ if (!I->isConstant()) // No need to keep track of writers to constants
+ for (unsigned i = 0, e = Writers.size(); i != e; ++i)
+ FunctionInfo[Writers[i]].GlobalInfo[I] |= Mod;
+ ++NumNonAddrTakenGlobalVars;
+
+ // If this global holds a pointer type, see if it is an indirect global.
+ if (I->getType()->getElementType()->isPointerTy() &&
+ AnalyzeIndirectGlobalMemory(I))
+ ++NumIndirectGlobalVars;
+ }
+ Readers.clear(); Writers.clear();
+ }
+}
+
+/// AnalyzeUsesOfPointer - Look at all of the users of the specified pointer.
+/// If this is used by anything complex (i.e., the address escapes), return
+/// true. Also, while we are at it, keep track of those functions that read and
+/// write to the value.
+///
+/// If OkayStoreDest is non-null, stores into this global are allowed.
+bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
+ std::vector<Function*> &Readers,
+ std::vector<Function*> &Writers,
+ GlobalValue *OkayStoreDest) {
+ if (!V->getType()->isPointerTy()) return true;
+
+ for (Value::use_iterator UI = V->use_begin(), E=V->use_end(); UI != E; ++UI) {
+ User *U = *UI;
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ Readers.push_back(LI->getParent()->getParent());
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (V == SI->getOperand(1)) {
+ Writers.push_back(SI->getParent()->getParent());
+ } else if (SI->getOperand(1) != OkayStoreDest) {
+ return true; // Storing the pointer
+ }
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ if (AnalyzeUsesOfPointer(GEP, Readers, Writers)) return true;
+ } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+ if (AnalyzeUsesOfPointer(BCI, Readers, Writers, OkayStoreDest))
+ return true;
+ } else if (isFreeCall(U)) {
+ Writers.push_back(cast<Instruction>(U)->getParent()->getParent());
+ } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
+ // Make sure that this is just the function being called, not that it is
+ // passing into the function.
+ for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
+ if (CI->getArgOperand(i) == V) return true;
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) {
+ // Make sure that this is just the function being called, not that it is
+ // passing into the function.
+ for (unsigned i = 0, e = II->getNumArgOperands(); i != e; ++i)
+ if (II->getArgOperand(i) == V) return true;
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+ if (CE->getOpcode() == Instruction::GetElementPtr ||
+ CE->getOpcode() == Instruction::BitCast) {
+ if (AnalyzeUsesOfPointer(CE, Readers, Writers))
+ return true;
+ } else {
+ return true;
+ }
+ } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) {
+ if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
+ return true; // Allow comparison against null.
+ } else {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// AnalyzeIndirectGlobalMemory - We found an non-address-taken global variable
+/// which holds a pointer type. See if the global always points to non-aliased
+/// heap memory: that is, all initializers of the globals are allocations, and
+/// those allocations have no use other than initialization of the global.
+/// Further, all loads out of GV must directly use the memory, not store the
+/// pointer somewhere. If this is true, we consider the memory pointed to by
+/// GV to be owned by GV and can disambiguate other pointers from it.
+bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
+ // Keep track of values related to the allocation of the memory, f.e. the
+ // value produced by the malloc call and any casts.
+ std::vector<Value*> AllocRelatedValues;
+
+ // Walk the user list of the global. If we find anything other than a direct
+ // load or store, bail out.
+ for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){
+ User *U = *I;
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ // The pointer loaded from the global can only be used in simple ways:
+ // we allow addressing of it and loading storing to it. We do *not* allow
+ // storing the loaded pointer somewhere else or passing to a function.
+ std::vector<Function*> ReadersWriters;
+ if (AnalyzeUsesOfPointer(LI, ReadersWriters, ReadersWriters))
+ return false; // Loaded pointer escapes.
+ // TODO: Could try some IP mod/ref of the loaded pointer.
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ // Storing the global itself.
+ if (SI->getOperand(0) == GV) return false;
+
+ // If storing the null pointer, ignore it.
+ if (isa<ConstantPointerNull>(SI->getOperand(0)))
+ continue;
+
+ // Check the value being stored.
+ Value *Ptr = GetUnderlyingObject(SI->getOperand(0));
+
+ if (isMalloc(Ptr)) {
+ // Okay, easy case.
+ } else if (CallInst *CI = dyn_cast<CallInst>(Ptr)) {
+ Function *F = CI->getCalledFunction();
+ if (!F || !F->isDeclaration()) return false; // Too hard to analyze.
+ if (F->getName() != "calloc") return false; // Not calloc.
+ } else {
+ return false; // Too hard to analyze.
+ }
+
+ // Analyze all uses of the allocation. If any of them are used in a
+ // non-simple way (e.g. stored to another global) bail out.
+ std::vector<Function*> ReadersWriters;
+ if (AnalyzeUsesOfPointer(Ptr, ReadersWriters, ReadersWriters, GV))
+ return false; // Loaded pointer escapes.
+
+ // Remember that this allocation is related to the indirect global.
+ AllocRelatedValues.push_back(Ptr);
+ } else {
+ // Something complex, bail out.
+ return false;
+ }
+ }
+
+ // Okay, this is an indirect global. Remember all of the allocations for
+ // this global in AllocsForIndirectGlobals.
+ while (!AllocRelatedValues.empty()) {
+ AllocsForIndirectGlobals[AllocRelatedValues.back()] = GV;
+ AllocRelatedValues.pop_back();
+ }
+ IndirectGlobals.insert(GV);
+ return true;
+}
+
+/// AnalyzeCallGraph - At this point, we know the functions where globals are
+/// immediately stored to and read from. Propagate this information up the call
+/// graph to all callers and compute the mod/ref info for all memory for each
+/// function.
+void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
+ // We do a bottom-up SCC traversal of the call graph. In other words, we
+ // visit all callees before callers (leaf-first).
+ for (scc_iterator<CallGraph*> I = scc_begin(&CG), E = scc_end(&CG); I != E;
+ ++I) {
+ std::vector<CallGraphNode *> &SCC = *I;
+ assert(!SCC.empty() && "SCC with no functions?");
+
+ if (!SCC[0]->getFunction()) {
+ // Calls externally - can't say anything useful. Remove any existing
+ // function records (may have been created when scanning globals).
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i)
+ FunctionInfo.erase(SCC[i]->getFunction());
+ continue;
+ }
+
+ FunctionRecord &FR = FunctionInfo[SCC[0]->getFunction()];
+
+ bool KnowNothing = false;
+ unsigned FunctionEffect = 0;
+
+ // Collect the mod/ref properties due to called functions. We only compute
+ // one mod-ref set.
+ for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) {
+ Function *F = SCC[i]->getFunction();
+ if (!F) {
+ KnowNothing = true;
+ break;
+ }
+
+ if (F->isDeclaration()) {
+ // Try to get mod/ref behaviour from function attributes.
+ if (F->doesNotAccessMemory()) {
+ // Can't do better than that!
+ } else if (F->onlyReadsMemory()) {
+ FunctionEffect |= Ref;
+ if (!F->isIntrinsic())
+ // This function might call back into the module and read a global -
+ // consider every global as possibly being read by this function.
+ FR.MayReadAnyGlobal = true;
+ } else {
+ FunctionEffect |= ModRef;
+ // Can't say anything useful unless it's an intrinsic - they don't
+ // read or write global variables of the kind considered here.
+ KnowNothing = !F->isIntrinsic();
+ }
+ continue;
+ }
+
+ for (CallGraphNode::iterator CI = SCC[i]->begin(), E = SCC[i]->end();
+ CI != E && !KnowNothing; ++CI)
+ if (Function *Callee = CI->second->getFunction()) {
+ if (FunctionRecord *CalleeFR = getFunctionInfo(Callee)) {
+ // Propagate function effect up.
+ FunctionEffect |= CalleeFR->FunctionEffect;
+
+ // Incorporate callee's effects on globals into our info.
+ for (std::map<const GlobalValue*, unsigned>::iterator GI =
+ CalleeFR->GlobalInfo.begin(), E = CalleeFR->GlobalInfo.end();
+ GI != E; ++GI)
+ FR.GlobalInfo[GI->first] |= GI->second;
+ FR.MayReadAnyGlobal |= CalleeFR->MayReadAnyGlobal;
+ } else {
+ // Can't say anything about it. However, if it is inside our SCC,
+ // then nothing needs to be done.
+ CallGraphNode *CalleeNode = CG[Callee];
+ if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end())
+ KnowNothing = true;
+ }
+ } else {
+ KnowNothing = true;
+ }
+ }
+
+ // If we can't say anything useful about this SCC, remove all SCC functions
+ // from the FunctionInfo map.
+ if (KnowNothing) {
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i)
+ FunctionInfo.erase(SCC[i]->getFunction());
+ continue;
+ }
+
+ // Scan the function bodies for explicit loads or stores.
+ for (unsigned i = 0, e = SCC.size(); i != e && FunctionEffect != ModRef;++i)
+ for (inst_iterator II = inst_begin(SCC[i]->getFunction()),
+ E = inst_end(SCC[i]->getFunction());
+ II != E && FunctionEffect != ModRef; ++II)
+ if (isa<LoadInst>(*II)) {
+ FunctionEffect |= Ref;
+ if (cast<LoadInst>(*II).isVolatile())
+ // Volatile loads may have side-effects, so mark them as writing
+ // memory (for example, a flag inside the processor).
+ FunctionEffect |= Mod;
+ } else if (isa<StoreInst>(*II)) {
+ FunctionEffect |= Mod;
+ if (cast<StoreInst>(*II).isVolatile())
+ // Treat volatile stores as reading memory somewhere.
+ FunctionEffect |= Ref;
+ } else if (isMalloc(&cast<Instruction>(*II)) ||
+ isFreeCall(&cast<Instruction>(*II))) {
+ FunctionEffect |= ModRef;
+ }
+
+ if ((FunctionEffect & Mod) == 0)
+ ++NumReadMemFunctions;
+ if (FunctionEffect == 0)
+ ++NumNoMemFunctions;
+ FR.FunctionEffect = FunctionEffect;
+
+ // Finally, now that we know the full effect on this SCC, clone the
+ // information to each function in the SCC.
+ for (unsigned i = 1, e = SCC.size(); i != e; ++i)
+ FunctionInfo[SCC[i]->getFunction()] = FR;
+ }
+}
+
+
+
+/// alias - If one of the pointers is to a global that we are tracking, and the
+/// other is some random pointer, we know there cannot be an alias, because the
+/// address of the global isn't taken.
+AliasAnalysis::AliasResult
+GlobalsModRef::alias(const Location &LocA,
+ const Location &LocB) {
+ // Get the base object these pointers point to.
+ const Value *UV1 = GetUnderlyingObject(LocA.Ptr);
+ const Value *UV2 = GetUnderlyingObject(LocB.Ptr);
+
+ // If either of the underlying values is a global, they may be non-addr-taken
+ // globals, which we can answer queries about.
+ const GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1);
+ const GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2);
+ if (GV1 || GV2) {
+ // If the global's address is taken, pretend we don't know it's a pointer to
+ // the global.
+ if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = 0;
+ if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = 0;
+
+ // If the two pointers are derived from two different non-addr-taken
+ // globals, or if one is and the other isn't, we know these can't alias.
+ if ((GV1 || GV2) && GV1 != GV2)
+ return NoAlias;
+
+ // Otherwise if they are both derived from the same addr-taken global, we
+ // can't know the two accesses don't overlap.
+ }
+
+ // These pointers may be based on the memory owned by an indirect global. If
+ // so, we may be able to handle this. First check to see if the base pointer
+ // is a direct load from an indirect global.
+ GV1 = GV2 = 0;
+ if (const LoadInst *LI = dyn_cast<LoadInst>(UV1))
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
+ if (IndirectGlobals.count(GV))
+ GV1 = GV;
+ if (const LoadInst *LI = dyn_cast<LoadInst>(UV2))
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
+ if (IndirectGlobals.count(GV))
+ GV2 = GV;
+
+ // These pointers may also be from an allocation for the indirect global. If
+ // so, also handle them.
+ if (AllocsForIndirectGlobals.count(UV1))
+ GV1 = AllocsForIndirectGlobals[UV1];
+ if (AllocsForIndirectGlobals.count(UV2))
+ GV2 = AllocsForIndirectGlobals[UV2];
+
+ // Now that we know whether the two pointers are related to indirect globals,
+ // use this to disambiguate the pointers. If either pointer is based on an
+ // indirect global and if they are not both based on the same indirect global,
+ // they cannot alias.
+ if ((GV1 || GV2) && GV1 != GV2)
+ return NoAlias;
+
+ return AliasAnalysis::alias(LocA, LocB);
+}
+
+AliasAnalysis::ModRefResult
+GlobalsModRef::getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ unsigned Known = ModRef;
+
+ // If we are asking for mod/ref info of a direct call with a pointer to a
+ // global we are tracking, return information if we have it.
+ if (const GlobalValue *GV =
+ dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr)))
+ if (GV->hasLocalLinkage())
+ if (const Function *F = CS.getCalledFunction())
+ if (NonAddressTakenGlobals.count(GV))
+ if (const FunctionRecord *FR = getFunctionInfo(F))
+ Known = FR->getInfoForGlobal(GV);
+
+ if (Known == NoModRef)
+ return NoModRef; // No need to query other mod/ref analyses
+ return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, Loc));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Methods to update the analysis as a result of the client transformation.
+//
+void GlobalsModRef::deleteValue(Value *V) {
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ if (NonAddressTakenGlobals.erase(GV)) {
+ // This global might be an indirect global. If so, remove it and remove
+ // any AllocRelatedValues for it.
+ if (IndirectGlobals.erase(GV)) {
+ // Remove any entries in AllocsForIndirectGlobals for this global.
+ for (std::map<const Value*, const GlobalValue*>::iterator
+ I = AllocsForIndirectGlobals.begin(),
+ E = AllocsForIndirectGlobals.end(); I != E; ) {
+ if (I->second == GV) {
+ AllocsForIndirectGlobals.erase(I++);
+ } else {
+ ++I;
+ }
+ }
+ }
+ }
+ }
+
+ // Otherwise, if this is an allocation related to an indirect global, remove
+ // it.
+ AllocsForIndirectGlobals.erase(V);
+
+ AliasAnalysis::deleteValue(V);
+}
+
+void GlobalsModRef::copyValue(Value *From, Value *To) {
+ AliasAnalysis::copyValue(From, To);
+}
+
+void GlobalsModRef::addEscapingUse(Use &U) {
+ // For the purposes of this analysis, it is conservatively correct to treat
+ // a newly escaping value equivalently to a deleted one. We could perhaps
+ // be more precise by processing the new use and attempting to update our
+ // saved analysis results to accommodate it.
+ deleteValue(U);
+
+ AliasAnalysis::addEscapingUse(U);
+}
diff --git a/contrib/llvm/lib/Analysis/IPA/IPA.cpp b/contrib/llvm/lib/Analysis/IPA/IPA.cpp
new file mode 100644
index 000000000000..0ba2e04c6302
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IPA/IPA.cpp
@@ -0,0 +1,29 @@
+//===-- IPA.cpp -----------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common initialization routines for the IPA library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeIPA - Initialize all passes linked into the IPA library.
+void llvm::initializeIPA(PassRegistry &Registry) {
+ initializeBasicCallGraphPass(Registry);
+ initializeCallGraphAnalysisGroup(Registry);
+ initializeFindUsedTypesPass(Registry);
+ initializeGlobalsModRefPass(Registry);
+}
+
+void LLVMInitializeIPA(LLVMPassRegistryRef R) {
+ initializeIPA(*unwrap(R));
+}
diff --git a/contrib/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm/lib/Analysis/IVUsers.cpp
new file mode 100644
index 000000000000..2cda7913f024
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IVUsers.cpp
@@ -0,0 +1,268 @@
+//===- IVUsers.cpp - Induction Variable Users -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements bookkeeping for "interesting" users of expressions
+// computed from induction variables.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "iv-users"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+char IVUsers::ID = 0;
+INITIALIZE_PASS_BEGIN(IVUsers, "iv-users",
+ "Induction Variable Users", false, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_END(IVUsers, "iv-users",
+ "Induction Variable Users", false, true)
+
+Pass *llvm::createIVUsersPass() {
+ return new IVUsers();
+}
+
+/// isInteresting - Test whether the given expression is "interesting" when
+/// used by the given expression, within the context of analyzing the
+/// given loop.
+static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
+ ScalarEvolution *SE) {
+ // An addrec is interesting if it's affine or if it has an interesting start.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ // Keep things simple. Don't touch loop-variant strides.
+ if (AR->getLoop() == L)
+ return AR->isAffine() || !L->contains(I);
+ // Otherwise recurse to see if the start value is interesting, and that
+ // the step value is not interesting, since we don't yet know how to
+ // do effective SCEV expansions for addrecs with interesting steps.
+ return isInteresting(AR->getStart(), I, L, SE) &&
+ !isInteresting(AR->getStepRecurrence(*SE), I, L, SE);
+ }
+
+ // An add is interesting if exactly one of its operands is interesting.
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ bool AnyInterestingYet = false;
+ for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end();
+ OI != OE; ++OI)
+ if (isInteresting(*OI, I, L, SE)) {
+ if (AnyInterestingYet)
+ return false;
+ AnyInterestingYet = true;
+ }
+ return AnyInterestingYet;
+ }
+
+ // Nothing else is interesting here.
+ return false;
+}
+
+/// AddUsersIfInteresting - Inspect the specified instruction. If it is a
+/// reducible SCEV, recursively add its users to the IVUsesByStride set and
+/// return true. Otherwise, return false.
+bool IVUsers::AddUsersIfInteresting(Instruction *I) {
+ if (!SE->isSCEVable(I->getType()))
+ return false; // Void and FP expressions cannot be reduced.
+
+ // LSR is not APInt clean, do not touch integers bigger than 64-bits.
+ // Also avoid creating IVs of non-native types. For example, we don't want a
+ // 64-bit IV in 32-bit code just because the loop has one 64-bit cast.
+ uint64_t Width = SE->getTypeSizeInBits(I->getType());
+ if (Width > 64 || (TD && !TD->isLegalInteger(Width)))
+ return false;
+
+ if (!Processed.insert(I))
+ return true; // Instruction already handled.
+
+ // Get the symbolic expression for this instruction.
+ const SCEV *ISE = SE->getSCEV(I);
+
+ // If we've come to an uninteresting expression, stop the traversal and
+ // call this a user.
+ if (!isInteresting(ISE, I, L, SE))
+ return false;
+
+ SmallPtrSet<Instruction *, 4> UniqueUsers;
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (!UniqueUsers.insert(User))
+ continue;
+
+ // Do not infinitely recurse on PHI nodes.
+ if (isa<PHINode>(User) && Processed.count(User))
+ continue;
+
+ // Descend recursively, but not into PHI nodes outside the current loop.
+ // It's important to see the entire expression outside the loop to get
+ // choices that depend on addressing mode use right, although we won't
+ // consider references outside the loop in all cases.
+ // If User is already in Processed, we don't want to recurse into it again,
+ // but do want to record a second reference in the same instruction.
+ bool AddUserToIVUsers = false;
+ if (LI->getLoopFor(User->getParent()) != L) {
+ if (isa<PHINode>(User) || Processed.count(User) ||
+ !AddUsersIfInteresting(User)) {
+ DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n'
+ << " OF SCEV: " << *ISE << '\n');
+ AddUserToIVUsers = true;
+ }
+ } else if (Processed.count(User) ||
+ !AddUsersIfInteresting(User)) {
+ DEBUG(dbgs() << "FOUND USER: " << *User << '\n'
+ << " OF SCEV: " << *ISE << '\n');
+ AddUserToIVUsers = true;
+ }
+
+ if (AddUserToIVUsers) {
+ // Okay, we found a user that we cannot reduce.
+ IVUses.push_back(new IVStrideUse(this, User, I));
+ IVStrideUse &NewUse = IVUses.back();
+ // Transform the expression into a normalized form.
+ ISE = TransformForPostIncUse(NormalizeAutodetect,
+ ISE, User, I,
+ NewUse.PostIncLoops,
+ *SE, *DT);
+ DEBUG(dbgs() << " NORMALIZED TO: " << *ISE << '\n');
+ }
+ }
+ return true;
+}
+
+IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) {
+ IVUses.push_back(new IVStrideUse(this, User, Operand));
+ return IVUses.back();
+}
+
+IVUsers::IVUsers()
+ : LoopPass(ID) {
+ initializeIVUsersPass(*PassRegistry::getPassRegistry());
+}
+
+void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<ScalarEvolution>();
+ AU.setPreservesAll();
+}
+
+bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
+
+ L = l;
+ LI = &getAnalysis<LoopInfo>();
+ DT = &getAnalysis<DominatorTree>();
+ SE = &getAnalysis<ScalarEvolution>();
+ TD = getAnalysisIfAvailable<TargetData>();
+
+ // Find all uses of induction variables in this loop, and categorize
+ // them by stride. Start by finding all of the PHI nodes in the header for
+ // this loop. If they are induction variables, inspect their uses.
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
+ (void)AddUsersIfInteresting(I);
+
+ return false;
+}
+
+void IVUsers::print(raw_ostream &OS, const Module *M) const {
+ OS << "IV Users for loop ";
+ WriteAsOperand(OS, L->getHeader(), false);
+ if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
+ OS << " with backedge-taken count "
+ << *SE->getBackedgeTakenCount(L);
+ }
+ OS << ":\n";
+
+ for (ilist<IVStrideUse>::const_iterator UI = IVUses.begin(),
+ E = IVUses.end(); UI != E; ++UI) {
+ OS << " ";
+ WriteAsOperand(OS, UI->getOperandValToReplace(), false);
+ OS << " = " << *getReplacementExpr(*UI);
+ for (PostIncLoopSet::const_iterator
+ I = UI->PostIncLoops.begin(),
+ E = UI->PostIncLoops.end(); I != E; ++I) {
+ OS << " (post-inc with loop ";
+ WriteAsOperand(OS, (*I)->getHeader(), false);
+ OS << ")";
+ }
+ OS << " in ";
+ UI->getUser()->print(OS);
+ OS << '\n';
+ }
+}
+
+void IVUsers::dump() const {
+ print(dbgs());
+}
+
+void IVUsers::releaseMemory() {
+ Processed.clear();
+ IVUses.clear();
+}
+
+/// getReplacementExpr - Return a SCEV expression which computes the
+/// value of the OperandValToReplace.
+const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &IU) const {
+ return SE->getSCEV(IU.getOperandValToReplace());
+}
+
+/// getExpr - Return the expression for the use.
+const SCEV *IVUsers::getExpr(const IVStrideUse &IU) const {
+ return
+ TransformForPostIncUse(Normalize, getReplacementExpr(IU),
+ IU.getUser(), IU.getOperandValToReplace(),
+ const_cast<PostIncLoopSet &>(IU.getPostIncLoops()),
+ *SE, *DT);
+}
+
+static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) {
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ if (AR->getLoop() == L)
+ return AR;
+ return findAddRecForLoop(AR->getStart(), L);
+ }
+
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I)
+ if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L))
+ return AR;
+ return 0;
+ }
+
+ return 0;
+}
+
+const SCEV *IVUsers::getStride(const IVStrideUse &IU, const Loop *L) const {
+ if (const SCEVAddRecExpr *AR = findAddRecForLoop(getExpr(IU), L))
+ return AR->getStepRecurrence(*SE);
+ return 0;
+}
+
+void IVStrideUse::transformToPostInc(const Loop *L) {
+ PostIncLoops.insert(L);
+}
+
+void IVStrideUse::deleted() {
+ // Remove this user from the list.
+ Parent->IVUses.erase(this);
+ // this now dangles!
+}
diff --git a/contrib/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm/lib/Analysis/InlineCost.cpp
new file mode 100644
index 000000000000..a820ecf0372a
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/InlineCost.cpp
@@ -0,0 +1,648 @@
+//===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements inline cost analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/CallingConv.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+using namespace llvm;
+
+/// callIsSmall - If a call is likely to lower to a single target instruction,
+/// or is otherwise deemed small return true.
+/// TODO: Perhaps calls like memcpy, strcpy, etc?
+bool llvm::callIsSmall(const Function *F) {
+ if (!F) return false;
+
+ if (F->hasLocalLinkage()) return false;
+
+ if (!F->hasName()) return false;
+
+ StringRef Name = F->getName();
+
+ // These will all likely lower to a single selection DAG node.
+ if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
+ Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
+ Name == "sin" || Name == "sinf" || Name == "sinl" ||
+ Name == "cos" || Name == "cosf" || Name == "cosl" ||
+ Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" )
+ return true;
+
+ // These are all likely to be optimized into something smaller.
+ if (Name == "pow" || Name == "powf" || Name == "powl" ||
+ Name == "exp2" || Name == "exp2l" || Name == "exp2f" ||
+ Name == "floor" || Name == "floorf" || Name == "ceil" ||
+ Name == "round" || Name == "ffs" || Name == "ffsl" ||
+ Name == "abs" || Name == "labs" || Name == "llabs")
+ return true;
+
+ return false;
+}
+
+/// analyzeBasicBlock - Fill in the current structure with information gleaned
+/// from the specified block.
+void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
+ ++NumBlocks;
+ unsigned NumInstsBeforeThisBB = NumInsts;
+ for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
+ II != E; ++II) {
+ if (isa<PHINode>(II)) continue; // PHI nodes don't count.
+
+ // Special handling for calls.
+ if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
+ if (isa<DbgInfoIntrinsic>(II))
+ continue; // Debug intrinsics don't count as size.
+
+ ImmutableCallSite CS(cast<Instruction>(II));
+
+ // If this function contains a call to setjmp or _setjmp, never inline
+ // it. This is a hack because we depend on the user marking their local
+ // variables as volatile if they are live across a setjmp call, and they
+ // probably won't do this in callers.
+ if (const Function *F = CS.getCalledFunction()) {
+ // If a function is both internal and has a single use, then it is
+ // extremely likely to get inlined in the future (it was probably
+ // exposed by an interleaved devirtualization pass).
+ if (F->hasInternalLinkage() && F->hasOneUse())
+ ++NumInlineCandidates;
+
+ if (F->isDeclaration() &&
+ (F->getName() == "setjmp" || F->getName() == "_setjmp"))
+ callsSetJmp = true;
+
+ // If this call is to function itself, then the function is recursive.
+ // Inlining it into other functions is a bad idea, because this is
+ // basically just a form of loop peeling, and our metrics aren't useful
+ // for that case.
+ if (F == BB->getParent())
+ isRecursive = true;
+ }
+
+ if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) {
+ // Each argument to a call takes on average one instruction to set up.
+ NumInsts += CS.arg_size();
+
+ // We don't want inline asm to count as a call - that would prevent loop
+ // unrolling. The argument setup cost is still real, though.
+ if (!isa<InlineAsm>(CS.getCalledValue()))
+ ++NumCalls;
+ }
+ }
+
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ if (!AI->isStaticAlloca())
+ this->usesDynamicAlloca = true;
+ }
+
+ if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
+ ++NumVectorInsts;
+
+ if (const CastInst *CI = dyn_cast<CastInst>(II)) {
+ // Noop casts, including ptr <-> int, don't count.
+ if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) ||
+ isa<PtrToIntInst>(CI))
+ continue;
+ // Result of a cmp instruction is often extended (to be used by other
+ // cmp instructions, logical or return instructions). These are usually
+ // nop on most sane targets.
+ if (isa<CmpInst>(CI->getOperand(0)))
+ continue;
+ } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){
+ // If a GEP has all constant indices, it will probably be folded with
+ // a load/store.
+ if (GEPI->hasAllConstantIndices())
+ continue;
+ }
+
+ ++NumInsts;
+ }
+
+ if (isa<ReturnInst>(BB->getTerminator()))
+ ++NumRets;
+
+ // We never want to inline functions that contain an indirectbr. This is
+ // incorrect because all the blockaddress's (in static global initializers
+ // for example) would be referring to the original function, and this indirect
+ // jump would jump from the inlined copy of the function into the original
+ // function which is extremely undefined behavior.
+ if (isa<IndirectBrInst>(BB->getTerminator()))
+ containsIndirectBr = true;
+
+ // Remember NumInsts for this BB.
+ NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB;
+}
+
+// CountCodeReductionForConstant - Figure out an approximation for how many
+// instructions will be constant folded if the specified value is constant.
+//
+unsigned CodeMetrics::CountCodeReductionForConstant(Value *V) {
+ unsigned Reduction = 0;
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+ User *U = *UI;
+ if (isa<BranchInst>(U) || isa<SwitchInst>(U)) {
+ // We will be able to eliminate all but one of the successors.
+ const TerminatorInst &TI = cast<TerminatorInst>(*U);
+ const unsigned NumSucc = TI.getNumSuccessors();
+ unsigned Instrs = 0;
+ for (unsigned I = 0; I != NumSucc; ++I)
+ Instrs += NumBBInsts[TI.getSuccessor(I)];
+ // We don't know which blocks will be eliminated, so use the average size.
+ Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc;
+ } else {
+ // Figure out if this instruction will be removed due to simple constant
+ // propagation.
+ Instruction &Inst = cast<Instruction>(*U);
+
+ // We can't constant propagate instructions which have effects or
+ // read memory.
+ //
+ // FIXME: It would be nice to capture the fact that a load from a
+ // pointer-to-constant-global is actually a *really* good thing to zap.
+ // Unfortunately, we don't know the pointer that may get propagated here,
+ // so we can't make this decision.
+ if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
+ isa<AllocaInst>(Inst))
+ continue;
+
+ bool AllOperandsConstant = true;
+ for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
+ if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) {
+ AllOperandsConstant = false;
+ break;
+ }
+
+ if (AllOperandsConstant) {
+ // We will get to remove this instruction...
+ Reduction += InlineConstants::InstrCost;
+
+ // And any other instructions that use it which become constants
+ // themselves.
+ Reduction += CountCodeReductionForConstant(&Inst);
+ }
+ }
+ }
+ return Reduction;
+}
+
+// CountCodeReductionForAlloca - Figure out an approximation of how much smaller
+// the function will be if it is inlined into a context where an argument
+// becomes an alloca.
+//
+unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) {
+ if (!V->getType()->isPointerTy()) return 0; // Not a pointer
+ unsigned Reduction = 0;
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+ Instruction *I = cast<Instruction>(*UI);
+ if (isa<LoadInst>(I) || isa<StoreInst>(I))
+ Reduction += InlineConstants::InstrCost;
+ else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ // If the GEP has variable indices, we won't be able to do much with it.
+ if (GEP->hasAllConstantIndices())
+ Reduction += CountCodeReductionForAlloca(GEP);
+ } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
+ // Track pointer through bitcasts.
+ Reduction += CountCodeReductionForAlloca(BCI);
+ } else {
+ // If there is some other strange instruction, we're not going to be able
+ // to do much if we inline this.
+ return 0;
+ }
+ }
+
+ return Reduction;
+}
+
+/// analyzeFunction - Fill in the current structure with information gleaned
+/// from the specified function.
+void CodeMetrics::analyzeFunction(Function *F) {
+ // Look at the size of the callee.
+ for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ analyzeBasicBlock(&*BB);
+}
+
+/// analyzeFunction - Fill in the current structure with information gleaned
+/// from the specified function.
+void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
+ Metrics.analyzeFunction(F);
+
+ // A function with exactly one return has it removed during the inlining
+ // process (see InlineFunction), so don't count it.
+ // FIXME: This knowledge should really be encoded outside of FunctionInfo.
+ if (Metrics.NumRets==1)
+ --Metrics.NumInsts;
+
+ // Check out all of the arguments to the function, figuring out how much
+ // code can be eliminated if one of the arguments is a constant.
+ ArgumentWeights.reserve(F->arg_size());
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
+ ArgumentWeights.push_back(ArgInfo(Metrics.CountCodeReductionForConstant(I),
+ Metrics.CountCodeReductionForAlloca(I)));
+}
+
+/// NeverInline - returns true if the function should never be inlined into
+/// any caller
+bool InlineCostAnalyzer::FunctionInfo::NeverInline() {
+ return (Metrics.callsSetJmp || Metrics.isRecursive ||
+ Metrics.containsIndirectBr);
+}
+// getSpecializationBonus - The heuristic used to determine the per-call
+// performance boost for using a specialization of Callee with argument
+// specializedArgNo replaced by a constant.
+int InlineCostAnalyzer::getSpecializationBonus(Function *Callee,
+ SmallVectorImpl<unsigned> &SpecializedArgNos)
+{
+ if (Callee->mayBeOverridden())
+ return 0;
+
+ int Bonus = 0;
+ // If this function uses the coldcc calling convention, prefer not to
+ // specialize it.
+ if (Callee->getCallingConv() == CallingConv::Cold)
+ Bonus -= InlineConstants::ColdccPenalty;
+
+ // Get information about the callee.
+ FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+
+ // If we haven't calculated this information yet, do so now.
+ if (CalleeFI->Metrics.NumBlocks == 0)
+ CalleeFI->analyzeFunction(Callee);
+
+ unsigned ArgNo = 0;
+ unsigned i = 0;
+ for (Function::arg_iterator I = Callee->arg_begin(), E = Callee->arg_end();
+ I != E; ++I, ++ArgNo)
+ if (ArgNo == SpecializedArgNos[i]) {
+ ++i;
+ Bonus += CountBonusForConstant(I);
+ }
+
+ // Calls usually take a long time, so they make the specialization gain
+ // smaller.
+ Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
+
+ return Bonus;
+}
+
+// ConstantFunctionBonus - Figure out how much of a bonus we can get for
+// possibly devirtualizing a function. We'll subtract the size of the function
+// we may wish to inline from the indirect call bonus providing a limit on
+// growth. Leave an upper limit of 0 for the bonus - we don't want to penalize
+// inlining because we decide we don't want to give a bonus for
+// devirtualizing.
+int InlineCostAnalyzer::ConstantFunctionBonus(CallSite CS, Constant *C) {
+
+ // This could just be NULL.
+ if (!C) return 0;
+
+ Function *F = dyn_cast<Function>(C);
+ if (!F) return 0;
+
+ int Bonus = InlineConstants::IndirectCallBonus + getInlineSize(CS, F);
+ return (Bonus > 0) ? 0 : Bonus;
+}
+
+// CountBonusForConstant - Figure out an approximation for how much per-call
+// performance boost we can expect if the specified value is constant.
+int InlineCostAnalyzer::CountBonusForConstant(Value *V, Constant *C) {
+ unsigned Bonus = 0;
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+ User *U = *UI;
+ if (CallInst *CI = dyn_cast<CallInst>(U)) {
+ // Turning an indirect call into a direct call is a BIG win
+ if (CI->getCalledValue() == V)
+ Bonus += ConstantFunctionBonus(CallSite(CI), C);
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) {
+ // Turning an indirect call into a direct call is a BIG win
+ if (II->getCalledValue() == V)
+ Bonus += ConstantFunctionBonus(CallSite(II), C);
+ }
+ // FIXME: Eliminating conditional branches and switches should
+ // also yield a per-call performance boost.
+ else {
+ // Figure out the bonuses that wll accrue due to simple constant
+ // propagation.
+ Instruction &Inst = cast<Instruction>(*U);
+
+ // We can't constant propagate instructions which have effects or
+ // read memory.
+ //
+ // FIXME: It would be nice to capture the fact that a load from a
+ // pointer-to-constant-global is actually a *really* good thing to zap.
+ // Unfortunately, we don't know the pointer that may get propagated here,
+ // so we can't make this decision.
+ if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
+ isa<AllocaInst>(Inst))
+ continue;
+
+ bool AllOperandsConstant = true;
+ for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
+ if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) {
+ AllOperandsConstant = false;
+ break;
+ }
+
+ if (AllOperandsConstant)
+ Bonus += CountBonusForConstant(&Inst);
+ }
+ }
+
+ return Bonus;
+}
+
+int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) {
+ // Get information about the callee.
+ FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+
+ // If we haven't calculated this information yet, do so now.
+ if (CalleeFI->Metrics.NumBlocks == 0)
+ CalleeFI->analyzeFunction(Callee);
+
+ // InlineCost - This value measures how good of an inline candidate this call
+ // site is to inline. A lower inline cost make is more likely for the call to
+ // be inlined. This value may go negative.
+ //
+ int InlineCost = 0;
+
+ // Compute any size reductions we can expect due to arguments being passed into
+ // the function.
+ //
+ unsigned ArgNo = 0;
+ CallSite::arg_iterator I = CS.arg_begin();
+ for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end();
+ FI != FE; ++I, ++FI, ++ArgNo) {
+
+ // If an alloca is passed in, inlining this function is likely to allow
+ // significant future optimization possibilities (like scalar promotion, and
+ // scalarization), so encourage the inlining of the function.
+ //
+ if (isa<AllocaInst>(I))
+ InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight;
+
+ // If this is a constant being passed into the function, use the argument
+ // weights calculated for the callee to determine how much will be folded
+ // away with this information.
+ else if (isa<Constant>(I))
+ InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight;
+ }
+
+ // Each argument passed in has a cost at both the caller and the callee
+ // sides. Measurements show that each argument costs about the same as an
+ // instruction.
+ InlineCost -= (CS.arg_size() * InlineConstants::InstrCost);
+
+ // Now that we have considered all of the factors that make the call site more
+ // likely to be inlined, look at factors that make us not want to inline it.
+
+ // Calls usually take a long time, so they make the inlining gain smaller.
+ InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
+
+ // Look at the size of the callee. Each instruction counts as 5.
+ InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost;
+
+ return InlineCost;
+}
+
+int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) {
+ // Get information about the callee.
+ FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+
+ // If we haven't calculated this information yet, do so now.
+ if (CalleeFI->Metrics.NumBlocks == 0)
+ CalleeFI->analyzeFunction(Callee);
+
+ bool isDirectCall = CS.getCalledFunction() == Callee;
+ Instruction *TheCall = CS.getInstruction();
+ int Bonus = 0;
+
+ // If there is only one call of the function, and it has internal linkage,
+ // make it almost guaranteed to be inlined.
+ //
+ if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall)
+ Bonus += InlineConstants::LastCallToStaticBonus;
+
+ // If the instruction after the call, or if the normal destination of the
+ // invoke is an unreachable instruction, the function is noreturn. As such,
+ // there is little point in inlining this.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+ if (isa<UnreachableInst>(II->getNormalDest()->begin()))
+ Bonus += InlineConstants::NoreturnPenalty;
+ } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall)))
+ Bonus += InlineConstants::NoreturnPenalty;
+
+ // If this function uses the coldcc calling convention, prefer not to inline
+ // it.
+ if (Callee->getCallingConv() == CallingConv::Cold)
+ Bonus += InlineConstants::ColdccPenalty;
+
+ // Add to the inline quality for properties that make the call valuable to
+ // inline. This includes factors that indicate that the result of inlining
+ // the function will be optimizable. Currently this just looks at arguments
+ // passed into the function.
+ //
+ CallSite::arg_iterator I = CS.arg_begin();
+ for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end();
+ FI != FE; ++I, ++FI)
+ // Compute any constant bonus due to inlining we want to give here.
+ if (isa<Constant>(I))
+ Bonus += CountBonusForConstant(FI, cast<Constant>(I));
+
+ return Bonus;
+}
+
+// getInlineCost - The heuristic used to determine if we should inline the
+// function call or not.
+//
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
+ SmallPtrSet<const Function*, 16> &NeverInline) {
+ return getInlineCost(CS, CS.getCalledFunction(), NeverInline);
+}
+
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
+ Function *Callee,
+ SmallPtrSet<const Function*, 16> &NeverInline) {
+ Instruction *TheCall = CS.getInstruction();
+ Function *Caller = TheCall->getParent()->getParent();
+
+ // Don't inline functions which can be redefined at link-time to mean
+ // something else. Don't inline functions marked noinline or call sites
+ // marked noinline.
+ if (Callee->mayBeOverridden() ||
+ Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee) ||
+ CS.isNoInline())
+ return llvm::InlineCost::getNever();
+
+ // Get information about the callee.
+ FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+
+ // If we haven't calculated this information yet, do so now.
+ if (CalleeFI->Metrics.NumBlocks == 0)
+ CalleeFI->analyzeFunction(Callee);
+
+ // If we should never inline this, return a huge cost.
+ if (CalleeFI->NeverInline())
+ return InlineCost::getNever();
+
+ // FIXME: It would be nice to kill off CalleeFI->NeverInline. Then we
+ // could move this up and avoid computing the FunctionInfo for
+ // things we are going to just return always inline for. This
+ // requires handling setjmp somewhere else, however.
+ if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline))
+ return InlineCost::getAlways();
+
+ if (CalleeFI->Metrics.usesDynamicAlloca) {
+ // Get information about the caller.
+ FunctionInfo &CallerFI = CachedFunctionInfo[Caller];
+
+ // If we haven't calculated this information yet, do so now.
+ if (CallerFI.Metrics.NumBlocks == 0) {
+ CallerFI.analyzeFunction(Caller);
+
+ // Recompute the CalleeFI pointer, getting Caller could have invalidated
+ // it.
+ CalleeFI = &CachedFunctionInfo[Callee];
+ }
+
+ // Don't inline a callee with dynamic alloca into a caller without them.
+ // Functions containing dynamic alloca's are inefficient in various ways;
+ // don't create more inefficiency.
+ if (!CallerFI.Metrics.usesDynamicAlloca)
+ return InlineCost::getNever();
+ }
+
+ // InlineCost - This value measures how good of an inline candidate this call
+ // site is to inline. A lower inline cost make is more likely for the call to
+ // be inlined. This value may go negative due to the fact that bonuses
+ // are negative numbers.
+ //
+ int InlineCost = getInlineSize(CS, Callee) + getInlineBonuses(CS, Callee);
+ return llvm::InlineCost::get(InlineCost);
+}
+
+// getSpecializationCost - The heuristic used to determine the code-size
+// impact of creating a specialized version of Callee with argument
+// SpecializedArgNo replaced by a constant.
+InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee,
+ SmallVectorImpl<unsigned> &SpecializedArgNos)
+{
+ // Don't specialize functions which can be redefined at link-time to mean
+ // something else.
+ if (Callee->mayBeOverridden())
+ return llvm::InlineCost::getNever();
+
+ // Get information about the callee.
+ FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+
+ // If we haven't calculated this information yet, do so now.
+ if (CalleeFI->Metrics.NumBlocks == 0)
+ CalleeFI->analyzeFunction(Callee);
+
+ int Cost = 0;
+
+ // Look at the original size of the callee. Each instruction counts as 5.
+ Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost;
+
+ // Offset that with the amount of code that can be constant-folded
+ // away with the given arguments replaced by constants.
+ for (SmallVectorImpl<unsigned>::iterator an = SpecializedArgNos.begin(),
+ ae = SpecializedArgNos.end(); an != ae; ++an)
+ Cost -= CalleeFI->ArgumentWeights[*an].ConstantWeight;
+
+ return llvm::InlineCost::get(Cost);
+}
+
+// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
+// higher threshold to determine if the function call should be inlined.
+float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
+ Function *Callee = CS.getCalledFunction();
+
+ // Get information about the callee.
+ FunctionInfo &CalleeFI = CachedFunctionInfo[Callee];
+
+ // If we haven't calculated this information yet, do so now.
+ if (CalleeFI.Metrics.NumBlocks == 0)
+ CalleeFI.analyzeFunction(Callee);
+
+ float Factor = 1.0f;
+ // Single BB functions are often written to be inlined.
+ if (CalleeFI.Metrics.NumBlocks == 1)
+ Factor += 0.5f;
+
+ // Be more aggressive if the function contains a good chunk (if it mades up
+ // at least 10% of the instructions) of vector instructions.
+ if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/2)
+ Factor += 2.0f;
+ else if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/10)
+ Factor += 1.5f;
+ return Factor;
+}
+
+/// growCachedCostInfo - update the cached cost info for Caller after Callee has
+/// been inlined.
+void
+InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) {
+ CodeMetrics &CallerMetrics = CachedFunctionInfo[Caller].Metrics;
+
+ // For small functions we prefer to recalculate the cost for better accuracy.
+ if (CallerMetrics.NumBlocks < 10 || CallerMetrics.NumInsts < 1000) {
+ resetCachedCostInfo(Caller);
+ return;
+ }
+
+ // For large functions, we can save a lot of computation time by skipping
+ // recalculations.
+ if (CallerMetrics.NumCalls > 0)
+ --CallerMetrics.NumCalls;
+
+ if (Callee == 0) return;
+
+ CodeMetrics &CalleeMetrics = CachedFunctionInfo[Callee].Metrics;
+
+ // If we don't have metrics for the callee, don't recalculate them just to
+ // update an approximation in the caller. Instead, just recalculate the
+ // caller info from scratch.
+ if (CalleeMetrics.NumBlocks == 0) {
+ resetCachedCostInfo(Caller);
+ return;
+ }
+
+ // Since CalleeMetrics were already calculated, we know that the CallerMetrics
+ // reference isn't invalidated: both were in the DenseMap.
+ CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca;
+
+ // FIXME: If any of these three are true for the callee, the callee was
+ // not inlined into the caller, so I think they're redundant here.
+ CallerMetrics.callsSetJmp |= CalleeMetrics.callsSetJmp;
+ CallerMetrics.isRecursive |= CalleeMetrics.isRecursive;
+ CallerMetrics.containsIndirectBr |= CalleeMetrics.containsIndirectBr;
+
+ CallerMetrics.NumInsts += CalleeMetrics.NumInsts;
+ CallerMetrics.NumBlocks += CalleeMetrics.NumBlocks;
+ CallerMetrics.NumCalls += CalleeMetrics.NumCalls;
+ CallerMetrics.NumVectorInsts += CalleeMetrics.NumVectorInsts;
+ CallerMetrics.NumRets += CalleeMetrics.NumRets;
+
+ // analyzeBasicBlock counts each function argument as an inst.
+ if (CallerMetrics.NumInsts >= Callee->arg_size())
+ CallerMetrics.NumInsts -= Callee->arg_size();
+ else
+ CallerMetrics.NumInsts = 0;
+
+ // We are not updating the argument weights. We have already determined that
+ // Caller is a fairly large function, so we accept the loss of precision.
+}
+
+/// clear - empty the cache of inline costs
+void InlineCostAnalyzer::clear() {
+ CachedFunctionInfo.clear();
+}
diff --git a/contrib/llvm/lib/Analysis/InstCount.cpp b/contrib/llvm/lib/Analysis/InstCount.cpp
new file mode 100644
index 000000000000..3b385d26ba3c
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/InstCount.cpp
@@ -0,0 +1,87 @@
+//===-- InstCount.cpp - Collects the count of all instructions ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass collects the count of all instructions and reports them
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instcount"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(TotalInsts , "Number of instructions (of all types)");
+STATISTIC(TotalBlocks, "Number of basic blocks");
+STATISTIC(TotalFuncs , "Number of non-external functions");
+STATISTIC(TotalMemInst, "Number of memory instructions");
+
+#define HANDLE_INST(N, OPCODE, CLASS) \
+ STATISTIC(Num ## OPCODE ## Inst, "Number of " #OPCODE " insts");
+
+#include "llvm/Instruction.def"
+
+
+namespace {
+ class InstCount : public FunctionPass, public InstVisitor<InstCount> {
+ friend class InstVisitor<InstCount>;
+
+ void visitFunction (Function &F) { ++TotalFuncs; }
+ void visitBasicBlock(BasicBlock &BB) { ++TotalBlocks; }
+
+#define HANDLE_INST(N, OPCODE, CLASS) \
+ void visit##OPCODE(CLASS &) { ++Num##OPCODE##Inst; ++TotalInsts; }
+
+#include "llvm/Instruction.def"
+
+ void visitInstruction(Instruction &I) {
+ errs() << "Instruction Count does not know about " << I;
+ llvm_unreachable(0);
+ }
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ InstCount() : FunctionPass(ID) {
+ initializeInstCountPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ virtual void print(raw_ostream &O, const Module *M) const {}
+
+ };
+}
+
+char InstCount::ID = 0;
+INITIALIZE_PASS(InstCount, "instcount",
+ "Counts the various types of Instructions", false, true)
+
+FunctionPass *llvm::createInstCountPass() { return new InstCount(); }
+
+// InstCount::run - This is the main Analysis entry point for a
+// function.
+//
+bool InstCount::runOnFunction(Function &F) {
+ unsigned StartMemInsts =
+ NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst +
+ NumInvokeInst + NumAllocaInst;
+ visit(F);
+ unsigned EndMemInsts =
+ NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst +
+ NumInvokeInst + NumAllocaInst;
+ TotalMemInst += EndMemInsts-StartMemInsts;
+ return false;
+}
diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
new file mode 100644
index 000000000000..9d6d3398feb8
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -0,0 +1,2314 @@
+//===- InstructionSimplify.cpp - Fold instruction operands ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements routines for folding instructions into simpler forms
+// that do not require creating new instructions. This does constant folding
+// ("add i32 1, 1" -> "2") but can also handle non-constant operands, either
+// returning a constant ("and i32 %x, 0" -> "0") or an already existing value
+// ("and i32 %x, %x" -> "%x"). All operands are assumed to have already been
+// simplified: This is usually true and assuming it simplifies the logic (if
+// they have not been simplified then results are correct but maybe suboptimal).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instsimplify"
+#include "llvm/Operator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+enum { RecursionLimit = 3 };
+
+STATISTIC(NumExpand, "Number of expansions");
+STATISTIC(NumFactor , "Number of factorizations");
+STATISTIC(NumReassoc, "Number of reassociations");
+
+static Value *SimplifyAndInst(Value *, Value *, const TargetData *,
+ const DominatorTree *, unsigned);
+static Value *SimplifyBinOp(unsigned, Value *, Value *, const TargetData *,
+ const DominatorTree *, unsigned);
+static Value *SimplifyCmpInst(unsigned, Value *, Value *, const TargetData *,
+ const DominatorTree *, unsigned);
+static Value *SimplifyOrInst(Value *, Value *, const TargetData *,
+ const DominatorTree *, unsigned);
+static Value *SimplifyXorInst(Value *, Value *, const TargetData *,
+ const DominatorTree *, unsigned);
+
+/// ValueDominatesPHI - Does the given value dominate the specified phi node?
+static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ // Arguments and constants dominate all instructions.
+ return true;
+
+ // If we have a DominatorTree then do a precise test.
+ if (DT)
+ return DT->dominates(I, P);
+
+ // Otherwise, if the instruction is in the entry block, and is not an invoke,
+ // then it obviously dominates all phi nodes.
+ if (I->getParent() == &I->getParent()->getParent()->getEntryBlock() &&
+ !isa<InvokeInst>(I))
+ return true;
+
+ return false;
+}
+
+/// ExpandBinOp - Simplify "A op (B op' C)" by distributing op over op', turning
+/// it into "(A op B) op' (A op C)". Here "op" is given by Opcode and "op'" is
+/// given by OpcodeToExpand, while "A" corresponds to LHS and "B op' C" to RHS.
+/// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)".
+/// Returns the simplified value, or null if no simplification was performed.
+static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ unsigned OpcToExpand, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand;
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ // Check whether the expression has the form "(A op' B) op C".
+ if (BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS))
+ if (Op0->getOpcode() == OpcodeToExpand) {
+ // It does! Try turning it into "(A op C) op' (B op C)".
+ Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
+ // Do "A op C" and "B op C" both simplify?
+ if (Value *L = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse))
+ if (Value *R = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) {
+ // They do! Return "L op' R" if it simplifies or is already available.
+ // If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
+ if ((L == A && R == B) || (Instruction::isCommutative(OpcodeToExpand)
+ && L == B && R == A)) {
+ ++NumExpand;
+ return LHS;
+ }
+ // Otherwise return "L op' R" if it simplifies.
+ if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT,
+ MaxRecurse)) {
+ ++NumExpand;
+ return V;
+ }
+ }
+ }
+
+ // Check whether the expression has the form "A op (B op' C)".
+ if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS))
+ if (Op1->getOpcode() == OpcodeToExpand) {
+ // It does! Try turning it into "(A op B) op' (A op C)".
+ Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
+ // Do "A op B" and "A op C" both simplify?
+ if (Value *L = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse))
+ if (Value *R = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse)) {
+ // They do! Return "L op' R" if it simplifies or is already available.
+ // If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
+ if ((L == B && R == C) || (Instruction::isCommutative(OpcodeToExpand)
+ && L == C && R == B)) {
+ ++NumExpand;
+ return RHS;
+ }
+ // Otherwise return "L op' R" if it simplifies.
+ if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT,
+ MaxRecurse)) {
+ ++NumExpand;
+ return V;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/// FactorizeBinOp - Simplify "LHS Opcode RHS" by factorizing out a common term
+/// using the operation OpCodeToExtract. For example, when Opcode is Add and
+/// OpCodeToExtract is Mul then this tries to turn "(A*B)+(A*C)" into "A*(B+C)".
+/// Returns the simplified value, or null if no simplification was performed.
+static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ unsigned OpcToExtract, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract;
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
+ BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
+
+ if (!Op0 || Op0->getOpcode() != OpcodeToExtract ||
+ !Op1 || Op1->getOpcode() != OpcodeToExtract)
+ return 0;
+
+ // The expression has the form "(A op' B) op (C op' D)".
+ Value *A = Op0->getOperand(0), *B = Op0->getOperand(1);
+ Value *C = Op1->getOperand(0), *D = Op1->getOperand(1);
+
+ // Use left distributivity, i.e. "X op' (Y op Z) = (X op' Y) op (X op' Z)".
+ // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
+ // commutative case, "(A op' B) op (C op' A)"?
+ if (A == C || (Instruction::isCommutative(OpcodeToExtract) && A == D)) {
+ Value *DD = A == C ? D : C;
+ // Form "A op' (B op DD)" if it simplifies completely.
+ // Does "B op DD" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, B, DD, TD, DT, MaxRecurse)) {
+ // It does! Return "A op' V" if it simplifies or is already available.
+ // If V equals B then "A op' V" is just the LHS. If V equals DD then
+ // "A op' V" is just the RHS.
+ if (V == B || V == DD) {
+ ++NumFactor;
+ return V == B ? LHS : RHS;
+ }
+ // Otherwise return "A op' V" if it simplifies.
+ if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, TD, DT, MaxRecurse)) {
+ ++NumFactor;
+ return W;
+ }
+ }
+ }
+
+ // Use right distributivity, i.e. "(X op Y) op' Z = (X op' Z) op (Y op' Z)".
+ // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
+ // commutative case, "(A op' B) op (B op' D)"?
+ if (B == D || (Instruction::isCommutative(OpcodeToExtract) && B == C)) {
+ Value *CC = B == D ? C : D;
+ // Form "(A op CC) op' B" if it simplifies completely..
+ // Does "A op CC" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, A, CC, TD, DT, MaxRecurse)) {
+ // It does! Return "V op' B" if it simplifies or is already available.
+ // If V equals A then "V op' B" is just the LHS. If V equals CC then
+ // "V op' B" is just the RHS.
+ if (V == A || V == CC) {
+ ++NumFactor;
+ return V == A ? LHS : RHS;
+ }
+ // Otherwise return "V op' B" if it simplifies.
+ if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, TD, DT, MaxRecurse)) {
+ ++NumFactor;
+ return W;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/// SimplifyAssociativeBinOp - Generic simplifications for associative binary
+/// operations. Returns the simpler value, or null if none was found.
+static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
+ const TargetData *TD,
+ const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc;
+ assert(Instruction::isAssociative(Opcode) && "Not an associative operation!");
+
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
+ BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
+
+ // Transform: "(A op B) op C" ==> "A op (B op C)" if it simplifies completely.
+ if (Op0 && Op0->getOpcode() == Opcode) {
+ Value *A = Op0->getOperand(0);
+ Value *B = Op0->getOperand(1);
+ Value *C = RHS;
+
+ // Does "B op C" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) {
+ // It does! Return "A op V" if it simplifies or is already available.
+ // If V equals B then "A op V" is just the LHS.
+ if (V == B) return LHS;
+ // Otherwise return "A op V" if it simplifies.
+ if (Value *W = SimplifyBinOp(Opcode, A, V, TD, DT, MaxRecurse)) {
+ ++NumReassoc;
+ return W;
+ }
+ }
+ }
+
+ // Transform: "A op (B op C)" ==> "(A op B) op C" if it simplifies completely.
+ if (Op1 && Op1->getOpcode() == Opcode) {
+ Value *A = LHS;
+ Value *B = Op1->getOperand(0);
+ Value *C = Op1->getOperand(1);
+
+ // Does "A op B" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse)) {
+ // It does! Return "V op C" if it simplifies or is already available.
+ // If V equals B then "V op C" is just the RHS.
+ if (V == B) return RHS;
+ // Otherwise return "V op C" if it simplifies.
+ if (Value *W = SimplifyBinOp(Opcode, V, C, TD, DT, MaxRecurse)) {
+ ++NumReassoc;
+ return W;
+ }
+ }
+ }
+
+ // The remaining transforms require commutativity as well as associativity.
+ if (!Instruction::isCommutative(Opcode))
+ return 0;
+
+ // Transform: "(A op B) op C" ==> "(C op A) op B" if it simplifies completely.
+ if (Op0 && Op0->getOpcode() == Opcode) {
+ Value *A = Op0->getOperand(0);
+ Value *B = Op0->getOperand(1);
+ Value *C = RHS;
+
+ // Does "C op A" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) {
+ // It does! Return "V op B" if it simplifies or is already available.
+ // If V equals A then "V op B" is just the LHS.
+ if (V == A) return LHS;
+ // Otherwise return "V op B" if it simplifies.
+ if (Value *W = SimplifyBinOp(Opcode, V, B, TD, DT, MaxRecurse)) {
+ ++NumReassoc;
+ return W;
+ }
+ }
+ }
+
+ // Transform: "A op (B op C)" ==> "B op (C op A)" if it simplifies completely.
+ if (Op1 && Op1->getOpcode() == Opcode) {
+ Value *A = LHS;
+ Value *B = Op1->getOperand(0);
+ Value *C = Op1->getOperand(1);
+
+ // Does "C op A" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) {
+ // It does! Return "B op V" if it simplifies or is already available.
+ // If V equals C then "B op V" is just the RHS.
+ if (V == C) return RHS;
+ // Otherwise return "B op V" if it simplifies.
+ if (Value *W = SimplifyBinOp(Opcode, B, V, TD, DT, MaxRecurse)) {
+ ++NumReassoc;
+ return W;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/// ThreadBinOpOverSelect - In the case of a binary operation with a select
+/// instruction as an operand, try to simplify the binop by seeing whether
+/// evaluating it on both branches of the select results in the same value.
+/// Returns the common value if so, otherwise returns null.
+static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
+ const TargetData *TD,
+ const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ SelectInst *SI;
+ if (isa<SelectInst>(LHS)) {
+ SI = cast<SelectInst>(LHS);
+ } else {
+ assert(isa<SelectInst>(RHS) && "No select instruction operand!");
+ SI = cast<SelectInst>(RHS);
+ }
+
+ // Evaluate the BinOp on the true and false branches of the select.
+ Value *TV;
+ Value *FV;
+ if (SI == LHS) {
+ TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, TD, DT, MaxRecurse);
+ FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, TD, DT, MaxRecurse);
+ } else {
+ TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), TD, DT, MaxRecurse);
+ FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), TD, DT, MaxRecurse);
+ }
+
+ // If they simplified to the same value, then return the common value.
+ // If they both failed to simplify then return null.
+ if (TV == FV)
+ return TV;
+
+ // If one branch simplified to undef, return the other one.
+ if (TV && isa<UndefValue>(TV))
+ return FV;
+ if (FV && isa<UndefValue>(FV))
+ return TV;
+
+ // If applying the operation did not change the true and false select values,
+ // then the result of the binop is the select itself.
+ if (TV == SI->getTrueValue() && FV == SI->getFalseValue())
+ return SI;
+
+ // If one branch simplified and the other did not, and the simplified
+ // value is equal to the unsimplified one, return the simplified value.
+ // For example, select (cond, X, X & Z) & Z -> X & Z.
+ if ((FV && !TV) || (TV && !FV)) {
+ // Check that the simplified value has the form "X op Y" where "op" is the
+ // same as the original operation.
+ Instruction *Simplified = dyn_cast<Instruction>(FV ? FV : TV);
+ if (Simplified && Simplified->getOpcode() == Opcode) {
+ // The value that didn't simplify is "UnsimplifiedLHS op UnsimplifiedRHS".
+ // We already know that "op" is the same as for the simplified value. See
+ // if the operands match too. If so, return the simplified value.
+ Value *UnsimplifiedBranch = FV ? SI->getTrueValue() : SI->getFalseValue();
+ Value *UnsimplifiedLHS = SI == LHS ? UnsimplifiedBranch : LHS;
+ Value *UnsimplifiedRHS = SI == LHS ? RHS : UnsimplifiedBranch;
+ if (Simplified->getOperand(0) == UnsimplifiedLHS &&
+ Simplified->getOperand(1) == UnsimplifiedRHS)
+ return Simplified;
+ if (Simplified->isCommutative() &&
+ Simplified->getOperand(1) == UnsimplifiedLHS &&
+ Simplified->getOperand(0) == UnsimplifiedRHS)
+ return Simplified;
+ }
+ }
+
+ return 0;
+}
+
+/// ThreadCmpOverSelect - In the case of a comparison with a select instruction,
+/// try to simplify the comparison by seeing whether both branches of the select
+/// result in the same value. Returns the common value if so, otherwise returns
+/// null.
+static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
+ Value *RHS, const TargetData *TD,
+ const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ // Make sure the select is on the LHS.
+ if (!isa<SelectInst>(LHS)) {
+ std::swap(LHS, RHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ }
+ assert(isa<SelectInst>(LHS) && "Not comparing with a select instruction!");
+ SelectInst *SI = cast<SelectInst>(LHS);
+
+ // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it.
+ // Does "cmp TV, RHS" simplify?
+ if (Value *TCmp = SimplifyCmpInst(Pred, SI->getTrueValue(), RHS, TD, DT,
+ MaxRecurse)) {
+ // It does! Does "cmp FV, RHS" simplify?
+ if (Value *FCmp = SimplifyCmpInst(Pred, SI->getFalseValue(), RHS, TD, DT,
+ MaxRecurse)) {
+ // It does! If they simplified to the same value, then use it as the
+ // result of the original comparison.
+ if (TCmp == FCmp)
+ return TCmp;
+ Value *Cond = SI->getCondition();
+ // If the false value simplified to false, then the result of the compare
+ // is equal to "Cond && TCmp". This also catches the case when the false
+ // value simplified to false and the true value to true, returning "Cond".
+ if (match(FCmp, m_Zero()))
+ if (Value *V = SimplifyAndInst(Cond, TCmp, TD, DT, MaxRecurse))
+ return V;
+ // If the true value simplified to true, then the result of the compare
+ // is equal to "Cond || FCmp".
+ if (match(TCmp, m_One()))
+ if (Value *V = SimplifyOrInst(Cond, FCmp, TD, DT, MaxRecurse))
+ return V;
+ // Finally, if the false value simplified to true and the true value to
+ // false, then the result of the compare is equal to "!Cond".
+ if (match(FCmp, m_One()) && match(TCmp, m_Zero()))
+ if (Value *V =
+ SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()),
+ TD, DT, MaxRecurse))
+ return V;
+ }
+ }
+
+ return 0;
+}
+
+/// ThreadBinOpOverPHI - In the case of a binary operation with an operand that
+/// is a PHI instruction, try to simplify the binop by seeing whether evaluating
+/// it on the incoming phi values yields the same result for every value. If so
+/// returns the common value, otherwise returns null.
+static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ PHINode *PI;
+ if (isa<PHINode>(LHS)) {
+ PI = cast<PHINode>(LHS);
+ // Bail out if RHS and the phi may be mutually interdependent due to a loop.
+ if (!ValueDominatesPHI(RHS, PI, DT))
+ return 0;
+ } else {
+ assert(isa<PHINode>(RHS) && "No PHI instruction operand!");
+ PI = cast<PHINode>(RHS);
+ // Bail out if LHS and the phi may be mutually interdependent due to a loop.
+ if (!ValueDominatesPHI(LHS, PI, DT))
+ return 0;
+ }
+
+ // Evaluate the BinOp on the incoming phi values.
+ Value *CommonValue = 0;
+ for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming = PI->getIncomingValue(i);
+ // If the incoming value is the phi node itself, it can safely be skipped.
+ if (Incoming == PI) continue;
+ Value *V = PI == LHS ?
+ SimplifyBinOp(Opcode, Incoming, RHS, TD, DT, MaxRecurse) :
+ SimplifyBinOp(Opcode, LHS, Incoming, TD, DT, MaxRecurse);
+ // If the operation failed to simplify, or simplified to a different value
+ // to previously, then give up.
+ if (!V || (CommonValue && V != CommonValue))
+ return 0;
+ CommonValue = V;
+ }
+
+ return CommonValue;
+}
+
+/// ThreadCmpOverPHI - In the case of a comparison with a PHI instruction, try
+/// try to simplify the comparison by seeing whether comparing with all of the
+/// incoming phi values yields the same result every time. If so returns the
+/// common result, otherwise returns null.
+static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ // Make sure the phi is on the LHS.
+ if (!isa<PHINode>(LHS)) {
+ std::swap(LHS, RHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ }
+ assert(isa<PHINode>(LHS) && "Not comparing with a phi instruction!");
+ PHINode *PI = cast<PHINode>(LHS);
+
+ // Bail out if RHS and the phi may be mutually interdependent due to a loop.
+ if (!ValueDominatesPHI(RHS, PI, DT))
+ return 0;
+
+ // Evaluate the BinOp on the incoming phi values.
+ Value *CommonValue = 0;
+ for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming = PI->getIncomingValue(i);
+ // If the incoming value is the phi node itself, it can safely be skipped.
+ if (Incoming == PI) continue;
+ Value *V = SimplifyCmpInst(Pred, Incoming, RHS, TD, DT, MaxRecurse);
+ // If the operation failed to simplify, or simplified to a different value
+ // to previously, then give up.
+ if (!V || (CommonValue && V != CommonValue))
+ return 0;
+ CommonValue = V;
+ }
+
+ return CommonValue;
+}
+
+/// SimplifyAddInst - Given operands for an Add, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(),
+ Ops, 2, TD);
+ }
+
+ // Canonicalize the constant to the RHS.
+ std::swap(Op0, Op1);
+ }
+
+ // X + undef -> undef
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ // X + 0 -> X
+ if (match(Op1, m_Zero()))
+ return Op0;
+
+ // X + (Y - X) -> Y
+ // (Y - X) + X -> Y
+ // Eg: X + -X -> 0
+ Value *Y = 0;
+ if (match(Op1, m_Sub(m_Value(Y), m_Specific(Op0))) ||
+ match(Op0, m_Sub(m_Value(Y), m_Specific(Op1))))
+ return Y;
+
+ // X + ~X -> -1 since ~X = -X-1
+ if (match(Op0, m_Not(m_Specific(Op1))) ||
+ match(Op1, m_Not(m_Specific(Op0))))
+ return Constant::getAllOnesValue(Op0->getType());
+
+ /// i1 add -> xor.
+ if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+ if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1))
+ return V;
+
+ // Try some generic simplifications for associative operations.
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // Mul distributes over Add. Try some generic simplifications based on this.
+ if (Value *V = FactorizeBinOp(Instruction::Add, Op0, Op1, Instruction::Mul,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // Threading Add over selects and phi nodes is pointless, so don't bother.
+ // Threading over the select in "A + select(cond, B, C)" means evaluating
+ // "A+B" and "A+C" and seeing if they are equal; but they are equal if and
+ // only if B and C are equal. If B and C are equal then (since we assume
+ // that operands have already been simplified) "select(cond, B, C)" should
+ // have been simplified to the common value of B and C already. Analysing
+ // "A+B" and "A+C" thus gains nothing, but costs compile time. Similarly
+ // for threading over phi nodes.
+
+ return 0;
+}
+
+Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+}
+
+/// SimplifySubInst - Given operands for a Sub, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0))
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(),
+ Ops, 2, TD);
+ }
+
+ // X - undef -> undef
+ // undef - X -> undef
+ if (match(Op0, m_Undef()) || match(Op1, m_Undef()))
+ return UndefValue::get(Op0->getType());
+
+ // X - 0 -> X
+ if (match(Op1, m_Zero()))
+ return Op0;
+
+ // X - X -> 0
+ if (Op0 == Op1)
+ return Constant::getNullValue(Op0->getType());
+
+ // (X*2) - X -> X
+ // (X<<1) - X -> X
+ Value *X = 0;
+ if (match(Op0, m_Mul(m_Specific(Op1), m_ConstantInt<2>())) ||
+ match(Op0, m_Shl(m_Specific(Op1), m_One())))
+ return Op1;
+
+ // (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies.
+ // For example, (X + Y) - Y -> X; (Y + X) - Y -> X
+ Value *Y = 0, *Z = Op1;
+ if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z
+ // See if "V === Y - Z" simplifies.
+ if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, TD, DT, MaxRecurse-1))
+ // It does! Now see if "X + V" simplifies.
+ if (Value *W = SimplifyBinOp(Instruction::Add, X, V, TD, DT,
+ MaxRecurse-1)) {
+ // It does, we successfully reassociated!
+ ++NumReassoc;
+ return W;
+ }
+ // See if "V === X - Z" simplifies.
+ if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1))
+ // It does! Now see if "Y + V" simplifies.
+ if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, TD, DT,
+ MaxRecurse-1)) {
+ // It does, we successfully reassociated!
+ ++NumReassoc;
+ return W;
+ }
+ }
+
+ // X - (Y + Z) -> (X - Y) - Z or (X - Z) - Y if everything simplifies.
+ // For example, X - (X + 1) -> -1
+ X = Op0;
+ if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z)
+ // See if "V === X - Y" simplifies.
+ if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, TD, DT, MaxRecurse-1))
+ // It does! Now see if "V - Z" simplifies.
+ if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, TD, DT,
+ MaxRecurse-1)) {
+ // It does, we successfully reassociated!
+ ++NumReassoc;
+ return W;
+ }
+ // See if "V === X - Z" simplifies.
+ if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1))
+ // It does! Now see if "V - Y" simplifies.
+ if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, TD, DT,
+ MaxRecurse-1)) {
+ // It does, we successfully reassociated!
+ ++NumReassoc;
+ return W;
+ }
+ }
+
+ // Z - (X - Y) -> (Z - X) + Y if everything simplifies.
+ // For example, X - (X - Y) -> Y.
+ Z = Op0;
+ if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y)
+ // See if "V === Z - X" simplifies.
+ if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, TD, DT, MaxRecurse-1))
+ // It does! Now see if "V + Y" simplifies.
+ if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, TD, DT,
+ MaxRecurse-1)) {
+ // It does, we successfully reassociated!
+ ++NumReassoc;
+ return W;
+ }
+
+ // Mul distributes over Sub. Try some generic simplifications based on this.
+ if (Value *V = FactorizeBinOp(Instruction::Sub, Op0, Op1, Instruction::Mul,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // i1 sub -> xor.
+ if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+ if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1))
+ return V;
+
+ // Threading Sub over selects and phi nodes is pointless, so don't bother.
+ // Threading over the select in "A - select(cond, B, C)" means evaluating
+ // "A-B" and "A-C" and seeing if they are equal; but they are equal if and
+ // only if B and C are equal. If B and C are equal then (since we assume
+ // that operands have already been simplified) "select(cond, B, C)" should
+ // have been simplified to the common value of B and C already. Analysing
+ // "A-B" and "A-C" thus gains nothing, but costs compile time. Similarly
+ // for threading over phi nodes.
+
+ return 0;
+}
+
+Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+}
+
+/// SimplifyMulInst - Given operands for a Mul, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(),
+ Ops, 2, TD);
+ }
+
+ // Canonicalize the constant to the RHS.
+ std::swap(Op0, Op1);
+ }
+
+ // X * undef -> 0
+ if (match(Op1, m_Undef()))
+ return Constant::getNullValue(Op0->getType());
+
+ // X * 0 -> 0
+ if (match(Op1, m_Zero()))
+ return Op1;
+
+ // X * 1 -> X
+ if (match(Op1, m_One()))
+ return Op0;
+
+ // (X / Y) * Y -> X if the division is exact.
+ Value *X = 0, *Y = 0;
+ if ((match(Op0, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op1) || // (X / Y) * Y
+ (match(Op1, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op0)) { // Y * (X / Y)
+ BinaryOperator *Div = cast<BinaryOperator>(Y == Op1 ? Op0 : Op1);
+ if (Div->isExact())
+ return X;
+ }
+
+ // i1 mul -> and.
+ if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+ if (Value *V = SimplifyAndInst(Op0, Op1, TD, DT, MaxRecurse-1))
+ return V;
+
+ // Try some generic simplifications for associative operations.
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // Mul distributes over Add. Try some generic simplifications based on this.
+ if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+ if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifyMulInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (Constant *C0 = dyn_cast<Constant>(Op0)) {
+ if (Constant *C1 = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { C0, C1 };
+ return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD);
+ }
+ }
+
+ bool isSigned = Opcode == Instruction::SDiv;
+
+ // X / undef -> undef
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ // undef / X -> 0
+ if (match(Op0, m_Undef()))
+ return Constant::getNullValue(Op0->getType());
+
+ // 0 / X -> 0, we don't need to preserve faults!
+ if (match(Op0, m_Zero()))
+ return Op0;
+
+ // X / 1 -> X
+ if (match(Op1, m_One()))
+ return Op0;
+
+ if (Op0->getType()->isIntegerTy(1))
+ // It can't be division by zero, hence it must be division by one.
+ return Op0;
+
+ // X / X -> 1
+ if (Op0 == Op1)
+ return ConstantInt::get(Op0->getType(), 1);
+
+ // (X * Y) / Y -> X if the multiplication does not overflow.
+ Value *X = 0, *Y = 0;
+ if (match(Op0, m_Mul(m_Value(X), m_Value(Y))) && (X == Op1 || Y == Op1)) {
+ if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1
+ BinaryOperator *Mul = cast<BinaryOperator>(Op0);
+ // If the Mul knows it does not overflow, then we are good to go.
+ if ((isSigned && Mul->hasNoSignedWrap()) ||
+ (!isSigned && Mul->hasNoUnsignedWrap()))
+ return X;
+ // If X has the form X = A / Y then X * Y cannot overflow.
+ if (BinaryOperator *Div = dyn_cast<BinaryOperator>(X))
+ if (Div->getOpcode() == Opcode && Div->getOperand(1) == Y)
+ return X;
+ }
+
+ // (X rem Y) / Y -> 0
+ if ((isSigned && match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) ||
+ (!isSigned && match(Op0, m_URem(m_Value(), m_Specific(Op1)))))
+ return Constant::getNullValue(Op0->getType());
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+/// SimplifySDivInst - Given operands for an SDiv, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifySDivInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyUDivInst - Given operands for a UDiv, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifyUDivInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *,
+ const DominatorTree *, unsigned) {
+ // undef / X -> undef (the undef could be a snan).
+ if (match(Op0, m_Undef()))
+ return Op0;
+
+ // X / undef -> undef
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ return 0;
+}
+
+Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifyFDivInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyRem - Given operands for an SRem or URem, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (Constant *C0 = dyn_cast<Constant>(Op0)) {
+ if (Constant *C1 = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { C0, C1 };
+ return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD);
+ }
+ }
+
+ bool isSigned = Opcode == Instruction::SRem;
+
+ // X % undef -> undef
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ // undef % X -> 0
+ if (match(Op0, m_Undef()))
+ return Constant::getNullValue(Op0->getType());
+
+ // 0 % X -> 0, we don't need to preserve faults!
+ if (match(Op0, m_Zero()))
+ return Op0;
+
+ // X % 0 -> undef, we don't need to preserve faults!
+ if (match(Op1, m_Zero()))
+ return UndefValue::get(Op0->getType());
+
+ // X % 1 -> 0
+ if (match(Op1, m_One()))
+ return Constant::getNullValue(Op0->getType());
+
+ if (Op0->getType()->isIntegerTy(1))
+ // It can't be remainder by zero, hence it must be remainder by one.
+ return Constant::getNullValue(Op0->getType());
+
+ // X % X -> 0
+ if (Op0 == Op1)
+ return Constant::getNullValue(Op0->getType());
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+/// SimplifySRemInst - Given operands for an SRem, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifySRemInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyURemInst - Given operands for a URem, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifyURemInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *,
+ const DominatorTree *, unsigned) {
+ // undef % X -> undef (the undef could be a snan).
+ if (match(Op0, m_Undef()))
+ return Op0;
+
+ // X % undef -> undef
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ return 0;
+}
+
+Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifyFRemInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (Constant *C0 = dyn_cast<Constant>(Op0)) {
+ if (Constant *C1 = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { C0, C1 };
+ return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD);
+ }
+ }
+
+ // 0 shift by X -> 0
+ if (match(Op0, m_Zero()))
+ return Op0;
+
+ // X shift by 0 -> X
+ if (match(Op1, m_Zero()))
+ return Op0;
+
+ // X shift by undef -> undef because it may shift by the bitwidth.
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ // Shifting by the bitwidth or more is undefined.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1))
+ if (CI->getValue().getLimitedValue() >=
+ Op0->getType()->getScalarSizeInBits())
+ return UndefValue::get(Op0->getType());
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+/// SimplifyShlInst - Given operands for an Shl, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ // undef << X -> 0
+ if (match(Op0, m_Undef()))
+ return Constant::getNullValue(Op0->getType());
+
+ // (X >> A) << A -> X
+ Value *X;
+ if (match(Op0, m_Shr(m_Value(X), m_Specific(Op1))) &&
+ cast<PossiblyExactOperator>(Op0)->isExact())
+ return X;
+ return 0;
+}
+
+Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+}
+
+/// SimplifyLShrInst - Given operands for an LShr, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ // undef >>l X -> 0
+ if (match(Op0, m_Undef()))
+ return Constant::getNullValue(Op0->getType());
+
+ // (X << A) >> A -> X
+ Value *X;
+ if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) &&
+ cast<OverflowingBinaryOperator>(Op0)->hasNoUnsignedWrap())
+ return X;
+
+ return 0;
+}
+
+Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifyLShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit);
+}
+
+/// SimplifyAShrInst - Given operands for an AShr, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ // all ones >>a X -> all ones
+ if (match(Op0, m_AllOnes()))
+ return Op0;
+
+ // undef >>a X -> all ones
+ if (match(Op0, m_Undef()))
+ return Constant::getAllOnesValue(Op0->getType());
+
+ // (X << A) >> A -> X
+ Value *X;
+ if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) &&
+ cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap())
+ return X;
+
+ return 0;
+}
+
+Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifyAShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit);
+}
+
+/// SimplifyAndInst - Given operands for an And, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::And, CLHS->getType(),
+ Ops, 2, TD);
+ }
+
+ // Canonicalize the constant to the RHS.
+ std::swap(Op0, Op1);
+ }
+
+ // X & undef -> 0
+ if (match(Op1, m_Undef()))
+ return Constant::getNullValue(Op0->getType());
+
+ // X & X = X
+ if (Op0 == Op1)
+ return Op0;
+
+ // X & 0 = 0
+ if (match(Op1, m_Zero()))
+ return Op1;
+
+ // X & -1 = X
+ if (match(Op1, m_AllOnes()))
+ return Op0;
+
+ // A & ~A = ~A & A = 0
+ if (match(Op0, m_Not(m_Specific(Op1))) ||
+ match(Op1, m_Not(m_Specific(Op0))))
+ return Constant::getNullValue(Op0->getType());
+
+ // (A | ?) & A = A
+ Value *A = 0, *B = 0;
+ if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+ (A == Op1 || B == Op1))
+ return Op1;
+
+ // A & (A | ?) = A
+ if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
+ (A == Op0 || B == Op0))
+ return Op0;
+
+ // Try some generic simplifications for associative operations.
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // And distributes over Or. Try some generic simplifications based on this.
+ if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Or,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // And distributes over Xor. Try some generic simplifications based on this.
+ if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Xor,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // Or distributes over And. Try some generic simplifications based on this.
+ if (Value *V = FactorizeBinOp(Instruction::And, Op0, Op1, Instruction::Or,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+ if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifyAndInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyOrInst - Given operands for an Or, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(),
+ Ops, 2, TD);
+ }
+
+ // Canonicalize the constant to the RHS.
+ std::swap(Op0, Op1);
+ }
+
+ // X | undef -> -1
+ if (match(Op1, m_Undef()))
+ return Constant::getAllOnesValue(Op0->getType());
+
+ // X | X = X
+ if (Op0 == Op1)
+ return Op0;
+
+ // X | 0 = X
+ if (match(Op1, m_Zero()))
+ return Op0;
+
+ // X | -1 = -1
+ if (match(Op1, m_AllOnes()))
+ return Op1;
+
+ // A | ~A = ~A | A = -1
+ if (match(Op0, m_Not(m_Specific(Op1))) ||
+ match(Op1, m_Not(m_Specific(Op0))))
+ return Constant::getAllOnesValue(Op0->getType());
+
+ // (A & ?) | A = A
+ Value *A = 0, *B = 0;
+ if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+ (A == Op1 || B == Op1))
+ return Op1;
+
+ // A | (A & ?) = A
+ if (match(Op1, m_And(m_Value(A), m_Value(B))) &&
+ (A == Op0 || B == Op0))
+ return Op0;
+
+ // ~(A & ?) | A = -1
+ if (match(Op0, m_Not(m_And(m_Value(A), m_Value(B)))) &&
+ (A == Op1 || B == Op1))
+ return Constant::getAllOnesValue(Op1->getType());
+
+ // A | ~(A & ?) = -1
+ if (match(Op1, m_Not(m_And(m_Value(A), m_Value(B)))) &&
+ (A == Op0 || B == Op0))
+ return Constant::getAllOnesValue(Op0->getType());
+
+ // Try some generic simplifications for associative operations.
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // Or distributes over And. Try some generic simplifications based on this.
+ if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // And distributes over Or. Try some generic simplifications based on this.
+ if (Value *V = FactorizeBinOp(Instruction::Or, Op0, Op1, Instruction::And,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+ if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifyOrInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyXorInst - Given operands for a Xor, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(),
+ Ops, 2, TD);
+ }
+
+ // Canonicalize the constant to the RHS.
+ std::swap(Op0, Op1);
+ }
+
+ // A ^ undef -> undef
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ // A ^ 0 = A
+ if (match(Op1, m_Zero()))
+ return Op0;
+
+ // A ^ A = 0
+ if (Op0 == Op1)
+ return Constant::getNullValue(Op0->getType());
+
+ // A ^ ~A = ~A ^ A = -1
+ if (match(Op0, m_Not(m_Specific(Op1))) ||
+ match(Op1, m_Not(m_Specific(Op0))))
+ return Constant::getAllOnesValue(Op0->getType());
+
+ // Try some generic simplifications for associative operations.
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // And distributes over Xor. Try some generic simplifications based on this.
+ if (Value *V = FactorizeBinOp(Instruction::Xor, Op0, Op1, Instruction::And,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // Threading Xor over selects and phi nodes is pointless, so don't bother.
+ // Threading over the select in "A ^ select(cond, B, C)" means evaluating
+ // "A^B" and "A^C" and seeing if they are equal; but they are equal if and
+ // only if B and C are equal. If B and C are equal then (since we assume
+ // that operands have already been simplified) "select(cond, B, C)" should
+ // have been simplified to the common value of B and C already. Analysing
+ // "A^B" and "A^C" thus gains nothing, but costs compile time. Similarly
+ // for threading over phi nodes.
+
+ return 0;
+}
+
+Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifyXorInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+static const Type *GetCompareTy(Value *Op) {
+ return CmpInst::makeCmpResultType(Op->getType());
+}
+
+/// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
+ assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
+
+ if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
+ if (Constant *CRHS = dyn_cast<Constant>(RHS))
+ return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD);
+
+ // If we have a constant, make sure it is on the RHS.
+ std::swap(LHS, RHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ }
+
+ const Type *ITy = GetCompareTy(LHS); // The return type.
+ const Type *OpTy = LHS->getType(); // The operand type.
+
+ // icmp X, X -> true/false
+ // X icmp undef -> true/false. For example, icmp ugt %X, undef -> false
+ // because X could be 0.
+ if (LHS == RHS || isa<UndefValue>(RHS))
+ return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
+
+ // Special case logic when the operands have i1 type.
+ if (OpTy->isIntegerTy(1) || (OpTy->isVectorTy() &&
+ cast<VectorType>(OpTy)->getElementType()->isIntegerTy(1))) {
+ switch (Pred) {
+ default: break;
+ case ICmpInst::ICMP_EQ:
+ // X == 1 -> X
+ if (match(RHS, m_One()))
+ return LHS;
+ break;
+ case ICmpInst::ICMP_NE:
+ // X != 0 -> X
+ if (match(RHS, m_Zero()))
+ return LHS;
+ break;
+ case ICmpInst::ICMP_UGT:
+ // X >u 0 -> X
+ if (match(RHS, m_Zero()))
+ return LHS;
+ break;
+ case ICmpInst::ICMP_UGE:
+ // X >=u 1 -> X
+ if (match(RHS, m_One()))
+ return LHS;
+ break;
+ case ICmpInst::ICMP_SLT:
+ // X <s 0 -> X
+ if (match(RHS, m_Zero()))
+ return LHS;
+ break;
+ case ICmpInst::ICMP_SLE:
+ // X <=s -1 -> X
+ if (match(RHS, m_One()))
+ return LHS;
+ break;
+ }
+ }
+
+ // icmp <alloca*>, <global/alloca*/null> - Different stack variables have
+ // different addresses, and what's more the address of a stack variable is
+ // never null or equal to the address of a global. Note that generalizing
+ // to the case where LHS is a global variable address or null is pointless,
+ // since if both LHS and RHS are constants then we already constant folded
+ // the compare, and if only one of them is then we moved it to RHS already.
+ if (isa<AllocaInst>(LHS) && (isa<GlobalValue>(RHS) || isa<AllocaInst>(RHS) ||
+ isa<ConstantPointerNull>(RHS)))
+ // We already know that LHS != RHS.
+ return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred));
+
+ // If we are comparing with zero then try hard since this is a common case.
+ if (match(RHS, m_Zero())) {
+ bool LHSKnownNonNegative, LHSKnownNegative;
+ switch (Pred) {
+ default:
+ assert(false && "Unknown ICmp predicate!");
+ case ICmpInst::ICMP_ULT:
+ return ConstantInt::getFalse(LHS->getContext());
+ case ICmpInst::ICMP_UGE:
+ return ConstantInt::getTrue(LHS->getContext());
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_ULE:
+ if (isKnownNonZero(LHS, TD))
+ return ConstantInt::getFalse(LHS->getContext());
+ break;
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_UGT:
+ if (isKnownNonZero(LHS, TD))
+ return ConstantInt::getTrue(LHS->getContext());
+ break;
+ case ICmpInst::ICMP_SLT:
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+ if (LHSKnownNegative)
+ return ConstantInt::getTrue(LHS->getContext());
+ if (LHSKnownNonNegative)
+ return ConstantInt::getFalse(LHS->getContext());
+ break;
+ case ICmpInst::ICMP_SLE:
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+ if (LHSKnownNegative)
+ return ConstantInt::getTrue(LHS->getContext());
+ if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
+ return ConstantInt::getFalse(LHS->getContext());
+ break;
+ case ICmpInst::ICMP_SGE:
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+ if (LHSKnownNegative)
+ return ConstantInt::getFalse(LHS->getContext());
+ if (LHSKnownNonNegative)
+ return ConstantInt::getTrue(LHS->getContext());
+ break;
+ case ICmpInst::ICMP_SGT:
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+ if (LHSKnownNegative)
+ return ConstantInt::getFalse(LHS->getContext());
+ if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
+ return ConstantInt::getTrue(LHS->getContext());
+ break;
+ }
+ }
+
+ // See if we are doing a comparison with a constant integer.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ // Rule out tautological comparisons (eg., ult 0 or uge 0).
+ ConstantRange RHS_CR = ICmpInst::makeConstantRange(Pred, CI->getValue());
+ if (RHS_CR.isEmptySet())
+ return ConstantInt::getFalse(CI->getContext());
+ if (RHS_CR.isFullSet())
+ return ConstantInt::getTrue(CI->getContext());
+
+ // Many binary operators with constant RHS have easy to compute constant
+ // range. Use them to check whether the comparison is a tautology.
+ uint32_t Width = CI->getBitWidth();
+ APInt Lower = APInt(Width, 0);
+ APInt Upper = APInt(Width, 0);
+ ConstantInt *CI2;
+ if (match(LHS, m_URem(m_Value(), m_ConstantInt(CI2)))) {
+ // 'urem x, CI2' produces [0, CI2).
+ Upper = CI2->getValue();
+ } else if (match(LHS, m_SRem(m_Value(), m_ConstantInt(CI2)))) {
+ // 'srem x, CI2' produces (-|CI2|, |CI2|).
+ Upper = CI2->getValue().abs();
+ Lower = (-Upper) + 1;
+ } else if (match(LHS, m_UDiv(m_Value(), m_ConstantInt(CI2)))) {
+ // 'udiv x, CI2' produces [0, UINT_MAX / CI2].
+ APInt NegOne = APInt::getAllOnesValue(Width);
+ if (!CI2->isZero())
+ Upper = NegOne.udiv(CI2->getValue()) + 1;
+ } else if (match(LHS, m_SDiv(m_Value(), m_ConstantInt(CI2)))) {
+ // 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2].
+ APInt IntMin = APInt::getSignedMinValue(Width);
+ APInt IntMax = APInt::getSignedMaxValue(Width);
+ APInt Val = CI2->getValue().abs();
+ if (!Val.isMinValue()) {
+ Lower = IntMin.sdiv(Val);
+ Upper = IntMax.sdiv(Val) + 1;
+ }
+ } else if (match(LHS, m_LShr(m_Value(), m_ConstantInt(CI2)))) {
+ // 'lshr x, CI2' produces [0, UINT_MAX >> CI2].
+ APInt NegOne = APInt::getAllOnesValue(Width);
+ if (CI2->getValue().ult(Width))
+ Upper = NegOne.lshr(CI2->getValue()) + 1;
+ } else if (match(LHS, m_AShr(m_Value(), m_ConstantInt(CI2)))) {
+ // 'ashr x, CI2' produces [INT_MIN >> CI2, INT_MAX >> CI2].
+ APInt IntMin = APInt::getSignedMinValue(Width);
+ APInt IntMax = APInt::getSignedMaxValue(Width);
+ if (CI2->getValue().ult(Width)) {
+ Lower = IntMin.ashr(CI2->getValue());
+ Upper = IntMax.ashr(CI2->getValue()) + 1;
+ }
+ } else if (match(LHS, m_Or(m_Value(), m_ConstantInt(CI2)))) {
+ // 'or x, CI2' produces [CI2, UINT_MAX].
+ Lower = CI2->getValue();
+ } else if (match(LHS, m_And(m_Value(), m_ConstantInt(CI2)))) {
+ // 'and x, CI2' produces [0, CI2].
+ Upper = CI2->getValue() + 1;
+ }
+ if (Lower != Upper) {
+ ConstantRange LHS_CR = ConstantRange(Lower, Upper);
+ if (RHS_CR.contains(LHS_CR))
+ return ConstantInt::getTrue(RHS->getContext());
+ if (RHS_CR.inverse().contains(LHS_CR))
+ return ConstantInt::getFalse(RHS->getContext());
+ }
+ }
+
+ // Compare of cast, for example (zext X) != 0 -> X != 0
+ if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) {
+ Instruction *LI = cast<CastInst>(LHS);
+ Value *SrcOp = LI->getOperand(0);
+ const Type *SrcTy = SrcOp->getType();
+ const Type *DstTy = LI->getType();
+
+ // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
+ // if the integer type is the same size as the pointer type.
+ if (MaxRecurse && TD && isa<PtrToIntInst>(LI) &&
+ TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) {
+ if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+ // Transfer the cast to the constant.
+ if (Value *V = SimplifyICmpInst(Pred, SrcOp,
+ ConstantExpr::getIntToPtr(RHSC, SrcTy),
+ TD, DT, MaxRecurse-1))
+ return V;
+ } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) {
+ if (RI->getOperand(0)->getType() == SrcTy)
+ // Compare without the cast.
+ if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
+ TD, DT, MaxRecurse-1))
+ return V;
+ }
+ }
+
+ if (isa<ZExtInst>(LHS)) {
+ // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have the
+ // same type.
+ if (ZExtInst *RI = dyn_cast<ZExtInst>(RHS)) {
+ if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
+ // Compare X and Y. Note that signed predicates become unsigned.
+ if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
+ SrcOp, RI->getOperand(0), TD, DT,
+ MaxRecurse-1))
+ return V;
+ }
+ // Turn icmp (zext X), Cst into a compare of X and Cst if Cst is extended
+ // too. If not, then try to deduce the result of the comparison.
+ else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ // Compute the constant that would happen if we truncated to SrcTy then
+ // reextended to DstTy.
+ Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
+ Constant *RExt = ConstantExpr::getCast(CastInst::ZExt, Trunc, DstTy);
+
+ // If the re-extended constant didn't change then this is effectively
+ // also a case of comparing two zero-extended values.
+ if (RExt == CI && MaxRecurse)
+ if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
+ SrcOp, Trunc, TD, DT, MaxRecurse-1))
+ return V;
+
+ // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit
+ // there. Use this to work out the result of the comparison.
+ if (RExt != CI) {
+ switch (Pred) {
+ default:
+ assert(false && "Unknown ICmp predicate!");
+ // LHS <u RHS.
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ return ConstantInt::getFalse(CI->getContext());
+
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ return ConstantInt::getTrue(CI->getContext());
+
+ // LHS is non-negative. If RHS is negative then LHS >s LHS. If RHS
+ // is non-negative then LHS <s RHS.
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ return CI->getValue().isNegative() ?
+ ConstantInt::getTrue(CI->getContext()) :
+ ConstantInt::getFalse(CI->getContext());
+
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ return CI->getValue().isNegative() ?
+ ConstantInt::getFalse(CI->getContext()) :
+ ConstantInt::getTrue(CI->getContext());
+ }
+ }
+ }
+ }
+
+ if (isa<SExtInst>(LHS)) {
+ // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the
+ // same type.
+ if (SExtInst *RI = dyn_cast<SExtInst>(RHS)) {
+ if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
+ // Compare X and Y. Note that the predicate does not change.
+ if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
+ TD, DT, MaxRecurse-1))
+ return V;
+ }
+ // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended
+ // too. If not, then try to deduce the result of the comparison.
+ else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ // Compute the constant that would happen if we truncated to SrcTy then
+ // reextended to DstTy.
+ Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
+ Constant *RExt = ConstantExpr::getCast(CastInst::SExt, Trunc, DstTy);
+
+ // If the re-extended constant didn't change then this is effectively
+ // also a case of comparing two sign-extended values.
+ if (RExt == CI && MaxRecurse)
+ if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, TD, DT,
+ MaxRecurse-1))
+ return V;
+
+ // Otherwise the upper bits of LHS are all equal, while RHS has varying
+ // bits there. Use this to work out the result of the comparison.
+ if (RExt != CI) {
+ switch (Pred) {
+ default:
+ assert(false && "Unknown ICmp predicate!");
+ case ICmpInst::ICMP_EQ:
+ return ConstantInt::getFalse(CI->getContext());
+ case ICmpInst::ICMP_NE:
+ return ConstantInt::getTrue(CI->getContext());
+
+ // If RHS is non-negative then LHS <s RHS. If RHS is negative then
+ // LHS >s RHS.
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ return CI->getValue().isNegative() ?
+ ConstantInt::getTrue(CI->getContext()) :
+ ConstantInt::getFalse(CI->getContext());
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ return CI->getValue().isNegative() ?
+ ConstantInt::getFalse(CI->getContext()) :
+ ConstantInt::getTrue(CI->getContext());
+
+ // If LHS is non-negative then LHS <u RHS. If LHS is negative then
+ // LHS >u RHS.
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ // Comparison is true iff the LHS <s 0.
+ if (MaxRecurse)
+ if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp,
+ Constant::getNullValue(SrcTy),
+ TD, DT, MaxRecurse-1))
+ return V;
+ break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ // Comparison is true iff the LHS >=s 0.
+ if (MaxRecurse)
+ if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp,
+ Constant::getNullValue(SrcTy),
+ TD, DT, MaxRecurse-1))
+ return V;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ // Special logic for binary operators.
+ BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS);
+ BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS);
+ if (MaxRecurse && (LBO || RBO)) {
+ // Analyze the case when either LHS or RHS is an add instruction.
+ Value *A = 0, *B = 0, *C = 0, *D = 0;
+ // LHS = A + B (or A and B are null); RHS = C + D (or C and D are null).
+ bool NoLHSWrapProblem = false, NoRHSWrapProblem = false;
+ if (LBO && LBO->getOpcode() == Instruction::Add) {
+ A = LBO->getOperand(0); B = LBO->getOperand(1);
+ NoLHSWrapProblem = ICmpInst::isEquality(Pred) ||
+ (CmpInst::isUnsigned(Pred) && LBO->hasNoUnsignedWrap()) ||
+ (CmpInst::isSigned(Pred) && LBO->hasNoSignedWrap());
+ }
+ if (RBO && RBO->getOpcode() == Instruction::Add) {
+ C = RBO->getOperand(0); D = RBO->getOperand(1);
+ NoRHSWrapProblem = ICmpInst::isEquality(Pred) ||
+ (CmpInst::isUnsigned(Pred) && RBO->hasNoUnsignedWrap()) ||
+ (CmpInst::isSigned(Pred) && RBO->hasNoSignedWrap());
+ }
+
+ // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
+ if ((A == RHS || B == RHS) && NoLHSWrapProblem)
+ if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A,
+ Constant::getNullValue(RHS->getType()),
+ TD, DT, MaxRecurse-1))
+ return V;
+
+ // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
+ if ((C == LHS || D == LHS) && NoRHSWrapProblem)
+ if (Value *V = SimplifyICmpInst(Pred,
+ Constant::getNullValue(LHS->getType()),
+ C == LHS ? D : C, TD, DT, MaxRecurse-1))
+ return V;
+
+ // icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow.
+ if (A && C && (A == C || A == D || B == C || B == D) &&
+ NoLHSWrapProblem && NoRHSWrapProblem) {
+ // Determine Y and Z in the form icmp (X+Y), (X+Z).
+ Value *Y = (A == C || A == D) ? B : A;
+ Value *Z = (C == A || C == B) ? D : C;
+ if (Value *V = SimplifyICmpInst(Pred, Y, Z, TD, DT, MaxRecurse-1))
+ return V;
+ }
+ }
+
+ if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) {
+ bool KnownNonNegative, KnownNegative;
+ switch (Pred) {
+ default:
+ break;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD);
+ if (!KnownNonNegative)
+ break;
+ // fall-through
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ return ConstantInt::getFalse(RHS->getContext());
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD);
+ if (!KnownNonNegative)
+ break;
+ // fall-through
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ return ConstantInt::getTrue(RHS->getContext());
+ }
+ }
+ if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) {
+ bool KnownNonNegative, KnownNegative;
+ switch (Pred) {
+ default:
+ break;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD);
+ if (!KnownNonNegative)
+ break;
+ // fall-through
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ return ConstantInt::getTrue(RHS->getContext());
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD);
+ if (!KnownNonNegative)
+ break;
+ // fall-through
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ return ConstantInt::getFalse(RHS->getContext());
+ }
+ }
+
+ if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() &&
+ LBO->getOperand(1) == RBO->getOperand(1)) {
+ switch (LBO->getOpcode()) {
+ default: break;
+ case Instruction::UDiv:
+ case Instruction::LShr:
+ if (ICmpInst::isSigned(Pred))
+ break;
+ // fall-through
+ case Instruction::SDiv:
+ case Instruction::AShr:
+ if (!LBO->isExact() && !RBO->isExact())
+ break;
+ if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+ RBO->getOperand(0), TD, DT, MaxRecurse-1))
+ return V;
+ break;
+ case Instruction::Shl: {
+ bool NUW = LBO->hasNoUnsignedWrap() && LBO->hasNoUnsignedWrap();
+ bool NSW = LBO->hasNoSignedWrap() && RBO->hasNoSignedWrap();
+ if (!NUW && !NSW)
+ break;
+ if (!NSW && ICmpInst::isSigned(Pred))
+ break;
+ if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+ RBO->getOperand(0), TD, DT, MaxRecurse-1))
+ return V;
+ break;
+ }
+ }
+ }
+
+ // If the comparison is with the result of a select instruction, check whether
+ // comparing with either branch of the select always yields the same value.
+ if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
+ if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse))
+ return V;
+
+ // If the comparison is with the result of a phi instruction, check whether
+ // doing the compare with each incoming phi value yields a common result.
+ if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
+ if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+}
+
+/// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
+ assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!");
+
+ if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
+ if (Constant *CRHS = dyn_cast<Constant>(RHS))
+ return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD);
+
+ // If we have a constant, make sure it is on the RHS.
+ std::swap(LHS, RHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ }
+
+ // Fold trivial predicates.
+ if (Pred == FCmpInst::FCMP_FALSE)
+ return ConstantInt::get(GetCompareTy(LHS), 0);
+ if (Pred == FCmpInst::FCMP_TRUE)
+ return ConstantInt::get(GetCompareTy(LHS), 1);
+
+ if (isa<UndefValue>(RHS)) // fcmp pred X, undef -> undef
+ return UndefValue::get(GetCompareTy(LHS));
+
+ // fcmp x,x -> true/false. Not all compares are foldable.
+ if (LHS == RHS) {
+ if (CmpInst::isTrueWhenEqual(Pred))
+ return ConstantInt::get(GetCompareTy(LHS), 1);
+ if (CmpInst::isFalseWhenEqual(Pred))
+ return ConstantInt::get(GetCompareTy(LHS), 0);
+ }
+
+ // Handle fcmp with constant RHS
+ if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+ // If the constant is a nan, see if we can fold the comparison based on it.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
+ if (CFP->getValueAPF().isNaN()) {
+ if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo"
+ return ConstantInt::getFalse(CFP->getContext());
+ assert(FCmpInst::isUnordered(Pred) &&
+ "Comparison must be either ordered or unordered!");
+ // True if unordered.
+ return ConstantInt::getTrue(CFP->getContext());
+ }
+ // Check whether the constant is an infinity.
+ if (CFP->getValueAPF().isInfinity()) {
+ if (CFP->getValueAPF().isNegative()) {
+ switch (Pred) {
+ case FCmpInst::FCMP_OLT:
+ // No value is ordered and less than negative infinity.
+ return ConstantInt::getFalse(CFP->getContext());
+ case FCmpInst::FCMP_UGE:
+ // All values are unordered with or at least negative infinity.
+ return ConstantInt::getTrue(CFP->getContext());
+ default:
+ break;
+ }
+ } else {
+ switch (Pred) {
+ case FCmpInst::FCMP_OGT:
+ // No value is ordered and greater than infinity.
+ return ConstantInt::getFalse(CFP->getContext());
+ case FCmpInst::FCMP_ULE:
+ // All values are unordered with and at most infinity.
+ return ConstantInt::getTrue(CFP->getContext());
+ default:
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ // If the comparison is with the result of a select instruction, check whether
+ // comparing with either branch of the select always yields the same value.
+ if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
+ if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse))
+ return V;
+
+ // If the comparison is with the result of a phi instruction, check whether
+ // doing the compare with each incoming phi value yields a common result.
+ if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
+ if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+}
+
+/// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
+/// the result. If not, this returns null.
+Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal,
+ const TargetData *TD, const DominatorTree *) {
+ // select true, X, Y -> X
+ // select false, X, Y -> Y
+ if (ConstantInt *CB = dyn_cast<ConstantInt>(CondVal))
+ return CB->getZExtValue() ? TrueVal : FalseVal;
+
+ // select C, X, X -> X
+ if (TrueVal == FalseVal)
+ return TrueVal;
+
+ if (isa<UndefValue>(TrueVal)) // select C, undef, X -> X
+ return FalseVal;
+ if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X
+ return TrueVal;
+ if (isa<UndefValue>(CondVal)) { // select undef, X, Y -> X or Y
+ if (isa<Constant>(TrueVal))
+ return TrueVal;
+ return FalseVal;
+ }
+
+ return 0;
+}
+
+/// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
+/// fold the result. If not, this returns null.
+Value *llvm::SimplifyGEPInst(Value *const *Ops, unsigned NumOps,
+ const TargetData *TD, const DominatorTree *) {
+ // The type of the GEP pointer operand.
+ const PointerType *PtrTy = cast<PointerType>(Ops[0]->getType());
+
+ // getelementptr P -> P.
+ if (NumOps == 1)
+ return Ops[0];
+
+ if (isa<UndefValue>(Ops[0])) {
+ // Compute the (pointer) type returned by the GEP instruction.
+ const Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, &Ops[1],
+ NumOps-1);
+ const Type *GEPTy = PointerType::get(LastType, PtrTy->getAddressSpace());
+ return UndefValue::get(GEPTy);
+ }
+
+ if (NumOps == 2) {
+ // getelementptr P, 0 -> P.
+ if (ConstantInt *C = dyn_cast<ConstantInt>(Ops[1]))
+ if (C->isZero())
+ return Ops[0];
+ // getelementptr P, N -> P if P points to a type of zero size.
+ if (TD) {
+ const Type *Ty = PtrTy->getElementType();
+ if (Ty->isSized() && TD->getTypeAllocSize(Ty) == 0)
+ return Ops[0];
+ }
+ }
+
+ // Check to see if this is constant foldable.
+ for (unsigned i = 0; i != NumOps; ++i)
+ if (!isa<Constant>(Ops[i]))
+ return 0;
+
+ return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]),
+ (Constant *const*)Ops+1, NumOps-1);
+}
+
+/// SimplifyPHINode - See if we can fold the given phi. If not, returns null.
+static Value *SimplifyPHINode(PHINode *PN, const DominatorTree *DT) {
+ // If all of the PHI's incoming values are the same then replace the PHI node
+ // with the common value.
+ Value *CommonValue = 0;
+ bool HasUndefInput = false;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming = PN->getIncomingValue(i);
+ // If the incoming value is the phi node itself, it can safely be skipped.
+ if (Incoming == PN) continue;
+ if (isa<UndefValue>(Incoming)) {
+ // Remember that we saw an undef value, but otherwise ignore them.
+ HasUndefInput = true;
+ continue;
+ }
+ if (CommonValue && Incoming != CommonValue)
+ return 0; // Not the same, bail out.
+ CommonValue = Incoming;
+ }
+
+ // If CommonValue is null then all of the incoming values were either undef or
+ // equal to the phi node itself.
+ if (!CommonValue)
+ return UndefValue::get(PN->getType());
+
+ // If we have a PHI node like phi(X, undef, X), where X is defined by some
+ // instruction, we cannot return X as the result of the PHI node unless it
+ // dominates the PHI block.
+ if (HasUndefInput)
+ return ValueDominatesPHI(CommonValue, PN, DT) ? CommonValue : 0;
+
+ return CommonValue;
+}
+
+
+//=== Helper functions for higher up the class hierarchy.
+
+/// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ switch (Opcode) {
+ case Instruction::Add:
+ return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
+ TD, DT, MaxRecurse);
+ case Instruction::Sub:
+ return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
+ TD, DT, MaxRecurse);
+ case Instruction::Mul: return SimplifyMulInst (LHS, RHS, TD, DT, MaxRecurse);
+ case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, DT, MaxRecurse);
+ case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, DT, MaxRecurse);
+ case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, DT, MaxRecurse);
+ case Instruction::SRem: return SimplifySRemInst(LHS, RHS, TD, DT, MaxRecurse);
+ case Instruction::URem: return SimplifyURemInst(LHS, RHS, TD, DT, MaxRecurse);
+ case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, TD, DT, MaxRecurse);
+ case Instruction::Shl:
+ return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
+ TD, DT, MaxRecurse);
+ case Instruction::LShr:
+ return SimplifyLShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse);
+ case Instruction::AShr:
+ return SimplifyAShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse);
+ case Instruction::And: return SimplifyAndInst(LHS, RHS, TD, DT, MaxRecurse);
+ case Instruction::Or: return SimplifyOrInst (LHS, RHS, TD, DT, MaxRecurse);
+ case Instruction::Xor: return SimplifyXorInst(LHS, RHS, TD, DT, MaxRecurse);
+ default:
+ if (Constant *CLHS = dyn_cast<Constant>(LHS))
+ if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
+ Constant *COps[] = {CLHS, CRHS};
+ return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, 2, TD);
+ }
+
+ // If the operation is associative, try some generic simplifications.
+ if (Instruction::isAssociative(Opcode))
+ if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, TD, DT, MaxRecurse))
+ return V;
+
+ return 0;
+ }
+}
+
+Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifyBinOp(Opcode, LHS, RHS, TD, DT, RecursionLimit);
+}
+
+/// SimplifyCmpInst - Given operands for a CmpInst, see if we can
+/// fold the result.
+static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
+ return SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse);
+ return SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse);
+}
+
+Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifyCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+}
+
+/// SimplifyInstruction - See if we can compute a simplified version of this
+/// instruction. If not, this returns null.
+Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD,
+ const DominatorTree *DT) {
+ Value *Result;
+
+ switch (I->getOpcode()) {
+ default:
+ Result = ConstantFoldInstruction(I, TD);
+ break;
+ case Instruction::Add:
+ Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1),
+ cast<BinaryOperator>(I)->hasNoSignedWrap(),
+ cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
+ TD, DT);
+ break;
+ case Instruction::Sub:
+ Result = SimplifySubInst(I->getOperand(0), I->getOperand(1),
+ cast<BinaryOperator>(I)->hasNoSignedWrap(),
+ cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
+ TD, DT);
+ break;
+ case Instruction::Mul:
+ Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::SDiv:
+ Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::UDiv:
+ Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::FDiv:
+ Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::SRem:
+ Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::URem:
+ Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::FRem:
+ Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::Shl:
+ Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1),
+ cast<BinaryOperator>(I)->hasNoSignedWrap(),
+ cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
+ TD, DT);
+ break;
+ case Instruction::LShr:
+ Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1),
+ cast<BinaryOperator>(I)->isExact(),
+ TD, DT);
+ break;
+ case Instruction::AShr:
+ Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1),
+ cast<BinaryOperator>(I)->isExact(),
+ TD, DT);
+ break;
+ case Instruction::And:
+ Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::Or:
+ Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::Xor:
+ Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::ICmp:
+ Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(),
+ I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::FCmp:
+ Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
+ I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::Select:
+ Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1),
+ I->getOperand(2), TD, DT);
+ break;
+ case Instruction::GetElementPtr: {
+ SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
+ Result = SimplifyGEPInst(&Ops[0], Ops.size(), TD, DT);
+ break;
+ }
+ case Instruction::PHI:
+ Result = SimplifyPHINode(cast<PHINode>(I), DT);
+ break;
+ }
+
+ /// If called on unreachable code, the above logic may report that the
+ /// instruction simplified to itself. Make life easier for users by
+ /// detecting that case here, returning a safe value instead.
+ return Result == I ? UndefValue::get(I->getType()) : Result;
+}
+
+/// ReplaceAndSimplifyAllUses - Perform From->replaceAllUsesWith(To) and then
+/// delete the From instruction. In addition to a basic RAUW, this does a
+/// recursive simplification of the newly formed instructions. This catches
+/// things where one simplification exposes other opportunities. This only
+/// simplifies and deletes scalar operations, it does not change the CFG.
+///
+void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To,
+ const TargetData *TD,
+ const DominatorTree *DT) {
+ assert(From != To && "ReplaceAndSimplifyAllUses(X,X) is not valid!");
+
+ // FromHandle/ToHandle - This keeps a WeakVH on the from/to values so that
+ // we can know if it gets deleted out from under us or replaced in a
+ // recursive simplification.
+ WeakVH FromHandle(From);
+ WeakVH ToHandle(To);
+
+ while (!From->use_empty()) {
+ // Update the instruction to use the new value.
+ Use &TheUse = From->use_begin().getUse();
+ Instruction *User = cast<Instruction>(TheUse.getUser());
+ TheUse = To;
+
+ // Check to see if the instruction can be folded due to the operand
+ // replacement. For example changing (or X, Y) into (or X, -1) can replace
+ // the 'or' with -1.
+ Value *SimplifiedVal;
+ {
+ // Sanity check to make sure 'User' doesn't dangle across
+ // SimplifyInstruction.
+ AssertingVH<> UserHandle(User);
+
+ SimplifiedVal = SimplifyInstruction(User, TD, DT);
+ if (SimplifiedVal == 0) continue;
+ }
+
+ // Recursively simplify this user to the new value.
+ ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD, DT);
+ From = dyn_cast_or_null<Instruction>((Value*)FromHandle);
+ To = ToHandle;
+
+ assert(ToHandle && "To value deleted by recursive simplification?");
+
+ // If the recursive simplification ended up revisiting and deleting
+ // 'From' then we're done.
+ if (From == 0)
+ return;
+ }
+
+ // If 'From' has value handles referring to it, do a real RAUW to update them.
+ From->replaceAllUsesWith(To);
+
+ From->eraseFromParent();
+}
diff --git a/contrib/llvm/lib/Analysis/Interval.cpp b/contrib/llvm/lib/Analysis/Interval.cpp
new file mode 100644
index 000000000000..ca9cdcaf2464
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Interval.cpp
@@ -0,0 +1,58 @@
+//===- Interval.cpp - Interval class code ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definition of the Interval class, which represents a
+// partition of a control flow graph of some kind.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Interval.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Interval Implementation
+//===----------------------------------------------------------------------===//
+
+// isLoop - Find out if there is a back edge in this interval...
+//
+bool Interval::isLoop() const {
+ // There is a loop in this interval iff one of the predecessors of the header
+ // node lives in the interval.
+ for (::pred_iterator I = ::pred_begin(HeaderNode), E = ::pred_end(HeaderNode);
+ I != E; ++I)
+ if (contains(*I))
+ return true;
+ return false;
+}
+
+
+void Interval::print(raw_ostream &OS) const {
+ OS << "-------------------------------------------------------------\n"
+ << "Interval Contents:\n";
+
+ // Print out all of the basic blocks in the interval...
+ for (std::vector<BasicBlock*>::const_iterator I = Nodes.begin(),
+ E = Nodes.end(); I != E; ++I)
+ OS << **I << "\n";
+
+ OS << "Interval Predecessors:\n";
+ for (std::vector<BasicBlock*>::const_iterator I = Predecessors.begin(),
+ E = Predecessors.end(); I != E; ++I)
+ OS << **I << "\n";
+
+ OS << "Interval Successors:\n";
+ for (std::vector<BasicBlock*>::const_iterator I = Successors.begin(),
+ E = Successors.end(); I != E; ++I)
+ OS << **I << "\n";
+}
diff --git a/contrib/llvm/lib/Analysis/IntervalPartition.cpp b/contrib/llvm/lib/Analysis/IntervalPartition.cpp
new file mode 100644
index 000000000000..2e259b147b8b
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IntervalPartition.cpp
@@ -0,0 +1,114 @@
+//===- IntervalPartition.cpp - Interval Partition module code -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definition of the IntervalPartition class, which
+// calculates and represent the interval partition of a function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/IntervalIterator.h"
+using namespace llvm;
+
+char IntervalPartition::ID = 0;
+INITIALIZE_PASS(IntervalPartition, "intervals",
+ "Interval Partition Construction", true, true)
+
+//===----------------------------------------------------------------------===//
+// IntervalPartition Implementation
+//===----------------------------------------------------------------------===//
+
+// releaseMemory - Reset state back to before function was analyzed
+void IntervalPartition::releaseMemory() {
+ for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
+ delete Intervals[i];
+ IntervalMap.clear();
+ Intervals.clear();
+ RootInterval = 0;
+}
+
+void IntervalPartition::print(raw_ostream &O, const Module*) const {
+ for(unsigned i = 0, e = Intervals.size(); i != e; ++i)
+ Intervals[i]->print(O);
+}
+
+// addIntervalToPartition - Add an interval to the internal list of intervals,
+// and then add mappings from all of the basic blocks in the interval to the
+// interval itself (in the IntervalMap).
+//
+void IntervalPartition::addIntervalToPartition(Interval *I) {
+ Intervals.push_back(I);
+
+ // Add mappings for all of the basic blocks in I to the IntervalPartition
+ for (Interval::node_iterator It = I->Nodes.begin(), End = I->Nodes.end();
+ It != End; ++It)
+ IntervalMap.insert(std::make_pair(*It, I));
+}
+
+// updatePredecessors - Interval generation only sets the successor fields of
+// the interval data structures. After interval generation is complete,
+// run through all of the intervals and propagate successor info as
+// predecessor info.
+//
+void IntervalPartition::updatePredecessors(Interval *Int) {
+ BasicBlock *Header = Int->getHeaderNode();
+ for (Interval::succ_iterator I = Int->Successors.begin(),
+ E = Int->Successors.end(); I != E; ++I)
+ getBlockInterval(*I)->Predecessors.push_back(Header);
+}
+
+// IntervalPartition ctor - Build the first level interval partition for the
+// specified function...
+//
+bool IntervalPartition::runOnFunction(Function &F) {
+ // Pass false to intervals_begin because we take ownership of it's memory
+ function_interval_iterator I = intervals_begin(&F, false);
+ assert(I != intervals_end(&F) && "No intervals in function!?!?!");
+
+ addIntervalToPartition(RootInterval = *I);
+
+ ++I; // After the first one...
+
+ // Add the rest of the intervals to the partition.
+ for (function_interval_iterator E = intervals_end(&F); I != E; ++I)
+ addIntervalToPartition(*I);
+
+ // Now that we know all of the successor information, propagate this to the
+ // predecessors for each block.
+ for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
+ updatePredecessors(Intervals[i]);
+ return false;
+}
+
+
+// IntervalPartition ctor - Build a reduced interval partition from an
+// existing interval graph. This takes an additional boolean parameter to
+// distinguish it from a copy constructor. Always pass in false for now.
+//
+IntervalPartition::IntervalPartition(IntervalPartition &IP, bool)
+ : FunctionPass(ID) {
+ assert(IP.getRootInterval() && "Cannot operate on empty IntervalPartitions!");
+
+ // Pass false to intervals_begin because we take ownership of it's memory
+ interval_part_interval_iterator I = intervals_begin(IP, false);
+ assert(I != intervals_end(IP) && "No intervals in interval partition!?!?!");
+
+ addIntervalToPartition(RootInterval = *I);
+
+ ++I; // After the first one...
+
+ // Add the rest of the intervals to the partition.
+ for (interval_part_interval_iterator E = intervals_end(IP); I != E; ++I)
+ addIntervalToPartition(*I);
+
+ // Now that we know all of the successor information, propagate this to the
+ // predecessors for each block.
+ for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
+ updatePredecessors(Intervals[i]);
+}
+
diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
new file mode 100644
index 000000000000..d5f0b5c82154
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -0,0 +1,1126 @@
+//===- LazyValueInfo.cpp - Value constraint analysis ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for lazy computation of value constraint
+// information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lazy-value-info"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include <map>
+#include <stack>
+using namespace llvm;
+
+char LazyValueInfo::ID = 0;
+INITIALIZE_PASS(LazyValueInfo, "lazy-value-info",
+ "Lazy Value Information Analysis", false, true)
+
+namespace llvm {
+ FunctionPass *createLazyValueInfoPass() { return new LazyValueInfo(); }
+}
+
+
+//===----------------------------------------------------------------------===//
+// LVILatticeVal
+//===----------------------------------------------------------------------===//
+
+/// LVILatticeVal - This is the information tracked by LazyValueInfo for each
+/// value.
+///
+/// FIXME: This is basically just for bringup, this can be made a lot more rich
+/// in the future.
+///
+namespace {
+class LVILatticeVal {
+ enum LatticeValueTy {
+ /// undefined - This Value has no known value yet.
+ undefined,
+
+ /// constant - This Value has a specific constant value.
+ constant,
+ /// notconstant - This Value is known to not have the specified value.
+ notconstant,
+
+ /// constantrange - The Value falls within this range.
+ constantrange,
+
+ /// overdefined - This value is not known to be constant, and we know that
+ /// it has a value.
+ overdefined
+ };
+
+ /// Val: This stores the current lattice value along with the Constant* for
+ /// the constant if this is a 'constant' or 'notconstant' value.
+ LatticeValueTy Tag;
+ Constant *Val;
+ ConstantRange Range;
+
+public:
+ LVILatticeVal() : Tag(undefined), Val(0), Range(1, true) {}
+
+ static LVILatticeVal get(Constant *C) {
+ LVILatticeVal Res;
+ if (!isa<UndefValue>(C))
+ Res.markConstant(C);
+ return Res;
+ }
+ static LVILatticeVal getNot(Constant *C) {
+ LVILatticeVal Res;
+ if (!isa<UndefValue>(C))
+ Res.markNotConstant(C);
+ return Res;
+ }
+ static LVILatticeVal getRange(ConstantRange CR) {
+ LVILatticeVal Res;
+ Res.markConstantRange(CR);
+ return Res;
+ }
+
+ bool isUndefined() const { return Tag == undefined; }
+ bool isConstant() const { return Tag == constant; }
+ bool isNotConstant() const { return Tag == notconstant; }
+ bool isConstantRange() const { return Tag == constantrange; }
+ bool isOverdefined() const { return Tag == overdefined; }
+
+ Constant *getConstant() const {
+ assert(isConstant() && "Cannot get the constant of a non-constant!");
+ return Val;
+ }
+
+ Constant *getNotConstant() const {
+ assert(isNotConstant() && "Cannot get the constant of a non-notconstant!");
+ return Val;
+ }
+
+ ConstantRange getConstantRange() const {
+ assert(isConstantRange() &&
+ "Cannot get the constant-range of a non-constant-range!");
+ return Range;
+ }
+
+ /// markOverdefined - Return true if this is a change in status.
+ bool markOverdefined() {
+ if (isOverdefined())
+ return false;
+ Tag = overdefined;
+ return true;
+ }
+
+ /// markConstant - Return true if this is a change in status.
+ bool markConstant(Constant *V) {
+ assert(V && "Marking constant with NULL");
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+ return markConstantRange(ConstantRange(CI->getValue()));
+ if (isa<UndefValue>(V))
+ return false;
+
+ assert((!isConstant() || getConstant() == V) &&
+ "Marking constant with different value");
+ assert(isUndefined());
+ Tag = constant;
+ Val = V;
+ return true;
+ }
+
+ /// markNotConstant - Return true if this is a change in status.
+ bool markNotConstant(Constant *V) {
+ assert(V && "Marking constant with NULL");
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+ return markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue()));
+ if (isa<UndefValue>(V))
+ return false;
+
+ assert((!isConstant() || getConstant() != V) &&
+ "Marking constant !constant with same value");
+ assert((!isNotConstant() || getNotConstant() == V) &&
+ "Marking !constant with different value");
+ assert(isUndefined() || isConstant());
+ Tag = notconstant;
+ Val = V;
+ return true;
+ }
+
+ /// markConstantRange - Return true if this is a change in status.
+ bool markConstantRange(const ConstantRange NewR) {
+ if (isConstantRange()) {
+ if (NewR.isEmptySet())
+ return markOverdefined();
+
+ bool changed = Range == NewR;
+ Range = NewR;
+ return changed;
+ }
+
+ assert(isUndefined());
+ if (NewR.isEmptySet())
+ return markOverdefined();
+
+ Tag = constantrange;
+ Range = NewR;
+ return true;
+ }
+
+ /// mergeIn - Merge the specified lattice value into this one, updating this
+ /// one and returning true if anything changed.
+ bool mergeIn(const LVILatticeVal &RHS) {
+ if (RHS.isUndefined() || isOverdefined()) return false;
+ if (RHS.isOverdefined()) return markOverdefined();
+
+ if (isUndefined()) {
+ Tag = RHS.Tag;
+ Val = RHS.Val;
+ Range = RHS.Range;
+ return true;
+ }
+
+ if (isConstant()) {
+ if (RHS.isConstant()) {
+ if (Val == RHS.Val)
+ return false;
+ return markOverdefined();
+ }
+
+ if (RHS.isNotConstant()) {
+ if (Val == RHS.Val)
+ return markOverdefined();
+
+ // Unless we can prove that the two Constants are different, we must
+ // move to overdefined.
+ // FIXME: use TargetData for smarter constant folding.
+ if (ConstantInt *Res = dyn_cast<ConstantInt>(
+ ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
+ getConstant(),
+ RHS.getNotConstant())))
+ if (Res->isOne())
+ return markNotConstant(RHS.getNotConstant());
+
+ return markOverdefined();
+ }
+
+ // RHS is a ConstantRange, LHS is a non-integer Constant.
+
+ // FIXME: consider the case where RHS is a range [1, 0) and LHS is
+ // a function. The correct result is to pick up RHS.
+
+ return markOverdefined();
+ }
+
+ if (isNotConstant()) {
+ if (RHS.isConstant()) {
+ if (Val == RHS.Val)
+ return markOverdefined();
+
+ // Unless we can prove that the two Constants are different, we must
+ // move to overdefined.
+ // FIXME: use TargetData for smarter constant folding.
+ if (ConstantInt *Res = dyn_cast<ConstantInt>(
+ ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
+ getNotConstant(),
+ RHS.getConstant())))
+ if (Res->isOne())
+ return false;
+
+ return markOverdefined();
+ }
+
+ if (RHS.isNotConstant()) {
+ if (Val == RHS.Val)
+ return false;
+ return markOverdefined();
+ }
+
+ return markOverdefined();
+ }
+
+ assert(isConstantRange() && "New LVILattice type?");
+ if (!RHS.isConstantRange())
+ return markOverdefined();
+
+ ConstantRange NewR = Range.unionWith(RHS.getConstantRange());
+ if (NewR.isFullSet())
+ return markOverdefined();
+ return markConstantRange(NewR);
+ }
+};
+
+} // end anonymous namespace.
+
+namespace llvm {
+raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val)
+ LLVM_ATTRIBUTE_USED;
+raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) {
+ if (Val.isUndefined())
+ return OS << "undefined";
+ if (Val.isOverdefined())
+ return OS << "overdefined";
+
+ if (Val.isNotConstant())
+ return OS << "notconstant<" << *Val.getNotConstant() << '>';
+ else if (Val.isConstantRange())
+ return OS << "constantrange<" << Val.getConstantRange().getLower() << ", "
+ << Val.getConstantRange().getUpper() << '>';
+ return OS << "constant<" << *Val.getConstant() << '>';
+}
+}
+
+//===----------------------------------------------------------------------===//
+// LazyValueInfoCache Decl
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// LVIValueHandle - A callback value handle update the cache when
+ /// values are erased.
+ class LazyValueInfoCache;
+ struct LVIValueHandle : public CallbackVH {
+ LazyValueInfoCache *Parent;
+
+ LVIValueHandle(Value *V, LazyValueInfoCache *P)
+ : CallbackVH(V), Parent(P) { }
+
+ void deleted();
+ void allUsesReplacedWith(Value *V) {
+ deleted();
+ }
+ };
+}
+
+namespace llvm {
+ template<>
+ struct DenseMapInfo<LVIValueHandle> {
+ typedef DenseMapInfo<Value*> PointerInfo;
+ static inline LVIValueHandle getEmptyKey() {
+ return LVIValueHandle(PointerInfo::getEmptyKey(),
+ static_cast<LazyValueInfoCache*>(0));
+ }
+ static inline LVIValueHandle getTombstoneKey() {
+ return LVIValueHandle(PointerInfo::getTombstoneKey(),
+ static_cast<LazyValueInfoCache*>(0));
+ }
+ static unsigned getHashValue(const LVIValueHandle &Val) {
+ return PointerInfo::getHashValue(Val);
+ }
+ static bool isEqual(const LVIValueHandle &LHS, const LVIValueHandle &RHS) {
+ return LHS == RHS;
+ }
+ };
+
+ template<>
+ struct DenseMapInfo<std::pair<AssertingVH<BasicBlock>, Value*> > {
+ typedef std::pair<AssertingVH<BasicBlock>, Value*> PairTy;
+ typedef DenseMapInfo<AssertingVH<BasicBlock> > APointerInfo;
+ typedef DenseMapInfo<Value*> BPointerInfo;
+ static inline PairTy getEmptyKey() {
+ return std::make_pair(APointerInfo::getEmptyKey(),
+ BPointerInfo::getEmptyKey());
+ }
+ static inline PairTy getTombstoneKey() {
+ return std::make_pair(APointerInfo::getTombstoneKey(),
+ BPointerInfo::getTombstoneKey());
+ }
+ static unsigned getHashValue( const PairTy &Val) {
+ return APointerInfo::getHashValue(Val.first) ^
+ BPointerInfo::getHashValue(Val.second);
+ }
+ static bool isEqual(const PairTy &LHS, const PairTy &RHS) {
+ return APointerInfo::isEqual(LHS.first, RHS.first) &&
+ BPointerInfo::isEqual(LHS.second, RHS.second);
+ }
+ };
+}
+
+namespace {
+ /// LazyValueInfoCache - This is the cache kept by LazyValueInfo which
+ /// maintains information about queries across the clients' queries.
+ class LazyValueInfoCache {
+ /// ValueCacheEntryTy - This is all of the cached block information for
+ /// exactly one Value*. The entries are sorted by the BasicBlock* of the
+ /// entries, allowing us to do a lookup with a binary search.
+ typedef std::map<AssertingVH<BasicBlock>, LVILatticeVal> ValueCacheEntryTy;
+
+ /// ValueCache - This is all of the cached information for all values,
+ /// mapped from Value* to key information.
+ DenseMap<LVIValueHandle, ValueCacheEntryTy> ValueCache;
+
+ /// OverDefinedCache - This tracks, on a per-block basis, the set of
+ /// values that are over-defined at the end of that block. This is required
+ /// for cache updating.
+ typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
+ DenseSet<OverDefinedPairTy> OverDefinedCache;
+
+ /// BlockValueStack - This stack holds the state of the value solver
+ /// during a query. It basically emulates the callstack of the naive
+ /// recursive value lookup process.
+ std::stack<std::pair<BasicBlock*, Value*> > BlockValueStack;
+
+ friend struct LVIValueHandle;
+
+ /// OverDefinedCacheUpdater - A helper object that ensures that the
+ /// OverDefinedCache is updated whenever solveBlockValue returns.
+ struct OverDefinedCacheUpdater {
+ LazyValueInfoCache *Parent;
+ Value *Val;
+ BasicBlock *BB;
+ LVILatticeVal &BBLV;
+
+ OverDefinedCacheUpdater(Value *V, BasicBlock *B, LVILatticeVal &LV,
+ LazyValueInfoCache *P)
+ : Parent(P), Val(V), BB(B), BBLV(LV) { }
+
+ bool markResult(bool changed) {
+ if (changed && BBLV.isOverdefined())
+ Parent->OverDefinedCache.insert(std::make_pair(BB, Val));
+ return changed;
+ }
+ };
+
+
+
+ LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB);
+ bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T,
+ LVILatticeVal &Result);
+ bool hasBlockValue(Value *Val, BasicBlock *BB);
+
+ // These methods process one work item and may add more. A false value
+ // returned means that the work item was not completely processed and must
+ // be revisited after going through the new items.
+ bool solveBlockValue(Value *Val, BasicBlock *BB);
+ bool solveBlockValueNonLocal(LVILatticeVal &BBLV,
+ Value *Val, BasicBlock *BB);
+ bool solveBlockValuePHINode(LVILatticeVal &BBLV,
+ PHINode *PN, BasicBlock *BB);
+ bool solveBlockValueConstantRange(LVILatticeVal &BBLV,
+ Instruction *BBI, BasicBlock *BB);
+
+ void solve();
+
+ ValueCacheEntryTy &lookup(Value *V) {
+ return ValueCache[LVIValueHandle(V, this)];
+ }
+
+ public:
+ /// getValueInBlock - This is the query interface to determine the lattice
+ /// value for the specified Value* at the end of the specified block.
+ LVILatticeVal getValueInBlock(Value *V, BasicBlock *BB);
+
+ /// getValueOnEdge - This is the query interface to determine the lattice
+ /// value for the specified Value* that is true on the specified edge.
+ LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB);
+
+ /// threadEdge - This is the update interface to inform the cache that an
+ /// edge from PredBB to OldSucc has been threaded to be from PredBB to
+ /// NewSucc.
+ void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc);
+
+ /// eraseBlock - This is part of the update interface to inform the cache
+ /// that a block has been deleted.
+ void eraseBlock(BasicBlock *BB);
+
+ /// clear - Empty the cache.
+ void clear() {
+ ValueCache.clear();
+ OverDefinedCache.clear();
+ }
+ };
+} // end anonymous namespace
+
+void LVIValueHandle::deleted() {
+ typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
+
+ SmallVector<OverDefinedPairTy, 4> ToErase;
+ for (DenseSet<OverDefinedPairTy>::iterator
+ I = Parent->OverDefinedCache.begin(),
+ E = Parent->OverDefinedCache.end();
+ I != E; ++I) {
+ if (I->second == getValPtr())
+ ToErase.push_back(*I);
+ }
+
+ for (SmallVector<OverDefinedPairTy, 4>::iterator I = ToErase.begin(),
+ E = ToErase.end(); I != E; ++I)
+ Parent->OverDefinedCache.erase(*I);
+
+ // This erasure deallocates *this, so it MUST happen after we're done
+ // using any and all members of *this.
+ Parent->ValueCache.erase(*this);
+}
+
+void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
+ SmallVector<OverDefinedPairTy, 4> ToErase;
+ for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(),
+ E = OverDefinedCache.end(); I != E; ++I) {
+ if (I->first == BB)
+ ToErase.push_back(*I);
+ }
+
+ for (SmallVector<OverDefinedPairTy, 4>::iterator I = ToErase.begin(),
+ E = ToErase.end(); I != E; ++I)
+ OverDefinedCache.erase(*I);
+
+ for (DenseMap<LVIValueHandle, ValueCacheEntryTy>::iterator
+ I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I)
+ I->second.erase(BB);
+}
+
+void LazyValueInfoCache::solve() {
+ while (!BlockValueStack.empty()) {
+ std::pair<BasicBlock*, Value*> &e = BlockValueStack.top();
+ if (solveBlockValue(e.second, e.first))
+ BlockValueStack.pop();
+ }
+}
+
+bool LazyValueInfoCache::hasBlockValue(Value *Val, BasicBlock *BB) {
+ // If already a constant, there is nothing to compute.
+ if (isa<Constant>(Val))
+ return true;
+
+ LVIValueHandle ValHandle(Val, this);
+ if (!ValueCache.count(ValHandle)) return false;
+ return ValueCache[ValHandle].count(BB);
+}
+
+LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) {
+ // If already a constant, there is nothing to compute.
+ if (Constant *VC = dyn_cast<Constant>(Val))
+ return LVILatticeVal::get(VC);
+
+ return lookup(Val)[BB];
+}
+
+bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
+ if (isa<Constant>(Val))
+ return true;
+
+ ValueCacheEntryTy &Cache = lookup(Val);
+ LVILatticeVal &BBLV = Cache[BB];
+
+ // OverDefinedCacheUpdater is a helper object that will update
+ // the OverDefinedCache for us when this method exits. Make sure to
+ // call markResult on it as we exist, passing a bool to indicate if the
+ // cache needs updating, i.e. if we have solve a new value or not.
+ OverDefinedCacheUpdater ODCacheUpdater(Val, BB, BBLV, this);
+
+ // If we've already computed this block's value, return it.
+ if (!BBLV.isUndefined()) {
+ DEBUG(dbgs() << " reuse BB '" << BB->getName() << "' val=" << BBLV <<'\n');
+
+ // Since we're reusing a cached value here, we don't need to update the
+ // OverDefinedCahce. The cache will have been properly updated
+ // whenever the cached value was inserted.
+ ODCacheUpdater.markResult(false);
+ return true;
+ }
+
+ // Otherwise, this is the first time we're seeing this block. Reset the
+ // lattice value to overdefined, so that cycles will terminate and be
+ // conservatively correct.
+ BBLV.markOverdefined();
+
+ Instruction *BBI = dyn_cast<Instruction>(Val);
+ if (BBI == 0 || BBI->getParent() != BB) {
+ return ODCacheUpdater.markResult(solveBlockValueNonLocal(BBLV, Val, BB));
+ }
+
+ if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
+ return ODCacheUpdater.markResult(solveBlockValuePHINode(BBLV, PN, BB));
+ }
+
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(BBI)) {
+ BBLV = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType()));
+ return ODCacheUpdater.markResult(true);
+ }
+
+ // We can only analyze the definitions of certain classes of instructions
+ // (integral binops and casts at the moment), so bail if this isn't one.
+ LVILatticeVal Result;
+ if ((!isa<BinaryOperator>(BBI) && !isa<CastInst>(BBI)) ||
+ !BBI->getType()->isIntegerTy()) {
+ DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because inst def found.\n");
+ BBLV.markOverdefined();
+ return ODCacheUpdater.markResult(true);
+ }
+
+ // FIXME: We're currently limited to binops with a constant RHS. This should
+ // be improved.
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI);
+ if (BO && !isa<ConstantInt>(BO->getOperand(1))) {
+ DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because inst def found.\n");
+
+ BBLV.markOverdefined();
+ return ODCacheUpdater.markResult(true);
+ }
+
+ return ODCacheUpdater.markResult(solveBlockValueConstantRange(BBLV, BBI, BB));
+}
+
+static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) {
+ if (LoadInst *L = dyn_cast<LoadInst>(I)) {
+ return L->getPointerAddressSpace() == 0 &&
+ GetUnderlyingObject(L->getPointerOperand()) ==
+ GetUnderlyingObject(Ptr);
+ }
+ if (StoreInst *S = dyn_cast<StoreInst>(I)) {
+ return S->getPointerAddressSpace() == 0 &&
+ GetUnderlyingObject(S->getPointerOperand()) ==
+ GetUnderlyingObject(Ptr);
+ }
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+ if (MI->isVolatile()) return false;
+ if (MI->getAddressSpace() != 0) return false;
+
+ // FIXME: check whether it has a valuerange that excludes zero?
+ ConstantInt *Len = dyn_cast<ConstantInt>(MI->getLength());
+ if (!Len || Len->isZero()) return false;
+
+ if (MI->getRawDest() == Ptr || MI->getDest() == Ptr)
+ return true;
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
+ return MTI->getRawSource() == Ptr || MTI->getSource() == Ptr;
+ }
+ return false;
+}
+
+bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
+ Value *Val, BasicBlock *BB) {
+ LVILatticeVal Result; // Start Undefined.
+
+ // If this is a pointer, and there's a load from that pointer in this BB,
+ // then we know that the pointer can't be NULL.
+ bool NotNull = false;
+ if (Val->getType()->isPointerTy()) {
+ if (isa<AllocaInst>(Val)) {
+ NotNull = true;
+ } else {
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();BI != BE;++BI){
+ if (InstructionDereferencesPointer(BI, Val)) {
+ NotNull = true;
+ break;
+ }
+ }
+ }
+ }
+
+ // If this is the entry block, we must be asking about an argument. The
+ // value is overdefined.
+ if (BB == &BB->getParent()->getEntryBlock()) {
+ assert(isa<Argument>(Val) && "Unknown live-in to the entry block");
+ if (NotNull) {
+ const PointerType *PTy = cast<PointerType>(Val->getType());
+ Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
+ } else {
+ Result.markOverdefined();
+ }
+ BBLV = Result;
+ return true;
+ }
+
+ // Loop over all of our predecessors, merging what we know from them into
+ // result.
+ bool EdgesMissing = false;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ LVILatticeVal EdgeResult;
+ EdgesMissing |= !getEdgeValue(Val, *PI, BB, EdgeResult);
+ if (EdgesMissing)
+ continue;
+
+ Result.mergeIn(EdgeResult);
+
+ // If we hit overdefined, exit early. The BlockVals entry is already set
+ // to overdefined.
+ if (Result.isOverdefined()) {
+ DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because of pred.\n");
+ // If we previously determined that this is a pointer that can't be null
+ // then return that rather than giving up entirely.
+ if (NotNull) {
+ const PointerType *PTy = cast<PointerType>(Val->getType());
+ Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
+ }
+
+ BBLV = Result;
+ return true;
+ }
+ }
+ if (EdgesMissing)
+ return false;
+
+ // Return the merged value, which is more precise than 'overdefined'.
+ assert(!Result.isOverdefined());
+ BBLV = Result;
+ return true;
+}
+
+bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
+ PHINode *PN, BasicBlock *BB) {
+ LVILatticeVal Result; // Start Undefined.
+
+ // Loop over all of our predecessors, merging what we know from them into
+ // result.
+ bool EdgesMissing = false;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *PhiBB = PN->getIncomingBlock(i);
+ Value *PhiVal = PN->getIncomingValue(i);
+ LVILatticeVal EdgeResult;
+ EdgesMissing |= !getEdgeValue(PhiVal, PhiBB, BB, EdgeResult);
+ if (EdgesMissing)
+ continue;
+
+ Result.mergeIn(EdgeResult);
+
+ // If we hit overdefined, exit early. The BlockVals entry is already set
+ // to overdefined.
+ if (Result.isOverdefined()) {
+ DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because of pred.\n");
+
+ BBLV = Result;
+ return true;
+ }
+ }
+ if (EdgesMissing)
+ return false;
+
+ // Return the merged value, which is more precise than 'overdefined'.
+ assert(!Result.isOverdefined() && "Possible PHI in entry block?");
+ BBLV = Result;
+ return true;
+}
+
+bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,
+ Instruction *BBI,
+ BasicBlock *BB) {
+ // Figure out the range of the LHS. If that fails, bail.
+ if (!hasBlockValue(BBI->getOperand(0), BB)) {
+ BlockValueStack.push(std::make_pair(BB, BBI->getOperand(0)));
+ return false;
+ }
+
+ LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB);
+ if (!LHSVal.isConstantRange()) {
+ BBLV.markOverdefined();
+ return true;
+ }
+
+ ConstantRange LHSRange = LHSVal.getConstantRange();
+ ConstantRange RHSRange(1);
+ const IntegerType *ResultTy = cast<IntegerType>(BBI->getType());
+ if (isa<BinaryOperator>(BBI)) {
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(BBI->getOperand(1))) {
+ RHSRange = ConstantRange(RHS->getValue());
+ } else {
+ BBLV.markOverdefined();
+ return true;
+ }
+ }
+
+ // NOTE: We're currently limited by the set of operations that ConstantRange
+ // can evaluate symbolically. Enhancing that set will allows us to analyze
+ // more definitions.
+ LVILatticeVal Result;
+ switch (BBI->getOpcode()) {
+ case Instruction::Add:
+ Result.markConstantRange(LHSRange.add(RHSRange));
+ break;
+ case Instruction::Sub:
+ Result.markConstantRange(LHSRange.sub(RHSRange));
+ break;
+ case Instruction::Mul:
+ Result.markConstantRange(LHSRange.multiply(RHSRange));
+ break;
+ case Instruction::UDiv:
+ Result.markConstantRange(LHSRange.udiv(RHSRange));
+ break;
+ case Instruction::Shl:
+ Result.markConstantRange(LHSRange.shl(RHSRange));
+ break;
+ case Instruction::LShr:
+ Result.markConstantRange(LHSRange.lshr(RHSRange));
+ break;
+ case Instruction::Trunc:
+ Result.markConstantRange(LHSRange.truncate(ResultTy->getBitWidth()));
+ break;
+ case Instruction::SExt:
+ Result.markConstantRange(LHSRange.signExtend(ResultTy->getBitWidth()));
+ break;
+ case Instruction::ZExt:
+ Result.markConstantRange(LHSRange.zeroExtend(ResultTy->getBitWidth()));
+ break;
+ case Instruction::BitCast:
+ Result.markConstantRange(LHSRange);
+ break;
+ case Instruction::And:
+ Result.markConstantRange(LHSRange.binaryAnd(RHSRange));
+ break;
+ case Instruction::Or:
+ Result.markConstantRange(LHSRange.binaryOr(RHSRange));
+ break;
+
+ // Unhandled instructions are overdefined.
+ default:
+ DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because inst def found.\n");
+ Result.markOverdefined();
+ break;
+ }
+
+ BBLV = Result;
+ return true;
+}
+
+/// getEdgeValue - This method attempts to infer more complex
+bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
+ BasicBlock *BBTo, LVILatticeVal &Result) {
+ // If already a constant, there is nothing to compute.
+ if (Constant *VC = dyn_cast<Constant>(Val)) {
+ Result = LVILatticeVal::get(VC);
+ return true;
+ }
+
+ // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we
+ // know that v != 0.
+ if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) {
+ // If this is a conditional branch and only one successor goes to BBTo, then
+ // we maybe able to infer something from the condition.
+ if (BI->isConditional() &&
+ BI->getSuccessor(0) != BI->getSuccessor(1)) {
+ bool isTrueDest = BI->getSuccessor(0) == BBTo;
+ assert(BI->getSuccessor(!isTrueDest) == BBTo &&
+ "BBTo isn't a successor of BBFrom");
+
+ // If V is the condition of the branch itself, then we know exactly what
+ // it is.
+ if (BI->getCondition() == Val) {
+ Result = LVILatticeVal::get(ConstantInt::get(
+ Type::getInt1Ty(Val->getContext()), isTrueDest));
+ return true;
+ }
+
+ // If the condition of the branch is an equality comparison, we may be
+ // able to infer the value.
+ ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition());
+ if (ICI && ICI->getOperand(0) == Val &&
+ isa<Constant>(ICI->getOperand(1))) {
+ if (ICI->isEquality()) {
+ // We know that V has the RHS constant if this is a true SETEQ or
+ // false SETNE.
+ if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ))
+ Result = LVILatticeVal::get(cast<Constant>(ICI->getOperand(1)));
+ else
+ Result = LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1)));
+ return true;
+ }
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
+ // Calculate the range of values that would satisfy the comparison.
+ ConstantRange CmpRange(CI->getValue(), CI->getValue()+1);
+ ConstantRange TrueValues =
+ ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange);
+
+ // If we're interested in the false dest, invert the condition.
+ if (!isTrueDest) TrueValues = TrueValues.inverse();
+
+ // Figure out the possible values of the query BEFORE this branch.
+ if (!hasBlockValue(Val, BBFrom)) {
+ BlockValueStack.push(std::make_pair(BBFrom, Val));
+ return false;
+ }
+
+ LVILatticeVal InBlock = getBlockValue(Val, BBFrom);
+ if (!InBlock.isConstantRange()) {
+ Result = LVILatticeVal::getRange(TrueValues);
+ return true;
+ }
+
+ // Find all potential values that satisfy both the input and output
+ // conditions.
+ ConstantRange PossibleValues =
+ TrueValues.intersectWith(InBlock.getConstantRange());
+
+ Result = LVILatticeVal::getRange(PossibleValues);
+ return true;
+ }
+ }
+ }
+ }
+
+ // If the edge was formed by a switch on the value, then we may know exactly
+ // what it is.
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(BBFrom->getTerminator())) {
+ if (SI->getCondition() == Val) {
+ // We don't know anything in the default case.
+ if (SI->getDefaultDest() == BBTo) {
+ Result.markOverdefined();
+ return true;
+ }
+
+ // We only know something if there is exactly one value that goes from
+ // BBFrom to BBTo.
+ unsigned NumEdges = 0;
+ ConstantInt *EdgeVal = 0;
+ for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) {
+ if (SI->getSuccessor(i) != BBTo) continue;
+ if (NumEdges++) break;
+ EdgeVal = SI->getCaseValue(i);
+ }
+ assert(EdgeVal && "Missing successor?");
+ if (NumEdges == 1) {
+ Result = LVILatticeVal::get(EdgeVal);
+ return true;
+ }
+ }
+ }
+
+ // Otherwise see if the value is known in the block.
+ if (hasBlockValue(Val, BBFrom)) {
+ Result = getBlockValue(Val, BBFrom);
+ return true;
+ }
+ BlockValueStack.push(std::make_pair(BBFrom, Val));
+ return false;
+}
+
+LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB) {
+ DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '"
+ << BB->getName() << "'\n");
+
+ BlockValueStack.push(std::make_pair(BB, V));
+ solve();
+ LVILatticeVal Result = getBlockValue(V, BB);
+
+ DEBUG(dbgs() << " Result = " << Result << "\n");
+ return Result;
+}
+
+LVILatticeVal LazyValueInfoCache::
+getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB) {
+ DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '"
+ << FromBB->getName() << "' to '" << ToBB->getName() << "'\n");
+
+ LVILatticeVal Result;
+ if (!getEdgeValue(V, FromBB, ToBB, Result)) {
+ solve();
+ bool WasFastQuery = getEdgeValue(V, FromBB, ToBB, Result);
+ (void)WasFastQuery;
+ assert(WasFastQuery && "More work to do after problem solved?");
+ }
+
+ DEBUG(dbgs() << " Result = " << Result << "\n");
+ return Result;
+}
+
+void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
+ BasicBlock *NewSucc) {
+ // When an edge in the graph has been threaded, values that we could not
+ // determine a value for before (i.e. were marked overdefined) may be possible
+ // to solve now. We do NOT try to proactively update these values. Instead,
+ // we clear their entries from the cache, and allow lazy updating to recompute
+ // them when needed.
+
+ // The updating process is fairly simple: we need to dropped cached info
+ // for all values that were marked overdefined in OldSucc, and for those same
+ // values in any successor of OldSucc (except NewSucc) in which they were
+ // also marked overdefined.
+ std::vector<BasicBlock*> worklist;
+ worklist.push_back(OldSucc);
+
+ DenseSet<Value*> ClearSet;
+ for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(),
+ E = OverDefinedCache.end(); I != E; ++I) {
+ if (I->first == OldSucc)
+ ClearSet.insert(I->second);
+ }
+
+ // Use a worklist to perform a depth-first search of OldSucc's successors.
+ // NOTE: We do not need a visited list since any blocks we have already
+ // visited will have had their overdefined markers cleared already, and we
+ // thus won't loop to their successors.
+ while (!worklist.empty()) {
+ BasicBlock *ToUpdate = worklist.back();
+ worklist.pop_back();
+
+ // Skip blocks only accessible through NewSucc.
+ if (ToUpdate == NewSucc) continue;
+
+ bool changed = false;
+ for (DenseSet<Value*>::iterator I = ClearSet.begin(), E = ClearSet.end();
+ I != E; ++I) {
+ // If a value was marked overdefined in OldSucc, and is here too...
+ DenseSet<OverDefinedPairTy>::iterator OI =
+ OverDefinedCache.find(std::make_pair(ToUpdate, *I));
+ if (OI == OverDefinedCache.end()) continue;
+
+ // Remove it from the caches.
+ ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(*I, this)];
+ ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate);
+
+ assert(CI != Entry.end() && "Couldn't find entry to update?");
+ Entry.erase(CI);
+ OverDefinedCache.erase(OI);
+
+ // If we removed anything, then we potentially need to update
+ // blocks successors too.
+ changed = true;
+ }
+
+ if (!changed) continue;
+
+ worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate));
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// LazyValueInfo Impl
+//===----------------------------------------------------------------------===//
+
+/// getCache - This lazily constructs the LazyValueInfoCache.
+static LazyValueInfoCache &getCache(void *&PImpl) {
+ if (!PImpl)
+ PImpl = new LazyValueInfoCache();
+ return *static_cast<LazyValueInfoCache*>(PImpl);
+}
+
+bool LazyValueInfo::runOnFunction(Function &F) {
+ if (PImpl)
+ getCache(PImpl).clear();
+
+ TD = getAnalysisIfAvailable<TargetData>();
+ // Fully lazy.
+ return false;
+}
+
+void LazyValueInfo::releaseMemory() {
+ // If the cache was allocated, free it.
+ if (PImpl) {
+ delete &getCache(PImpl);
+ PImpl = 0;
+ }
+}
+
+Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) {
+ LVILatticeVal Result = getCache(PImpl).getValueInBlock(V, BB);
+
+ if (Result.isConstant())
+ return Result.getConstant();
+ if (Result.isConstantRange()) {
+ ConstantRange CR = Result.getConstantRange();
+ if (const APInt *SingleVal = CR.getSingleElement())
+ return ConstantInt::get(V->getContext(), *SingleVal);
+ }
+ return 0;
+}
+
+/// getConstantOnEdge - Determine whether the specified value is known to be a
+/// constant on the specified edge. Return null if not.
+Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
+ BasicBlock *ToBB) {
+ LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB);
+
+ if (Result.isConstant())
+ return Result.getConstant();
+ if (Result.isConstantRange()) {
+ ConstantRange CR = Result.getConstantRange();
+ if (const APInt *SingleVal = CR.getSingleElement())
+ return ConstantInt::get(V->getContext(), *SingleVal);
+ }
+ return 0;
+}
+
+/// getPredicateOnEdge - Determine whether the specified value comparison
+/// with a constant is known to be true or false on the specified CFG edge.
+/// Pred is a CmpInst predicate.
+LazyValueInfo::Tristate
+LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
+ BasicBlock *FromBB, BasicBlock *ToBB) {
+ LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB);
+
+ // If we know the value is a constant, evaluate the conditional.
+ Constant *Res = 0;
+ if (Result.isConstant()) {
+ Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD);
+ if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res))
+ return ResCI->isZero() ? False : True;
+ return Unknown;
+ }
+
+ if (Result.isConstantRange()) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(C);
+ if (!CI) return Unknown;
+
+ ConstantRange CR = Result.getConstantRange();
+ if (Pred == ICmpInst::ICMP_EQ) {
+ if (!CR.contains(CI->getValue()))
+ return False;
+
+ if (CR.isSingleElement() && CR.contains(CI->getValue()))
+ return True;
+ } else if (Pred == ICmpInst::ICMP_NE) {
+ if (!CR.contains(CI->getValue()))
+ return True;
+
+ if (CR.isSingleElement() && CR.contains(CI->getValue()))
+ return False;
+ }
+
+ // Handle more complex predicates.
+ ConstantRange TrueValues =
+ ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue());
+ if (TrueValues.contains(CR))
+ return True;
+ if (TrueValues.inverse().contains(CR))
+ return False;
+ return Unknown;
+ }
+
+ if (Result.isNotConstant()) {
+ // If this is an equality comparison, we can try to fold it knowing that
+ // "V != C1".
+ if (Pred == ICmpInst::ICMP_EQ) {
+ // !C1 == C -> false iff C1 == C.
+ Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
+ Result.getNotConstant(), C, TD);
+ if (Res->isNullValue())
+ return False;
+ } else if (Pred == ICmpInst::ICMP_NE) {
+ // !C1 != C -> true iff C1 == C.
+ Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
+ Result.getNotConstant(), C, TD);
+ if (Res->isNullValue())
+ return True;
+ }
+ return Unknown;
+ }
+
+ return Unknown;
+}
+
+void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
+ BasicBlock *NewSucc) {
+ if (PImpl) getCache(PImpl).threadEdge(PredBB, OldSucc, NewSucc);
+}
+
+void LazyValueInfo::eraseBlock(BasicBlock *BB) {
+ if (PImpl) getCache(PImpl).eraseBlock(BB);
+}
diff --git a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp
new file mode 100644
index 000000000000..efb722bb97c4
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp
@@ -0,0 +1,137 @@
+//===- LibCallAliasAnalysis.cpp - Implement AliasAnalysis for libcalls ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LibCallAliasAnalysis class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LibCallAliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+// Register this pass...
+char LibCallAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS(LibCallAliasAnalysis, AliasAnalysis, "libcall-aa",
+ "LibCall Alias Analysis", false, true, false)
+
+FunctionPass *llvm::createLibCallAliasAnalysisPass(LibCallInfo *LCI) {
+ return new LibCallAliasAnalysis(LCI);
+}
+
+LibCallAliasAnalysis::~LibCallAliasAnalysis() {
+ delete LCI;
+}
+
+void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AliasAnalysis::getAnalysisUsage(AU);
+ AU.setPreservesAll(); // Does not transform code
+}
+
+
+
+/// AnalyzeLibCallDetails - Given a call to a function with the specified
+/// LibCallFunctionInfo, see if we can improve the mod/ref footprint of the call
+/// vs the specified pointer/size.
+AliasAnalysis::ModRefResult
+LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
+ ImmutableCallSite CS,
+ const Location &Loc) {
+ // If we have a function, check to see what kind of mod/ref effects it
+ // has. Start by including any info globally known about the function.
+ AliasAnalysis::ModRefResult MRInfo = FI->UniversalBehavior;
+ if (MRInfo == NoModRef) return MRInfo;
+
+ // If that didn't tell us that the function is 'readnone', check to see
+ // if we have detailed info and if 'P' is any of the locations we know
+ // about.
+ const LibCallFunctionInfo::LocationMRInfo *Details = FI->LocationDetails;
+ if (Details == 0)
+ return MRInfo;
+
+ // If the details array is of the 'DoesNot' kind, we only know something if
+ // the pointer is a match for one of the locations in 'Details'. If we find a
+ // match, we can prove some interactions cannot happen.
+ //
+ if (FI->DetailsType == LibCallFunctionInfo::DoesNot) {
+ // Find out if the pointer refers to a known location.
+ for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) {
+ const LibCallLocationInfo &LocInfo =
+ LCI->getLocationInfo(Details[i].LocationID);
+ LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc);
+ if (Res != LibCallLocationInfo::Yes) continue;
+
+ // If we find a match against a location that we 'do not' interact with,
+ // learn this info into MRInfo.
+ return ModRefResult(MRInfo & ~Details[i].MRInfo);
+ }
+ return MRInfo;
+ }
+
+ // If the details are of the 'DoesOnly' sort, we know something if the pointer
+ // is a match for one of the locations in 'Details'. Also, if we can prove
+ // that the pointers is *not* one of the locations in 'Details', we know that
+ // the call is NoModRef.
+ assert(FI->DetailsType == LibCallFunctionInfo::DoesOnly);
+
+ // Find out if the pointer refers to a known location.
+ bool NoneMatch = true;
+ for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) {
+ const LibCallLocationInfo &LocInfo =
+ LCI->getLocationInfo(Details[i].LocationID);
+ LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc);
+ if (Res == LibCallLocationInfo::No) continue;
+
+ // If we don't know if this pointer points to the location, then we have to
+ // assume it might alias in some case.
+ if (Res == LibCallLocationInfo::Unknown) {
+ NoneMatch = false;
+ continue;
+ }
+
+ // If we know that this pointer definitely is pointing into the location,
+ // merge in this information.
+ return ModRefResult(MRInfo & Details[i].MRInfo);
+ }
+
+ // If we found that the pointer is guaranteed to not match any of the
+ // locations in our 'DoesOnly' rule, then we know that the pointer must point
+ // to some other location. Since the libcall doesn't mod/ref any other
+ // locations, return NoModRef.
+ if (NoneMatch)
+ return NoModRef;
+
+ // Otherwise, return any other info gained so far.
+ return MRInfo;
+}
+
+// getModRefInfo - Check to see if the specified callsite can clobber the
+// specified memory object.
+//
+AliasAnalysis::ModRefResult
+LibCallAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ ModRefResult MRInfo = ModRef;
+
+ // If this is a direct call to a function that LCI knows about, get the
+ // information about the runtime function.
+ if (LCI) {
+ if (const Function *F = CS.getCalledFunction()) {
+ if (const LibCallFunctionInfo *FI = LCI->getFunctionInfo(F)) {
+ MRInfo = ModRefResult(MRInfo & AnalyzeLibCallDetails(FI, CS, Loc));
+ if (MRInfo == NoModRef) return NoModRef;
+ }
+ }
+ }
+
+ // The AliasAnalysis base class has some smarts, lets use them.
+ return (ModRefResult)(MRInfo | AliasAnalysis::getModRefInfo(CS, Loc));
+}
diff --git a/contrib/llvm/lib/Analysis/LibCallSemantics.cpp b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp
new file mode 100644
index 000000000000..81b0f46f3740
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp
@@ -0,0 +1,63 @@
+//===- LibCallSemantics.cpp - Describe library semantics ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements interfaces that can be used to describe language
+// specific runtime library interfaces (e.g. libc, libm, etc) to LLVM
+// optimizers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Function.h"
+using namespace llvm;
+
+/// getMap - This impl pointer in ~LibCallInfo is actually a StringMap. This
+/// helper does the cast.
+static StringMap<const LibCallFunctionInfo*> *getMap(void *Ptr) {
+ return static_cast<StringMap<const LibCallFunctionInfo*> *>(Ptr);
+}
+
+LibCallInfo::~LibCallInfo() {
+ delete getMap(Impl);
+}
+
+const LibCallLocationInfo &LibCallInfo::getLocationInfo(unsigned LocID) const {
+ // Get location info on the first call.
+ if (NumLocations == 0)
+ NumLocations = getLocationInfo(Locations);
+
+ assert(LocID < NumLocations && "Invalid location ID!");
+ return Locations[LocID];
+}
+
+
+/// getFunctionInfo - Return the LibCallFunctionInfo object corresponding to
+/// the specified function if we have it. If not, return null.
+const LibCallFunctionInfo *
+LibCallInfo::getFunctionInfo(const Function *F) const {
+ StringMap<const LibCallFunctionInfo*> *Map = getMap(Impl);
+
+ /// If this is the first time we are querying for this info, lazily construct
+ /// the StringMap to index it.
+ if (Map == 0) {
+ Impl = Map = new StringMap<const LibCallFunctionInfo*>();
+
+ const LibCallFunctionInfo *Array = getFunctionInfoArray();
+ if (Array == 0) return 0;
+
+ // We now have the array of entries. Populate the StringMap.
+ for (unsigned i = 0; Array[i].Name; ++i)
+ (*Map)[Array[i].Name] = Array+i;
+ }
+
+ // Look up this function in the string map.
+ return Map->lookup(F->getName());
+}
+
diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp
new file mode 100644
index 000000000000..f130f30c49da
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Lint.cpp
@@ -0,0 +1,658 @@
+//===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass statically checks for common and easily-identified constructs
+// which produce undefined or likely unintended behavior in LLVM IR.
+//
+// It is not a guarantee of correctness, in two ways. First, it isn't
+// comprehensive. There are checks which could be done statically which are
+// not yet implemented. Some of these are indicated by TODO comments, but
+// those aren't comprehensive either. Second, many conditions cannot be
+// checked statically. This pass does no dynamic instrumentation, so it
+// can't check for all possible problems.
+//
+// Another limitation is that it assumes all code will be executed. A store
+// through a null pointer in a basic block which is never reached is harmless,
+// but this pass will warn about it anyway. This is the main reason why most
+// of these checks live here instead of in the Verifier pass.
+//
+// Optimization passes may make conditions that this pass checks for more or
+// less obvious. If an optimization pass appears to be introducing a warning,
+// it may be that the optimization pass is merely exposing an existing
+// condition in the code.
+//
+// This code may be run before instcombine. In many cases, instcombine checks
+// for the same kinds of things and turns instructions with undefined behavior
+// into unreachable (or equivalent). Because of this, this pass makes some
+// effort to look through bitcasts and so on.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/Lint.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Function.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+namespace {
+ namespace MemRef {
+ static unsigned Read = 1;
+ static unsigned Write = 2;
+ static unsigned Callee = 4;
+ static unsigned Branchee = 8;
+ }
+
+ class Lint : public FunctionPass, public InstVisitor<Lint> {
+ friend class InstVisitor<Lint>;
+
+ void visitFunction(Function &F);
+
+ void visitCallSite(CallSite CS);
+ void visitMemoryReference(Instruction &I, Value *Ptr,
+ uint64_t Size, unsigned Align,
+ const Type *Ty, unsigned Flags);
+
+ void visitCallInst(CallInst &I);
+ void visitInvokeInst(InvokeInst &I);
+ void visitReturnInst(ReturnInst &I);
+ void visitLoadInst(LoadInst &I);
+ void visitStoreInst(StoreInst &I);
+ void visitXor(BinaryOperator &I);
+ void visitSub(BinaryOperator &I);
+ void visitLShr(BinaryOperator &I);
+ void visitAShr(BinaryOperator &I);
+ void visitShl(BinaryOperator &I);
+ void visitSDiv(BinaryOperator &I);
+ void visitUDiv(BinaryOperator &I);
+ void visitSRem(BinaryOperator &I);
+ void visitURem(BinaryOperator &I);
+ void visitAllocaInst(AllocaInst &I);
+ void visitVAArgInst(VAArgInst &I);
+ void visitIndirectBrInst(IndirectBrInst &I);
+ void visitExtractElementInst(ExtractElementInst &I);
+ void visitInsertElementInst(InsertElementInst &I);
+ void visitUnreachableInst(UnreachableInst &I);
+
+ Value *findValue(Value *V, bool OffsetOk) const;
+ Value *findValueImpl(Value *V, bool OffsetOk,
+ SmallPtrSet<Value *, 4> &Visited) const;
+
+ public:
+ Module *Mod;
+ AliasAnalysis *AA;
+ DominatorTree *DT;
+ TargetData *TD;
+
+ std::string Messages;
+ raw_string_ostream MessagesStr;
+
+ static char ID; // Pass identification, replacement for typeid
+ Lint() : FunctionPass(ID), MessagesStr(Messages) {
+ initializeLintPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<DominatorTree>();
+ }
+ virtual void print(raw_ostream &O, const Module *M) const {}
+
+ void WriteValue(const Value *V) {
+ if (!V) return;
+ if (isa<Instruction>(V)) {
+ MessagesStr << *V << '\n';
+ } else {
+ WriteAsOperand(MessagesStr, V, true, Mod);
+ MessagesStr << '\n';
+ }
+ }
+
+ // CheckFailed - A check failed, so print out the condition and the message
+ // that failed. This provides a nice place to put a breakpoint if you want
+ // to see why something is not correct.
+ void CheckFailed(const Twine &Message,
+ const Value *V1 = 0, const Value *V2 = 0,
+ const Value *V3 = 0, const Value *V4 = 0) {
+ MessagesStr << Message.str() << "\n";
+ WriteValue(V1);
+ WriteValue(V2);
+ WriteValue(V3);
+ WriteValue(V4);
+ }
+ };
+}
+
+char Lint::ID = 0;
+INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
+ false, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
+ false, true)
+
+// Assert - We know that cond should be true, if not print an error message.
+#define Assert(C, M) \
+ do { if (!(C)) { CheckFailed(M); return; } } while (0)
+#define Assert1(C, M, V1) \
+ do { if (!(C)) { CheckFailed(M, V1); return; } } while (0)
+#define Assert2(C, M, V1, V2) \
+ do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0)
+#define Assert3(C, M, V1, V2, V3) \
+ do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0)
+#define Assert4(C, M, V1, V2, V3, V4) \
+ do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0)
+
+// Lint::run - This is the main Analysis entry point for a
+// function.
+//
+bool Lint::runOnFunction(Function &F) {
+ Mod = F.getParent();
+ AA = &getAnalysis<AliasAnalysis>();
+ DT = &getAnalysis<DominatorTree>();
+ TD = getAnalysisIfAvailable<TargetData>();
+ visit(F);
+ dbgs() << MessagesStr.str();
+ Messages.clear();
+ return false;
+}
+
+void Lint::visitFunction(Function &F) {
+ // This isn't undefined behavior, it's just a little unusual, and it's a
+ // fairly common mistake to neglect to name a function.
+ Assert1(F.hasName() || F.hasLocalLinkage(),
+ "Unusual: Unnamed function with non-local linkage", &F);
+
+ // TODO: Check for irreducible control flow.
+}
+
+void Lint::visitCallSite(CallSite CS) {
+ Instruction &I = *CS.getInstruction();
+ Value *Callee = CS.getCalledValue();
+
+ visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize,
+ 0, 0, MemRef::Callee);
+
+ if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) {
+ Assert1(CS.getCallingConv() == F->getCallingConv(),
+ "Undefined behavior: Caller and callee calling convention differ",
+ &I);
+
+ const FunctionType *FT = F->getFunctionType();
+ unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
+
+ Assert1(FT->isVarArg() ?
+ FT->getNumParams() <= NumActualArgs :
+ FT->getNumParams() == NumActualArgs,
+ "Undefined behavior: Call argument count mismatches callee "
+ "argument count", &I);
+
+ Assert1(FT->getReturnType() == I.getType(),
+ "Undefined behavior: Call return type mismatches "
+ "callee return type", &I);
+
+ // Check argument types (in case the callee was casted) and attributes.
+ // TODO: Verify that caller and callee attributes are compatible.
+ Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end();
+ CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+ for (; AI != AE; ++AI) {
+ Value *Actual = *AI;
+ if (PI != PE) {
+ Argument *Formal = PI++;
+ Assert1(Formal->getType() == Actual->getType(),
+ "Undefined behavior: Call argument type mismatches "
+ "callee parameter type", &I);
+
+ // Check that noalias arguments don't alias other arguments. This is
+ // not fully precise because we don't know the sizes of the dereferenced
+ // memory regions.
+ if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy())
+ for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI)
+ if (AI != BI && (*BI)->getType()->isPointerTy()) {
+ AliasAnalysis::AliasResult Result = AA->alias(*AI, *BI);
+ Assert1(Result != AliasAnalysis::MustAlias &&
+ Result != AliasAnalysis::PartialAlias,
+ "Unusual: noalias argument aliases another argument", &I);
+ }
+
+ // Check that an sret argument points to valid memory.
+ if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
+ const Type *Ty =
+ cast<PointerType>(Formal->getType())->getElementType();
+ visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty),
+ TD ? TD->getABITypeAlignment(Ty) : 0,
+ Ty, MemRef::Read | MemRef::Write);
+ }
+ }
+ }
+ }
+
+ if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall())
+ for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+ AI != AE; ++AI) {
+ Value *Obj = findValue(*AI, /*OffsetOk=*/true);
+ Assert1(!isa<AllocaInst>(Obj),
+ "Undefined behavior: Call with \"tail\" keyword references "
+ "alloca", &I);
+ }
+
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
+ switch (II->getIntrinsicID()) {
+ default: break;
+
+ // TODO: Check more intrinsics
+
+ case Intrinsic::memcpy: {
+ MemCpyInst *MCI = cast<MemCpyInst>(&I);
+ // TODO: If the size is known, use it.
+ visitMemoryReference(I, MCI->getDest(), AliasAnalysis::UnknownSize,
+ MCI->getAlignment(), 0,
+ MemRef::Write);
+ visitMemoryReference(I, MCI->getSource(), AliasAnalysis::UnknownSize,
+ MCI->getAlignment(), 0,
+ MemRef::Read);
+
+ // Check that the memcpy arguments don't overlap. The AliasAnalysis API
+ // isn't expressive enough for what we really want to do. Known partial
+ // overlap is not distinguished from the case where nothing is known.
+ uint64_t Size = 0;
+ if (const ConstantInt *Len =
+ dyn_cast<ConstantInt>(findValue(MCI->getLength(),
+ /*OffsetOk=*/false)))
+ if (Len->getValue().isIntN(32))
+ Size = Len->getValue().getZExtValue();
+ Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
+ AliasAnalysis::MustAlias,
+ "Undefined behavior: memcpy source and destination overlap", &I);
+ break;
+ }
+ case Intrinsic::memmove: {
+ MemMoveInst *MMI = cast<MemMoveInst>(&I);
+ // TODO: If the size is known, use it.
+ visitMemoryReference(I, MMI->getDest(), AliasAnalysis::UnknownSize,
+ MMI->getAlignment(), 0,
+ MemRef::Write);
+ visitMemoryReference(I, MMI->getSource(), AliasAnalysis::UnknownSize,
+ MMI->getAlignment(), 0,
+ MemRef::Read);
+ break;
+ }
+ case Intrinsic::memset: {
+ MemSetInst *MSI = cast<MemSetInst>(&I);
+ // TODO: If the size is known, use it.
+ visitMemoryReference(I, MSI->getDest(), AliasAnalysis::UnknownSize,
+ MSI->getAlignment(), 0,
+ MemRef::Write);
+ break;
+ }
+
+ case Intrinsic::vastart:
+ Assert1(I.getParent()->getParent()->isVarArg(),
+ "Undefined behavior: va_start called in a non-varargs function",
+ &I);
+
+ visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+ 0, 0, MemRef::Read | MemRef::Write);
+ break;
+ case Intrinsic::vacopy:
+ visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+ 0, 0, MemRef::Write);
+ visitMemoryReference(I, CS.getArgument(1), AliasAnalysis::UnknownSize,
+ 0, 0, MemRef::Read);
+ break;
+ case Intrinsic::vaend:
+ visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+ 0, 0, MemRef::Read | MemRef::Write);
+ break;
+
+ case Intrinsic::stackrestore:
+ // Stackrestore doesn't read or write memory, but it sets the
+ // stack pointer, which the compiler may read from or write to
+ // at any time, so check it for both readability and writeability.
+ visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+ 0, 0, MemRef::Read | MemRef::Write);
+ break;
+ }
+}
+
+void Lint::visitCallInst(CallInst &I) {
+ return visitCallSite(&I);
+}
+
+void Lint::visitInvokeInst(InvokeInst &I) {
+ return visitCallSite(&I);
+}
+
+void Lint::visitReturnInst(ReturnInst &I) {
+ Function *F = I.getParent()->getParent();
+ Assert1(!F->doesNotReturn(),
+ "Unusual: Return statement in function with noreturn attribute",
+ &I);
+
+ if (Value *V = I.getReturnValue()) {
+ Value *Obj = findValue(V, /*OffsetOk=*/true);
+ Assert1(!isa<AllocaInst>(Obj),
+ "Unusual: Returning alloca value", &I);
+ }
+}
+
+// TODO: Check that the reference is in bounds.
+// TODO: Check readnone/readonly function attributes.
+void Lint::visitMemoryReference(Instruction &I,
+ Value *Ptr, uint64_t Size, unsigned Align,
+ const Type *Ty, unsigned Flags) {
+ // If no memory is being referenced, it doesn't matter if the pointer
+ // is valid.
+ if (Size == 0)
+ return;
+
+ Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true);
+ Assert1(!isa<ConstantPointerNull>(UnderlyingObject),
+ "Undefined behavior: Null pointer dereference", &I);
+ Assert1(!isa<UndefValue>(UnderlyingObject),
+ "Undefined behavior: Undef pointer dereference", &I);
+ Assert1(!isa<ConstantInt>(UnderlyingObject) ||
+ !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(),
+ "Unusual: All-ones pointer dereference", &I);
+ Assert1(!isa<ConstantInt>(UnderlyingObject) ||
+ !cast<ConstantInt>(UnderlyingObject)->isOne(),
+ "Unusual: Address one pointer dereference", &I);
+
+ if (Flags & MemRef::Write) {
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject))
+ Assert1(!GV->isConstant(),
+ "Undefined behavior: Write to read-only memory", &I);
+ Assert1(!isa<Function>(UnderlyingObject) &&
+ !isa<BlockAddress>(UnderlyingObject),
+ "Undefined behavior: Write to text section", &I);
+ }
+ if (Flags & MemRef::Read) {
+ Assert1(!isa<Function>(UnderlyingObject),
+ "Unusual: Load from function body", &I);
+ Assert1(!isa<BlockAddress>(UnderlyingObject),
+ "Undefined behavior: Load from block address", &I);
+ }
+ if (Flags & MemRef::Callee) {
+ Assert1(!isa<BlockAddress>(UnderlyingObject),
+ "Undefined behavior: Call to block address", &I);
+ }
+ if (Flags & MemRef::Branchee) {
+ Assert1(!isa<Constant>(UnderlyingObject) ||
+ isa<BlockAddress>(UnderlyingObject),
+ "Undefined behavior: Branch to non-blockaddress", &I);
+ }
+
+ if (TD) {
+ if (Align == 0 && Ty) Align = TD->getABITypeAlignment(Ty);
+
+ if (Align != 0) {
+ unsigned BitWidth = TD->getTypeSizeInBits(Ptr->getType());
+ APInt Mask = APInt::getAllOnesValue(BitWidth),
+ KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(Ptr, Mask, KnownZero, KnownOne, TD);
+ Assert1(!(KnownOne & APInt::getLowBitsSet(BitWidth, Log2_32(Align))),
+ "Undefined behavior: Memory reference address is misaligned", &I);
+ }
+ }
+}
+
+void Lint::visitLoadInst(LoadInst &I) {
+ visitMemoryReference(I, I.getPointerOperand(),
+ AA->getTypeStoreSize(I.getType()), I.getAlignment(),
+ I.getType(), MemRef::Read);
+}
+
+void Lint::visitStoreInst(StoreInst &I) {
+ visitMemoryReference(I, I.getPointerOperand(),
+ AA->getTypeStoreSize(I.getOperand(0)->getType()),
+ I.getAlignment(),
+ I.getOperand(0)->getType(), MemRef::Write);
+}
+
+void Lint::visitXor(BinaryOperator &I) {
+ Assert1(!isa<UndefValue>(I.getOperand(0)) ||
+ !isa<UndefValue>(I.getOperand(1)),
+ "Undefined result: xor(undef, undef)", &I);
+}
+
+void Lint::visitSub(BinaryOperator &I) {
+ Assert1(!isa<UndefValue>(I.getOperand(0)) ||
+ !isa<UndefValue>(I.getOperand(1)),
+ "Undefined result: sub(undef, undef)", &I);
+}
+
+void Lint::visitLShr(BinaryOperator &I) {
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
+ Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+ "Undefined result: Shift count out of range", &I);
+}
+
+void Lint::visitAShr(BinaryOperator &I) {
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
+ Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+ "Undefined result: Shift count out of range", &I);
+}
+
+void Lint::visitShl(BinaryOperator &I) {
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
+ Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+ "Undefined result: Shift count out of range", &I);
+}
+
+static bool isZero(Value *V, TargetData *TD) {
+ // Assume undef could be zero.
+ if (isa<UndefValue>(V)) return true;
+
+ unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
+ APInt Mask = APInt::getAllOnesValue(BitWidth),
+ KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD);
+ return KnownZero.isAllOnesValue();
+}
+
+void Lint::visitSDiv(BinaryOperator &I) {
+ Assert1(!isZero(I.getOperand(1), TD),
+ "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitUDiv(BinaryOperator &I) {
+ Assert1(!isZero(I.getOperand(1), TD),
+ "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitSRem(BinaryOperator &I) {
+ Assert1(!isZero(I.getOperand(1), TD),
+ "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitURem(BinaryOperator &I) {
+ Assert1(!isZero(I.getOperand(1), TD),
+ "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitAllocaInst(AllocaInst &I) {
+ if (isa<ConstantInt>(I.getArraySize()))
+ // This isn't undefined behavior, it's just an obvious pessimization.
+ Assert1(&I.getParent()->getParent()->getEntryBlock() == I.getParent(),
+ "Pessimization: Static alloca outside of entry block", &I);
+
+ // TODO: Check for an unusual size (MSB set?)
+}
+
+void Lint::visitVAArgInst(VAArgInst &I) {
+ visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0, 0,
+ MemRef::Read | MemRef::Write);
+}
+
+void Lint::visitIndirectBrInst(IndirectBrInst &I) {
+ visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0, 0,
+ MemRef::Branchee);
+
+ Assert1(I.getNumDestinations() != 0,
+ "Undefined behavior: indirectbr with no destinations", &I);
+}
+
+void Lint::visitExtractElementInst(ExtractElementInst &I) {
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(findValue(I.getIndexOperand(),
+ /*OffsetOk=*/false)))
+ Assert1(CI->getValue().ult(I.getVectorOperandType()->getNumElements()),
+ "Undefined result: extractelement index out of range", &I);
+}
+
+void Lint::visitInsertElementInst(InsertElementInst &I) {
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(findValue(I.getOperand(2),
+ /*OffsetOk=*/false)))
+ Assert1(CI->getValue().ult(I.getType()->getNumElements()),
+ "Undefined result: insertelement index out of range", &I);
+}
+
+void Lint::visitUnreachableInst(UnreachableInst &I) {
+ // This isn't undefined behavior, it's merely suspicious.
+ Assert1(&I == I.getParent()->begin() ||
+ prior(BasicBlock::iterator(&I))->mayHaveSideEffects(),
+ "Unusual: unreachable immediately preceded by instruction without "
+ "side effects", &I);
+}
+
+/// findValue - Look through bitcasts and simple memory reference patterns
+/// to identify an equivalent, but more informative, value. If OffsetOk
+/// is true, look through getelementptrs with non-zero offsets too.
+///
+/// Most analysis passes don't require this logic, because instcombine
+/// will simplify most of these kinds of things away. But it's a goal of
+/// this Lint pass to be useful even on non-optimized IR.
+Value *Lint::findValue(Value *V, bool OffsetOk) const {
+ SmallPtrSet<Value *, 4> Visited;
+ return findValueImpl(V, OffsetOk, Visited);
+}
+
+/// findValueImpl - Implementation helper for findValue.
+Value *Lint::findValueImpl(Value *V, bool OffsetOk,
+ SmallPtrSet<Value *, 4> &Visited) const {
+ // Detect self-referential values.
+ if (!Visited.insert(V))
+ return UndefValue::get(V->getType());
+
+ // TODO: Look through sext or zext cast, when the result is known to
+ // be interpreted as signed or unsigned, respectively.
+ // TODO: Look through eliminable cast pairs.
+ // TODO: Look through calls with unique return values.
+ // TODO: Look through vector insert/extract/shuffle.
+ V = OffsetOk ? GetUnderlyingObject(V, TD) : V->stripPointerCasts();
+ if (LoadInst *L = dyn_cast<LoadInst>(V)) {
+ BasicBlock::iterator BBI = L;
+ BasicBlock *BB = L->getParent();
+ SmallPtrSet<BasicBlock *, 4> VisitedBlocks;
+ for (;;) {
+ if (!VisitedBlocks.insert(BB)) break;
+ if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(),
+ BB, BBI, 6, AA))
+ return findValueImpl(U, OffsetOk, Visited);
+ if (BBI != BB->begin()) break;
+ BB = BB->getUniquePredecessor();
+ if (!BB) break;
+ BBI = BB->end();
+ }
+ } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ if (Value *W = PN->hasConstantValue())
+ if (W != V)
+ return findValueImpl(W, OffsetOk, Visited);
+ } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
+ if (CI->isNoopCast(TD ? TD->getIntPtrType(V->getContext()) :
+ Type::getInt64Ty(V->getContext())))
+ return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
+ } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
+ if (Value *W = FindInsertedValue(Ex->getAggregateOperand(),
+ Ex->idx_begin(),
+ Ex->idx_end()))
+ if (W != V)
+ return findValueImpl(W, OffsetOk, Visited);
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ // Same as above, but for ConstantExpr instead of Instruction.
+ if (Instruction::isCast(CE->getOpcode())) {
+ if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()),
+ CE->getOperand(0)->getType(),
+ CE->getType(),
+ TD ? TD->getIntPtrType(V->getContext()) :
+ Type::getInt64Ty(V->getContext())))
+ return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
+ } else if (CE->getOpcode() == Instruction::ExtractValue) {
+ ArrayRef<unsigned> Indices = CE->getIndices();
+ if (Value *W = FindInsertedValue(CE->getOperand(0),
+ Indices.begin(),
+ Indices.end()))
+ if (W != V)
+ return findValueImpl(W, OffsetOk, Visited);
+ }
+ }
+
+ // As a last resort, try SimplifyInstruction or constant folding.
+ if (Instruction *Inst = dyn_cast<Instruction>(V)) {
+ if (Value *W = SimplifyInstruction(Inst, TD, DT))
+ return findValueImpl(W, OffsetOk, Visited);
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ if (Value *W = ConstantFoldConstantExpression(CE, TD))
+ if (W != V)
+ return findValueImpl(W, OffsetOk, Visited);
+ }
+
+ return V;
+}
+
+//===----------------------------------------------------------------------===//
+// Implement the public interfaces to this file...
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createLintPass() {
+ return new Lint();
+}
+
+/// lintFunction - Check a function for errors, printing messages on stderr.
+///
+void llvm::lintFunction(const Function &f) {
+ Function &F = const_cast<Function&>(f);
+ assert(!F.isDeclaration() && "Cannot lint external functions");
+
+ FunctionPassManager FPM(F.getParent());
+ Lint *V = new Lint();
+ FPM.add(V);
+ FPM.run(F);
+}
+
+/// lintModule - Check a module for errors, printing messages on stderr.
+///
+void llvm::lintModule(const Module &M) {
+ PassManager PM;
+ Lint *V = new Lint();
+ PM.add(V);
+ PM.run(const_cast<Module&>(M));
+}
diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp
new file mode 100644
index 000000000000..ab34fd653a70
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Loads.cpp
@@ -0,0 +1,236 @@
+//===- Loads.cpp - Local load analysis ------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines simple local analyses for load instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
+using namespace llvm;
+
+/// AreEquivalentAddressValues - Test if A and B will obviously have the same
+/// value. This includes recognizing that %t0 and %t1 will have the same
+/// value in code like this:
+/// %t0 = getelementptr \@a, 0, 3
+/// store i32 0, i32* %t0
+/// %t1 = getelementptr \@a, 0, 3
+/// %t2 = load i32* %t1
+///
+static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
+ // Test if the values are trivially equivalent.
+ if (A == B) return true;
+
+ // Test if the values come from identical arithmetic instructions.
+ // Use isIdenticalToWhenDefined instead of isIdenticalTo because
+ // this function is only used when one address use dominates the
+ // other, which means that they'll always either have the same
+ // value or one of them will have an undefined value.
+ if (isa<BinaryOperator>(A) || isa<CastInst>(A) ||
+ isa<PHINode>(A) || isa<GetElementPtrInst>(A))
+ if (const Instruction *BI = dyn_cast<Instruction>(B))
+ if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
+ return true;
+
+ // Otherwise they may not be equivalent.
+ return false;
+}
+
+/// getUnderlyingObjectWithOffset - Strip off up to MaxLookup GEPs and
+/// bitcasts to get back to the underlying object being addressed, keeping
+/// track of the offset in bytes from the GEPs relative to the result.
+/// This is closely related to GetUnderlyingObject but is located
+/// here to avoid making VMCore depend on TargetData.
+static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD,
+ uint64_t &ByteOffset,
+ unsigned MaxLookup = 6) {
+ if (!V->getType()->isPointerTy())
+ return V;
+ for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ if (!GEP->hasAllConstantIndices())
+ return V;
+ SmallVector<Value*, 8> Indices(GEP->op_begin() + 1, GEP->op_end());
+ ByteOffset += TD->getIndexedOffset(GEP->getPointerOperandType(),
+ &Indices[0], Indices.size());
+ V = GEP->getPointerOperand();
+ } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+ V = cast<Operator>(V)->getOperand(0);
+ } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (GA->mayBeOverridden())
+ return V;
+ V = GA->getAliasee();
+ } else {
+ return V;
+ }
+ assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+ }
+ return V;
+}
+
+/// isSafeToLoadUnconditionally - Return true if we know that executing a load
+/// from this value cannot trap. If it is not obviously safe to load from the
+/// specified pointer, we do a quick local scan of the basic block containing
+/// ScanFrom, to determine if the address is already accessed.
+bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
+ unsigned Align, const TargetData *TD) {
+ uint64_t ByteOffset = 0;
+ Value *Base = V;
+ if (TD)
+ Base = getUnderlyingObjectWithOffset(V, TD, ByteOffset);
+
+ const Type *BaseType = 0;
+ unsigned BaseAlign = 0;
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
+ // An alloca is safe to load from as load as it is suitably aligned.
+ BaseType = AI->getAllocatedType();
+ BaseAlign = AI->getAlignment();
+ } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Base)) {
+ // Global variables are safe to load from but their size cannot be
+ // guaranteed if they are overridden.
+ if (!isa<GlobalAlias>(GV) && !GV->mayBeOverridden()) {
+ BaseType = GV->getType()->getElementType();
+ BaseAlign = GV->getAlignment();
+ }
+ }
+
+ if (BaseType && BaseType->isSized()) {
+ if (TD && BaseAlign == 0)
+ BaseAlign = TD->getPrefTypeAlignment(BaseType);
+
+ if (Align <= BaseAlign) {
+ if (!TD)
+ return true; // Loading directly from an alloca or global is OK.
+
+ // Check if the load is within the bounds of the underlying object.
+ const PointerType *AddrTy = cast<PointerType>(V->getType());
+ uint64_t LoadSize = TD->getTypeStoreSize(AddrTy->getElementType());
+ if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType) &&
+ (Align == 0 || (ByteOffset % Align) == 0))
+ return true;
+ }
+ }
+
+ // Otherwise, be a little bit aggressive by scanning the local block where we
+ // want to check to see if the pointer is already being loaded or stored
+ // from/to. If so, the previous load or store would have already trapped,
+ // so there is no harm doing an extra load (also, CSE will later eliminate
+ // the load entirely).
+ BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();
+
+ while (BBI != E) {
+ --BBI;
+
+ // If we see a free or a call which may write to memory (i.e. which might do
+ // a free) the pointer could be marked invalid.
+ if (isa<CallInst>(BBI) && BBI->mayWriteToMemory() &&
+ !isa<DbgInfoIntrinsic>(BBI))
+ return false;
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+ if (AreEquivalentAddressValues(LI->getOperand(0), V)) return true;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+ if (AreEquivalentAddressValues(SI->getOperand(1), V)) return true;
+ }
+ }
+ return false;
+}
+
+/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at the
+/// instruction before ScanFrom) checking to see if we have the value at the
+/// memory address *Ptr locally available within a small number of instructions.
+/// If the value is available, return it.
+///
+/// If not, return the iterator for the last validated instruction that the
+/// value would be live through. If we scanned the entire block and didn't find
+/// something that invalidates *Ptr or provides it, ScanFrom would be left at
+/// begin() and this returns null. ScanFrom could also be left
+///
+/// MaxInstsToScan specifies the maximum instructions to scan in the block. If
+/// it is set to 0, it will scan the whole block. You can also optionally
+/// specify an alias analysis implementation, which makes this more precise.
+Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
+ BasicBlock::iterator &ScanFrom,
+ unsigned MaxInstsToScan,
+ AliasAnalysis *AA) {
+ if (MaxInstsToScan == 0) MaxInstsToScan = ~0U;
+
+ // If we're using alias analysis to disambiguate get the size of *Ptr.
+ uint64_t AccessSize = 0;
+ if (AA) {
+ const Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
+ AccessSize = AA->getTypeStoreSize(AccessTy);
+ }
+
+ while (ScanFrom != ScanBB->begin()) {
+ // We must ignore debug info directives when counting (otherwise they
+ // would affect codegen).
+ Instruction *Inst = --ScanFrom;
+ if (isa<DbgInfoIntrinsic>(Inst))
+ continue;
+
+ // Restore ScanFrom to expected value in case next test succeeds
+ ScanFrom++;
+
+ // Don't scan huge blocks.
+ if (MaxInstsToScan-- == 0) return 0;
+
+ --ScanFrom;
+ // If this is a load of Ptr, the loaded value is available.
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ if (AreEquivalentAddressValues(LI->getOperand(0), Ptr))
+ return LI;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // If this is a store through Ptr, the value is available!
+ if (AreEquivalentAddressValues(SI->getOperand(1), Ptr))
+ return SI->getOperand(0);
+
+ // If Ptr is an alloca and this is a store to a different alloca, ignore
+ // the store. This is a trivial form of alias analysis that is important
+ // for reg2mem'd code.
+ if ((isa<AllocaInst>(Ptr) || isa<GlobalVariable>(Ptr)) &&
+ (isa<AllocaInst>(SI->getOperand(1)) ||
+ isa<GlobalVariable>(SI->getOperand(1))))
+ continue;
+
+ // If we have alias analysis and it says the store won't modify the loaded
+ // value, ignore the store.
+ if (AA &&
+ (AA->getModRefInfo(SI, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
+ continue;
+
+ // Otherwise the store that may or may not alias the pointer, bail out.
+ ++ScanFrom;
+ return 0;
+ }
+
+ // If this is some other instruction that may clobber Ptr, bail out.
+ if (Inst->mayWriteToMemory()) {
+ // If alias analysis claims that it really won't modify the load,
+ // ignore it.
+ if (AA &&
+ (AA->getModRefInfo(Inst, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
+ continue;
+
+ // May modify the pointer, bail out.
+ ++ScanFrom;
+ return 0;
+ }
+ }
+
+ // Got to the start of the block, we didn't find it, but are done for this
+ // block.
+ return 0;
+}
diff --git a/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp
new file mode 100644
index 000000000000..c1afe8fbd618
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp
@@ -0,0 +1,358 @@
+//===- LoopDependenceAnalysis.cpp - LDA Implementation ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the (beginning) of an implementation of a loop dependence analysis
+// framework, which is used to detect dependences in memory accesses in loops.
+//
+// Please note that this is work in progress and the interface is subject to
+// change.
+//
+// TODO: adapt as implementation progresses.
+//
+// TODO: document lingo (pair, subscript, index)
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lda"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/LoopDependenceAnalysis.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Instructions.h"
+#include "llvm/Operator.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+STATISTIC(NumAnswered, "Number of dependence queries answered");
+STATISTIC(NumAnalysed, "Number of distinct dependence pairs analysed");
+STATISTIC(NumDependent, "Number of pairs with dependent accesses");
+STATISTIC(NumIndependent, "Number of pairs with independent accesses");
+STATISTIC(NumUnknown, "Number of pairs with unknown accesses");
+
+LoopPass *llvm::createLoopDependenceAnalysisPass() {
+ return new LoopDependenceAnalysis();
+}
+
+INITIALIZE_PASS_BEGIN(LoopDependenceAnalysis, "lda",
+ "Loop Dependence Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LoopDependenceAnalysis, "lda",
+ "Loop Dependence Analysis", false, true)
+char LoopDependenceAnalysis::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// Utility Functions
+//===----------------------------------------------------------------------===//
+
+static inline bool IsMemRefInstr(const Value *V) {
+ const Instruction *I = dyn_cast<const Instruction>(V);
+ return I && (I->mayReadFromMemory() || I->mayWriteToMemory());
+}
+
+static void GetMemRefInstrs(const Loop *L,
+ SmallVectorImpl<Instruction*> &Memrefs) {
+ for (Loop::block_iterator b = L->block_begin(), be = L->block_end();
+ b != be; ++b)
+ for (BasicBlock::iterator i = (*b)->begin(), ie = (*b)->end();
+ i != ie; ++i)
+ if (IsMemRefInstr(i))
+ Memrefs.push_back(i);
+}
+
+static bool IsLoadOrStoreInst(Value *I) {
+ return isa<LoadInst>(I) || isa<StoreInst>(I);
+}
+
+static Value *GetPointerOperand(Value *I) {
+ if (LoadInst *i = dyn_cast<LoadInst>(I))
+ return i->getPointerOperand();
+ if (StoreInst *i = dyn_cast<StoreInst>(I))
+ return i->getPointerOperand();
+ llvm_unreachable("Value is no load or store instruction!");
+ // Never reached.
+ return 0;
+}
+
+static AliasAnalysis::AliasResult UnderlyingObjectsAlias(AliasAnalysis *AA,
+ const Value *A,
+ const Value *B) {
+ const Value *aObj = GetUnderlyingObject(A);
+ const Value *bObj = GetUnderlyingObject(B);
+ return AA->alias(aObj, AA->getTypeStoreSize(aObj->getType()),
+ bObj, AA->getTypeStoreSize(bObj->getType()));
+}
+
+static inline const SCEV *GetZeroSCEV(ScalarEvolution *SE) {
+ return SE->getConstant(Type::getInt32Ty(SE->getContext()), 0L);
+}
+
+//===----------------------------------------------------------------------===//
+// Dependence Testing
+//===----------------------------------------------------------------------===//
+
+bool LoopDependenceAnalysis::isDependencePair(const Value *A,
+ const Value *B) const {
+ return IsMemRefInstr(A) &&
+ IsMemRefInstr(B) &&
+ (cast<const Instruction>(A)->mayWriteToMemory() ||
+ cast<const Instruction>(B)->mayWriteToMemory());
+}
+
+bool LoopDependenceAnalysis::findOrInsertDependencePair(Value *A,
+ Value *B,
+ DependencePair *&P) {
+ void *insertPos = 0;
+ FoldingSetNodeID id;
+ id.AddPointer(A);
+ id.AddPointer(B);
+
+ P = Pairs.FindNodeOrInsertPos(id, insertPos);
+ if (P) return true;
+
+ P = new (PairAllocator) DependencePair(id, A, B);
+ Pairs.InsertNode(P, insertPos);
+ return false;
+}
+
+void LoopDependenceAnalysis::getLoops(const SCEV *S,
+ DenseSet<const Loop*>* Loops) const {
+ // Refactor this into an SCEVVisitor, if efficiency becomes a concern.
+ for (const Loop *L = this->L; L != 0; L = L->getParentLoop())
+ if (!SE->isLoopInvariant(S, L))
+ Loops->insert(L);
+}
+
+bool LoopDependenceAnalysis::isLoopInvariant(const SCEV *S) const {
+ DenseSet<const Loop*> loops;
+ getLoops(S, &loops);
+ return loops.empty();
+}
+
+bool LoopDependenceAnalysis::isAffine(const SCEV *S) const {
+ const SCEVAddRecExpr *rec = dyn_cast<SCEVAddRecExpr>(S);
+ return isLoopInvariant(S) || (rec && rec->isAffine());
+}
+
+bool LoopDependenceAnalysis::isZIVPair(const SCEV *A, const SCEV *B) const {
+ return isLoopInvariant(A) && isLoopInvariant(B);
+}
+
+bool LoopDependenceAnalysis::isSIVPair(const SCEV *A, const SCEV *B) const {
+ DenseSet<const Loop*> loops;
+ getLoops(A, &loops);
+ getLoops(B, &loops);
+ return loops.size() == 1;
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analyseZIV(const SCEV *A,
+ const SCEV *B,
+ Subscript *S) const {
+ assert(isZIVPair(A, B) && "Attempted to ZIV-test non-ZIV SCEVs!");
+ return A == B ? Dependent : Independent;
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analyseSIV(const SCEV *A,
+ const SCEV *B,
+ Subscript *S) const {
+ return Unknown; // TODO: Implement.
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analyseMIV(const SCEV *A,
+ const SCEV *B,
+ Subscript *S) const {
+ return Unknown; // TODO: Implement.
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analyseSubscript(const SCEV *A,
+ const SCEV *B,
+ Subscript *S) const {
+ DEBUG(dbgs() << " Testing subscript: " << *A << ", " << *B << "\n");
+
+ if (A == B) {
+ DEBUG(dbgs() << " -> [D] same SCEV\n");
+ return Dependent;
+ }
+
+ if (!isAffine(A) || !isAffine(B)) {
+ DEBUG(dbgs() << " -> [?] not affine\n");
+ return Unknown;
+ }
+
+ if (isZIVPair(A, B))
+ return analyseZIV(A, B, S);
+
+ if (isSIVPair(A, B))
+ return analyseSIV(A, B, S);
+
+ return analyseMIV(A, B, S);
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analysePair(DependencePair *P) const {
+ DEBUG(dbgs() << "Analysing:\n" << *P->A << "\n" << *P->B << "\n");
+
+ // We only analyse loads and stores but no possible memory accesses by e.g.
+ // free, call, or invoke instructions.
+ if (!IsLoadOrStoreInst(P->A) || !IsLoadOrStoreInst(P->B)) {
+ DEBUG(dbgs() << "--> [?] no load/store\n");
+ return Unknown;
+ }
+
+ Value *aPtr = GetPointerOperand(P->A);
+ Value *bPtr = GetPointerOperand(P->B);
+
+ switch (UnderlyingObjectsAlias(AA, aPtr, bPtr)) {
+ case AliasAnalysis::MayAlias:
+ case AliasAnalysis::PartialAlias:
+ // We can not analyse objects if we do not know about their aliasing.
+ DEBUG(dbgs() << "---> [?] may alias\n");
+ return Unknown;
+
+ case AliasAnalysis::NoAlias:
+ // If the objects noalias, they are distinct, accesses are independent.
+ DEBUG(dbgs() << "---> [I] no alias\n");
+ return Independent;
+
+ case AliasAnalysis::MustAlias:
+ break; // The underlying objects alias, test accesses for dependence.
+ }
+
+ const GEPOperator *aGEP = dyn_cast<GEPOperator>(aPtr);
+ const GEPOperator *bGEP = dyn_cast<GEPOperator>(bPtr);
+
+ if (!aGEP || !bGEP)
+ return Unknown;
+
+ // FIXME: Is filtering coupled subscripts necessary?
+
+ // Collect GEP operand pairs (FIXME: use GetGEPOperands from BasicAA), adding
+ // trailing zeroes to the smaller GEP, if needed.
+ typedef SmallVector<std::pair<const SCEV*, const SCEV*>, 4> GEPOpdPairsTy;
+ GEPOpdPairsTy opds;
+ for(GEPOperator::const_op_iterator aIdx = aGEP->idx_begin(),
+ aEnd = aGEP->idx_end(),
+ bIdx = bGEP->idx_begin(),
+ bEnd = bGEP->idx_end();
+ aIdx != aEnd && bIdx != bEnd;
+ aIdx += (aIdx != aEnd), bIdx += (bIdx != bEnd)) {
+ const SCEV* aSCEV = (aIdx != aEnd) ? SE->getSCEV(*aIdx) : GetZeroSCEV(SE);
+ const SCEV* bSCEV = (bIdx != bEnd) ? SE->getSCEV(*bIdx) : GetZeroSCEV(SE);
+ opds.push_back(std::make_pair(aSCEV, bSCEV));
+ }
+
+ if (!opds.empty() && opds[0].first != opds[0].second) {
+ // We cannot (yet) handle arbitrary GEP pointer offsets. By limiting
+ //
+ // TODO: this could be relaxed by adding the size of the underlying object
+ // to the first subscript. If we have e.g. (GEP x,0,i; GEP x,2,-i) and we
+ // know that x is a [100 x i8]*, we could modify the first subscript to be
+ // (i, 200-i) instead of (i, -i).
+ return Unknown;
+ }
+
+ // Now analyse the collected operand pairs (skipping the GEP ptr offsets).
+ for (GEPOpdPairsTy::const_iterator i = opds.begin() + 1, end = opds.end();
+ i != end; ++i) {
+ Subscript subscript;
+ DependenceResult result = analyseSubscript(i->first, i->second, &subscript);
+ if (result != Dependent) {
+ // We either proved independence or failed to analyse this subscript.
+ // Further subscripts will not improve the situation, so abort early.
+ return result;
+ }
+ P->Subscripts.push_back(subscript);
+ }
+ // We successfully analysed all subscripts but failed to prove independence.
+ return Dependent;
+}
+
+bool LoopDependenceAnalysis::depends(Value *A, Value *B) {
+ assert(isDependencePair(A, B) && "Values form no dependence pair!");
+ ++NumAnswered;
+
+ DependencePair *p;
+ if (!findOrInsertDependencePair(A, B, p)) {
+ // The pair is not cached, so analyse it.
+ ++NumAnalysed;
+ switch (p->Result = analysePair(p)) {
+ case Dependent: ++NumDependent; break;
+ case Independent: ++NumIndependent; break;
+ case Unknown: ++NumUnknown; break;
+ }
+ }
+ return p->Result != Independent;
+}
+
+//===----------------------------------------------------------------------===//
+// LoopDependenceAnalysis Implementation
+//===----------------------------------------------------------------------===//
+
+bool LoopDependenceAnalysis::runOnLoop(Loop *L, LPPassManager &) {
+ this->L = L;
+ AA = &getAnalysis<AliasAnalysis>();
+ SE = &getAnalysis<ScalarEvolution>();
+ return false;
+}
+
+void LoopDependenceAnalysis::releaseMemory() {
+ Pairs.clear();
+ PairAllocator.Reset();
+}
+
+void LoopDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<AliasAnalysis>();
+ AU.addRequiredTransitive<ScalarEvolution>();
+}
+
+static void PrintLoopInfo(raw_ostream &OS,
+ LoopDependenceAnalysis *LDA, const Loop *L) {
+ if (!L->empty()) return; // ignore non-innermost loops
+
+ SmallVector<Instruction*, 8> memrefs;
+ GetMemRefInstrs(L, memrefs);
+
+ OS << "Loop at depth " << L->getLoopDepth() << ", header block: ";
+ WriteAsOperand(OS, L->getHeader(), false);
+ OS << "\n";
+
+ OS << " Load/store instructions: " << memrefs.size() << "\n";
+ for (SmallVector<Instruction*, 8>::const_iterator x = memrefs.begin(),
+ end = memrefs.end(); x != end; ++x)
+ OS << "\t" << (x - memrefs.begin()) << ": " << **x << "\n";
+
+ OS << " Pairwise dependence results:\n";
+ for (SmallVector<Instruction*, 8>::const_iterator x = memrefs.begin(),
+ end = memrefs.end(); x != end; ++x)
+ for (SmallVector<Instruction*, 8>::const_iterator y = x + 1;
+ y != end; ++y)
+ if (LDA->isDependencePair(*x, *y))
+ OS << "\t" << (x - memrefs.begin()) << "," << (y - memrefs.begin())
+ << ": " << (LDA->depends(*x, *y) ? "dependent" : "independent")
+ << "\n";
+}
+
+void LoopDependenceAnalysis::print(raw_ostream &OS, const Module*) const {
+ // TODO: doc why const_cast is safe
+ PrintLoopInfo(OS, const_cast<LoopDependenceAnalysis*>(this), this->L);
+}
diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp
new file mode 100644
index 000000000000..05831402f409
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp
@@ -0,0 +1,419 @@
+//===- LoopInfo.cpp - Natural Loop Calculator -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the LoopInfo class that is used to identify natural loops
+// and determine the loop depth of various nodes of the CFG. Note that the
+// loops identified may actually be several natural loops that share the same
+// header node... not just a single natural loop.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <algorithm>
+using namespace llvm;
+
+// Always verify loopinfo if expensive checking is enabled.
+#ifdef XDEBUG
+static bool VerifyLoopInfo = true;
+#else
+static bool VerifyLoopInfo = false;
+#endif
+static cl::opt<bool,true>
+VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
+ cl::desc("Verify loop info (time consuming)"));
+
+char LoopInfo::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopInfo, "loops", "Natural Loop Information", true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(LoopInfo, "loops", "Natural Loop Information", true, true)
+
+//===----------------------------------------------------------------------===//
+// Loop implementation
+//
+
+/// isLoopInvariant - Return true if the specified value is loop invariant
+///
+bool Loop::isLoopInvariant(Value *V) const {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return !contains(I);
+ return true; // All non-instructions are loop invariant
+}
+
+/// hasLoopInvariantOperands - Return true if all the operands of the
+/// specified instruction are loop invariant.
+bool Loop::hasLoopInvariantOperands(Instruction *I) const {
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (!isLoopInvariant(I->getOperand(i)))
+ return false;
+
+ return true;
+}
+
+/// makeLoopInvariant - If the given value is an instruciton inside of the
+/// loop and it can be hoisted, do so to make it trivially loop-invariant.
+/// Return true if the value after any hoisting is loop invariant. This
+/// function can be used as a slightly more aggressive replacement for
+/// isLoopInvariant.
+///
+/// If InsertPt is specified, it is the point to hoist instructions to.
+/// If null, the terminator of the loop preheader is used.
+///
+bool Loop::makeLoopInvariant(Value *V, bool &Changed,
+ Instruction *InsertPt) const {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return makeLoopInvariant(I, Changed, InsertPt);
+ return true; // All non-instructions are loop-invariant.
+}
+
+/// makeLoopInvariant - If the given instruction is inside of the
+/// loop and it can be hoisted, do so to make it trivially loop-invariant.
+/// Return true if the instruction after any hoisting is loop invariant. This
+/// function can be used as a slightly more aggressive replacement for
+/// isLoopInvariant.
+///
+/// If InsertPt is specified, it is the point to hoist instructions to.
+/// If null, the terminator of the loop preheader is used.
+///
+bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
+ Instruction *InsertPt) const {
+ // Test if the value is already loop-invariant.
+ if (isLoopInvariant(I))
+ return true;
+ if (!I->isSafeToSpeculativelyExecute())
+ return false;
+ if (I->mayReadFromMemory())
+ return false;
+ // Determine the insertion point, unless one was given.
+ if (!InsertPt) {
+ BasicBlock *Preheader = getLoopPreheader();
+ // Without a preheader, hoisting is not feasible.
+ if (!Preheader)
+ return false;
+ InsertPt = Preheader->getTerminator();
+ }
+ // Don't hoist instructions with loop-variant operands.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (!makeLoopInvariant(I->getOperand(i), Changed, InsertPt))
+ return false;
+
+ // Hoist.
+ I->moveBefore(InsertPt);
+ Changed = true;
+ return true;
+}
+
+/// getCanonicalInductionVariable - Check to see if the loop has a canonical
+/// induction variable: an integer recurrence that starts at 0 and increments
+/// by one each time through the loop. If so, return the phi node that
+/// corresponds to it.
+///
+/// The IndVarSimplify pass transforms loops to have a canonical induction
+/// variable.
+///
+PHINode *Loop::getCanonicalInductionVariable() const {
+ BasicBlock *H = getHeader();
+
+ BasicBlock *Incoming = 0, *Backedge = 0;
+ pred_iterator PI = pred_begin(H);
+ assert(PI != pred_end(H) &&
+ "Loop must have at least one backedge!");
+ Backedge = *PI++;
+ if (PI == pred_end(H)) return 0; // dead loop
+ Incoming = *PI++;
+ if (PI != pred_end(H)) return 0; // multiple backedges?
+
+ if (contains(Incoming)) {
+ if (contains(Backedge))
+ return 0;
+ std::swap(Incoming, Backedge);
+ } else if (!contains(Backedge))
+ return 0;
+
+ // Loop over all of the PHI nodes, looking for a canonical indvar.
+ for (BasicBlock::iterator I = H->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(PN->getIncomingValueForBlock(Incoming)))
+ if (CI->isNullValue())
+ if (Instruction *Inc =
+ dyn_cast<Instruction>(PN->getIncomingValueForBlock(Backedge)))
+ if (Inc->getOpcode() == Instruction::Add &&
+ Inc->getOperand(0) == PN)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Inc->getOperand(1)))
+ if (CI->equalsInt(1))
+ return PN;
+ }
+ return 0;
+}
+
+/// getTripCount - Return a loop-invariant LLVM value indicating the number of
+/// times the loop will be executed. Note that this means that the backedge
+/// of the loop executes N-1 times. If the trip-count cannot be determined,
+/// this returns null.
+///
+/// The IndVarSimplify pass transforms loops to have a form that this
+/// function easily understands.
+///
+Value *Loop::getTripCount() const {
+ // Canonical loops will end with a 'cmp ne I, V', where I is the incremented
+ // canonical induction variable and V is the trip count of the loop.
+ PHINode *IV = getCanonicalInductionVariable();
+ if (IV == 0 || IV->getNumIncomingValues() != 2) return 0;
+
+ bool P0InLoop = contains(IV->getIncomingBlock(0));
+ Value *Inc = IV->getIncomingValue(!P0InLoop);
+ BasicBlock *BackedgeBlock = IV->getIncomingBlock(!P0InLoop);
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(BackedgeBlock->getTerminator()))
+ if (BI->isConditional()) {
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
+ if (ICI->getOperand(0) == Inc) {
+ if (BI->getSuccessor(0) == getHeader()) {
+ if (ICI->getPredicate() == ICmpInst::ICMP_NE)
+ return ICI->getOperand(1);
+ } else if (ICI->getPredicate() == ICmpInst::ICMP_EQ) {
+ return ICI->getOperand(1);
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+/// getSmallConstantTripCount - Returns the trip count of this loop as a
+/// normal unsigned value, if possible. Returns 0 if the trip count is unknown
+/// or not constant. Will also return 0 if the trip count is very large
+/// (>= 2^32)
+unsigned Loop::getSmallConstantTripCount() const {
+ Value* TripCount = this->getTripCount();
+ if (TripCount) {
+ if (ConstantInt *TripCountC = dyn_cast<ConstantInt>(TripCount)) {
+ // Guard against huge trip counts.
+ if (TripCountC->getValue().getActiveBits() <= 32) {
+ return (unsigned)TripCountC->getZExtValue();
+ }
+ }
+ }
+ return 0;
+}
+
+/// getSmallConstantTripMultiple - Returns the largest constant divisor of the
+/// trip count of this loop as a normal unsigned value, if possible. This
+/// means that the actual trip count is always a multiple of the returned
+/// value (don't forget the trip count could very well be zero as well!).
+///
+/// Returns 1 if the trip count is unknown or not guaranteed to be the
+/// multiple of a constant (which is also the case if the trip count is simply
+/// constant, use getSmallConstantTripCount for that case), Will also return 1
+/// if the trip count is very large (>= 2^32).
+unsigned Loop::getSmallConstantTripMultiple() const {
+ Value* TripCount = this->getTripCount();
+ // This will hold the ConstantInt result, if any
+ ConstantInt *Result = NULL;
+ if (TripCount) {
+ // See if the trip count is constant itself
+ Result = dyn_cast<ConstantInt>(TripCount);
+ // if not, see if it is a multiplication
+ if (!Result)
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TripCount)) {
+ switch (BO->getOpcode()) {
+ case BinaryOperator::Mul:
+ Result = dyn_cast<ConstantInt>(BO->getOperand(1));
+ break;
+ case BinaryOperator::Shl:
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1)))
+ if (CI->getValue().getActiveBits() <= 5)
+ return 1u << CI->getZExtValue();
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ // Guard against huge trip counts.
+ if (Result && Result->getValue().getActiveBits() <= 32) {
+ return (unsigned)Result->getZExtValue();
+ } else {
+ return 1;
+ }
+}
+
+/// isLCSSAForm - Return true if the Loop is in LCSSA form
+bool Loop::isLCSSAForm(DominatorTree &DT) const {
+ // Sort the blocks vector so that we can use binary search to do quick
+ // lookups.
+ SmallPtrSet<BasicBlock*, 16> LoopBBs(block_begin(), block_end());
+
+ for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) {
+ BasicBlock *BB = *BI;
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I)
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+ ++UI) {
+ User *U = *UI;
+ BasicBlock *UserBB = cast<Instruction>(U)->getParent();
+ if (PHINode *P = dyn_cast<PHINode>(U))
+ UserBB = P->getIncomingBlock(UI);
+
+ // Check the current block, as a fast-path, before checking whether
+ // the use is anywhere in the loop. Most values are used in the same
+ // block they are defined in. Also, blocks not reachable from the
+ // entry are special; uses in them don't need to go through PHIs.
+ if (UserBB != BB &&
+ !LoopBBs.count(UserBB) &&
+ DT.isReachableFromEntry(UserBB))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/// isLoopSimplifyForm - Return true if the Loop is in the form that
+/// the LoopSimplify form transforms loops to, which is sometimes called
+/// normal form.
+bool Loop::isLoopSimplifyForm() const {
+ // Normal-form loops have a preheader, a single backedge, and all of their
+ // exits have all their predecessors inside the loop.
+ return getLoopPreheader() && getLoopLatch() && hasDedicatedExits();
+}
+
+/// hasDedicatedExits - Return true if no exit block for the loop
+/// has a predecessor that is outside the loop.
+bool Loop::hasDedicatedExits() const {
+ // Sort the blocks vector so that we can use binary search to do quick
+ // lookups.
+ SmallPtrSet<BasicBlock *, 16> LoopBBs(block_begin(), block_end());
+ // Each predecessor of each exit block of a normal loop is contained
+ // within the loop.
+ SmallVector<BasicBlock *, 4> ExitBlocks;
+ getExitBlocks(ExitBlocks);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ for (pred_iterator PI = pred_begin(ExitBlocks[i]),
+ PE = pred_end(ExitBlocks[i]); PI != PE; ++PI)
+ if (!LoopBBs.count(*PI))
+ return false;
+ // All the requirements are met.
+ return true;
+}
+
+/// getUniqueExitBlocks - Return all unique successor blocks of this loop.
+/// These are the blocks _outside of the current loop_ which are branched to.
+/// This assumes that loop exits are in canonical form.
+///
+void
+Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
+ assert(hasDedicatedExits() &&
+ "getUniqueExitBlocks assumes the loop has canonical form exits!");
+
+ // Sort the blocks vector so that we can use binary search to do quick
+ // lookups.
+ SmallVector<BasicBlock *, 128> LoopBBs(block_begin(), block_end());
+ std::sort(LoopBBs.begin(), LoopBBs.end());
+
+ SmallVector<BasicBlock *, 32> switchExitBlocks;
+
+ for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) {
+
+ BasicBlock *current = *BI;
+ switchExitBlocks.clear();
+
+ for (succ_iterator I = succ_begin(*BI), E = succ_end(*BI); I != E; ++I) {
+ // If block is inside the loop then it is not a exit block.
+ if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I))
+ continue;
+
+ pred_iterator PI = pred_begin(*I);
+ BasicBlock *firstPred = *PI;
+
+ // If current basic block is this exit block's first predecessor
+ // then only insert exit block in to the output ExitBlocks vector.
+ // This ensures that same exit block is not inserted twice into
+ // ExitBlocks vector.
+ if (current != firstPred)
+ continue;
+
+ // If a terminator has more then two successors, for example SwitchInst,
+ // then it is possible that there are multiple edges from current block
+ // to one exit block.
+ if (std::distance(succ_begin(current), succ_end(current)) <= 2) {
+ ExitBlocks.push_back(*I);
+ continue;
+ }
+
+ // In case of multiple edges from current block to exit block, collect
+ // only one edge in ExitBlocks. Use switchExitBlocks to keep track of
+ // duplicate edges.
+ if (std::find(switchExitBlocks.begin(), switchExitBlocks.end(), *I)
+ == switchExitBlocks.end()) {
+ switchExitBlocks.push_back(*I);
+ ExitBlocks.push_back(*I);
+ }
+ }
+ }
+}
+
+/// getUniqueExitBlock - If getUniqueExitBlocks would return exactly one
+/// block, return that block. Otherwise return null.
+BasicBlock *Loop::getUniqueExitBlock() const {
+ SmallVector<BasicBlock *, 8> UniqueExitBlocks;
+ getUniqueExitBlocks(UniqueExitBlocks);
+ if (UniqueExitBlocks.size() == 1)
+ return UniqueExitBlocks[0];
+ return 0;
+}
+
+void Loop::dump() const {
+ print(dbgs());
+}
+
+//===----------------------------------------------------------------------===//
+// LoopInfo implementation
+//
+bool LoopInfo::runOnFunction(Function &) {
+ releaseMemory();
+ LI.Calculate(getAnalysis<DominatorTree>().getBase()); // Update
+ return false;
+}
+
+void LoopInfo::verifyAnalysis() const {
+ // LoopInfo is a FunctionPass, but verifying every loop in the function
+ // each time verifyAnalysis is called is very expensive. The
+ // -verify-loop-info option can enable this. In order to perform some
+ // checking by default, LoopPass has been taught to call verifyLoop
+ // manually during loop pass sequences.
+
+ if (!VerifyLoopInfo) return;
+
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ assert(!(*I)->getParentLoop() && "Top-level loop has a parent!");
+ (*I)->verifyLoopNest();
+ }
+
+ // TODO: check BBMap consistency.
+}
+
+void LoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<DominatorTree>();
+}
+
+void LoopInfo::print(raw_ostream &OS, const Module*) const {
+ LI.print(OS);
+}
+
diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp
new file mode 100644
index 000000000000..10e3f297f9c5
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LoopPass.cpp
@@ -0,0 +1,422 @@
+//===- LoopPass.cpp - Loop Pass and Loop Pass Manager ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements LoopPass and LPPassManager. All loop optimization
+// and transformation passes are derived from LoopPass. LPPassManager is
+// responsible for managing LoopPasses.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/DebugInfoProbe.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Timer.h"
+using namespace llvm;
+
+namespace {
+
+/// PrintLoopPass - Print a Function corresponding to a Loop.
+///
+class PrintLoopPass : public LoopPass {
+private:
+ std::string Banner;
+ raw_ostream &Out; // raw_ostream to print on.
+
+public:
+ static char ID;
+ PrintLoopPass(const std::string &B, raw_ostream &o)
+ : LoopPass(ID), Banner(B), Out(o) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &) {
+ Out << Banner;
+ for (Loop::block_iterator b = L->block_begin(), be = L->block_end();
+ b != be;
+ ++b) {
+ (*b)->print(Out);
+ }
+ return false;
+ }
+};
+
+char PrintLoopPass::ID = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// DebugInfoProbe
+
+static DebugInfoProbeInfo *TheDebugProbe;
+static void createDebugInfoProbe() {
+ if (TheDebugProbe) return;
+
+ // Constructed the first time this is called. This guarantees that the
+ // object will be constructed, if -enable-debug-info-probe is set,
+ // before static globals, thus it will be destroyed before them.
+ static ManagedStatic<DebugInfoProbeInfo> DIP;
+ TheDebugProbe = &*DIP;
+}
+
+//===----------------------------------------------------------------------===//
+// LPPassManager
+//
+
+char LPPassManager::ID = 0;
+
+LPPassManager::LPPassManager(int Depth)
+ : FunctionPass(ID), PMDataManager(Depth) {
+ skipThisLoop = false;
+ redoThisLoop = false;
+ LI = NULL;
+ CurrentLoop = NULL;
+}
+
+/// Delete loop from the loop queue and loop hierarchy (LoopInfo).
+void LPPassManager::deleteLoopFromQueue(Loop *L) {
+
+ if (Loop *ParentLoop = L->getParentLoop()) { // Not a top-level loop.
+ // Reparent all of the blocks in this loop. Since BBLoop had a parent,
+ // they are now all in it.
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I)
+ if (LI->getLoopFor(*I) == L) // Don't change blocks in subloops.
+ LI->changeLoopFor(*I, ParentLoop);
+
+ // Remove the loop from its parent loop.
+ for (Loop::iterator I = ParentLoop->begin(), E = ParentLoop->end();;
+ ++I) {
+ assert(I != E && "Couldn't find loop");
+ if (*I == L) {
+ ParentLoop->removeChildLoop(I);
+ break;
+ }
+ }
+
+ // Move all subloops into the parent loop.
+ while (!L->empty())
+ ParentLoop->addChildLoop(L->removeChildLoop(L->end()-1));
+ } else {
+ // Reparent all of the blocks in this loop. Since BBLoop had no parent,
+ // they no longer in a loop at all.
+
+ for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
+ // Don't change blocks in subloops.
+ if (LI->getLoopFor(L->getBlocks()[i]) == L) {
+ LI->removeBlock(L->getBlocks()[i]);
+ --i;
+ }
+ }
+
+ // Remove the loop from the top-level LoopInfo object.
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end();; ++I) {
+ assert(I != E && "Couldn't find loop");
+ if (*I == L) {
+ LI->removeLoop(I);
+ break;
+ }
+ }
+
+ // Move all of the subloops to the top-level.
+ while (!L->empty())
+ LI->addTopLevelLoop(L->removeChildLoop(L->end()-1));
+ }
+
+ delete L;
+
+ // If L is current loop then skip rest of the passes and let
+ // runOnFunction remove L from LQ. Otherwise, remove L from LQ now
+ // and continue applying other passes on CurrentLoop.
+ if (CurrentLoop == L) {
+ skipThisLoop = true;
+ return;
+ }
+
+ for (std::deque<Loop *>::iterator I = LQ.begin(),
+ E = LQ.end(); I != E; ++I) {
+ if (*I == L) {
+ LQ.erase(I);
+ break;
+ }
+ }
+}
+
+// Inset loop into loop nest (LoopInfo) and loop queue (LQ).
+void LPPassManager::insertLoop(Loop *L, Loop *ParentLoop) {
+
+ assert (CurrentLoop != L && "Cannot insert CurrentLoop");
+
+ // Insert into loop nest
+ if (ParentLoop)
+ ParentLoop->addChildLoop(L);
+ else
+ LI->addTopLevelLoop(L);
+
+ insertLoopIntoQueue(L);
+}
+
+void LPPassManager::insertLoopIntoQueue(Loop *L) {
+ // Insert L into loop queue
+ if (L == CurrentLoop)
+ redoLoop(L);
+ else if (!L->getParentLoop())
+ // This is top level loop.
+ LQ.push_front(L);
+ else {
+ // Insert L after the parent loop.
+ for (std::deque<Loop *>::iterator I = LQ.begin(),
+ E = LQ.end(); I != E; ++I) {
+ if (*I == L->getParentLoop()) {
+ // deque does not support insert after.
+ ++I;
+ LQ.insert(I, 1, L);
+ break;
+ }
+ }
+ }
+}
+
+// Reoptimize this loop. LPPassManager will re-insert this loop into the
+// queue. This allows LoopPass to change loop nest for the loop. This
+// utility may send LPPassManager into infinite loops so use caution.
+void LPPassManager::redoLoop(Loop *L) {
+ assert (CurrentLoop == L && "Can redo only CurrentLoop");
+ redoThisLoop = true;
+}
+
+/// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for
+/// all loop passes.
+void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From,
+ BasicBlock *To, Loop *L) {
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ LoopPass *LP = getContainedPass(Index);
+ LP->cloneBasicBlockAnalysis(From, To, L);
+ }
+}
+
+/// deleteSimpleAnalysisValue - Invoke deleteAnalysisValue hook for all passes.
+void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) {
+ if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) {
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;
+ ++BI) {
+ Instruction &I = *BI;
+ deleteSimpleAnalysisValue(&I, L);
+ }
+ }
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ LoopPass *LP = getContainedPass(Index);
+ LP->deleteAnalysisValue(V, L);
+ }
+}
+
+
+// Recurse through all subloops and all loops into LQ.
+static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) {
+ LQ.push_back(L);
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ addLoopIntoQueue(*I, LQ);
+}
+
+/// Pass Manager itself does not invalidate any analysis info.
+void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
+ // LPPassManager needs LoopInfo. In the long term LoopInfo class will
+ // become part of LPPassManager.
+ Info.addRequired<LoopInfo>();
+ Info.setPreservesAll();
+}
+
+/// run - Execute all of the passes scheduled for execution. Keep track of
+/// whether any of the passes modifies the function, and if so, return true.
+bool LPPassManager::runOnFunction(Function &F) {
+ LI = &getAnalysis<LoopInfo>();
+ bool Changed = false;
+ createDebugInfoProbe();
+
+ // Collect inherited analysis from Module level pass manager.
+ populateInheritedAnalysis(TPM->activeStack);
+
+ // Populate Loop Queue
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+ addLoopIntoQueue(*I, LQ);
+
+ if (LQ.empty()) // No loops, skip calling finalizers
+ return false;
+
+ // Initialization
+ for (std::deque<Loop *>::const_iterator I = LQ.begin(), E = LQ.end();
+ I != E; ++I) {
+ Loop *L = *I;
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ LoopPass *P = getContainedPass(Index);
+ Changed |= P->doInitialization(L, *this);
+ }
+ }
+
+ // Walk Loops
+ while (!LQ.empty()) {
+
+ CurrentLoop = LQ.back();
+ skipThisLoop = false;
+ redoThisLoop = false;
+
+ // Run all passes on the current Loop.
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ LoopPass *P = getContainedPass(Index);
+ dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG,
+ CurrentLoop->getHeader()->getName());
+ dumpRequiredSet(P);
+
+ initializeAnalysisImpl(P);
+ if (TheDebugProbe)
+ TheDebugProbe->initialize(P, F);
+ {
+ PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader());
+ TimeRegion PassTimer(getPassTimer(P));
+
+ Changed |= P->runOnLoop(CurrentLoop, *this);
+ }
+ if (TheDebugProbe)
+ TheDebugProbe->finalize(P, F);
+
+ if (Changed)
+ dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG,
+ skipThisLoop ? "<deleted>" :
+ CurrentLoop->getHeader()->getName());
+ dumpPreservedSet(P);
+
+ if (!skipThisLoop) {
+ // Manually check that this loop is still healthy. This is done
+ // instead of relying on LoopInfo::verifyLoop since LoopInfo
+ // is a function pass and it's really expensive to verify every
+ // loop in the function every time. That level of checking can be
+ // enabled with the -verify-loop-info option.
+ {
+ TimeRegion PassTimer(getPassTimer(LI));
+ CurrentLoop->verifyLoop();
+ }
+
+ // Then call the regular verifyAnalysis functions.
+ verifyPreservedAnalysis(P);
+ }
+
+ removeNotPreservedAnalysis(P);
+ recordAvailableAnalysis(P);
+ removeDeadPasses(P,
+ skipThisLoop ? "<deleted>" :
+ CurrentLoop->getHeader()->getName(),
+ ON_LOOP_MSG);
+
+ if (skipThisLoop)
+ // Do not run other passes on this loop.
+ break;
+ }
+
+ // If the loop was deleted, release all the loop passes. This frees up
+ // some memory, and avoids trouble with the pass manager trying to call
+ // verifyAnalysis on them.
+ if (skipThisLoop)
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+ freePass(P, "<deleted>", ON_LOOP_MSG);
+ }
+
+ // Pop the loop from queue after running all passes.
+ LQ.pop_back();
+
+ if (redoThisLoop)
+ LQ.push_back(CurrentLoop);
+ }
+
+ // Finalization
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ LoopPass *P = getContainedPass(Index);
+ Changed |= P->doFinalization();
+ }
+
+ return Changed;
+}
+
+/// Print passes managed by this manager
+void LPPassManager::dumpPassStructure(unsigned Offset) {
+ errs().indent(Offset*2) << "Loop Pass Manager\n";
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+ P->dumpPassStructure(Offset + 1);
+ dumpLastUses(P, Offset+1);
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// LoopPass
+
+Pass *LoopPass::createPrinterPass(raw_ostream &O,
+ const std::string &Banner) const {
+ return new PrintLoopPass(Banner, O);
+}
+
+// Check if this pass is suitable for the current LPPassManager, if
+// available. This pass P is not suitable for a LPPassManager if P
+// is not preserving higher level analysis info used by other
+// LPPassManager passes. In such case, pop LPPassManager from the
+// stack. This will force assignPassManager() to create new
+// LPPassManger as expected.
+void LoopPass::preparePassManager(PMStack &PMS) {
+
+ // Find LPPassManager
+ while (!PMS.empty() &&
+ PMS.top()->getPassManagerType() > PMT_LoopPassManager)
+ PMS.pop();
+
+ // If this pass is destroying high level information that is used
+ // by other passes that are managed by LPM then do not insert
+ // this pass in current LPM. Use new LPPassManager.
+ if (PMS.top()->getPassManagerType() == PMT_LoopPassManager &&
+ !PMS.top()->preserveHigherLevelAnalysis(this))
+ PMS.pop();
+}
+
+/// Assign pass manager to manage this pass.
+void LoopPass::assignPassManager(PMStack &PMS,
+ PassManagerType PreferredType) {
+ // Find LPPassManager
+ while (!PMS.empty() &&
+ PMS.top()->getPassManagerType() > PMT_LoopPassManager)
+ PMS.pop();
+
+ LPPassManager *LPPM;
+ if (PMS.top()->getPassManagerType() == PMT_LoopPassManager)
+ LPPM = (LPPassManager*)PMS.top();
+ else {
+ // Create new Loop Pass Manager if it does not exist.
+ assert (!PMS.empty() && "Unable to create Loop Pass Manager");
+ PMDataManager *PMD = PMS.top();
+
+ // [1] Create new Call Graph Pass Manager
+ LPPM = new LPPassManager(PMD->getDepth() + 1);
+ LPPM->populateInheritedAnalysis(PMS);
+
+ // [2] Set up new manager's top level manager
+ PMTopLevelManager *TPM = PMD->getTopLevelManager();
+ TPM->addIndirectPassManager(LPPM);
+
+ // [3] Assign manager to manage this new manager. This may create
+ // and push new managers into PMS
+ Pass *P = LPPM->getAsPass();
+ TPM->schedulePass(P);
+
+ // [4] Push new manager into PMS
+ PMS.push(LPPM);
+ }
+
+ LPPM->add(this);
+}
diff --git a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
new file mode 100644
index 000000000000..64d215c37cc7
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
@@ -0,0 +1,167 @@
+//===- MemDepPrinter.cpp - Printer for MemoryDependenceAnalysis -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
+using namespace llvm;
+
+namespace {
+ struct MemDepPrinter : public FunctionPass {
+ const Function *F;
+
+ typedef PointerIntPair<const Instruction *, 1> InstAndClobberFlag;
+ typedef std::pair<InstAndClobberFlag, const BasicBlock *> Dep;
+ typedef SmallSetVector<Dep, 4> DepSet;
+ typedef DenseMap<const Instruction *, DepSet> DepSetMap;
+ DepSetMap Deps;
+
+ static char ID; // Pass identifcation, replacement for typeid
+ MemDepPrinter() : FunctionPass(ID) {
+ initializeMemDepPrinterPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ void print(raw_ostream &OS, const Module * = 0) const;
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredTransitive<AliasAnalysis>();
+ AU.addRequiredTransitive<MemoryDependenceAnalysis>();
+ AU.setPreservesAll();
+ }
+
+ virtual void releaseMemory() {
+ Deps.clear();
+ F = 0;
+ }
+ };
+}
+
+char MemDepPrinter::ID = 0;
+INITIALIZE_PASS_BEGIN(MemDepPrinter, "print-memdeps",
+ "Print MemDeps of function", false, true)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_END(MemDepPrinter, "print-memdeps",
+ "Print MemDeps of function", false, true)
+
+FunctionPass *llvm::createMemDepPrinter() {
+ return new MemDepPrinter();
+}
+
+bool MemDepPrinter::runOnFunction(Function &F) {
+ this->F = &F;
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ MemoryDependenceAnalysis &MDA = getAnalysis<MemoryDependenceAnalysis>();
+
+ // All this code uses non-const interfaces because MemDep is not
+ // const-friendly, though nothing is actually modified.
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+ Instruction *Inst = &*I;
+
+ if (!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory())
+ continue;
+
+ MemDepResult Res = MDA.getDependency(Inst);
+ if (!Res.isNonLocal()) {
+ assert(Res.isClobber() != Res.isDef() &&
+ "Local dep should be def or clobber!");
+ Deps[Inst].insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
+ Res.isClobber()),
+ static_cast<BasicBlock *>(0)));
+ } else if (CallSite CS = cast<Value>(Inst)) {
+ const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI =
+ MDA.getNonLocalCallDependency(CS);
+
+ DepSet &InstDeps = Deps[Inst];
+ for (MemoryDependenceAnalysis::NonLocalDepInfo::const_iterator
+ I = NLDI.begin(), E = NLDI.end(); I != E; ++I) {
+ const MemDepResult &Res = I->getResult();
+ assert(Res.isClobber() != Res.isDef() &&
+ "Resolved non-local call dep should be def or clobber!");
+ InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
+ Res.isClobber()),
+ I->getBB()));
+ }
+ } else {
+ SmallVector<NonLocalDepResult, 4> NLDI;
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ // FIXME: Volatile is not handled properly here.
+ AliasAnalysis::Location Loc = AA.getLocation(LI);
+ MDA.getNonLocalPointerDependency(Loc, !LI->isVolatile(),
+ LI->getParent(), NLDI);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // FIXME: Volatile is not handled properly here.
+ AliasAnalysis::Location Loc = AA.getLocation(SI);
+ MDA.getNonLocalPointerDependency(Loc, false, SI->getParent(), NLDI);
+ } else if (VAArgInst *VI = dyn_cast<VAArgInst>(Inst)) {
+ AliasAnalysis::Location Loc = AA.getLocation(VI);
+ MDA.getNonLocalPointerDependency(Loc, false, VI->getParent(), NLDI);
+ } else {
+ llvm_unreachable("Unknown memory instruction!");
+ }
+
+ DepSet &InstDeps = Deps[Inst];
+ for (SmallVectorImpl<NonLocalDepResult>::const_iterator
+ I = NLDI.begin(), E = NLDI.end(); I != E; ++I) {
+ const MemDepResult &Res = I->getResult();
+ assert(Res.isClobber() != Res.isDef() &&
+ "Resolved non-local pointer dep should be def or clobber!");
+ InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
+ Res.isClobber()),
+ I->getBB()));
+ }
+ }
+ }
+
+ return false;
+}
+
+void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
+ for (const_inst_iterator I = inst_begin(*F), E = inst_end(*F); I != E; ++I) {
+ const Instruction *Inst = &*I;
+
+ DepSetMap::const_iterator DI = Deps.find(Inst);
+ if (DI == Deps.end())
+ continue;
+
+ const DepSet &InstDeps = DI->second;
+
+ for (DepSet::const_iterator I = InstDeps.begin(), E = InstDeps.end();
+ I != E; ++I) {
+ const Instruction *DepInst = I->first.getPointer();
+ bool isClobber = I->first.getInt();
+ const BasicBlock *DepBB = I->second;
+
+ OS << " " << (isClobber ? "Clobber" : " Def");
+ if (DepBB) {
+ OS << " in block ";
+ WriteAsOperand(OS, DepBB, /*PrintType=*/false, M);
+ }
+ OS << " from: ";
+ if (DepInst == Inst)
+ OS << "<unspecified>";
+ else
+ DepInst->print(OS);
+ OS << "\n";
+ }
+
+ Inst->print(OS);
+ OS << "\n\n";
+ }
+}
diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
new file mode 100644
index 000000000000..769c68ce425e
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -0,0 +1,218 @@
+//===------ MemoryBuiltins.cpp - Identify calls to memory builtins --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions identifies calls to builtin functions that allocate
+// or free memory.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// malloc Call Utility Functions.
+//
+
+/// isMalloc - Returns true if the value is either a malloc call or a
+/// bitcast of the result of a malloc call.
+bool llvm::isMalloc(const Value *I) {
+ return extractMallocCall(I) || extractMallocCallFromBitCast(I);
+}
+
+static bool isMallocCall(const CallInst *CI) {
+ if (!CI)
+ return false;
+
+ Function *Callee = CI->getCalledFunction();
+ if (Callee == 0 || !Callee->isDeclaration())
+ return false;
+ if (Callee->getName() != "malloc" &&
+ Callee->getName() != "_Znwj" && // operator new(unsigned int)
+ Callee->getName() != "_Znwm" && // operator new(unsigned long)
+ Callee->getName() != "_Znaj" && // operator new[](unsigned int)
+ Callee->getName() != "_Znam") // operator new[](unsigned long)
+ return false;
+
+ // Check malloc prototype.
+ // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin
+ // attribute will exist.
+ const FunctionType *FTy = Callee->getFunctionType();
+ if (FTy->getNumParams() != 1)
+ return false;
+ if (IntegerType *ITy = dyn_cast<IntegerType>(FTy->param_begin()->get())) {
+ if (ITy->getBitWidth() != 32 && ITy->getBitWidth() != 64)
+ return false;
+ return true;
+ }
+
+ return false;
+}
+
+/// extractMallocCall - Returns the corresponding CallInst if the instruction
+/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we
+/// ignore InvokeInst here.
+const CallInst *llvm::extractMallocCall(const Value *I) {
+ const CallInst *CI = dyn_cast<CallInst>(I);
+ return (isMallocCall(CI)) ? CI : NULL;
+}
+
+CallInst *llvm::extractMallocCall(Value *I) {
+ CallInst *CI = dyn_cast<CallInst>(I);
+ return (isMallocCall(CI)) ? CI : NULL;
+}
+
+static bool isBitCastOfMallocCall(const BitCastInst *BCI) {
+ if (!BCI)
+ return false;
+
+ return isMallocCall(dyn_cast<CallInst>(BCI->getOperand(0)));
+}
+
+/// extractMallocCallFromBitCast - Returns the corresponding CallInst if the
+/// instruction is a bitcast of the result of a malloc call.
+CallInst *llvm::extractMallocCallFromBitCast(Value *I) {
+ BitCastInst *BCI = dyn_cast<BitCastInst>(I);
+ return (isBitCastOfMallocCall(BCI)) ? cast<CallInst>(BCI->getOperand(0))
+ : NULL;
+}
+
+const CallInst *llvm::extractMallocCallFromBitCast(const Value *I) {
+ const BitCastInst *BCI = dyn_cast<BitCastInst>(I);
+ return (isBitCastOfMallocCall(BCI)) ? cast<CallInst>(BCI->getOperand(0))
+ : NULL;
+}
+
+static Value *computeArraySize(const CallInst *CI, const TargetData *TD,
+ bool LookThroughSExt = false) {
+ if (!CI)
+ return NULL;
+
+ // The size of the malloc's result type must be known to determine array size.
+ const Type *T = getMallocAllocatedType(CI);
+ if (!T || !T->isSized() || !TD)
+ return NULL;
+
+ unsigned ElementSize = TD->getTypeAllocSize(T);
+ if (const StructType *ST = dyn_cast<StructType>(T))
+ ElementSize = TD->getStructLayout(ST)->getSizeInBytes();
+
+ // If malloc call's arg can be determined to be a multiple of ElementSize,
+ // return the multiple. Otherwise, return NULL.
+ Value *MallocArg = CI->getArgOperand(0);
+ Value *Multiple = NULL;
+ if (ComputeMultiple(MallocArg, ElementSize, Multiple,
+ LookThroughSExt))
+ return Multiple;
+
+ return NULL;
+}
+
+/// isArrayMalloc - Returns the corresponding CallInst if the instruction
+/// is a call to malloc whose array size can be determined and the array size
+/// is not constant 1. Otherwise, return NULL.
+const CallInst *llvm::isArrayMalloc(const Value *I, const TargetData *TD) {
+ const CallInst *CI = extractMallocCall(I);
+ Value *ArraySize = computeArraySize(CI, TD);
+
+ if (ArraySize &&
+ ArraySize != ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
+ return CI;
+
+ // CI is a non-array malloc or we can't figure out that it is an array malloc.
+ return NULL;
+}
+
+/// getMallocType - Returns the PointerType resulting from the malloc call.
+/// The PointerType depends on the number of bitcast uses of the malloc call:
+/// 0: PointerType is the calls' return type.
+/// 1: PointerType is the bitcast's result type.
+/// >1: Unique PointerType cannot be determined, return NULL.
+const PointerType *llvm::getMallocType(const CallInst *CI) {
+ assert(isMalloc(CI) && "getMallocType and not malloc call");
+
+ const PointerType *MallocType = NULL;
+ unsigned NumOfBitCastUses = 0;
+
+ // Determine if CallInst has a bitcast use.
+ for (Value::const_use_iterator UI = CI->use_begin(), E = CI->use_end();
+ UI != E; )
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(*UI++)) {
+ MallocType = cast<PointerType>(BCI->getDestTy());
+ NumOfBitCastUses++;
+ }
+
+ // Malloc call has 1 bitcast use, so type is the bitcast's destination type.
+ if (NumOfBitCastUses == 1)
+ return MallocType;
+
+ // Malloc call was not bitcast, so type is the malloc function's return type.
+ if (NumOfBitCastUses == 0)
+ return cast<PointerType>(CI->getType());
+
+ // Type could not be determined.
+ return NULL;
+}
+
+/// getMallocAllocatedType - Returns the Type allocated by malloc call.
+/// The Type depends on the number of bitcast uses of the malloc call:
+/// 0: PointerType is the malloc calls' return type.
+/// 1: PointerType is the bitcast's result type.
+/// >1: Unique PointerType cannot be determined, return NULL.
+const Type *llvm::getMallocAllocatedType(const CallInst *CI) {
+ const PointerType *PT = getMallocType(CI);
+ return PT ? PT->getElementType() : NULL;
+}
+
+/// getMallocArraySize - Returns the array size of a malloc call. If the
+/// argument passed to malloc is a multiple of the size of the malloced type,
+/// then return that multiple. For non-array mallocs, the multiple is
+/// constant 1. Otherwise, return NULL for mallocs whose array size cannot be
+/// determined.
+Value *llvm::getMallocArraySize(CallInst *CI, const TargetData *TD,
+ bool LookThroughSExt) {
+ assert(isMalloc(CI) && "getMallocArraySize and not malloc call");
+ return computeArraySize(CI, TD, LookThroughSExt);
+}
+
+//===----------------------------------------------------------------------===//
+// free Call Utility Functions.
+//
+
+/// isFreeCall - Returns non-null if the value is a call to the builtin free()
+const CallInst *llvm::isFreeCall(const Value *I) {
+ const CallInst *CI = dyn_cast<CallInst>(I);
+ if (!CI)
+ return 0;
+ Function *Callee = CI->getCalledFunction();
+ if (Callee == 0 || !Callee->isDeclaration())
+ return 0;
+
+ if (Callee->getName() != "free" &&
+ Callee->getName() != "_ZdlPv" && // operator delete(void*)
+ Callee->getName() != "_ZdaPv") // operator delete[](void*)
+ return 0;
+
+ // Check free prototype.
+ // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin
+ // attribute will exist.
+ const FunctionType *FTy = Callee->getFunctionType();
+ if (!FTy->getReturnType()->isVoidTy())
+ return 0;
+ if (FTy->getNumParams() != 1)
+ return 0;
+ if (FTy->param_begin()->get() != Type::getInt8PtrTy(Callee->getContext()))
+ return 0;
+
+ return CI;
+}
diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
new file mode 100644
index 000000000000..ce7fab6459ed
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -0,0 +1,1408 @@
+//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an analysis that determines, for a given memory
+// operation, what preceding memory operations it depends on. It builds on
+// alias analysis information, and tries to provide a lazy, caching interface to
+// a common kind of alias information query.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "memdep"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/PredIteratorCache.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
+STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
+STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
+
+STATISTIC(NumCacheNonLocalPtr,
+ "Number of fully cached non-local ptr responses");
+STATISTIC(NumCacheDirtyNonLocalPtr,
+ "Number of cached, but dirty, non-local ptr responses");
+STATISTIC(NumUncacheNonLocalPtr,
+ "Number of uncached non-local ptr responses");
+STATISTIC(NumCacheCompleteNonLocalPtr,
+ "Number of block queries that were completely cached");
+
+char MemoryDependenceAnalysis::ID = 0;
+
+// Register this pass...
+INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",
+ "Memory Dependence Analysis", false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep",
+ "Memory Dependence Analysis", false, true)
+
+MemoryDependenceAnalysis::MemoryDependenceAnalysis()
+: FunctionPass(ID), PredCache(0) {
+ initializeMemoryDependenceAnalysisPass(*PassRegistry::getPassRegistry());
+}
+MemoryDependenceAnalysis::~MemoryDependenceAnalysis() {
+}
+
+/// Clean up memory in between runs
+void MemoryDependenceAnalysis::releaseMemory() {
+ LocalDeps.clear();
+ NonLocalDeps.clear();
+ NonLocalPointerDeps.clear();
+ ReverseLocalDeps.clear();
+ ReverseNonLocalDeps.clear();
+ ReverseNonLocalPtrDeps.clear();
+ PredCache->clear();
+}
+
+
+
+/// getAnalysisUsage - Does not modify anything. It uses Alias Analysis.
+///
+void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<AliasAnalysis>();
+}
+
+bool MemoryDependenceAnalysis::runOnFunction(Function &) {
+ AA = &getAnalysis<AliasAnalysis>();
+ TD = getAnalysisIfAvailable<TargetData>();
+ if (PredCache == 0)
+ PredCache.reset(new PredIteratorCache());
+ return false;
+}
+
+/// RemoveFromReverseMap - This is a helper function that removes Val from
+/// 'Inst's set in ReverseMap. If the set becomes empty, remove Inst's entry.
+template <typename KeyTy>
+static void RemoveFromReverseMap(DenseMap<Instruction*,
+ SmallPtrSet<KeyTy, 4> > &ReverseMap,
+ Instruction *Inst, KeyTy Val) {
+ typename DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> >::iterator
+ InstIt = ReverseMap.find(Inst);
+ assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
+ bool Found = InstIt->second.erase(Val);
+ assert(Found && "Invalid reverse map!"); (void)Found;
+ if (InstIt->second.empty())
+ ReverseMap.erase(InstIt);
+}
+
+/// GetLocation - If the given instruction references a specific memory
+/// location, fill in Loc with the details, otherwise set Loc.Ptr to null.
+/// Return a ModRefInfo value describing the general behavior of the
+/// instruction.
+static
+AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
+ AliasAnalysis::Location &Loc,
+ AliasAnalysis *AA) {
+ if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ if (LI->isVolatile()) {
+ Loc = AliasAnalysis::Location();
+ return AliasAnalysis::ModRef;
+ }
+ Loc = AA->getLocation(LI);
+ return AliasAnalysis::Ref;
+ }
+
+ if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (SI->isVolatile()) {
+ Loc = AliasAnalysis::Location();
+ return AliasAnalysis::ModRef;
+ }
+ Loc = AA->getLocation(SI);
+ return AliasAnalysis::Mod;
+ }
+
+ if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
+ Loc = AA->getLocation(V);
+ return AliasAnalysis::ModRef;
+ }
+
+ if (const CallInst *CI = isFreeCall(Inst)) {
+ // calls to free() deallocate the entire structure
+ Loc = AliasAnalysis::Location(CI->getArgOperand(0));
+ return AliasAnalysis::Mod;
+ }
+
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::invariant_start:
+ Loc = AliasAnalysis::Location(II->getArgOperand(1),
+ cast<ConstantInt>(II->getArgOperand(0))
+ ->getZExtValue(),
+ II->getMetadata(LLVMContext::MD_tbaa));
+ // These intrinsics don't really modify the memory, but returning Mod
+ // will allow them to be handled conservatively.
+ return AliasAnalysis::Mod;
+ case Intrinsic::invariant_end:
+ Loc = AliasAnalysis::Location(II->getArgOperand(2),
+ cast<ConstantInt>(II->getArgOperand(1))
+ ->getZExtValue(),
+ II->getMetadata(LLVMContext::MD_tbaa));
+ // These intrinsics don't really modify the memory, but returning Mod
+ // will allow them to be handled conservatively.
+ return AliasAnalysis::Mod;
+ default:
+ break;
+ }
+
+ // Otherwise, just do the coarse-grained thing that always works.
+ if (Inst->mayWriteToMemory())
+ return AliasAnalysis::ModRef;
+ if (Inst->mayReadFromMemory())
+ return AliasAnalysis::Ref;
+ return AliasAnalysis::NoModRef;
+}
+
+/// getCallSiteDependencyFrom - Private helper for finding the local
+/// dependencies of a call site.
+MemDepResult MemoryDependenceAnalysis::
+getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
+ BasicBlock::iterator ScanIt, BasicBlock *BB) {
+ // Walk backwards through the block, looking for dependencies
+ while (ScanIt != BB->begin()) {
+ Instruction *Inst = --ScanIt;
+
+ // If this inst is a memory op, get the pointer it accessed
+ AliasAnalysis::Location Loc;
+ AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA);
+ if (Loc.Ptr) {
+ // A simple instruction.
+ if (AA->getModRefInfo(CS, Loc) != AliasAnalysis::NoModRef)
+ return MemDepResult::getClobber(Inst);
+ continue;
+ }
+
+ if (CallSite InstCS = cast<Value>(Inst)) {
+ // Debug intrinsics don't cause dependences.
+ if (isa<DbgInfoIntrinsic>(Inst)) continue;
+ // If these two calls do not interfere, look past it.
+ switch (AA->getModRefInfo(CS, InstCS)) {
+ case AliasAnalysis::NoModRef:
+ // If the two calls are the same, return InstCS as a Def, so that
+ // CS can be found redundant and eliminated.
+ if (isReadOnlyCall && !(MR & AliasAnalysis::Mod) &&
+ CS.getInstruction()->isIdenticalToWhenDefined(Inst))
+ return MemDepResult::getDef(Inst);
+
+ // Otherwise if the two calls don't interact (e.g. InstCS is readnone)
+ // keep scanning.
+ break;
+ default:
+ return MemDepResult::getClobber(Inst);
+ }
+ }
+ }
+
+ // No dependence found. If this is the entry block of the function, it is a
+ // clobber, otherwise it is non-local.
+ if (BB != &BB->getParent()->getEntryBlock())
+ return MemDepResult::getNonLocal();
+ return MemDepResult::getClobber(ScanIt);
+}
+
+/// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that
+/// would fully overlap MemLoc if done as a wider legal integer load.
+///
+/// MemLocBase, MemLocOffset are lazily computed here the first time the
+/// base/offs of memloc is needed.
+static bool
+isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc,
+ const Value *&MemLocBase,
+ int64_t &MemLocOffs,
+ const LoadInst *LI,
+ const TargetData *TD) {
+ // If we have no target data, we can't do this.
+ if (TD == 0) return false;
+
+ // If we haven't already computed the base/offset of MemLoc, do so now.
+ if (MemLocBase == 0)
+ MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, *TD);
+
+ unsigned Size = MemoryDependenceAnalysis::
+ getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size,
+ LI, *TD);
+ return Size != 0;
+}
+
+/// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that
+/// looks at a memory location for a load (specified by MemLocBase, Offs,
+/// and Size) and compares it against a load. If the specified load could
+/// be safely widened to a larger integer load that is 1) still efficient,
+/// 2) safe for the target, and 3) would provide the specified memory
+/// location value, then this function returns the size in bytes of the
+/// load width to use. If not, this returns zero.
+unsigned MemoryDependenceAnalysis::
+getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
+ unsigned MemLocSize, const LoadInst *LI,
+ const TargetData &TD) {
+ // We can only extend non-volatile integer loads.
+ if (!isa<IntegerType>(LI->getType()) || LI->isVolatile()) return 0;
+
+ // Get the base of this load.
+ int64_t LIOffs = 0;
+ const Value *LIBase =
+ GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, TD);
+
+ // If the two pointers are not based on the same pointer, we can't tell that
+ // they are related.
+ if (LIBase != MemLocBase) return 0;
+
+ // Okay, the two values are based on the same pointer, but returned as
+ // no-alias. This happens when we have things like two byte loads at "P+1"
+ // and "P+3". Check to see if increasing the size of the "LI" load up to its
+ // alignment (or the largest native integer type) will allow us to load all
+ // the bits required by MemLoc.
+
+ // If MemLoc is before LI, then no widening of LI will help us out.
+ if (MemLocOffs < LIOffs) return 0;
+
+ // Get the alignment of the load in bytes. We assume that it is safe to load
+ // any legal integer up to this size without a problem. For example, if we're
+ // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
+ // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it
+ // to i16.
+ unsigned LoadAlign = LI->getAlignment();
+
+ int64_t MemLocEnd = MemLocOffs+MemLocSize;
+
+ // If no amount of rounding up will let MemLoc fit into LI, then bail out.
+ if (LIOffs+LoadAlign < MemLocEnd) return 0;
+
+ // This is the size of the load to try. Start with the next larger power of
+ // two.
+ unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U;
+ NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
+
+ while (1) {
+ // If this load size is bigger than our known alignment or would not fit
+ // into a native integer register, then we fail.
+ if (NewLoadByteSize > LoadAlign ||
+ !TD.fitsInLegalInteger(NewLoadByteSize*8))
+ return 0;
+
+ // If a load of this width would include all of MemLoc, then we succeed.
+ if (LIOffs+NewLoadByteSize >= MemLocEnd)
+ return NewLoadByteSize;
+
+ NewLoadByteSize <<= 1;
+ }
+
+ return 0;
+}
+
+/// getPointerDependencyFrom - Return the instruction on which a memory
+/// location depends. If isLoad is true, this routine ignores may-aliases with
+/// read-only operations. If isLoad is false, this routine ignores may-aliases
+/// with reads from read-only locations.
+MemDepResult MemoryDependenceAnalysis::
+getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
+ BasicBlock::iterator ScanIt, BasicBlock *BB) {
+
+ const Value *MemLocBase = 0;
+ int64_t MemLocOffset = 0;
+
+ // Walk backwards through the basic block, looking for dependencies.
+ while (ScanIt != BB->begin()) {
+ Instruction *Inst = --ScanIt;
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ // Debug intrinsics don't (and can't) cause dependences.
+ if (isa<DbgInfoIntrinsic>(II)) continue;
+
+ // If we reach a lifetime begin or end marker, then the query ends here
+ // because the value is undefined.
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
+ // FIXME: This only considers queries directly on the invariant-tagged
+ // pointer, not on query pointers that are indexed off of them. It'd
+ // be nice to handle that at some point (the right approach is to use
+ // GetPointerBaseWithConstantOffset).
+ if (AA->isMustAlias(AliasAnalysis::Location(II->getArgOperand(1)),
+ MemLoc))
+ return MemDepResult::getDef(II);
+ continue;
+ }
+ }
+
+ // Values depend on loads if the pointers are must aliased. This means that
+ // a load depends on another must aliased load from the same value.
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ AliasAnalysis::Location LoadLoc = AA->getLocation(LI);
+
+ // If we found a pointer, check if it could be the same as our pointer.
+ AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc);
+
+ if (isLoad) {
+ if (R == AliasAnalysis::NoAlias) {
+ // If this is an over-aligned integer load (for example,
+ // "load i8* %P, align 4") see if it would obviously overlap with the
+ // queried location if widened to a larger load (e.g. if the queried
+ // location is 1 byte at P+1). If so, return it as a load/load
+ // clobber result, allowing the client to decide to widen the load if
+ // it wants to.
+ if (const IntegerType *ITy = dyn_cast<IntegerType>(LI->getType()))
+ if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() &&
+ isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
+ MemLocOffset, LI, TD))
+ return MemDepResult::getClobber(Inst);
+
+ continue;
+ }
+
+ // Must aliased loads are defs of each other.
+ if (R == AliasAnalysis::MustAlias)
+ return MemDepResult::getDef(Inst);
+
+ // If we have a partial alias, then return this as a clobber for the
+ // client to handle.
+ if (R == AliasAnalysis::PartialAlias)
+ return MemDepResult::getClobber(Inst);
+
+ // Random may-alias loads don't depend on each other without a
+ // dependence.
+ continue;
+ }
+
+ // Stores don't depend on other no-aliased accesses.
+ if (R == AliasAnalysis::NoAlias)
+ continue;
+
+ // Stores don't alias loads from read-only memory.
+ if (AA->pointsToConstantMemory(LoadLoc))
+ continue;
+
+ // Stores depend on may/must aliased loads.
+ return MemDepResult::getDef(Inst);
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // If alias analysis can tell that this store is guaranteed to not modify
+ // the query pointer, ignore it. Use getModRefInfo to handle cases where
+ // the query pointer points to constant memory etc.
+ if (AA->getModRefInfo(SI, MemLoc) == AliasAnalysis::NoModRef)
+ continue;
+
+ // Ok, this store might clobber the query pointer. Check to see if it is
+ // a must alias: in this case, we want to return this as a def.
+ AliasAnalysis::Location StoreLoc = AA->getLocation(SI);
+
+ // If we found a pointer, check if it could be the same as our pointer.
+ AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc);
+
+ if (R == AliasAnalysis::NoAlias)
+ continue;
+ if (R == AliasAnalysis::MustAlias)
+ return MemDepResult::getDef(Inst);
+ return MemDepResult::getClobber(Inst);
+ }
+
+ // If this is an allocation, and if we know that the accessed pointer is to
+ // the allocation, return Def. This means that there is no dependence and
+ // the access can be optimized based on that. For example, a load could
+ // turn into undef.
+ // Note: Only determine this to be a malloc if Inst is the malloc call, not
+ // a subsequent bitcast of the malloc call result. There can be stores to
+ // the malloced memory between the malloc call and its bitcast uses, and we
+ // need to continue scanning until the malloc call.
+ if (isa<AllocaInst>(Inst) ||
+ (isa<CallInst>(Inst) && extractMallocCall(Inst))) {
+ const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD);
+
+ if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr))
+ return MemDepResult::getDef(Inst);
+ continue;
+ }
+
+ // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
+ switch (AA->getModRefInfo(Inst, MemLoc)) {
+ case AliasAnalysis::NoModRef:
+ // If the call has no effect on the queried pointer, just ignore it.
+ continue;
+ case AliasAnalysis::Mod:
+ return MemDepResult::getClobber(Inst);
+ case AliasAnalysis::Ref:
+ // If the call is known to never store to the pointer, and if this is a
+ // load query, we can safely ignore it (scan past it).
+ if (isLoad)
+ continue;
+ default:
+ // Otherwise, there is a potential dependence. Return a clobber.
+ return MemDepResult::getClobber(Inst);
+ }
+ }
+
+ // No dependence found. If this is the entry block of the function, it is a
+ // clobber, otherwise it is non-local.
+ if (BB != &BB->getParent()->getEntryBlock())
+ return MemDepResult::getNonLocal();
+ return MemDepResult::getClobber(ScanIt);
+}
+
+/// getDependency - Return the instruction on which a memory operation
+/// depends.
+MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
+ Instruction *ScanPos = QueryInst;
+
+ // Check for a cached result
+ MemDepResult &LocalCache = LocalDeps[QueryInst];
+
+ // If the cached entry is non-dirty, just return it. Note that this depends
+ // on MemDepResult's default constructing to 'dirty'.
+ if (!LocalCache.isDirty())
+ return LocalCache;
+
+ // Otherwise, if we have a dirty entry, we know we can start the scan at that
+ // instruction, which may save us some work.
+ if (Instruction *Inst = LocalCache.getInst()) {
+ ScanPos = Inst;
+
+ RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
+ }
+
+ BasicBlock *QueryParent = QueryInst->getParent();
+
+ // Do the scan.
+ if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
+ // No dependence found. If this is the entry block of the function, it is a
+ // clobber, otherwise it is non-local.
+ if (QueryParent != &QueryParent->getParent()->getEntryBlock())
+ LocalCache = MemDepResult::getNonLocal();
+ else
+ LocalCache = MemDepResult::getClobber(QueryInst);
+ } else {
+ AliasAnalysis::Location MemLoc;
+ AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA);
+ if (MemLoc.Ptr) {
+ // If we can do a pointer scan, make it happen.
+ bool isLoad = !(MR & AliasAnalysis::Mod);
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
+ isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_end;
+
+ LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos,
+ QueryParent);
+ } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
+ CallSite QueryCS(QueryInst);
+ bool isReadOnly = AA->onlyReadsMemory(QueryCS);
+ LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos,
+ QueryParent);
+ } else
+ // Non-memory instruction.
+ LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
+ }
+
+ // Remember the result!
+ if (Instruction *I = LocalCache.getInst())
+ ReverseLocalDeps[I].insert(QueryInst);
+
+ return LocalCache;
+}
+
+#ifndef NDEBUG
+/// AssertSorted - This method is used when -debug is specified to verify that
+/// cache arrays are properly kept sorted.
+static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
+ int Count = -1) {
+ if (Count == -1) Count = Cache.size();
+ if (Count == 0) return;
+
+ for (unsigned i = 1; i != unsigned(Count); ++i)
+ assert(!(Cache[i] < Cache[i-1]) && "Cache isn't sorted!");
+}
+#endif
+
+/// getNonLocalCallDependency - Perform a full dependency query for the
+/// specified call, returning the set of blocks that the value is
+/// potentially live across. The returned set of results will include a
+/// "NonLocal" result for all blocks where the value is live across.
+///
+/// This method assumes the instruction returns a "NonLocal" dependency
+/// within its own block.
+///
+/// This returns a reference to an internal data structure that may be
+/// invalidated on the next non-local query or when an instruction is
+/// removed. Clients must copy this data if they want it around longer than
+/// that.
+const MemoryDependenceAnalysis::NonLocalDepInfo &
+MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
+ assert(getDependency(QueryCS.getInstruction()).isNonLocal() &&
+ "getNonLocalCallDependency should only be used on calls with non-local deps!");
+ PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()];
+ NonLocalDepInfo &Cache = CacheP.first;
+
+ /// DirtyBlocks - This is the set of blocks that need to be recomputed. In
+ /// the cached case, this can happen due to instructions being deleted etc. In
+ /// the uncached case, this starts out as the set of predecessors we care
+ /// about.
+ SmallVector<BasicBlock*, 32> DirtyBlocks;
+
+ if (!Cache.empty()) {
+ // Okay, we have a cache entry. If we know it is not dirty, just return it
+ // with no computation.
+ if (!CacheP.second) {
+ ++NumCacheNonLocal;
+ return Cache;
+ }
+
+ // If we already have a partially computed set of results, scan them to
+ // determine what is dirty, seeding our initial DirtyBlocks worklist.
+ for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end();
+ I != E; ++I)
+ if (I->getResult().isDirty())
+ DirtyBlocks.push_back(I->getBB());
+
+ // Sort the cache so that we can do fast binary search lookups below.
+ std::sort(Cache.begin(), Cache.end());
+
+ ++NumCacheDirtyNonLocal;
+ //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
+ // << Cache.size() << " cached: " << *QueryInst;
+ } else {
+ // Seed DirtyBlocks with each of the preds of QueryInst's block.
+ BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
+ for (BasicBlock **PI = PredCache->GetPreds(QueryBB); *PI; ++PI)
+ DirtyBlocks.push_back(*PI);
+ ++NumUncacheNonLocal;
+ }
+
+ // isReadonlyCall - If this is a read-only call, we can be more aggressive.
+ bool isReadonlyCall = AA->onlyReadsMemory(QueryCS);
+
+ SmallPtrSet<BasicBlock*, 64> Visited;
+
+ unsigned NumSortedEntries = Cache.size();
+ DEBUG(AssertSorted(Cache));
+
+ // Iterate while we still have blocks to update.
+ while (!DirtyBlocks.empty()) {
+ BasicBlock *DirtyBB = DirtyBlocks.back();
+ DirtyBlocks.pop_back();
+
+ // Already processed this block?
+ if (!Visited.insert(DirtyBB))
+ continue;
+
+ // Do a binary search to see if we already have an entry for this block in
+ // the cache set. If so, find it.
+ DEBUG(AssertSorted(Cache, NumSortedEntries));
+ NonLocalDepInfo::iterator Entry =
+ std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries,
+ NonLocalDepEntry(DirtyBB));
+ if (Entry != Cache.begin() && prior(Entry)->getBB() == DirtyBB)
+ --Entry;
+
+ NonLocalDepEntry *ExistingResult = 0;
+ if (Entry != Cache.begin()+NumSortedEntries &&
+ Entry->getBB() == DirtyBB) {
+ // If we already have an entry, and if it isn't already dirty, the block
+ // is done.
+ if (!Entry->getResult().isDirty())
+ continue;
+
+ // Otherwise, remember this slot so we can update the value.
+ ExistingResult = &*Entry;
+ }
+
+ // If the dirty entry has a pointer, start scanning from it so we don't have
+ // to rescan the entire block.
+ BasicBlock::iterator ScanPos = DirtyBB->end();
+ if (ExistingResult) {
+ if (Instruction *Inst = ExistingResult->getResult().getInst()) {
+ ScanPos = Inst;
+ // We're removing QueryInst's use of Inst.
+ RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
+ QueryCS.getInstruction());
+ }
+ }
+
+ // Find out if this block has a local dependency for QueryInst.
+ MemDepResult Dep;
+
+ if (ScanPos != DirtyBB->begin()) {
+ Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB);
+ } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
+ // No dependence found. If this is the entry block of the function, it is
+ // a clobber, otherwise it is non-local.
+ Dep = MemDepResult::getNonLocal();
+ } else {
+ Dep = MemDepResult::getClobber(ScanPos);
+ }
+
+ // If we had a dirty entry for the block, update it. Otherwise, just add
+ // a new entry.
+ if (ExistingResult)
+ ExistingResult->setResult(Dep);
+ else
+ Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
+
+ // If the block has a dependency (i.e. it isn't completely transparent to
+ // the value), remember the association!
+ if (!Dep.isNonLocal()) {
+ // Keep the ReverseNonLocalDeps map up to date so we can efficiently
+ // update this when we remove instructions.
+ if (Instruction *Inst = Dep.getInst())
+ ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
+ } else {
+
+ // If the block *is* completely transparent to the load, we need to check
+ // the predecessors of this block. Add them to our worklist.
+ for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI)
+ DirtyBlocks.push_back(*PI);
+ }
+ }
+
+ return Cache;
+}
+
+/// getNonLocalPointerDependency - Perform a full dependency query for an
+/// access to the specified (non-volatile) memory location, returning the
+/// set of instructions that either define or clobber the value.
+///
+/// This method assumes the pointer has a "NonLocal" dependency within its
+/// own block.
+///
+void MemoryDependenceAnalysis::
+getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad,
+ BasicBlock *FromBB,
+ SmallVectorImpl<NonLocalDepResult> &Result) {
+ assert(Loc.Ptr->getType()->isPointerTy() &&
+ "Can't get pointer deps of a non-pointer!");
+ Result.clear();
+
+ PHITransAddr Address(const_cast<Value *>(Loc.Ptr), TD);
+
+ // This is the set of blocks we've inspected, and the pointer we consider in
+ // each block. Because of critical edges, we currently bail out if querying
+ // a block with multiple different pointers. This can happen during PHI
+ // translation.
+ DenseMap<BasicBlock*, Value*> Visited;
+ if (!getNonLocalPointerDepFromBB(Address, Loc, isLoad, FromBB,
+ Result, Visited, true))
+ return;
+ Result.clear();
+ Result.push_back(NonLocalDepResult(FromBB,
+ MemDepResult::getClobber(FromBB->begin()),
+ const_cast<Value *>(Loc.Ptr)));
+}
+
+/// GetNonLocalInfoForBlock - Compute the memdep value for BB with
+/// Pointer/PointeeSize using either cached information in Cache or by doing a
+/// lookup (which may use dirty cache info if available). If we do a lookup,
+/// add the result to the cache.
+MemDepResult MemoryDependenceAnalysis::
+GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
+ bool isLoad, BasicBlock *BB,
+ NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
+
+ // Do a binary search to see if we already have an entry for this block in
+ // the cache set. If so, find it.
+ NonLocalDepInfo::iterator Entry =
+ std::upper_bound(Cache->begin(), Cache->begin()+NumSortedEntries,
+ NonLocalDepEntry(BB));
+ if (Entry != Cache->begin() && (Entry-1)->getBB() == BB)
+ --Entry;
+
+ NonLocalDepEntry *ExistingResult = 0;
+ if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB)
+ ExistingResult = &*Entry;
+
+ // If we have a cached entry, and it is non-dirty, use it as the value for
+ // this dependency.
+ if (ExistingResult && !ExistingResult->getResult().isDirty()) {
+ ++NumCacheNonLocalPtr;
+ return ExistingResult->getResult();
+ }
+
+ // Otherwise, we have to scan for the value. If we have a dirty cache
+ // entry, start scanning from its position, otherwise we scan from the end
+ // of the block.
+ BasicBlock::iterator ScanPos = BB->end();
+ if (ExistingResult && ExistingResult->getResult().getInst()) {
+ assert(ExistingResult->getResult().getInst()->getParent() == BB &&
+ "Instruction invalidated?");
+ ++NumCacheDirtyNonLocalPtr;
+ ScanPos = ExistingResult->getResult().getInst();
+
+ // Eliminating the dirty entry from 'Cache', so update the reverse info.
+ ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
+ RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey);
+ } else {
+ ++NumUncacheNonLocalPtr;
+ }
+
+ // Scan the block for the dependency.
+ MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB);
+
+ // If we had a dirty entry for the block, update it. Otherwise, just add
+ // a new entry.
+ if (ExistingResult)
+ ExistingResult->setResult(Dep);
+ else
+ Cache->push_back(NonLocalDepEntry(BB, Dep));
+
+ // If the block has a dependency (i.e. it isn't completely transparent to
+ // the value), remember the reverse association because we just added it
+ // to Cache!
+ if (Dep.isNonLocal())
+ return Dep;
+
+ // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
+ // update MemDep when we remove instructions.
+ Instruction *Inst = Dep.getInst();
+ assert(Inst && "Didn't depend on anything?");
+ ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
+ ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
+ return Dep;
+}
+
+/// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain
+/// number of elements in the array that are already properly ordered. This is
+/// optimized for the case when only a few entries are added.
+static void
+SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
+ unsigned NumSortedEntries) {
+ switch (Cache.size() - NumSortedEntries) {
+ case 0:
+ // done, no new entries.
+ break;
+ case 2: {
+ // Two new entries, insert the last one into place.
+ NonLocalDepEntry Val = Cache.back();
+ Cache.pop_back();
+ MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
+ std::upper_bound(Cache.begin(), Cache.end()-1, Val);
+ Cache.insert(Entry, Val);
+ // FALL THROUGH.
+ }
+ case 1:
+ // One new entry, Just insert the new value at the appropriate position.
+ if (Cache.size() != 1) {
+ NonLocalDepEntry Val = Cache.back();
+ Cache.pop_back();
+ MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
+ std::upper_bound(Cache.begin(), Cache.end(), Val);
+ Cache.insert(Entry, Val);
+ }
+ break;
+ default:
+ // Added many values, do a full scale sort.
+ std::sort(Cache.begin(), Cache.end());
+ break;
+ }
+}
+
+/// getNonLocalPointerDepFromBB - Perform a dependency query based on
+/// pointer/pointeesize starting at the end of StartBB. Add any clobber/def
+/// results to the results vector and keep track of which blocks are visited in
+/// 'Visited'.
+///
+/// This has special behavior for the first block queries (when SkipFirstBlock
+/// is true). In this special case, it ignores the contents of the specified
+/// block and starts returning dependence info for its predecessors.
+///
+/// This function returns false on success, or true to indicate that it could
+/// not compute dependence information for some reason. This should be treated
+/// as a clobber dependence on the first instruction in the predecessor block.
+bool MemoryDependenceAnalysis::
+getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
+ const AliasAnalysis::Location &Loc,
+ bool isLoad, BasicBlock *StartBB,
+ SmallVectorImpl<NonLocalDepResult> &Result,
+ DenseMap<BasicBlock*, Value*> &Visited,
+ bool SkipFirstBlock) {
+
+ // Look up the cached info for Pointer.
+ ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
+
+ // Set up a temporary NLPI value. If the map doesn't yet have an entry for
+ // CacheKey, this value will be inserted as the associated value. Otherwise,
+ // it'll be ignored, and we'll have to check to see if the cached size and
+ // tbaa tag are consistent with the current query.
+ NonLocalPointerInfo InitialNLPI;
+ InitialNLPI.Size = Loc.Size;
+ InitialNLPI.TBAATag = Loc.TBAATag;
+
+ // Get the NLPI for CacheKey, inserting one into the map if it doesn't
+ // already have one.
+ std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
+ NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
+ NonLocalPointerInfo *CacheInfo = &Pair.first->second;
+
+ // If we already have a cache entry for this CacheKey, we may need to do some
+ // work to reconcile the cache entry and the current query.
+ if (!Pair.second) {
+ if (CacheInfo->Size < Loc.Size) {
+ // The query's Size is greater than the cached one. Throw out the
+ // cached data and procede with the query at the greater size.
+ CacheInfo->Pair = BBSkipFirstBlockPair();
+ CacheInfo->Size = Loc.Size;
+ for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
+ DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
+ if (Instruction *Inst = DI->getResult().getInst())
+ RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
+ CacheInfo->NonLocalDeps.clear();
+ } else if (CacheInfo->Size > Loc.Size) {
+ // This query's Size is less than the cached one. Conservatively restart
+ // the query using the greater size.
+ return getNonLocalPointerDepFromBB(Pointer,
+ Loc.getWithNewSize(CacheInfo->Size),
+ isLoad, StartBB, Result, Visited,
+ SkipFirstBlock);
+ }
+
+ // If the query's TBAATag is inconsistent with the cached one,
+ // conservatively throw out the cached data and restart the query with
+ // no tag if needed.
+ if (CacheInfo->TBAATag != Loc.TBAATag) {
+ if (CacheInfo->TBAATag) {
+ CacheInfo->Pair = BBSkipFirstBlockPair();
+ CacheInfo->TBAATag = 0;
+ for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
+ DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
+ if (Instruction *Inst = DI->getResult().getInst())
+ RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
+ CacheInfo->NonLocalDeps.clear();
+ }
+ if (Loc.TBAATag)
+ return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutTBAATag(),
+ isLoad, StartBB, Result, Visited,
+ SkipFirstBlock);
+ }
+ }
+
+ NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
+
+ // If we have valid cached information for exactly the block we are
+ // investigating, just return it with no recomputation.
+ if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
+ // We have a fully cached result for this query then we can just return the
+ // cached results and populate the visited set. However, we have to verify
+ // that we don't already have conflicting results for these blocks. Check
+ // to ensure that if a block in the results set is in the visited set that
+ // it was for the same pointer query.
+ if (!Visited.empty()) {
+ for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
+ I != E; ++I) {
+ DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB());
+ if (VI == Visited.end() || VI->second == Pointer.getAddr())
+ continue;
+
+ // We have a pointer mismatch in a block. Just return clobber, saying
+ // that something was clobbered in this result. We could also do a
+ // non-fully cached query, but there is little point in doing this.
+ return true;
+ }
+ }
+
+ Value *Addr = Pointer.getAddr();
+ for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
+ I != E; ++I) {
+ Visited.insert(std::make_pair(I->getBB(), Addr));
+ if (!I->getResult().isNonLocal())
+ Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Addr));
+ }
+ ++NumCacheCompleteNonLocalPtr;
+ return false;
+ }
+
+ // Otherwise, either this is a new block, a block with an invalid cache
+ // pointer or one that we're about to invalidate by putting more info into it
+ // than its valid cache info. If empty, the result will be valid cache info,
+ // otherwise it isn't.
+ if (Cache->empty())
+ CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
+ else
+ CacheInfo->Pair = BBSkipFirstBlockPair();
+
+ SmallVector<BasicBlock*, 32> Worklist;
+ Worklist.push_back(StartBB);
+
+ // Keep track of the entries that we know are sorted. Previously cached
+ // entries will all be sorted. The entries we add we only sort on demand (we
+ // don't insert every element into its sorted position). We know that we
+ // won't get any reuse from currently inserted values, because we don't
+ // revisit blocks after we insert info for them.
+ unsigned NumSortedEntries = Cache->size();
+ DEBUG(AssertSorted(*Cache));
+
+ while (!Worklist.empty()) {
+ BasicBlock *BB = Worklist.pop_back_val();
+
+ // Skip the first block if we have it.
+ if (!SkipFirstBlock) {
+ // Analyze the dependency of *Pointer in FromBB. See if we already have
+ // been here.
+ assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
+
+ // Get the dependency info for Pointer in BB. If we have cached
+ // information, we will use it, otherwise we compute it.
+ DEBUG(AssertSorted(*Cache, NumSortedEntries));
+ MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache,
+ NumSortedEntries);
+
+ // If we got a Def or Clobber, add this to the list of results.
+ if (!Dep.isNonLocal()) {
+ Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
+ continue;
+ }
+ }
+
+ // If 'Pointer' is an instruction defined in this block, then we need to do
+ // phi translation to change it into a value live in the predecessor block.
+ // If not, we just add the predecessors to the worklist and scan them with
+ // the same Pointer.
+ if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
+ SkipFirstBlock = false;
+ for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
+ // Verify that we haven't looked at this block yet.
+ std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
+ InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr()));
+ if (InsertRes.second) {
+ // First time we've looked at *PI.
+ Worklist.push_back(*PI);
+ continue;
+ }
+
+ // If we have seen this block before, but it was with a different
+ // pointer then we have a phi translation failure and we have to treat
+ // this as a clobber.
+ if (InsertRes.first->second != Pointer.getAddr())
+ goto PredTranslationFailure;
+ }
+ continue;
+ }
+
+ // We do need to do phi translation, if we know ahead of time we can't phi
+ // translate this value, don't even try.
+ if (!Pointer.IsPotentiallyPHITranslatable())
+ goto PredTranslationFailure;
+
+ // We may have added values to the cache list before this PHI translation.
+ // If so, we haven't done anything to ensure that the cache remains sorted.
+ // Sort it now (if needed) so that recursive invocations of
+ // getNonLocalPointerDepFromBB and other routines that could reuse the cache
+ // value will only see properly sorted cache arrays.
+ if (Cache && NumSortedEntries != Cache->size()) {
+ SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
+ NumSortedEntries = Cache->size();
+ }
+ Cache = 0;
+
+ for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
+ BasicBlock *Pred = *PI;
+
+ // Get the PHI translated pointer in this predecessor. This can fail if
+ // not translatable, in which case the getAddr() returns null.
+ PHITransAddr PredPointer(Pointer);
+ PredPointer.PHITranslateValue(BB, Pred, 0);
+
+ Value *PredPtrVal = PredPointer.getAddr();
+
+ // Check to see if we have already visited this pred block with another
+ // pointer. If so, we can't do this lookup. This failure can occur
+ // with PHI translation when a critical edge exists and the PHI node in
+ // the successor translates to a pointer value different than the
+ // pointer the block was first analyzed with.
+ std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
+ InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal));
+
+ if (!InsertRes.second) {
+ // If the predecessor was visited with PredPtr, then we already did
+ // the analysis and can ignore it.
+ if (InsertRes.first->second == PredPtrVal)
+ continue;
+
+ // Otherwise, the block was previously analyzed with a different
+ // pointer. We can't represent the result of this case, so we just
+ // treat this as a phi translation failure.
+ goto PredTranslationFailure;
+ }
+
+ // If PHI translation was unable to find an available pointer in this
+ // predecessor, then we have to assume that the pointer is clobbered in
+ // that predecessor. We can still do PRE of the load, which would insert
+ // a computation of the pointer in this predecessor.
+ if (PredPtrVal == 0) {
+ // Add the entry to the Result list.
+ NonLocalDepResult Entry(Pred,
+ MemDepResult::getClobber(Pred->getTerminator()),
+ PredPtrVal);
+ Result.push_back(Entry);
+
+ // Since we had a phi translation failure, the cache for CacheKey won't
+ // include all of the entries that we need to immediately satisfy future
+ // queries. Mark this in NonLocalPointerDeps by setting the
+ // BBSkipFirstBlockPair pointer to null. This requires reuse of the
+ // cached value to do more work but not miss the phi trans failure.
+ NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
+ NLPI.Pair = BBSkipFirstBlockPair();
+ continue;
+ }
+
+ // FIXME: it is entirely possible that PHI translating will end up with
+ // the same value. Consider PHI translating something like:
+ // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
+ // to recurse here, pedantically speaking.
+
+ // If we have a problem phi translating, fall through to the code below
+ // to handle the failure condition.
+ if (getNonLocalPointerDepFromBB(PredPointer,
+ Loc.getWithNewPtr(PredPointer.getAddr()),
+ isLoad, Pred,
+ Result, Visited))
+ goto PredTranslationFailure;
+ }
+
+ // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
+ CacheInfo = &NonLocalPointerDeps[CacheKey];
+ Cache = &CacheInfo->NonLocalDeps;
+ NumSortedEntries = Cache->size();
+
+ // Since we did phi translation, the "Cache" set won't contain all of the
+ // results for the query. This is ok (we can still use it to accelerate
+ // specific block queries) but we can't do the fastpath "return all
+ // results from the set" Clear out the indicator for this.
+ CacheInfo->Pair = BBSkipFirstBlockPair();
+ SkipFirstBlock = false;
+ continue;
+
+ PredTranslationFailure:
+
+ if (Cache == 0) {
+ // Refresh the CacheInfo/Cache pointer if it got invalidated.
+ CacheInfo = &NonLocalPointerDeps[CacheKey];
+ Cache = &CacheInfo->NonLocalDeps;
+ NumSortedEntries = Cache->size();
+ }
+
+ // Since we failed phi translation, the "Cache" set won't contain all of the
+ // results for the query. This is ok (we can still use it to accelerate
+ // specific block queries) but we can't do the fastpath "return all
+ // results from the set". Clear out the indicator for this.
+ CacheInfo->Pair = BBSkipFirstBlockPair();
+
+ // If *nothing* works, mark the pointer as being clobbered by the first
+ // instruction in this block.
+ //
+ // If this is the magic first block, return this as a clobber of the whole
+ // incoming value. Since we can't phi translate to one of the predecessors,
+ // we have to bail out.
+ if (SkipFirstBlock)
+ return true;
+
+ for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) {
+ assert(I != Cache->rend() && "Didn't find current block??");
+ if (I->getBB() != BB)
+ continue;
+
+ assert(I->getResult().isNonLocal() &&
+ "Should only be here with transparent block");
+ I->setResult(MemDepResult::getClobber(BB->begin()));
+ ReverseNonLocalPtrDeps[BB->begin()].insert(CacheKey);
+ Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(),
+ Pointer.getAddr()));
+ break;
+ }
+ }
+
+ // Okay, we're done now. If we added new values to the cache, re-sort it.
+ SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
+ DEBUG(AssertSorted(*Cache));
+ return false;
+}
+
+/// RemoveCachedNonLocalPointerDependencies - If P exists in
+/// CachedNonLocalPointerInfo, remove it.
+void MemoryDependenceAnalysis::
+RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) {
+ CachedNonLocalPointerInfo::iterator It =
+ NonLocalPointerDeps.find(P);
+ if (It == NonLocalPointerDeps.end()) return;
+
+ // Remove all of the entries in the BB->val map. This involves removing
+ // instructions from the reverse map.
+ NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
+
+ for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
+ Instruction *Target = PInfo[i].getResult().getInst();
+ if (Target == 0) continue; // Ignore non-local dep results.
+ assert(Target->getParent() == PInfo[i].getBB());
+
+ // Eliminating the dirty entry from 'Cache', so update the reverse info.
+ RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
+ }
+
+ // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
+ NonLocalPointerDeps.erase(It);
+}
+
+
+/// invalidateCachedPointerInfo - This method is used to invalidate cached
+/// information about the specified pointer, because it may be too
+/// conservative in memdep. This is an optional call that can be used when
+/// the client detects an equivalence between the pointer and some other
+/// value and replaces the other value with ptr. This can make Ptr available
+/// in more places that cached info does not necessarily keep.
+void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) {
+ // If Ptr isn't really a pointer, just ignore it.
+ if (!Ptr->getType()->isPointerTy()) return;
+ // Flush store info for the pointer.
+ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
+ // Flush load info for the pointer.
+ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
+}
+
+/// invalidateCachedPredecessors - Clear the PredIteratorCache info.
+/// This needs to be done when the CFG changes, e.g., due to splitting
+/// critical edges.
+void MemoryDependenceAnalysis::invalidateCachedPredecessors() {
+ PredCache->clear();
+}
+
+/// removeInstruction - Remove an instruction from the dependence analysis,
+/// updating the dependence of instructions that previously depended on it.
+/// This method attempts to keep the cache coherent using the reverse map.
+void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
+ // Walk through the Non-local dependencies, removing this one as the value
+ // for any cached queries.
+ NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst);
+ if (NLDI != NonLocalDeps.end()) {
+ NonLocalDepInfo &BlockMap = NLDI->second.first;
+ for (NonLocalDepInfo::iterator DI = BlockMap.begin(), DE = BlockMap.end();
+ DI != DE; ++DI)
+ if (Instruction *Inst = DI->getResult().getInst())
+ RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
+ NonLocalDeps.erase(NLDI);
+ }
+
+ // If we have a cached local dependence query for this instruction, remove it.
+ //
+ LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
+ if (LocalDepEntry != LocalDeps.end()) {
+ // Remove us from DepInst's reverse set now that the local dep info is gone.
+ if (Instruction *Inst = LocalDepEntry->second.getInst())
+ RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
+
+ // Remove this local dependency info.
+ LocalDeps.erase(LocalDepEntry);
+ }
+
+ // If we have any cached pointer dependencies on this instruction, remove
+ // them. If the instruction has non-pointer type, then it can't be a pointer
+ // base.
+
+ // Remove it from both the load info and the store info. The instruction
+ // can't be in either of these maps if it is non-pointer.
+ if (RemInst->getType()->isPointerTy()) {
+ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
+ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
+ }
+
+ // Loop over all of the things that depend on the instruction we're removing.
+ //
+ SmallVector<std::pair<Instruction*, Instruction*>, 8> ReverseDepsToAdd;
+
+ // If we find RemInst as a clobber or Def in any of the maps for other values,
+ // we need to replace its entry with a dirty version of the instruction after
+ // it. If RemInst is a terminator, we use a null dirty value.
+ //
+ // Using a dirty version of the instruction after RemInst saves having to scan
+ // the entire block to get to this point.
+ MemDepResult NewDirtyVal;
+ if (!RemInst->isTerminator())
+ NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst));
+
+ ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
+ if (ReverseDepIt != ReverseLocalDeps.end()) {
+ SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second;
+ // RemInst can't be the terminator if it has local stuff depending on it.
+ assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) &&
+ "Nothing can locally depend on a terminator");
+
+ for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(),
+ E = ReverseDeps.end(); I != E; ++I) {
+ Instruction *InstDependingOnRemInst = *I;
+ assert(InstDependingOnRemInst != RemInst &&
+ "Already removed our local dep info");
+
+ LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
+
+ // Make sure to remember that new things depend on NewDepInst.
+ assert(NewDirtyVal.getInst() && "There is no way something else can have "
+ "a local dep on this if it is a terminator!");
+ ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(),
+ InstDependingOnRemInst));
+ }
+
+ ReverseLocalDeps.erase(ReverseDepIt);
+
+ // Add new reverse deps after scanning the set, to avoid invalidating the
+ // 'ReverseDeps' reference.
+ while (!ReverseDepsToAdd.empty()) {
+ ReverseLocalDeps[ReverseDepsToAdd.back().first]
+ .insert(ReverseDepsToAdd.back().second);
+ ReverseDepsToAdd.pop_back();
+ }
+ }
+
+ ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
+ if (ReverseDepIt != ReverseNonLocalDeps.end()) {
+ SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second;
+ for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end();
+ I != E; ++I) {
+ assert(*I != RemInst && "Already removed NonLocalDep info for RemInst");
+
+ PerInstNLInfo &INLD = NonLocalDeps[*I];
+ // The information is now dirty!
+ INLD.second = true;
+
+ for (NonLocalDepInfo::iterator DI = INLD.first.begin(),
+ DE = INLD.first.end(); DI != DE; ++DI) {
+ if (DI->getResult().getInst() != RemInst) continue;
+
+ // Convert to a dirty entry for the subsequent instruction.
+ DI->setResult(NewDirtyVal);
+
+ if (Instruction *NextI = NewDirtyVal.getInst())
+ ReverseDepsToAdd.push_back(std::make_pair(NextI, *I));
+ }
+ }
+
+ ReverseNonLocalDeps.erase(ReverseDepIt);
+
+ // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
+ while (!ReverseDepsToAdd.empty()) {
+ ReverseNonLocalDeps[ReverseDepsToAdd.back().first]
+ .insert(ReverseDepsToAdd.back().second);
+ ReverseDepsToAdd.pop_back();
+ }
+ }
+
+ // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
+ // value in the NonLocalPointerDeps info.
+ ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
+ ReverseNonLocalPtrDeps.find(RemInst);
+ if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
+ SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second;
+ SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd;
+
+ for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(),
+ E = Set.end(); I != E; ++I) {
+ ValueIsLoadPair P = *I;
+ assert(P.getPointer() != RemInst &&
+ "Already removed NonLocalPointerDeps info for RemInst");
+
+ NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
+
+ // The cache is not valid for any specific block anymore.
+ NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
+
+ // Update any entries for RemInst to use the instruction after it.
+ for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end();
+ DI != DE; ++DI) {
+ if (DI->getResult().getInst() != RemInst) continue;
+
+ // Convert to a dirty entry for the subsequent instruction.
+ DI->setResult(NewDirtyVal);
+
+ if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
+ ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
+ }
+
+ // Re-sort the NonLocalDepInfo. Changing the dirty entry to its
+ // subsequent value may invalidate the sortedness.
+ std::sort(NLPDI.begin(), NLPDI.end());
+ }
+
+ ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
+
+ while (!ReversePtrDepsToAdd.empty()) {
+ ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first]
+ .insert(ReversePtrDepsToAdd.back().second);
+ ReversePtrDepsToAdd.pop_back();
+ }
+ }
+
+
+ assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
+ AA->deleteValue(RemInst);
+ DEBUG(verifyRemoved(RemInst));
+}
+/// verifyRemoved - Verify that the specified instruction does not occur
+/// in our internal data structures.
+void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
+ for (LocalDepMapType::const_iterator I = LocalDeps.begin(),
+ E = LocalDeps.end(); I != E; ++I) {
+ assert(I->first != D && "Inst occurs in data structures");
+ assert(I->second.getInst() != D &&
+ "Inst occurs in data structures");
+ }
+
+ for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(),
+ E = NonLocalPointerDeps.end(); I != E; ++I) {
+ assert(I->first.getPointer() != D && "Inst occurs in NLPD map key");
+ const NonLocalDepInfo &Val = I->second.NonLocalDeps;
+ for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end();
+ II != E; ++II)
+ assert(II->getResult().getInst() != D && "Inst occurs as NLPD value");
+ }
+
+ for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(),
+ E = NonLocalDeps.end(); I != E; ++I) {
+ assert(I->first != D && "Inst occurs in data structures");
+ const PerInstNLInfo &INLD = I->second;
+ for (NonLocalDepInfo::const_iterator II = INLD.first.begin(),
+ EE = INLD.first.end(); II != EE; ++II)
+ assert(II->getResult().getInst() != D && "Inst occurs in data structures");
+ }
+
+ for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(),
+ E = ReverseLocalDeps.end(); I != E; ++I) {
+ assert(I->first != D && "Inst occurs in data structures");
+ for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
+ EE = I->second.end(); II != EE; ++II)
+ assert(*II != D && "Inst occurs in data structures");
+ }
+
+ for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(),
+ E = ReverseNonLocalDeps.end();
+ I != E; ++I) {
+ assert(I->first != D && "Inst occurs in data structures");
+ for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
+ EE = I->second.end(); II != EE; ++II)
+ assert(*II != D && "Inst occurs in data structures");
+ }
+
+ for (ReverseNonLocalPtrDepTy::const_iterator
+ I = ReverseNonLocalPtrDeps.begin(),
+ E = ReverseNonLocalPtrDeps.end(); I != E; ++I) {
+ assert(I->first != D && "Inst occurs in rev NLPD map");
+
+ for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(),
+ E = I->second.end(); II != E; ++II)
+ assert(*II != ValueIsLoadPair(D, false) &&
+ *II != ValueIsLoadPair(D, true) &&
+ "Inst occurs in ReverseNonLocalPtrDeps map");
+ }
+
+}
diff --git a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
new file mode 100644
index 000000000000..e7e999cebeb9
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -0,0 +1,87 @@
+//===-- ModuleDebugInfoPrinter.cpp - Prints module debug info metadata ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass decodes the debug info metadata in a module and prints in a
+// (sufficiently-prepared-) human-readable form.
+//
+// For example, run this pass from opt along with the -analyze option, and
+// it'll print to standard output.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+namespace {
+ class ModuleDebugInfoPrinter : public ModulePass {
+ DebugInfoFinder Finder;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ ModuleDebugInfoPrinter() : ModulePass(ID) {
+ initializeModuleDebugInfoPrinterPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ virtual void print(raw_ostream &O, const Module *M) const;
+ };
+}
+
+char ModuleDebugInfoPrinter::ID = 0;
+INITIALIZE_PASS(ModuleDebugInfoPrinter, "module-debuginfo",
+ "Decodes module-level debug info", false, true)
+
+ModulePass *llvm::createModuleDebugInfoPrinterPass() {
+ return new ModuleDebugInfoPrinter();
+}
+
+bool ModuleDebugInfoPrinter::runOnModule(Module &M) {
+ Finder.processModule(M);
+ return false;
+}
+
+void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
+ for (DebugInfoFinder::iterator I = Finder.compile_unit_begin(),
+ E = Finder.compile_unit_end(); I != E; ++I) {
+ O << "Compile Unit: ";
+ DICompileUnit(*I).print(O);
+ O << '\n';
+ }
+
+ for (DebugInfoFinder::iterator I = Finder.subprogram_begin(),
+ E = Finder.subprogram_end(); I != E; ++I) {
+ O << "Subprogram: ";
+ DISubprogram(*I).print(O);
+ O << '\n';
+ }
+
+ for (DebugInfoFinder::iterator I = Finder.global_variable_begin(),
+ E = Finder.global_variable_end(); I != E; ++I) {
+ O << "GlobalVariable: ";
+ DIGlobalVariable(*I).print(O);
+ O << '\n';
+ }
+
+ for (DebugInfoFinder::iterator I = Finder.type_begin(),
+ E = Finder.type_end(); I != E; ++I) {
+ O << "Type: ";
+ DIType(*I).print(O);
+ O << '\n';
+ }
+}
diff --git a/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp
new file mode 100644
index 000000000000..101c2d5b0285
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp
@@ -0,0 +1,88 @@
+//===- NoAliasAnalysis.cpp - Minimal Alias Analysis Impl ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the default implementation of the Alias Analysis interface
+// that simply returns "I don't know" for all queries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+namespace {
+ /// NoAA - This class implements the -no-aa pass, which always returns "I
+ /// don't know" for alias queries. NoAA is unlike other alias analysis
+ /// implementations, in that it does not chain to a previous analysis. As
+ /// such it doesn't follow many of the rules that other alias analyses must.
+ ///
+ struct NoAA : public ImmutablePass, public AliasAnalysis {
+ static char ID; // Class identification, replacement for typeinfo
+ NoAA() : ImmutablePass(ID) {
+ initializeNoAAPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ }
+
+ virtual void initializePass() {
+ // Note: NoAA does not call InitializeAliasAnalysis because it's
+ // special and does not support chaining.
+ TD = getAnalysisIfAvailable<TargetData>();
+ }
+
+ virtual AliasResult alias(const Location &LocA, const Location &LocB) {
+ return MayAlias;
+ }
+
+ virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
+ return UnknownModRefBehavior;
+ }
+ virtual ModRefBehavior getModRefBehavior(const Function *F) {
+ return UnknownModRefBehavior;
+ }
+
+ virtual bool pointsToConstantMemory(const Location &Loc,
+ bool OrLocal) {
+ return false;
+ }
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ return ModRef;
+ }
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ return ModRef;
+ }
+
+ virtual void deleteValue(Value *V) {}
+ virtual void copyValue(Value *From, Value *To) {}
+ virtual void addEscapingUse(Use &U) {}
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+ };
+} // End of anonymous namespace
+
+// Register this pass...
+char NoAA::ID = 0;
+INITIALIZE_AG_PASS(NoAA, AliasAnalysis, "no-aa",
+ "No Alias Analysis (always returns 'may' alias)",
+ true, true, true)
+
+ImmutablePass *llvm::createNoAAPass() { return new NoAA(); }
diff --git a/contrib/llvm/lib/Analysis/PHITransAddr.cpp b/contrib/llvm/lib/Analysis/PHITransAddr.cpp
new file mode 100644
index 000000000000..70dcd0df242d
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PHITransAddr.cpp
@@ -0,0 +1,442 @@
+//===- PHITransAddr.cpp - PHI Translation for Addresses -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PHITransAddr class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static bool CanPHITrans(Instruction *Inst) {
+ if (isa<PHINode>(Inst) ||
+ isa<GetElementPtrInst>(Inst))
+ return true;
+
+ if (isa<CastInst>(Inst) &&
+ Inst->isSafeToSpeculativelyExecute())
+ return true;
+
+ if (Inst->getOpcode() == Instruction::Add &&
+ isa<ConstantInt>(Inst->getOperand(1)))
+ return true;
+
+ // cerr << "MEMDEP: Could not PHI translate: " << *Pointer;
+ // if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst))
+ // cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0);
+ return false;
+}
+
+void PHITransAddr::dump() const {
+ if (Addr == 0) {
+ dbgs() << "PHITransAddr: null\n";
+ return;
+ }
+ dbgs() << "PHITransAddr: " << *Addr << "\n";
+ for (unsigned i = 0, e = InstInputs.size(); i != e; ++i)
+ dbgs() << " Input #" << i << " is " << *InstInputs[i] << "\n";
+}
+
+
+static bool VerifySubExpr(Value *Expr,
+ SmallVectorImpl<Instruction*> &InstInputs) {
+ // If this is a non-instruction value, there is nothing to do.
+ Instruction *I = dyn_cast<Instruction>(Expr);
+ if (I == 0) return true;
+
+ // If it's an instruction, it is either in Tmp or its operands recursively
+ // are.
+ SmallVectorImpl<Instruction*>::iterator Entry =
+ std::find(InstInputs.begin(), InstInputs.end(), I);
+ if (Entry != InstInputs.end()) {
+ InstInputs.erase(Entry);
+ return true;
+ }
+
+ // If it isn't in the InstInputs list it is a subexpr incorporated into the
+ // address. Sanity check that it is phi translatable.
+ if (!CanPHITrans(I)) {
+ errs() << "Non phi translatable instruction found in PHITransAddr:\n";
+ errs() << *I << '\n';
+ llvm_unreachable("Either something is missing from InstInputs or "
+ "CanPHITrans is wrong.");
+ return false;
+ }
+
+ // Validate the operands of the instruction.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (!VerifySubExpr(I->getOperand(i), InstInputs))
+ return false;
+
+ return true;
+}
+
+/// Verify - Check internal consistency of this data structure. If the
+/// structure is valid, it returns true. If invalid, it prints errors and
+/// returns false.
+bool PHITransAddr::Verify() const {
+ if (Addr == 0) return true;
+
+ SmallVector<Instruction*, 8> Tmp(InstInputs.begin(), InstInputs.end());
+
+ if (!VerifySubExpr(Addr, Tmp))
+ return false;
+
+ if (!Tmp.empty()) {
+ errs() << "PHITransAddr contains extra instructions:\n";
+ for (unsigned i = 0, e = InstInputs.size(); i != e; ++i)
+ errs() << " InstInput #" << i << " is " << *InstInputs[i] << "\n";
+ llvm_unreachable("This is unexpected.");
+ return false;
+ }
+
+ // a-ok.
+ return true;
+}
+
+
+/// IsPotentiallyPHITranslatable - If this needs PHI translation, return true
+/// if we have some hope of doing it. This should be used as a filter to
+/// avoid calling PHITranslateValue in hopeless situations.
+bool PHITransAddr::IsPotentiallyPHITranslatable() const {
+ // If the input value is not an instruction, or if it is not defined in CurBB,
+ // then we don't need to phi translate it.
+ Instruction *Inst = dyn_cast<Instruction>(Addr);
+ return Inst == 0 || CanPHITrans(Inst);
+}
+
+
+static void RemoveInstInputs(Value *V,
+ SmallVectorImpl<Instruction*> &InstInputs) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) return;
+
+ // If the instruction is in the InstInputs list, remove it.
+ SmallVectorImpl<Instruction*>::iterator Entry =
+ std::find(InstInputs.begin(), InstInputs.end(), I);
+ if (Entry != InstInputs.end()) {
+ InstInputs.erase(Entry);
+ return;
+ }
+
+ assert(!isa<PHINode>(I) && "Error, removing something that isn't an input");
+
+ // Otherwise, it must have instruction inputs itself. Zap them recursively.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(i)))
+ RemoveInstInputs(Op, InstInputs);
+ }
+}
+
+Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
+ BasicBlock *PredBB,
+ const DominatorTree *DT) {
+ // If this is a non-instruction value, it can't require PHI translation.
+ Instruction *Inst = dyn_cast<Instruction>(V);
+ if (Inst == 0) return V;
+
+ // Determine whether 'Inst' is an input to our PHI translatable expression.
+ bool isInput = std::count(InstInputs.begin(), InstInputs.end(), Inst);
+
+ // Handle inputs instructions if needed.
+ if (isInput) {
+ if (Inst->getParent() != CurBB) {
+ // If it is an input defined in a different block, then it remains an
+ // input.
+ return Inst;
+ }
+
+ // If 'Inst' is defined in this block and is an input that needs to be phi
+ // translated, we need to incorporate the value into the expression or fail.
+
+ // In either case, the instruction itself isn't an input any longer.
+ InstInputs.erase(std::find(InstInputs.begin(), InstInputs.end(), Inst));
+
+ // If this is a PHI, go ahead and translate it.
+ if (PHINode *PN = dyn_cast<PHINode>(Inst))
+ return AddAsInput(PN->getIncomingValueForBlock(PredBB));
+
+ // If this is a non-phi value, and it is analyzable, we can incorporate it
+ // into the expression by making all instruction operands be inputs.
+ if (!CanPHITrans(Inst))
+ return 0;
+
+ // All instruction operands are now inputs (and of course, they may also be
+ // defined in this block, so they may need to be phi translated themselves.
+ for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
+ if (Instruction *Op = dyn_cast<Instruction>(Inst->getOperand(i)))
+ InstInputs.push_back(Op);
+ }
+
+ // Ok, it must be an intermediate result (either because it started that way
+ // or because we just incorporated it into the expression). See if its
+ // operands need to be phi translated, and if so, reconstruct it.
+
+ if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
+ if (!Cast->isSafeToSpeculativelyExecute()) return 0;
+ Value *PHIIn = PHITranslateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT);
+ if (PHIIn == 0) return 0;
+ if (PHIIn == Cast->getOperand(0))
+ return Cast;
+
+ // Find an available version of this cast.
+
+ // Constants are trivial to find.
+ if (Constant *C = dyn_cast<Constant>(PHIIn))
+ return AddAsInput(ConstantExpr::getCast(Cast->getOpcode(),
+ C, Cast->getType()));
+
+ // Otherwise we have to see if a casted version of the incoming pointer
+ // is available. If so, we can use it, otherwise we have to fail.
+ for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end();
+ UI != E; ++UI) {
+ if (CastInst *CastI = dyn_cast<CastInst>(*UI))
+ if (CastI->getOpcode() == Cast->getOpcode() &&
+ CastI->getType() == Cast->getType() &&
+ (!DT || DT->dominates(CastI->getParent(), PredBB)))
+ return CastI;
+ }
+ return 0;
+ }
+
+ // Handle getelementptr with at least one PHI translatable operand.
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+ SmallVector<Value*, 8> GEPOps;
+ bool AnyChanged = false;
+ for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
+ Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB, DT);
+ if (GEPOp == 0) return 0;
+
+ AnyChanged |= GEPOp != GEP->getOperand(i);
+ GEPOps.push_back(GEPOp);
+ }
+
+ if (!AnyChanged)
+ return GEP;
+
+ // Simplify the GEP to handle 'gep x, 0' -> x etc.
+ if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD, DT)) {
+ for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
+ RemoveInstInputs(GEPOps[i], InstInputs);
+
+ return AddAsInput(V);
+ }
+
+ // Scan to see if we have this GEP available.
+ Value *APHIOp = GEPOps[0];
+ for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end();
+ UI != E; ++UI) {
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI))
+ if (GEPI->getType() == GEP->getType() &&
+ GEPI->getNumOperands() == GEPOps.size() &&
+ GEPI->getParent()->getParent() == CurBB->getParent() &&
+ (!DT || DT->dominates(GEPI->getParent(), PredBB))) {
+ bool Mismatch = false;
+ for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
+ if (GEPI->getOperand(i) != GEPOps[i]) {
+ Mismatch = true;
+ break;
+ }
+ if (!Mismatch)
+ return GEPI;
+ }
+ }
+ return 0;
+ }
+
+ // Handle add with a constant RHS.
+ if (Inst->getOpcode() == Instruction::Add &&
+ isa<ConstantInt>(Inst->getOperand(1))) {
+ // PHI translate the LHS.
+ Constant *RHS = cast<ConstantInt>(Inst->getOperand(1));
+ bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap();
+ bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap();
+
+ Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT);
+ if (LHS == 0) return 0;
+
+ // If the PHI translated LHS is an add of a constant, fold the immediates.
+ if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS))
+ if (BOp->getOpcode() == Instruction::Add)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
+ LHS = BOp->getOperand(0);
+ RHS = ConstantExpr::getAdd(RHS, CI);
+ isNSW = isNUW = false;
+
+ // If the old 'LHS' was an input, add the new 'LHS' as an input.
+ if (std::count(InstInputs.begin(), InstInputs.end(), BOp)) {
+ RemoveInstInputs(BOp, InstInputs);
+ AddAsInput(LHS);
+ }
+ }
+
+ // See if the add simplifies away.
+ if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, DT)) {
+ // If we simplified the operands, the LHS is no longer an input, but Res
+ // is.
+ RemoveInstInputs(LHS, InstInputs);
+ return AddAsInput(Res);
+ }
+
+ // If we didn't modify the add, just return it.
+ if (LHS == Inst->getOperand(0) && RHS == Inst->getOperand(1))
+ return Inst;
+
+ // Otherwise, see if we have this add available somewhere.
+ for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end();
+ UI != E; ++UI) {
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI))
+ if (BO->getOpcode() == Instruction::Add &&
+ BO->getOperand(0) == LHS && BO->getOperand(1) == RHS &&
+ BO->getParent()->getParent() == CurBB->getParent() &&
+ (!DT || DT->dominates(BO->getParent(), PredBB)))
+ return BO;
+ }
+
+ return 0;
+ }
+
+ // Otherwise, we failed.
+ return 0;
+}
+
+
+/// PHITranslateValue - PHI translate the current address up the CFG from
+/// CurBB to Pred, updating our state to reflect any needed changes. If the
+/// dominator tree DT is non-null, the translated value must dominate
+/// PredBB. This returns true on failure and sets Addr to null.
+bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB,
+ const DominatorTree *DT) {
+ assert(Verify() && "Invalid PHITransAddr!");
+ Addr = PHITranslateSubExpr(Addr, CurBB, PredBB, DT);
+ assert(Verify() && "Invalid PHITransAddr!");
+
+ if (DT) {
+ // Make sure the value is live in the predecessor.
+ if (Instruction *Inst = dyn_cast_or_null<Instruction>(Addr))
+ if (!DT->dominates(Inst->getParent(), PredBB))
+ Addr = 0;
+ }
+
+ return Addr == 0;
+}
+
+/// PHITranslateWithInsertion - PHI translate this value into the specified
+/// predecessor block, inserting a computation of the value if it is
+/// unavailable.
+///
+/// All newly created instructions are added to the NewInsts list. This
+/// returns null on failure.
+///
+Value *PHITransAddr::
+PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB,
+ const DominatorTree &DT,
+ SmallVectorImpl<Instruction*> &NewInsts) {
+ unsigned NISize = NewInsts.size();
+
+ // Attempt to PHI translate with insertion.
+ Addr = InsertPHITranslatedSubExpr(Addr, CurBB, PredBB, DT, NewInsts);
+
+ // If successful, return the new value.
+ if (Addr) return Addr;
+
+ // If not, destroy any intermediate instructions inserted.
+ while (NewInsts.size() != NISize)
+ NewInsts.pop_back_val()->eraseFromParent();
+ return 0;
+}
+
+
+/// InsertPHITranslatedPointer - Insert a computation of the PHI translated
+/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB
+/// block. All newly created instructions are added to the NewInsts list.
+/// This returns null on failure.
+///
+Value *PHITransAddr::
+InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
+ BasicBlock *PredBB, const DominatorTree &DT,
+ SmallVectorImpl<Instruction*> &NewInsts) {
+ // See if we have a version of this value already available and dominating
+ // PredBB. If so, there is no need to insert a new instance of it.
+ PHITransAddr Tmp(InVal, TD);
+ if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT))
+ return Tmp.getAddr();
+
+ // If we don't have an available version of this value, it must be an
+ // instruction.
+ Instruction *Inst = cast<Instruction>(InVal);
+
+ // Handle cast of PHI translatable value.
+ if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
+ if (!Cast->isSafeToSpeculativelyExecute()) return 0;
+ Value *OpVal = InsertPHITranslatedSubExpr(Cast->getOperand(0),
+ CurBB, PredBB, DT, NewInsts);
+ if (OpVal == 0) return 0;
+
+ // Otherwise insert a cast at the end of PredBB.
+ CastInst *New = CastInst::Create(Cast->getOpcode(),
+ OpVal, InVal->getType(),
+ InVal->getName()+".phi.trans.insert",
+ PredBB->getTerminator());
+ NewInsts.push_back(New);
+ return New;
+ }
+
+ // Handle getelementptr with at least one PHI operand.
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+ SmallVector<Value*, 8> GEPOps;
+ BasicBlock *CurBB = GEP->getParent();
+ for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
+ Value *OpVal = InsertPHITranslatedSubExpr(GEP->getOperand(i),
+ CurBB, PredBB, DT, NewInsts);
+ if (OpVal == 0) return 0;
+ GEPOps.push_back(OpVal);
+ }
+
+ GetElementPtrInst *Result =
+ GetElementPtrInst::Create(GEPOps[0], GEPOps.begin()+1, GEPOps.end(),
+ InVal->getName()+".phi.trans.insert",
+ PredBB->getTerminator());
+ Result->setIsInBounds(GEP->isInBounds());
+ NewInsts.push_back(Result);
+ return Result;
+ }
+
+#if 0
+ // FIXME: This code works, but it is unclear that we actually want to insert
+ // a big chain of computation in order to make a value available in a block.
+ // This needs to be evaluated carefully to consider its cost trade offs.
+
+ // Handle add with a constant RHS.
+ if (Inst->getOpcode() == Instruction::Add &&
+ isa<ConstantInt>(Inst->getOperand(1))) {
+ // PHI translate the LHS.
+ Value *OpVal = InsertPHITranslatedSubExpr(Inst->getOperand(0),
+ CurBB, PredBB, DT, NewInsts);
+ if (OpVal == 0) return 0;
+
+ BinaryOperator *Res = BinaryOperator::CreateAdd(OpVal, Inst->getOperand(1),
+ InVal->getName()+".phi.trans.insert",
+ PredBB->getTerminator());
+ Res->setHasNoSignedWrap(cast<BinaryOperator>(Inst)->hasNoSignedWrap());
+ Res->setHasNoUnsignedWrap(cast<BinaryOperator>(Inst)->hasNoUnsignedWrap());
+ NewInsts.push_back(Res);
+ return Res;
+ }
+#endif
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Analysis/PathNumbering.cpp b/contrib/llvm/lib/Analysis/PathNumbering.cpp
new file mode 100644
index 000000000000..7c584daef734
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PathNumbering.cpp
@@ -0,0 +1,522 @@
+//===- PathNumbering.cpp --------------------------------------*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Ball-Larus path numbers uniquely identify paths through a directed acyclic
+// graph (DAG) [Ball96]. For a CFG backedges are removed and replaced by phony
+// edges to obtain a DAG, and thus the unique path numbers [Ball96].
+//
+// The purpose of this analysis is to enumerate the edges in a CFG in order
+// to obtain paths from path numbers in a convenient manner. As described in
+// [Ball96] edges can be enumerated such that given a path number by following
+// the CFG and updating the path number, the path is obtained.
+//
+// [Ball96]
+// T. Ball and J. R. Larus. "Efficient Path Profiling."
+// International Symposium on Microarchitecture, pages 46-57, 1996.
+// http://portal.acm.org/citation.cfm?id=243857
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "ball-larus-numbering"
+
+#include "llvm/Analysis/PathNumbering.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TypeBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <queue>
+#include <stack>
+#include <string>
+#include <utility>
+#include <sstream>
+
+using namespace llvm;
+
+// Are we enabling early termination
+static cl::opt<bool> ProcessEarlyTermination(
+ "path-profile-early-termination", cl::Hidden,
+ cl::desc("In path profiling, insert extra instrumentation to account for "
+ "unexpected function termination."));
+
+// Returns the basic block for the BallLarusNode
+BasicBlock* BallLarusNode::getBlock() {
+ return(_basicBlock);
+}
+
+// Returns the number of paths to the exit starting at the node.
+unsigned BallLarusNode::getNumberPaths() {
+ return(_numberPaths);
+}
+
+// Sets the number of paths to the exit starting at the node.
+void BallLarusNode::setNumberPaths(unsigned numberPaths) {
+ _numberPaths = numberPaths;
+}
+
+// Gets the NodeColor used in graph algorithms.
+BallLarusNode::NodeColor BallLarusNode::getColor() {
+ return(_color);
+}
+
+// Sets the NodeColor used in graph algorithms.
+void BallLarusNode::setColor(BallLarusNode::NodeColor color) {
+ _color = color;
+}
+
+// Returns an iterator over predecessor edges. Includes phony and
+// backedges.
+BLEdgeIterator BallLarusNode::predBegin() {
+ return(_predEdges.begin());
+}
+
+// Returns the end sentinel for the predecessor iterator.
+BLEdgeIterator BallLarusNode::predEnd() {
+ return(_predEdges.end());
+}
+
+// Returns the number of predecessor edges. Includes phony and
+// backedges.
+unsigned BallLarusNode::getNumberPredEdges() {
+ return(_predEdges.size());
+}
+
+// Returns an iterator over successor edges. Includes phony and
+// backedges.
+BLEdgeIterator BallLarusNode::succBegin() {
+ return(_succEdges.begin());
+}
+
+// Returns the end sentinel for the successor iterator.
+BLEdgeIterator BallLarusNode::succEnd() {
+ return(_succEdges.end());
+}
+
+// Returns the number of successor edges. Includes phony and
+// backedges.
+unsigned BallLarusNode::getNumberSuccEdges() {
+ return(_succEdges.size());
+}
+
+// Add an edge to the predecessor list.
+void BallLarusNode::addPredEdge(BallLarusEdge* edge) {
+ _predEdges.push_back(edge);
+}
+
+// Remove an edge from the predecessor list.
+void BallLarusNode::removePredEdge(BallLarusEdge* edge) {
+ removeEdge(_predEdges, edge);
+}
+
+// Add an edge to the successor list.
+void BallLarusNode::addSuccEdge(BallLarusEdge* edge) {
+ _succEdges.push_back(edge);
+}
+
+// Remove an edge from the successor list.
+void BallLarusNode::removeSuccEdge(BallLarusEdge* edge) {
+ removeEdge(_succEdges, edge);
+}
+
+// Returns the name of the BasicBlock being represented. If BasicBlock
+// is null then returns "<null>". If BasicBlock has no name, then
+// "<unnamed>" is returned. Intended for use with debug output.
+std::string BallLarusNode::getName() {
+ std::stringstream name;
+
+ if(getBlock() != NULL) {
+ if(getBlock()->hasName()) {
+ std::string tempName(getBlock()->getName());
+ name << tempName.c_str() << " (" << _uid << ")";
+ } else
+ name << "<unnamed> (" << _uid << ")";
+ } else
+ name << "<null> (" << _uid << ")";
+
+ return name.str();
+}
+
+// Removes an edge from an edgeVector. Used by removePredEdge and
+// removeSuccEdge.
+void BallLarusNode::removeEdge(BLEdgeVector& v, BallLarusEdge* e) {
+ // TODO: Avoid linear scan by using a set instead
+ for(BLEdgeIterator i = v.begin(),
+ end = v.end();
+ i != end;
+ ++i) {
+ if((*i) == e) {
+ v.erase(i);
+ break;
+ }
+ }
+}
+
+// Returns the source node of this edge.
+BallLarusNode* BallLarusEdge::getSource() const {
+ return(_source);
+}
+
+// Returns the target node of this edge.
+BallLarusNode* BallLarusEdge::getTarget() const {
+ return(_target);
+}
+
+// Sets the type of the edge.
+BallLarusEdge::EdgeType BallLarusEdge::getType() const {
+ return _edgeType;
+}
+
+// Gets the type of the edge.
+void BallLarusEdge::setType(EdgeType type) {
+ _edgeType = type;
+}
+
+// Returns the weight of this edge. Used to decode path numbers to sequences
+// of basic blocks.
+unsigned BallLarusEdge::getWeight() {
+ return(_weight);
+}
+
+// Sets the weight of the edge. Used during path numbering.
+void BallLarusEdge::setWeight(unsigned weight) {
+ _weight = weight;
+}
+
+// Gets the phony edge originating at the root.
+BallLarusEdge* BallLarusEdge::getPhonyRoot() {
+ return _phonyRoot;
+}
+
+// Sets the phony edge originating at the root.
+void BallLarusEdge::setPhonyRoot(BallLarusEdge* phonyRoot) {
+ _phonyRoot = phonyRoot;
+}
+
+// Gets the phony edge terminating at the exit.
+BallLarusEdge* BallLarusEdge::getPhonyExit() {
+ return _phonyExit;
+}
+
+// Sets the phony edge terminating at the exit.
+void BallLarusEdge::setPhonyExit(BallLarusEdge* phonyExit) {
+ _phonyExit = phonyExit;
+}
+
+// Gets the associated real edge if this is a phony edge.
+BallLarusEdge* BallLarusEdge::getRealEdge() {
+ return _realEdge;
+}
+
+// Sets the associated real edge if this is a phony edge.
+void BallLarusEdge::setRealEdge(BallLarusEdge* realEdge) {
+ _realEdge = realEdge;
+}
+
+// Returns the duplicate number of the edge.
+unsigned BallLarusEdge::getDuplicateNumber() {
+ return(_duplicateNumber);
+}
+
+// Initialization that requires virtual functions which are not fully
+// functional in the constructor.
+void BallLarusDag::init() {
+ BLBlockNodeMap inDag;
+ std::stack<BallLarusNode*> dfsStack;
+
+ _root = addNode(&(_function.getEntryBlock()));
+ _exit = addNode(NULL);
+
+ // start search from root
+ dfsStack.push(getRoot());
+
+ // dfs to add each bb into the dag
+ while(dfsStack.size())
+ buildNode(inDag, dfsStack);
+
+ // put in the final edge
+ addEdge(getExit(),getRoot(),0);
+}
+
+// Frees all memory associated with the DAG.
+BallLarusDag::~BallLarusDag() {
+ for(BLEdgeIterator edge = _edges.begin(), end = _edges.end(); edge != end;
+ ++edge)
+ delete (*edge);
+
+ for(BLNodeIterator node = _nodes.begin(), end = _nodes.end(); node != end;
+ ++node)
+ delete (*node);
+}
+
+// Calculate the path numbers by assigning edge increments as prescribed
+// in Ball-Larus path profiling.
+void BallLarusDag::calculatePathNumbers() {
+ BallLarusNode* node;
+ std::queue<BallLarusNode*> bfsQueue;
+ bfsQueue.push(getExit());
+
+ while(bfsQueue.size() > 0) {
+ node = bfsQueue.front();
+
+ DEBUG(dbgs() << "calculatePathNumbers on " << node->getName() << "\n");
+
+ bfsQueue.pop();
+ unsigned prevPathNumber = node->getNumberPaths();
+ calculatePathNumbersFrom(node);
+
+ // Check for DAG splitting
+ if( node->getNumberPaths() > 100000000 && node != getRoot() ) {
+ // Add new phony edge from the split-node to the DAG's exit
+ BallLarusEdge* exitEdge = addEdge(node, getExit(), 0);
+ exitEdge->setType(BallLarusEdge::SPLITEDGE_PHONY);
+
+ // Counters to handle the possibility of a multi-graph
+ BasicBlock* oldTarget = 0;
+ unsigned duplicateNumber = 0;
+
+ // Iterate through each successor edge, adding phony edges
+ for( BLEdgeIterator succ = node->succBegin(), end = node->succEnd();
+ succ != end; oldTarget = (*succ)->getTarget()->getBlock(), succ++ ) {
+
+ if( (*succ)->getType() == BallLarusEdge::NORMAL ) {
+ // is this edge a duplicate?
+ if( oldTarget != (*succ)->getTarget()->getBlock() )
+ duplicateNumber = 0;
+
+ // create the new phony edge: root -> succ
+ BallLarusEdge* rootEdge =
+ addEdge(getRoot(), (*succ)->getTarget(), duplicateNumber++);
+ rootEdge->setType(BallLarusEdge::SPLITEDGE_PHONY);
+ rootEdge->setRealEdge(*succ);
+
+ // split on this edge and reference it's exit/root phony edges
+ (*succ)->setType(BallLarusEdge::SPLITEDGE);
+ (*succ)->setPhonyRoot(rootEdge);
+ (*succ)->setPhonyExit(exitEdge);
+ (*succ)->setWeight(0);
+ }
+ }
+
+ calculatePathNumbersFrom(node);
+ }
+
+ DEBUG(dbgs() << "prev, new number paths " << prevPathNumber << ", "
+ << node->getNumberPaths() << ".\n");
+
+ if(prevPathNumber == 0 && node->getNumberPaths() != 0) {
+ DEBUG(dbgs() << "node ready : " << node->getName() << "\n");
+ for(BLEdgeIterator pred = node->predBegin(), end = node->predEnd();
+ pred != end; pred++) {
+ if( (*pred)->getType() == BallLarusEdge::BACKEDGE ||
+ (*pred)->getType() == BallLarusEdge::SPLITEDGE )
+ continue;
+
+ BallLarusNode* nextNode = (*pred)->getSource();
+ // not yet visited?
+ if(nextNode->getNumberPaths() == 0)
+ bfsQueue.push(nextNode);
+ }
+ }
+ }
+
+ DEBUG(dbgs() << "\tNumber of paths: " << getRoot()->getNumberPaths() << "\n");
+}
+
+// Returns the number of paths for the Dag.
+unsigned BallLarusDag::getNumberOfPaths() {
+ return(getRoot()->getNumberPaths());
+}
+
+// Returns the root (i.e. entry) node for the DAG.
+BallLarusNode* BallLarusDag::getRoot() {
+ return _root;
+}
+
+// Returns the exit node for the DAG.
+BallLarusNode* BallLarusDag::getExit() {
+ return _exit;
+}
+
+// Returns the function for the DAG.
+Function& BallLarusDag::getFunction() {
+ return(_function);
+}
+
+// Clears the node colors.
+void BallLarusDag::clearColors(BallLarusNode::NodeColor color) {
+ for (BLNodeIterator nodeIt = _nodes.begin(); nodeIt != _nodes.end(); nodeIt++)
+ (*nodeIt)->setColor(color);
+}
+
+// Processes one node and its imediate edges for building the DAG.
+void BallLarusDag::buildNode(BLBlockNodeMap& inDag, BLNodeStack& dfsStack) {
+ BallLarusNode* currentNode = dfsStack.top();
+ BasicBlock* currentBlock = currentNode->getBlock();
+
+ if(currentNode->getColor() != BallLarusNode::WHITE) {
+ // we have already visited this node
+ dfsStack.pop();
+ currentNode->setColor(BallLarusNode::BLACK);
+ } else {
+ // are there any external procedure calls?
+ if( ProcessEarlyTermination ) {
+ for( BasicBlock::iterator bbCurrent = currentNode->getBlock()->begin(),
+ bbEnd = currentNode->getBlock()->end(); bbCurrent != bbEnd;
+ bbCurrent++ ) {
+ Instruction& instr = *bbCurrent;
+ if( instr.getOpcode() == Instruction::Call ) {
+ BallLarusEdge* callEdge = addEdge(currentNode, getExit(), 0);
+ callEdge->setType(BallLarusEdge::CALLEDGE_PHONY);
+ break;
+ }
+ }
+ }
+
+ TerminatorInst* terminator = currentNode->getBlock()->getTerminator();
+ if(isa<ReturnInst>(terminator) || isa<UnreachableInst>(terminator)
+ || isa<UnwindInst>(terminator))
+ addEdge(currentNode, getExit(),0);
+
+ currentNode->setColor(BallLarusNode::GRAY);
+ inDag[currentBlock] = currentNode;
+
+ BasicBlock* oldSuccessor = 0;
+ unsigned duplicateNumber = 0;
+
+ // iterate through this node's successors
+ for(succ_iterator successor = succ_begin(currentBlock),
+ succEnd = succ_end(currentBlock); successor != succEnd;
+ oldSuccessor = *successor, ++successor ) {
+ BasicBlock* succBB = *successor;
+
+ // is this edge a duplicate?
+ if (oldSuccessor == succBB)
+ duplicateNumber++;
+ else
+ duplicateNumber = 0;
+
+ buildEdge(inDag, dfsStack, currentNode, succBB, duplicateNumber);
+ }
+ }
+}
+
+// Process an edge in the CFG for DAG building.
+void BallLarusDag::buildEdge(BLBlockNodeMap& inDag, std::stack<BallLarusNode*>&
+ dfsStack, BallLarusNode* currentNode,
+ BasicBlock* succBB, unsigned duplicateCount) {
+ BallLarusNode* succNode = inDag[succBB];
+
+ if(succNode && succNode->getColor() == BallLarusNode::BLACK) {
+ // visited node and forward edge
+ addEdge(currentNode, succNode, duplicateCount);
+ } else if(succNode && succNode->getColor() == BallLarusNode::GRAY) {
+ // visited node and back edge
+ DEBUG(dbgs() << "Backedge detected.\n");
+ addBackedge(currentNode, succNode, duplicateCount);
+ } else {
+ BallLarusNode* childNode;
+ // not visited node and forward edge
+ if(succNode) // an unvisited node that is child of a gray node
+ childNode = succNode;
+ else { // an unvisited node that is a child of a an unvisted node
+ childNode = addNode(succBB);
+ inDag[succBB] = childNode;
+ }
+ addEdge(currentNode, childNode, duplicateCount);
+ dfsStack.push(childNode);
+ }
+}
+
+// The weight on each edge is the increment required along any path that
+// contains that edge.
+void BallLarusDag::calculatePathNumbersFrom(BallLarusNode* node) {
+ if(node == getExit())
+ // The Exit node must be base case
+ node->setNumberPaths(1);
+ else {
+ unsigned sumPaths = 0;
+ BallLarusNode* succNode;
+
+ for(BLEdgeIterator succ = node->succBegin(), end = node->succEnd();
+ succ != end; succ++) {
+ if( (*succ)->getType() == BallLarusEdge::BACKEDGE ||
+ (*succ)->getType() == BallLarusEdge::SPLITEDGE )
+ continue;
+
+ (*succ)->setWeight(sumPaths);
+ succNode = (*succ)->getTarget();
+
+ if( !succNode->getNumberPaths() )
+ return;
+ sumPaths += succNode->getNumberPaths();
+ }
+
+ node->setNumberPaths(sumPaths);
+ }
+}
+
+// Allows subclasses to determine which type of Node is created.
+// Override this method to produce subclasses of BallLarusNode if
+// necessary. The destructor of BallLarusDag will call free on each
+// pointer created.
+BallLarusNode* BallLarusDag::createNode(BasicBlock* BB) {
+ return( new BallLarusNode(BB) );
+}
+
+// Allows subclasses to determine which type of Edge is created.
+// Override this method to produce subclasses of BallLarusEdge if
+// necessary. The destructor of BallLarusDag will call free on each
+// pointer created.
+BallLarusEdge* BallLarusDag::createEdge(BallLarusNode* source,
+ BallLarusNode* target,
+ unsigned duplicateCount) {
+ return( new BallLarusEdge(source, target, duplicateCount) );
+}
+
+// Proxy to node's constructor. Updates the DAG state.
+BallLarusNode* BallLarusDag::addNode(BasicBlock* BB) {
+ BallLarusNode* newNode = createNode(BB);
+ _nodes.push_back(newNode);
+ return( newNode );
+}
+
+// Proxy to edge's constructor. Updates the DAG state.
+BallLarusEdge* BallLarusDag::addEdge(BallLarusNode* source,
+ BallLarusNode* target,
+ unsigned duplicateCount) {
+ BallLarusEdge* newEdge = createEdge(source, target, duplicateCount);
+ _edges.push_back(newEdge);
+ source->addSuccEdge(newEdge);
+ target->addPredEdge(newEdge);
+ return(newEdge);
+}
+
+// Adds a backedge with its phony edges. Updates the DAG state.
+void BallLarusDag::addBackedge(BallLarusNode* source, BallLarusNode* target,
+ unsigned duplicateCount) {
+ BallLarusEdge* childEdge = addEdge(source, target, duplicateCount);
+ childEdge->setType(BallLarusEdge::BACKEDGE);
+
+ childEdge->setPhonyRoot(addEdge(getRoot(), target,0));
+ childEdge->setPhonyExit(addEdge(source, getExit(),0));
+
+ childEdge->getPhonyRoot()->setRealEdge(childEdge);
+ childEdge->getPhonyRoot()->setType(BallLarusEdge::BACKEDGE_PHONY);
+
+ childEdge->getPhonyExit()->setRealEdge(childEdge);
+ childEdge->getPhonyExit()->setType(BallLarusEdge::BACKEDGE_PHONY);
+ _backEdges.push_back(childEdge);
+}
diff --git a/contrib/llvm/lib/Analysis/PathProfileInfo.cpp b/contrib/llvm/lib/Analysis/PathProfileInfo.cpp
new file mode 100644
index 000000000000..b361d3f4fa94
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PathProfileInfo.cpp
@@ -0,0 +1,434 @@
+//===- PathProfileInfo.cpp ------------------------------------*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface used by optimizers to load path profiles,
+// and provides a loader pass which reads a path profile file.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "path-profile-info"
+
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include "llvm/Analysis/PathProfileInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <cstdio>
+
+using namespace llvm;
+
+// command line option for loading path profiles
+static cl::opt<std::string>
+PathProfileInfoFilename("path-profile-loader-file", cl::init("llvmprof.out"),
+ cl::value_desc("filename"),
+ cl::desc("Path profile file loaded by -path-profile-loader"), cl::Hidden);
+
+namespace {
+ class PathProfileLoaderPass : public ModulePass, public PathProfileInfo {
+ public:
+ PathProfileLoaderPass() : ModulePass(ID) { }
+ ~PathProfileLoaderPass();
+
+ // this pass doesn't change anything (only loads information)
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ // the full name of the loader pass
+ virtual const char* getPassName() const {
+ return "Path Profiling Information Loader";
+ }
+
+ // required since this pass implements multiple inheritance
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &PathProfileInfo::ID)
+ return (PathProfileInfo*)this;
+ return this;
+ }
+
+ // entry point to run the pass
+ bool runOnModule(Module &M);
+
+ // pass identification
+ static char ID;
+
+ private:
+ // make a reference table to refer to function by number
+ void buildFunctionRefs(Module &M);
+
+ // process argument info of a program from the input file
+ void handleArgumentInfo();
+
+ // process path number information from the input file
+ void handlePathInfo();
+
+ // array of references to the functions in the module
+ std::vector<Function*> _functions;
+
+ // path profile file handle
+ FILE* _file;
+
+ // path profile file name
+ std::string _filename;
+ };
+}
+
+// register PathLoader
+char PathProfileLoaderPass::ID = 0;
+
+INITIALIZE_ANALYSIS_GROUP(PathProfileInfo, "Path Profile Information",
+ NoPathProfileInfo)
+INITIALIZE_AG_PASS(PathProfileLoaderPass, PathProfileInfo,
+ "path-profile-loader",
+ "Load path profile information from file",
+ false, true, false)
+
+char &llvm::PathProfileLoaderPassID = PathProfileLoaderPass::ID;
+
+// link PathLoader as a pass, and make it available as an optimisation
+ModulePass *llvm::createPathProfileLoaderPass() {
+ return new PathProfileLoaderPass;
+}
+
+// ----------------------------------------------------------------------------
+// PathEdge implementation
+//
+ProfilePathEdge::ProfilePathEdge (BasicBlock* source, BasicBlock* target,
+ unsigned duplicateNumber)
+ : _source(source), _target(target), _duplicateNumber(duplicateNumber) {}
+
+// ----------------------------------------------------------------------------
+// Path implementation
+//
+
+ProfilePath::ProfilePath (unsigned int number, unsigned int count,
+ double countStdDev, PathProfileInfo* ppi)
+ : _number(number) , _count(count), _countStdDev(countStdDev), _ppi(ppi) {}
+
+double ProfilePath::getFrequency() const {
+ return 100 * double(_count) /
+ double(_ppi->_functionPathCounts[_ppi->_currentFunction]);
+}
+
+static BallLarusEdge* getNextEdge (BallLarusNode* node,
+ unsigned int pathNumber) {
+ BallLarusEdge* best = 0;
+
+ for( BLEdgeIterator next = node->succBegin(),
+ end = node->succEnd(); next != end; next++ ) {
+ if( (*next)->getType() != BallLarusEdge::BACKEDGE && // no backedges
+ (*next)->getType() != BallLarusEdge::SPLITEDGE && // no split edges
+ (*next)->getWeight() <= pathNumber && // weight must be <= pathNumber
+ (!best || (best->getWeight() < (*next)->getWeight())) ) // best one?
+ best = *next;
+ }
+
+ return best;
+}
+
+ProfilePathEdgeVector* ProfilePath::getPathEdges() const {
+ BallLarusNode* currentNode = _ppi->_currentDag->getRoot ();
+ unsigned int increment = _number;
+ ProfilePathEdgeVector* pev = new ProfilePathEdgeVector;
+
+ while (currentNode != _ppi->_currentDag->getExit()) {
+ BallLarusEdge* next = getNextEdge(currentNode, increment);
+
+ increment -= next->getWeight();
+
+ if( next->getType() != BallLarusEdge::BACKEDGE_PHONY &&
+ next->getType() != BallLarusEdge::SPLITEDGE_PHONY &&
+ next->getTarget() != _ppi->_currentDag->getExit() )
+ pev->push_back(ProfilePathEdge(
+ next->getSource()->getBlock(),
+ next->getTarget()->getBlock(),
+ next->getDuplicateNumber()));
+
+ if( next->getType() == BallLarusEdge::BACKEDGE_PHONY &&
+ next->getTarget() == _ppi->_currentDag->getExit() )
+ pev->push_back(ProfilePathEdge(
+ next->getRealEdge()->getSource()->getBlock(),
+ next->getRealEdge()->getTarget()->getBlock(),
+ next->getDuplicateNumber()));
+
+ if( next->getType() == BallLarusEdge::SPLITEDGE_PHONY &&
+ next->getSource() == _ppi->_currentDag->getRoot() )
+ pev->push_back(ProfilePathEdge(
+ next->getRealEdge()->getSource()->getBlock(),
+ next->getRealEdge()->getTarget()->getBlock(),
+ next->getDuplicateNumber()));
+
+ // set the new node
+ currentNode = next->getTarget();
+ }
+
+ return pev;
+}
+
+ProfilePathBlockVector* ProfilePath::getPathBlocks() const {
+ BallLarusNode* currentNode = _ppi->_currentDag->getRoot ();
+ unsigned int increment = _number;
+ ProfilePathBlockVector* pbv = new ProfilePathBlockVector;
+
+ while (currentNode != _ppi->_currentDag->getExit()) {
+ BallLarusEdge* next = getNextEdge(currentNode, increment);
+ increment -= next->getWeight();
+
+ // add block to the block list if it is a real edge
+ if( next->getType() == BallLarusEdge::NORMAL)
+ pbv->push_back (currentNode->getBlock());
+ // make the back edge the last edge since we are at the end
+ else if( next->getTarget() == _ppi->_currentDag->getExit() ) {
+ pbv->push_back (currentNode->getBlock());
+ pbv->push_back (next->getRealEdge()->getTarget()->getBlock());
+ }
+
+ // set the new node
+ currentNode = next->getTarget();
+ }
+
+ return pbv;
+}
+
+BasicBlock* ProfilePath::getFirstBlockInPath() const {
+ BallLarusNode* root = _ppi->_currentDag->getRoot();
+ BallLarusEdge* edge = getNextEdge(root, _number);
+
+ if( edge && (edge->getType() == BallLarusEdge::BACKEDGE_PHONY ||
+ edge->getType() == BallLarusEdge::SPLITEDGE_PHONY) )
+ return edge->getTarget()->getBlock();
+
+ return root->getBlock();
+}
+
+// ----------------------------------------------------------------------------
+// PathProfileInfo implementation
+//
+
+// Pass identification
+char llvm::PathProfileInfo::ID = 0;
+
+PathProfileInfo::PathProfileInfo () : _currentDag(0) , _currentFunction(0) {
+}
+
+PathProfileInfo::~PathProfileInfo() {
+ if (_currentDag)
+ delete _currentDag;
+}
+
+// set the function for which paths are currently begin processed
+void PathProfileInfo::setCurrentFunction(Function* F) {
+ // Make sure it exists
+ if (!F) return;
+
+ if (_currentDag)
+ delete _currentDag;
+
+ _currentFunction = F;
+ _currentDag = new BallLarusDag(*F);
+ _currentDag->init();
+ _currentDag->calculatePathNumbers();
+}
+
+// get the function for which paths are currently being processed
+Function* PathProfileInfo::getCurrentFunction() const {
+ return _currentFunction;
+}
+
+// get the entry block of the function
+BasicBlock* PathProfileInfo::getCurrentFunctionEntry() {
+ return _currentDag->getRoot()->getBlock();
+}
+
+// return the path based on its number
+ProfilePath* PathProfileInfo::getPath(unsigned int number) {
+ return _functionPaths[_currentFunction][number];
+}
+
+// return the number of paths which a function may potentially execute
+unsigned int PathProfileInfo::getPotentialPathCount() {
+ return _currentDag ? _currentDag->getNumberOfPaths() : 0;
+}
+
+// return an iterator for the beginning of a functions executed paths
+ProfilePathIterator PathProfileInfo::pathBegin() {
+ return _functionPaths[_currentFunction].begin();
+}
+
+// return an iterator for the end of a functions executed paths
+ProfilePathIterator PathProfileInfo::pathEnd() {
+ return _functionPaths[_currentFunction].end();
+}
+
+// returns the total number of paths run in the function
+unsigned int PathProfileInfo::pathsRun() {
+ return _currentFunction ? _functionPaths[_currentFunction].size() : 0;
+}
+
+// ----------------------------------------------------------------------------
+// PathLoader implementation
+//
+
+// remove all generated paths
+PathProfileLoaderPass::~PathProfileLoaderPass() {
+ for( FunctionPathIterator funcNext = _functionPaths.begin(),
+ funcEnd = _functionPaths.end(); funcNext != funcEnd; funcNext++)
+ for( ProfilePathIterator pathNext = funcNext->second.begin(),
+ pathEnd = funcNext->second.end(); pathNext != pathEnd; pathNext++)
+ delete pathNext->second;
+}
+
+// entry point of the pass; this loads and parses a file
+bool PathProfileLoaderPass::runOnModule(Module &M) {
+ // get the filename and setup the module's function references
+ _filename = PathProfileInfoFilename;
+ buildFunctionRefs (M);
+
+ if (!(_file = fopen(_filename.c_str(), "rb"))) {
+ errs () << "error: input '" << _filename << "' file does not exist.\n";
+ return false;
+ }
+
+ ProfilingType profType;
+
+ while( fread(&profType, sizeof(ProfilingType), 1, _file) ) {
+ switch (profType) {
+ case ArgumentInfo:
+ handleArgumentInfo ();
+ break;
+ case PathInfo:
+ handlePathInfo ();
+ break;
+ default:
+ errs () << "error: bad path profiling file syntax, " << profType << "\n";
+ fclose (_file);
+ return false;
+ }
+ }
+
+ fclose (_file);
+
+ return true;
+}
+
+// create a reference table for functions defined in the path profile file
+void PathProfileLoaderPass::buildFunctionRefs (Module &M) {
+ _functions.push_back(0); // make the 0 index a null pointer
+
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) {
+ if (F->isDeclaration())
+ continue;
+ _functions.push_back(F);
+ }
+}
+
+// handle command like argument infor in the output file
+void PathProfileLoaderPass::handleArgumentInfo() {
+ // get the argument list's length
+ unsigned savedArgsLength;
+ if( fread(&savedArgsLength, sizeof(unsigned), 1, _file) != 1 ) {
+ errs() << "warning: argument info header/data mismatch\n";
+ return;
+ }
+
+ // allocate a buffer, and get the arguments
+ char* args = new char[savedArgsLength+1];
+ if( fread(args, 1, savedArgsLength, _file) != savedArgsLength )
+ errs() << "warning: argument info header/data mismatch\n";
+
+ args[savedArgsLength] = '\0';
+ argList = std::string(args);
+ delete [] args; // cleanup dynamic string
+
+ // byte alignment
+ if (savedArgsLength & 3)
+ fseek(_file, 4-(savedArgsLength&3), SEEK_CUR);
+}
+
+// Handle path profile information in the output file
+void PathProfileLoaderPass::handlePathInfo () {
+ // get the number of functions in this profile
+ unsigned functionCount;
+ if( fread(&functionCount, sizeof(functionCount), 1, _file) != 1 ) {
+ errs() << "warning: path info header/data mismatch\n";
+ return;
+ }
+
+ // gather path information for each function
+ for (unsigned i = 0; i < functionCount; i++) {
+ PathProfileHeader pathHeader;
+ if( fread(&pathHeader, sizeof(pathHeader), 1, _file) != 1 ) {
+ errs() << "warning: bad header for path function info\n";
+ break;
+ }
+
+ Function* f = _functions[pathHeader.fnNumber];
+
+ // dynamically allocate a table to store path numbers
+ PathProfileTableEntry* pathTable =
+ new PathProfileTableEntry[pathHeader.numEntries];
+
+ if( fread(pathTable, sizeof(PathProfileTableEntry),
+ pathHeader.numEntries, _file) != pathHeader.numEntries) {
+ delete [] pathTable;
+ errs() << "warning: path function info header/data mismatch\n";
+ return;
+ }
+
+ // Build a new path for the current function
+ unsigned int totalPaths = 0;
+ for (unsigned int j = 0; j < pathHeader.numEntries; j++) {
+ totalPaths += pathTable[j].pathCounter;
+ _functionPaths[f][pathTable[j].pathNumber]
+ = new ProfilePath(pathTable[j].pathNumber, pathTable[j].pathCounter,
+ 0, this);
+ }
+
+ _functionPathCounts[f] = totalPaths;
+
+ delete [] pathTable;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// NoProfile PathProfileInfo implementation
+//
+
+namespace {
+ struct NoPathProfileInfo : public ImmutablePass, public PathProfileInfo {
+ static char ID; // Class identification, replacement for typeinfo
+ NoPathProfileInfo() : ImmutablePass(ID) {
+ initializeNoPathProfileInfoPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &PathProfileInfo::ID)
+ return (PathProfileInfo*)this;
+ return this;
+ }
+
+ virtual const char *getPassName() const {
+ return "NoPathProfileInfo";
+ }
+ };
+} // End of anonymous namespace
+
+char NoPathProfileInfo::ID = 0;
+// Register this pass...
+INITIALIZE_AG_PASS(NoPathProfileInfo, PathProfileInfo, "no-path-profile",
+ "No Path Profile Information", false, true, true)
+
+ImmutablePass *llvm::createNoPathProfileInfoPass() { return new NoPathProfileInfo(); }
diff --git a/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp b/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp
new file mode 100644
index 000000000000..0ae734e259db
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp
@@ -0,0 +1,207 @@
+//===- PathProfileVerifier.cpp --------------------------------*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This verifier derives an edge profile file from current path profile
+// information
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "path-profile-verifier"
+
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include "llvm/Analysis/PathProfileInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <stdio.h>
+
+using namespace llvm;
+
+namespace {
+ class PathProfileVerifier : public ModulePass {
+ private:
+ bool runOnModule(Module &M);
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ PathProfileVerifier() : ModulePass(ID) {
+ initializePathProfileVerifierPass(*PassRegistry::getPassRegistry());
+ }
+
+
+ virtual const char *getPassName() const {
+ return "Path Profiler Verifier";
+ }
+
+ // The verifier requires the path profile and edge profile.
+ virtual void getAnalysisUsage(AnalysisUsage& AU) const;
+ };
+}
+
+static cl::opt<std::string>
+EdgeProfileFilename("path-profile-verifier-file",
+ cl::init("edgefrompath.llvmprof.out"),
+ cl::value_desc("filename"),
+ cl::desc("Edge profile file generated by -path-profile-verifier"),
+ cl::Hidden);
+
+char PathProfileVerifier::ID = 0;
+INITIALIZE_PASS(PathProfileVerifier, "path-profile-verifier",
+ "Compare the path profile derived edge profile against the "
+ "edge profile.", true, true)
+
+ModulePass *llvm::createPathProfileVerifierPass() {
+ return new PathProfileVerifier();
+}
+
+// The verifier requires the path profile and edge profile.
+void PathProfileVerifier::getAnalysisUsage(AnalysisUsage& AU) const {
+ AU.addRequired<PathProfileInfo>();
+ AU.addPreserved<PathProfileInfo>();
+}
+
+typedef std::map<unsigned, unsigned> DuplicateToIndexMap;
+typedef std::map<BasicBlock*,DuplicateToIndexMap> BlockToDuplicateMap;
+typedef std::map<BasicBlock*,BlockToDuplicateMap> NestedBlockToIndexMap;
+
+// the verifier iterates through each path to gather the total
+// number of edge frequencies
+bool PathProfileVerifier::runOnModule (Module &M) {
+ PathProfileInfo& pathProfileInfo = getAnalysis<PathProfileInfo>();
+
+ // setup a data structure to map path edges which index an
+ // array of edge counters
+ NestedBlockToIndexMap arrayMap;
+ unsigned i = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+
+ arrayMap[0][F->begin()][0] = i++;
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+
+ unsigned duplicate = 0;
+ BasicBlock* prev = 0;
+ for (unsigned s = 0, e = TI->getNumSuccessors(); s != e;
+ prev = TI->getSuccessor(s), ++s) {
+ if (prev == TI->getSuccessor(s))
+ duplicate++;
+ else duplicate = 0;
+
+ arrayMap[BB][TI->getSuccessor(s)][duplicate] = i++;
+ }
+ }
+ }
+
+ std::vector<unsigned> edgeArray(i);
+
+ // iterate through each path and increment the edge counters as needed
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+
+ pathProfileInfo.setCurrentFunction(F);
+
+ DEBUG(dbgs() << "function '" << F->getName() << "' ran "
+ << pathProfileInfo.pathsRun()
+ << "/" << pathProfileInfo.getPotentialPathCount()
+ << " potential paths\n");
+
+ for( ProfilePathIterator nextPath = pathProfileInfo.pathBegin(),
+ endPath = pathProfileInfo.pathEnd();
+ nextPath != endPath; nextPath++ ) {
+ ProfilePath* currentPath = nextPath->second;
+
+ ProfilePathEdgeVector* pev = currentPath->getPathEdges();
+ DEBUG(dbgs () << "path #" << currentPath->getNumber() << ": "
+ << currentPath->getCount() << "\n");
+ // setup the entry edge (normally path profiling doesn't care about this)
+ if (currentPath->getFirstBlockInPath() == &F->getEntryBlock())
+ edgeArray[arrayMap[0][currentPath->getFirstBlockInPath()][0]]
+ += currentPath->getCount();
+
+ for( ProfilePathEdgeIterator nextEdge = pev->begin(),
+ endEdge = pev->end(); nextEdge != endEdge; nextEdge++ ) {
+ if (nextEdge != pev->begin())
+ DEBUG(dbgs() << " :: ");
+
+ BasicBlock* source = nextEdge->getSource();
+ BasicBlock* target = nextEdge->getTarget();
+ unsigned duplicateNumber = nextEdge->getDuplicateNumber();
+ DEBUG(dbgs () << source->getNameStr() << " --{" << duplicateNumber
+ << "}--> " << target->getNameStr());
+
+ // Ensure all the referenced edges exist
+ // TODO: make this a separate function
+ if( !arrayMap.count(source) ) {
+ errs() << " error [" << F->getNameStr() << "()]: source '"
+ << source->getNameStr()
+ << "' does not exist in the array map.\n";
+ } else if( !arrayMap[source].count(target) ) {
+ errs() << " error [" << F->getNameStr() << "()]: target '"
+ << target->getNameStr()
+ << "' does not exist in the array map.\n";
+ } else if( !arrayMap[source][target].count(duplicateNumber) ) {
+ errs() << " error [" << F->getNameStr() << "()]: edge "
+ << source->getNameStr() << " -> " << target->getNameStr()
+ << " duplicate number " << duplicateNumber
+ << " does not exist in the array map.\n";
+ } else {
+ edgeArray[arrayMap[source][target][duplicateNumber]]
+ += currentPath->getCount();
+ }
+ }
+
+ DEBUG(errs() << "\n");
+
+ delete pev;
+ }
+ }
+
+ std::string errorInfo;
+ std::string filename = EdgeProfileFilename;
+
+ // Open a handle to the file
+ FILE* edgeFile = fopen(filename.c_str(),"wb");
+
+ if (!edgeFile) {
+ errs() << "error: unable to open file '" << filename << "' for output.\n";
+ return false;
+ }
+
+ errs() << "Generating edge profile '" << filename << "' ...\n";
+
+ // write argument info
+ unsigned type = ArgumentInfo;
+ unsigned num = pathProfileInfo.argList.size();
+ int zeros = 0;
+
+ fwrite(&type,sizeof(unsigned),1,edgeFile);
+ fwrite(&num,sizeof(unsigned),1,edgeFile);
+ fwrite(pathProfileInfo.argList.c_str(),1,num,edgeFile);
+ if (num&3)
+ fwrite(&zeros, 1, 4-(num&3), edgeFile);
+
+ type = EdgeInfo;
+ num = edgeArray.size();
+ fwrite(&type,sizeof(unsigned),1,edgeFile);
+ fwrite(&num,sizeof(unsigned),1,edgeFile);
+
+ // write each edge to the file
+ for( std::vector<unsigned>::iterator s = edgeArray.begin(),
+ e = edgeArray.end(); s != e; s++)
+ fwrite(&*s, sizeof (unsigned), 1, edgeFile);
+
+ fclose (edgeFile);
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Analysis/PostDominators.cpp b/contrib/llvm/lib/Analysis/PostDominators.cpp
new file mode 100644
index 000000000000..6ed27297923f
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PostDominators.cpp
@@ -0,0 +1,51 @@
+//===- PostDominators.cpp - Post-Dominator Calculation --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the post-dominator construction algorithms.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "postdomtree"
+
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Analysis/DominatorInternals.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// PostDominatorTree Implementation
+//===----------------------------------------------------------------------===//
+
+char PostDominatorTree::ID = 0;
+INITIALIZE_PASS(PostDominatorTree, "postdomtree",
+ "Post-Dominator Tree Construction", true, true)
+
+bool PostDominatorTree::runOnFunction(Function &F) {
+ DT->recalculate(F);
+ return false;
+}
+
+PostDominatorTree::~PostDominatorTree() {
+ delete DT;
+}
+
+void PostDominatorTree::print(raw_ostream &OS, const Module *) const {
+ DT->print(OS);
+}
+
+
+FunctionPass* llvm::createPostDomTree() {
+ return new PostDominatorTree();
+}
+
diff --git a/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp b/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp
new file mode 100644
index 000000000000..b594e2ba5506
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp
@@ -0,0 +1,426 @@
+//===- ProfileEstimatorPass.cpp - LLVM Pass to estimate profile info ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a concrete implementation of profiling information that
+// estimates the profiling information in a very crude and unimaginative way.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "profile-estimator"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
+using namespace llvm;
+
+static cl::opt<double>
+LoopWeight(
+ "profile-estimator-loop-weight", cl::init(10),
+ cl::value_desc("loop-weight"),
+ cl::desc("Number of loop executions used for profile-estimator")
+);
+
+namespace {
+ class ProfileEstimatorPass : public FunctionPass, public ProfileInfo {
+ double ExecCount;
+ LoopInfo *LI;
+ std::set<BasicBlock*> BBToVisit;
+ std::map<Loop*,double> LoopExitWeights;
+ std::map<Edge,double> MinimalWeight;
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ explicit ProfileEstimatorPass(const double execcount = 0)
+ : FunctionPass(ID), ExecCount(execcount) {
+ initializeProfileEstimatorPassPass(*PassRegistry::getPassRegistry());
+ if (execcount == 0) ExecCount = LoopWeight;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<LoopInfo>();
+ }
+
+ virtual const char *getPassName() const {
+ return "Profiling information estimator";
+ }
+
+ /// run - Estimate the profile information from the specified file.
+ virtual bool runOnFunction(Function &F);
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &ProfileInfo::ID)
+ return (ProfileInfo*)this;
+ return this;
+ }
+
+ virtual void recurseBasicBlock(BasicBlock *BB);
+
+ void inline printEdgeWeight(Edge);
+ };
+} // End of anonymous namespace
+
+char ProfileEstimatorPass::ID = 0;
+INITIALIZE_AG_PASS_BEGIN(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
+ "Estimate profiling information", false, true, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_AG_PASS_END(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
+ "Estimate profiling information", false, true, false)
+
+namespace llvm {
+ char &ProfileEstimatorPassID = ProfileEstimatorPass::ID;
+
+ FunctionPass *createProfileEstimatorPass() {
+ return new ProfileEstimatorPass();
+ }
+
+ /// createProfileEstimatorPass - This function returns a Pass that estimates
+ /// profiling information using the given loop execution count.
+ Pass *createProfileEstimatorPass(const unsigned execcount) {
+ return new ProfileEstimatorPass(execcount);
+ }
+}
+
+static double ignoreMissing(double w) {
+ if (w == ProfileInfo::MissingValue) return 0;
+ return w;
+}
+
+static void inline printEdgeError(ProfileInfo::Edge e, const char *M) {
+ DEBUG(dbgs() << "-- Edge " << e << " is not calculated, " << M << "\n");
+}
+
+void inline ProfileEstimatorPass::printEdgeWeight(Edge E) {
+ DEBUG(dbgs() << "-- Weight of Edge " << E << ":"
+ << format("%20.20g", getEdgeWeight(E)) << "\n");
+}
+
+// recurseBasicBlock() - This calculates the ProfileInfo estimation for a
+// single block and then recurses into the successors.
+// The algorithm preserves the flow condition, meaning that the sum of the
+// weight of the incoming edges must be equal the block weight which must in
+// turn be equal to the sume of the weights of the outgoing edges.
+// Since the flow of an block is deterimined from the current state of the
+// flow, once an edge has a flow assigned this flow is never changed again,
+// otherwise it would be possible to violate the flow condition in another
+// block.
+void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) {
+
+ // Break the recursion if this BasicBlock was already visited.
+ if (BBToVisit.find(BB) == BBToVisit.end()) return;
+
+ // Read the LoopInfo for this block.
+ bool BBisHeader = LI->isLoopHeader(BB);
+ Loop* BBLoop = LI->getLoopFor(BB);
+
+ // To get the block weight, read all incoming edges.
+ double BBWeight = 0;
+ std::set<BasicBlock*> ProcessedPreds;
+ for ( pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ bbi != bbe; ++bbi ) {
+ // If this block was not considered already, add weight.
+ Edge edge = getEdge(*bbi,BB);
+ double w = getEdgeWeight(edge);
+ if (ProcessedPreds.insert(*bbi).second) {
+ BBWeight += ignoreMissing(w);
+ }
+ // If this block is a loop header and the predecessor is contained in this
+ // loop, thus the edge is a backedge, continue and do not check if the
+ // value is valid.
+ if (BBisHeader && BBLoop->contains(*bbi)) {
+ printEdgeError(edge, "but is backedge, continuing");
+ continue;
+ }
+ // If the edges value is missing (and this is no loop header, and this is
+ // no backedge) return, this block is currently non estimatable.
+ if (w == MissingValue) {
+ printEdgeError(edge, "returning");
+ return;
+ }
+ }
+ if (getExecutionCount(BB) != MissingValue) {
+ BBWeight = getExecutionCount(BB);
+ }
+
+ // Fetch all necessary information for current block.
+ SmallVector<Edge, 8> ExitEdges;
+ SmallVector<Edge, 8> Edges;
+ if (BBLoop) {
+ BBLoop->getExitEdges(ExitEdges);
+ }
+
+ // If this is a loop header, consider the following:
+ // Exactly the flow that is entering this block, must exit this block too. So
+ // do the following:
+ // *) get all the exit edges, read the flow that is already leaving this
+ // loop, remember the edges that do not have any flow on them right now.
+ // (The edges that have already flow on them are most likely exiting edges of
+ // other loops, do not touch those flows because the previously caclulated
+ // loopheaders would not be exact anymore.)
+ // *) In case there is not a single exiting edge left, create one at the loop
+ // latch to prevent the flow from building up in the loop.
+ // *) Take the flow that is not leaving the loop already and distribute it on
+ // the remaining exiting edges.
+ // (This ensures that all flow that enters the loop also leaves it.)
+ // *) Increase the flow into the loop by increasing the weight of this block.
+ // There is at least one incoming backedge that will bring us this flow later
+ // on. (So that the flow condition in this node is valid again.)
+ if (BBisHeader) {
+ double incoming = BBWeight;
+ // Subtract the flow leaving the loop.
+ std::set<Edge> ProcessedExits;
+ for (SmallVector<Edge, 8>::iterator ei = ExitEdges.begin(),
+ ee = ExitEdges.end(); ei != ee; ++ei) {
+ if (ProcessedExits.insert(*ei).second) {
+ double w = getEdgeWeight(*ei);
+ if (w == MissingValue) {
+ Edges.push_back(*ei);
+ // Check if there is a necessary minimal weight, if yes, subtract it
+ // from weight.
+ if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
+ incoming -= MinimalWeight[*ei];
+ DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
+ }
+ } else {
+ incoming -= w;
+ }
+ }
+ }
+ // If no exit edges, create one:
+ if (Edges.size() == 0) {
+ BasicBlock *Latch = BBLoop->getLoopLatch();
+ if (Latch) {
+ Edge edge = getEdge(Latch,0);
+ EdgeInformation[BB->getParent()][edge] = BBWeight;
+ printEdgeWeight(edge);
+ edge = getEdge(Latch, BB);
+ EdgeInformation[BB->getParent()][edge] = BBWeight * ExecCount;
+ printEdgeWeight(edge);
+ }
+ }
+
+ // Distribute remaining weight to the exting edges. To prevent fractions
+ // from building up and provoking precision problems the weight which is to
+ // be distributed is split and the rounded, the last edge gets a somewhat
+ // bigger value, but we are close enough for an estimation.
+ double fraction = floor(incoming/Edges.size());
+ for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end();
+ ei != ee; ++ei) {
+ double w = 0;
+ if (ei != (ee-1)) {
+ w = fraction;
+ incoming -= fraction;
+ } else {
+ w = incoming;
+ }
+ EdgeInformation[BB->getParent()][*ei] += w;
+ // Read necessary minimal weight.
+ if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
+ EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
+ DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
+ }
+ printEdgeWeight(*ei);
+
+ // Add minimal weight to paths to all exit edges, this is used to ensure
+ // that enough flow is reaching this edges.
+ Path p;
+ const BasicBlock *Dest = GetPath(BB, (*ei).first, p, GetPathToDest);
+ while (Dest != BB) {
+ const BasicBlock *Parent = p.find(Dest)->second;
+ Edge e = getEdge(Parent, Dest);
+ if (MinimalWeight.find(e) == MinimalWeight.end()) {
+ MinimalWeight[e] = 0;
+ }
+ MinimalWeight[e] += w;
+ DEBUG(dbgs() << "Minimal Weight for " << e << ": " << format("%.20g",MinimalWeight[e]) << "\n");
+ Dest = Parent;
+ }
+ }
+ // Increase flow into the loop.
+ BBWeight *= (ExecCount+1);
+ }
+
+ BlockInformation[BB->getParent()][BB] = BBWeight;
+ // Up until now we considered only the loop exiting edges, now we have a
+ // definite block weight and must distribute this onto the outgoing edges.
+ // Since there may be already flow attached to some of the edges, read this
+ // flow first and remember the edges that have still now flow attached.
+ Edges.clear();
+ std::set<BasicBlock*> ProcessedSuccs;
+
+ succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+ // Also check for (BB,0) edges that may already contain some flow. (But only
+ // in case there are no successors.)
+ if (bbi == bbe) {
+ Edge edge = getEdge(BB,0);
+ EdgeInformation[BB->getParent()][edge] = BBWeight;
+ printEdgeWeight(edge);
+ }
+ for ( ; bbi != bbe; ++bbi ) {
+ if (ProcessedSuccs.insert(*bbi).second) {
+ Edge edge = getEdge(BB,*bbi);
+ double w = getEdgeWeight(edge);
+ if (w != MissingValue) {
+ BBWeight -= getEdgeWeight(edge);
+ } else {
+ Edges.push_back(edge);
+ // If minimal weight is necessary, reserve weight by subtracting weight
+ // from block weight, this is readded later on.
+ if (MinimalWeight.find(edge) != MinimalWeight.end()) {
+ BBWeight -= MinimalWeight[edge];
+ DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[edge]) << " at " << edge << "\n");
+ }
+ }
+ }
+ }
+
+ double fraction = floor(BBWeight/Edges.size());
+ // Finally we know what flow is still not leaving the block, distribute this
+ // flow onto the empty edges.
+ for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end();
+ ei != ee; ++ei) {
+ if (ei != (ee-1)) {
+ EdgeInformation[BB->getParent()][*ei] += fraction;
+ BBWeight -= fraction;
+ } else {
+ EdgeInformation[BB->getParent()][*ei] += BBWeight;
+ }
+ // Readd minial necessary weight.
+ if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
+ EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
+ DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
+ }
+ printEdgeWeight(*ei);
+ }
+
+ // This block is visited, mark this before the recursion.
+ BBToVisit.erase(BB);
+
+ // Recurse into successors.
+ for (succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+ bbi != bbe; ++bbi) {
+ recurseBasicBlock(*bbi);
+ }
+}
+
+bool ProfileEstimatorPass::runOnFunction(Function &F) {
+ if (F.isDeclaration()) return false;
+
+ // Fetch LoopInfo and clear ProfileInfo for this function.
+ LI = &getAnalysis<LoopInfo>();
+ FunctionInformation.erase(&F);
+ BlockInformation[&F].clear();
+ EdgeInformation[&F].clear();
+ BBToVisit.clear();
+
+ // Mark all blocks as to visit.
+ for (Function::iterator bi = F.begin(), be = F.end(); bi != be; ++bi)
+ BBToVisit.insert(bi);
+
+ // Clear Minimal Edges.
+ MinimalWeight.clear();
+
+ DEBUG(dbgs() << "Working on function " << F.getNameStr() << "\n");
+
+ // Since the entry block is the first one and has no predecessors, the edge
+ // (0,entry) is inserted with the starting weight of 1.
+ BasicBlock *entry = &F.getEntryBlock();
+ BlockInformation[&F][entry] = pow(2.0, 32.0);
+ Edge edge = getEdge(0,entry);
+ EdgeInformation[&F][edge] = BlockInformation[&F][entry];
+ printEdgeWeight(edge);
+
+ // Since recurseBasicBlock() maybe returns with a block which was not fully
+ // estimated, use recurseBasicBlock() until everything is calculated.
+ bool cleanup = false;
+ recurseBasicBlock(entry);
+ while (BBToVisit.size() > 0 && !cleanup) {
+ // Remember number of open blocks, this is later used to check if progress
+ // was made.
+ unsigned size = BBToVisit.size();
+
+ // Try to calculate all blocks in turn.
+ for (std::set<BasicBlock*>::iterator bi = BBToVisit.begin(),
+ be = BBToVisit.end(); bi != be; ++bi) {
+ recurseBasicBlock(*bi);
+ // If at least one block was finished, break because iterator may be
+ // invalid.
+ if (BBToVisit.size() < size) break;
+ }
+
+ // If there was not a single block resolved, make some assumptions.
+ if (BBToVisit.size() == size) {
+ bool found = false;
+ for (std::set<BasicBlock*>::iterator BBI = BBToVisit.begin(), BBE = BBToVisit.end();
+ (BBI != BBE) && (!found); ++BBI) {
+ BasicBlock *BB = *BBI;
+ // Try each predecessor if it can be assumend.
+ for (pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ (bbi != bbe) && (!found); ++bbi) {
+ Edge e = getEdge(*bbi,BB);
+ double w = getEdgeWeight(e);
+ // Check that edge from predecessor is still free.
+ if (w == MissingValue) {
+ // Check if there is a circle from this block to predecessor.
+ Path P;
+ const BasicBlock *Dest = GetPath(BB, *bbi, P, GetPathToDest);
+ if (Dest != *bbi) {
+ // If there is no circle, just set edge weight to 0
+ EdgeInformation[&F][e] = 0;
+ DEBUG(dbgs() << "Assuming edge weight: ");
+ printEdgeWeight(e);
+ found = true;
+ }
+ }
+ }
+ }
+ if (!found) {
+ cleanup = true;
+ DEBUG(dbgs() << "No assumption possible in Fuction "<<F.getName()<<", setting all to zero\n");
+ }
+ }
+ }
+ // In case there was no safe way to assume edges, set as a last measure,
+ // set _everything_ to zero.
+ if (cleanup) {
+ FunctionInformation[&F] = 0;
+ BlockInformation[&F].clear();
+ EdgeInformation[&F].clear();
+ for (Function::const_iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ const BasicBlock *BB = &(*FI);
+ BlockInformation[&F][BB] = 0;
+ const_pred_iterator predi = pred_begin(BB), prede = pred_end(BB);
+ if (predi == prede) {
+ Edge e = getEdge(0,BB);
+ setEdgeWeight(e,0);
+ }
+ for (;predi != prede; ++predi) {
+ Edge e = getEdge(*predi,BB);
+ setEdgeWeight(e,0);
+ }
+ succ_const_iterator succi = succ_begin(BB), succe = succ_end(BB);
+ if (succi == succe) {
+ Edge e = getEdge(BB,0);
+ setEdgeWeight(e,0);
+ }
+ for (;succi != succe; ++succi) {
+ Edge e = getEdge(*succi,BB);
+ setEdgeWeight(e,0);
+ }
+ }
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Analysis/ProfileInfo.cpp b/contrib/llvm/lib/Analysis/ProfileInfo.cpp
new file mode 100644
index 000000000000..173de2c02791
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ProfileInfo.cpp
@@ -0,0 +1,1105 @@
+//===- ProfileInfo.cpp - Profile Info Interface ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the abstract ProfileInfo interface, and the default
+// "no profile" implementation.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "profile-info"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/ADT/SmallSet.h"
+#include <set>
+#include <queue>
+#include <limits>
+using namespace llvm;
+
+namespace llvm {
+ template<> char ProfileInfoT<Function,BasicBlock>::ID = 0;
+}
+
+// Register the ProfileInfo interface, providing a nice name to refer to.
+INITIALIZE_ANALYSIS_GROUP(ProfileInfo, "Profile Information", NoProfileInfo)
+
+namespace llvm {
+
+template <>
+ProfileInfoT<MachineFunction, MachineBasicBlock>::ProfileInfoT() {}
+template <>
+ProfileInfoT<MachineFunction, MachineBasicBlock>::~ProfileInfoT() {}
+
+template <>
+ProfileInfoT<Function, BasicBlock>::ProfileInfoT() {
+ MachineProfile = 0;
+}
+template <>
+ProfileInfoT<Function, BasicBlock>::~ProfileInfoT() {
+ if (MachineProfile) delete MachineProfile;
+}
+
+template<>
+char ProfileInfoT<MachineFunction, MachineBasicBlock>::ID = 0;
+
+template<>
+const double ProfileInfoT<Function,BasicBlock>::MissingValue = -1;
+
+template<> const
+double ProfileInfoT<MachineFunction, MachineBasicBlock>::MissingValue = -1;
+
+template<> double
+ProfileInfoT<Function,BasicBlock>::getExecutionCount(const BasicBlock *BB) {
+ std::map<const Function*, BlockCounts>::iterator J =
+ BlockInformation.find(BB->getParent());
+ if (J != BlockInformation.end()) {
+ BlockCounts::iterator I = J->second.find(BB);
+ if (I != J->second.end())
+ return I->second;
+ }
+
+ double Count = MissingValue;
+
+ const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+
+ // Are there zero predecessors of this block?
+ if (PI == PE) {
+ Edge e = getEdge(0, BB);
+ Count = getEdgeWeight(e);
+ } else {
+ // Otherwise, if there are predecessors, the execution count of this block is
+ // the sum of the edge frequencies from the incoming edges.
+ std::set<const BasicBlock*> ProcessedPreds;
+ Count = 0;
+ for (; PI != PE; ++PI) {
+ const BasicBlock *P = *PI;
+ if (ProcessedPreds.insert(P).second) {
+ double w = getEdgeWeight(getEdge(P, BB));
+ if (w == MissingValue) {
+ Count = MissingValue;
+ break;
+ }
+ Count += w;
+ }
+ }
+ }
+
+ // If the predecessors did not suffice to get block weight, try successors.
+ if (Count == MissingValue) {
+
+ succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB);
+
+ // Are there zero successors of this block?
+ if (SI == SE) {
+ Edge e = getEdge(BB,0);
+ Count = getEdgeWeight(e);
+ } else {
+ std::set<const BasicBlock*> ProcessedSuccs;
+ Count = 0;
+ for (; SI != SE; ++SI)
+ if (ProcessedSuccs.insert(*SI).second) {
+ double w = getEdgeWeight(getEdge(BB, *SI));
+ if (w == MissingValue) {
+ Count = MissingValue;
+ break;
+ }
+ Count += w;
+ }
+ }
+ }
+
+ if (Count != MissingValue) BlockInformation[BB->getParent()][BB] = Count;
+ return Count;
+}
+
+template<>
+double ProfileInfoT<MachineFunction, MachineBasicBlock>::
+ getExecutionCount(const MachineBasicBlock *MBB) {
+ std::map<const MachineFunction*, BlockCounts>::iterator J =
+ BlockInformation.find(MBB->getParent());
+ if (J != BlockInformation.end()) {
+ BlockCounts::iterator I = J->second.find(MBB);
+ if (I != J->second.end())
+ return I->second;
+ }
+
+ return MissingValue;
+}
+
+template<>
+double ProfileInfoT<Function,BasicBlock>::getExecutionCount(const Function *F) {
+ std::map<const Function*, double>::iterator J =
+ FunctionInformation.find(F);
+ if (J != FunctionInformation.end())
+ return J->second;
+
+ // isDeclaration() is checked here and not at start of function to allow
+ // functions without a body still to have a execution count.
+ if (F->isDeclaration()) return MissingValue;
+
+ double Count = getExecutionCount(&F->getEntryBlock());
+ if (Count != MissingValue) FunctionInformation[F] = Count;
+ return Count;
+}
+
+template<>
+double ProfileInfoT<MachineFunction, MachineBasicBlock>::
+ getExecutionCount(const MachineFunction *MF) {
+ std::map<const MachineFunction*, double>::iterator J =
+ FunctionInformation.find(MF);
+ if (J != FunctionInformation.end())
+ return J->second;
+
+ double Count = getExecutionCount(&MF->front());
+ if (Count != MissingValue) FunctionInformation[MF] = Count;
+ return Count;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+ setExecutionCount(const BasicBlock *BB, double w) {
+ DEBUG(dbgs() << "Creating Block " << BB->getName()
+ << " (weight: " << format("%.20g",w) << ")\n");
+ BlockInformation[BB->getParent()][BB] = w;
+}
+
+template<>
+void ProfileInfoT<MachineFunction, MachineBasicBlock>::
+ setExecutionCount(const MachineBasicBlock *MBB, double w) {
+ DEBUG(dbgs() << "Creating Block " << MBB->getBasicBlock()->getName()
+ << " (weight: " << format("%.20g",w) << ")\n");
+ BlockInformation[MBB->getParent()][MBB] = w;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::addEdgeWeight(Edge e, double w) {
+ double oldw = getEdgeWeight(e);
+ assert (oldw != MissingValue && "Adding weight to Edge with no previous weight");
+ DEBUG(dbgs() << "Adding to Edge " << e
+ << " (new weight: " << format("%.20g",oldw + w) << ")\n");
+ EdgeInformation[getFunction(e)][e] = oldw + w;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+ addExecutionCount(const BasicBlock *BB, double w) {
+ double oldw = getExecutionCount(BB);
+ assert (oldw != MissingValue && "Adding weight to Block with no previous weight");
+ DEBUG(dbgs() << "Adding to Block " << BB->getName()
+ << " (new weight: " << format("%.20g",oldw + w) << ")\n");
+ BlockInformation[BB->getParent()][BB] = oldw + w;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::removeBlock(const BasicBlock *BB) {
+ std::map<const Function*, BlockCounts>::iterator J =
+ BlockInformation.find(BB->getParent());
+ if (J == BlockInformation.end()) return;
+
+ DEBUG(dbgs() << "Deleting " << BB->getName() << "\n");
+ J->second.erase(BB);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::removeEdge(Edge e) {
+ std::map<const Function*, EdgeWeights>::iterator J =
+ EdgeInformation.find(getFunction(e));
+ if (J == EdgeInformation.end()) return;
+
+ DEBUG(dbgs() << "Deleting" << e << "\n");
+ J->second.erase(e);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+ replaceEdge(const Edge &oldedge, const Edge &newedge) {
+ double w;
+ if ((w = getEdgeWeight(newedge)) == MissingValue) {
+ w = getEdgeWeight(oldedge);
+ DEBUG(dbgs() << "Replacing " << oldedge << " with " << newedge << "\n");
+ } else {
+ w += getEdgeWeight(oldedge);
+ DEBUG(dbgs() << "Adding " << oldedge << " to " << newedge << "\n");
+ }
+ setEdgeWeight(newedge,w);
+ removeEdge(oldedge);
+}
+
+template<>
+const BasicBlock *ProfileInfoT<Function,BasicBlock>::
+ GetPath(const BasicBlock *Src, const BasicBlock *Dest,
+ Path &P, unsigned Mode) {
+ const BasicBlock *BB = 0;
+ bool hasFoundPath = false;
+
+ std::queue<const BasicBlock *> BFS;
+ BFS.push(Src);
+
+ while(BFS.size() && !hasFoundPath) {
+ BB = BFS.front();
+ BFS.pop();
+
+ succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
+ if (Succ == End) {
+ P[0] = BB;
+ if (Mode & GetPathToExit) {
+ hasFoundPath = true;
+ BB = 0;
+ }
+ }
+ for(;Succ != End; ++Succ) {
+ if (P.find(*Succ) != P.end()) continue;
+ Edge e = getEdge(BB,*Succ);
+ if ((Mode & GetPathWithNewEdges) && (getEdgeWeight(e) != MissingValue)) continue;
+ P[*Succ] = BB;
+ BFS.push(*Succ);
+ if ((Mode & GetPathToDest) && *Succ == Dest) {
+ hasFoundPath = true;
+ BB = *Succ;
+ break;
+ }
+ if ((Mode & GetPathToValue) && (getExecutionCount(*Succ) != MissingValue)) {
+ hasFoundPath = true;
+ BB = *Succ;
+ break;
+ }
+ }
+ }
+
+ return BB;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+ divertFlow(const Edge &oldedge, const Edge &newedge) {
+ DEBUG(dbgs() << "Diverting " << oldedge << " via " << newedge );
+
+ // First check if the old edge was taken, if not, just delete it...
+ if (getEdgeWeight(oldedge) == 0) {
+ removeEdge(oldedge);
+ return;
+ }
+
+ Path P;
+ P[newedge.first] = 0;
+ P[newedge.second] = newedge.first;
+ const BasicBlock *BB = GetPath(newedge.second,oldedge.second,P,GetPathToExit | GetPathToDest);
+
+ double w = getEdgeWeight (oldedge);
+ DEBUG(dbgs() << ", Weight: " << format("%.20g",w) << "\n");
+ do {
+ const BasicBlock *Parent = P.find(BB)->second;
+ Edge e = getEdge(Parent,BB);
+ double oldw = getEdgeWeight(e);
+ double oldc = getExecutionCount(e.first);
+ setEdgeWeight(e, w+oldw);
+ if (Parent != oldedge.first) {
+ setExecutionCount(e.first, w+oldc);
+ }
+ BB = Parent;
+ } while (BB != newedge.first);
+ removeEdge(oldedge);
+}
+
+/// Replaces all occurrences of RmBB in the ProfilingInfo with DestBB.
+/// This checks all edges of the function the blocks reside in and replaces the
+/// occurrences of RmBB with DestBB.
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+ replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB) {
+ DEBUG(dbgs() << "Replacing " << RmBB->getName()
+ << " with " << DestBB->getName() << "\n");
+ const Function *F = DestBB->getParent();
+ std::map<const Function*, EdgeWeights>::iterator J =
+ EdgeInformation.find(F);
+ if (J == EdgeInformation.end()) return;
+
+ Edge e, newedge;
+ bool erasededge = false;
+ EdgeWeights::iterator I = J->second.begin(), E = J->second.end();
+ while(I != E) {
+ e = (I++)->first;
+ bool foundedge = false; bool eraseedge = false;
+ if (e.first == RmBB) {
+ if (e.second == DestBB) {
+ eraseedge = true;
+ } else {
+ newedge = getEdge(DestBB, e.second);
+ foundedge = true;
+ }
+ }
+ if (e.second == RmBB) {
+ if (e.first == DestBB) {
+ eraseedge = true;
+ } else {
+ newedge = getEdge(e.first, DestBB);
+ foundedge = true;
+ }
+ }
+ if (foundedge) {
+ replaceEdge(e, newedge);
+ }
+ if (eraseedge) {
+ if (erasededge) {
+ Edge newedge = getEdge(DestBB, DestBB);
+ replaceEdge(e, newedge);
+ } else {
+ removeEdge(e);
+ erasededge = true;
+ }
+ }
+ }
+}
+
+/// Splits an edge in the ProfileInfo and redirects flow over NewBB.
+/// Since its possible that there is more than one edge in the CFG from FristBB
+/// to SecondBB its necessary to redirect the flow proporionally.
+template<>
+void ProfileInfoT<Function,BasicBlock>::splitEdge(const BasicBlock *FirstBB,
+ const BasicBlock *SecondBB,
+ const BasicBlock *NewBB,
+ bool MergeIdenticalEdges) {
+ const Function *F = FirstBB->getParent();
+ std::map<const Function*, EdgeWeights>::iterator J =
+ EdgeInformation.find(F);
+ if (J == EdgeInformation.end()) return;
+
+ // Generate edges and read current weight.
+ Edge e = getEdge(FirstBB, SecondBB);
+ Edge n1 = getEdge(FirstBB, NewBB);
+ Edge n2 = getEdge(NewBB, SecondBB);
+ EdgeWeights &ECs = J->second;
+ double w = ECs[e];
+
+ int succ_count = 0;
+ if (!MergeIdenticalEdges) {
+ // First count the edges from FristBB to SecondBB, if there is more than
+ // one, only slice out a proporional part for NewBB.
+ for(succ_const_iterator BBI = succ_begin(FirstBB), BBE = succ_end(FirstBB);
+ BBI != BBE; ++BBI) {
+ if (*BBI == SecondBB) succ_count++;
+ }
+ // When the NewBB is completely new, increment the count by one so that
+ // the counts are properly distributed.
+ if (getExecutionCount(NewBB) == ProfileInfo::MissingValue) succ_count++;
+ } else {
+ // When the edges are merged anyway, then redirect all flow.
+ succ_count = 1;
+ }
+
+ // We know now how many edges there are from FirstBB to SecondBB, reroute a
+ // proportional part of the edge weight over NewBB.
+ double neww = floor(w / succ_count);
+ ECs[n1] += neww;
+ ECs[n2] += neww;
+ BlockInformation[F][NewBB] += neww;
+ if (succ_count == 1) {
+ ECs.erase(e);
+ } else {
+ ECs[e] -= neww;
+ }
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *Old,
+ const BasicBlock* New) {
+ const Function *F = Old->getParent();
+ std::map<const Function*, EdgeWeights>::iterator J =
+ EdgeInformation.find(F);
+ if (J == EdgeInformation.end()) return;
+
+ DEBUG(dbgs() << "Splitting " << Old->getName() << " to " << New->getName() << "\n");
+
+ std::set<Edge> Edges;
+ for (EdgeWeights::iterator ewi = J->second.begin(), ewe = J->second.end();
+ ewi != ewe; ++ewi) {
+ Edge old = ewi->first;
+ if (old.first == Old) {
+ Edges.insert(old);
+ }
+ }
+ for (std::set<Edge>::iterator EI = Edges.begin(), EE = Edges.end();
+ EI != EE; ++EI) {
+ Edge newedge = getEdge(New, EI->second);
+ replaceEdge(*EI, newedge);
+ }
+
+ double w = getExecutionCount(Old);
+ setEdgeWeight(getEdge(Old, New), w);
+ setExecutionCount(New, w);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *BB,
+ const BasicBlock* NewBB,
+ BasicBlock *const *Preds,
+ unsigned NumPreds) {
+ const Function *F = BB->getParent();
+ std::map<const Function*, EdgeWeights>::iterator J =
+ EdgeInformation.find(F);
+ if (J == EdgeInformation.end()) return;
+
+ DEBUG(dbgs() << "Splitting " << NumPreds << " Edges from " << BB->getName()
+ << " to " << NewBB->getName() << "\n");
+
+ // Collect weight that was redirected over NewBB.
+ double newweight = 0;
+
+ std::set<const BasicBlock *> ProcessedPreds;
+ // For all requestes Predecessors.
+ for (unsigned pred = 0; pred < NumPreds; ++pred) {
+ const BasicBlock * Pred = Preds[pred];
+ if (ProcessedPreds.insert(Pred).second) {
+ // Create edges and read old weight.
+ Edge oldedge = getEdge(Pred, BB);
+ Edge newedge = getEdge(Pred, NewBB);
+
+ // Remember how much weight was redirected.
+ newweight += getEdgeWeight(oldedge);
+
+ replaceEdge(oldedge,newedge);
+ }
+ }
+
+ Edge newedge = getEdge(NewBB,BB);
+ setEdgeWeight(newedge, newweight);
+ setExecutionCount(NewBB, newweight);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::transfer(const Function *Old,
+ const Function *New) {
+ DEBUG(dbgs() << "Replacing Function " << Old->getName() << " with "
+ << New->getName() << "\n");
+ std::map<const Function*, EdgeWeights>::iterator J =
+ EdgeInformation.find(Old);
+ if(J != EdgeInformation.end()) {
+ EdgeInformation[New] = J->second;
+ }
+ EdgeInformation.erase(Old);
+ BlockInformation.erase(Old);
+ FunctionInformation.erase(Old);
+}
+
+static double readEdgeOrRemember(ProfileInfo::Edge edge, double w,
+ ProfileInfo::Edge &tocalc, unsigned &uncalc) {
+ if (w == ProfileInfo::MissingValue) {
+ tocalc = edge;
+ uncalc++;
+ return 0;
+ } else {
+ return w;
+ }
+}
+
+template<>
+bool ProfileInfoT<Function,BasicBlock>::
+ CalculateMissingEdge(const BasicBlock *BB, Edge &removed,
+ bool assumeEmptySelf) {
+ Edge edgetocalc;
+ unsigned uncalculated = 0;
+
+ // collect weights of all incoming and outgoing edges, rememer edges that
+ // have no value
+ double incount = 0;
+ SmallSet<const BasicBlock*,8> pred_visited;
+ const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ if (bbi==bbe) {
+ Edge e = getEdge(0,BB);
+ incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated);
+ }
+ for (;bbi != bbe; ++bbi) {
+ if (pred_visited.insert(*bbi)) {
+ Edge e = getEdge(*bbi,BB);
+ incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated);
+ }
+ }
+
+ double outcount = 0;
+ SmallSet<const BasicBlock*,8> succ_visited;
+ succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
+ if (sbbi==sbbe) {
+ Edge e = getEdge(BB,0);
+ if (getEdgeWeight(e) == MissingValue) {
+ double w = getExecutionCount(BB);
+ if (w != MissingValue) {
+ setEdgeWeight(e,w);
+ removed = e;
+ }
+ }
+ outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated);
+ }
+ for (;sbbi != sbbe; ++sbbi) {
+ if (succ_visited.insert(*sbbi)) {
+ Edge e = getEdge(BB,*sbbi);
+ outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated);
+ }
+ }
+
+ // if exactly one edge weight was missing, calculate it and remove it from
+ // spanning tree
+ if (uncalculated == 0 ) {
+ return true;
+ } else
+ if (uncalculated == 1) {
+ if (incount < outcount) {
+ EdgeInformation[BB->getParent()][edgetocalc] = outcount-incount;
+ } else {
+ EdgeInformation[BB->getParent()][edgetocalc] = incount-outcount;
+ }
+ DEBUG(dbgs() << "--Calc Edge Counter for " << edgetocalc << ": "
+ << format("%.20g", getEdgeWeight(edgetocalc)) << "\n");
+ removed = edgetocalc;
+ return true;
+ } else
+ if (uncalculated == 2 && assumeEmptySelf && edgetocalc.first == edgetocalc.second && incount == outcount) {
+ setEdgeWeight(edgetocalc, incount * 10);
+ removed = edgetocalc;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static void readEdge(ProfileInfo *PI, ProfileInfo::Edge e, double &calcw, std::set<ProfileInfo::Edge> &misscount) {
+ double w = PI->getEdgeWeight(e);
+ if (w != ProfileInfo::MissingValue) {
+ calcw += w;
+ } else {
+ misscount.insert(e);
+ }
+}
+
+template<>
+bool ProfileInfoT<Function,BasicBlock>::EstimateMissingEdges(const BasicBlock *BB) {
+ double inWeight = 0;
+ std::set<Edge> inMissing;
+ std::set<const BasicBlock*> ProcessedPreds;
+ const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ if (bbi == bbe) {
+ readEdge(this,getEdge(0,BB),inWeight,inMissing);
+ }
+ for( ; bbi != bbe; ++bbi ) {
+ if (ProcessedPreds.insert(*bbi).second) {
+ readEdge(this,getEdge(*bbi,BB),inWeight,inMissing);
+ }
+ }
+
+ double outWeight = 0;
+ std::set<Edge> outMissing;
+ std::set<const BasicBlock*> ProcessedSuccs;
+ succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
+ if (sbbi == sbbe)
+ readEdge(this,getEdge(BB,0),outWeight,outMissing);
+ for ( ; sbbi != sbbe; ++sbbi ) {
+ if (ProcessedSuccs.insert(*sbbi).second) {
+ readEdge(this,getEdge(BB,*sbbi),outWeight,outMissing);
+ }
+ }
+
+ double share;
+ std::set<Edge>::iterator ei,ee;
+ if (inMissing.size() == 0 && outMissing.size() > 0) {
+ ei = outMissing.begin();
+ ee = outMissing.end();
+ share = inWeight/outMissing.size();
+ setExecutionCount(BB,inWeight);
+ } else
+ if (inMissing.size() > 0 && outMissing.size() == 0 && outWeight == 0) {
+ ei = inMissing.begin();
+ ee = inMissing.end();
+ share = 0;
+ setExecutionCount(BB,0);
+ } else
+ if (inMissing.size() == 0 && outMissing.size() == 0) {
+ setExecutionCount(BB,outWeight);
+ return true;
+ } else {
+ return false;
+ }
+ for ( ; ei != ee; ++ei ) {
+ setEdgeWeight(*ei,share);
+ }
+ return true;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
+// if (getExecutionCount(&(F->getEntryBlock())) == 0) {
+// for (Function::const_iterator FI = F->begin(), FE = F->end();
+// FI != FE; ++FI) {
+// const BasicBlock* BB = &(*FI);
+// {
+// const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+// if (NBB == End) {
+// setEdgeWeight(getEdge(0,BB),0);
+// }
+// for(;NBB != End; ++NBB) {
+// setEdgeWeight(getEdge(*NBB,BB),0);
+// }
+// }
+// {
+// succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+// if (NBB == End) {
+// setEdgeWeight(getEdge(0,BB),0);
+// }
+// for(;NBB != End; ++NBB) {
+// setEdgeWeight(getEdge(*NBB,BB),0);
+// }
+// }
+// }
+// return;
+// }
+ // The set of BasicBlocks that are still unvisited.
+ std::set<const BasicBlock*> Unvisited;
+
+ // The set of return edges (Edges with no successors).
+ std::set<Edge> ReturnEdges;
+ double ReturnWeight = 0;
+
+ // First iterate over the whole function and collect:
+ // 1) The blocks in this function in the Unvisited set.
+ // 2) The return edges in the ReturnEdges set.
+ // 3) The flow that is leaving the function already via return edges.
+
+ // Data structure for searching the function.
+ std::queue<const BasicBlock *> BFS;
+ const BasicBlock *BB = &(F->getEntryBlock());
+ BFS.push(BB);
+ Unvisited.insert(BB);
+
+ while (BFS.size()) {
+ BB = BFS.front(); BFS.pop();
+ succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+ if (NBB == End) {
+ Edge e = getEdge(BB,0);
+ double w = getEdgeWeight(e);
+ if (w == MissingValue) {
+ // If the return edge has no value, try to read value from block.
+ double bw = getExecutionCount(BB);
+ if (bw != MissingValue) {
+ setEdgeWeight(e,bw);
+ ReturnWeight += bw;
+ } else {
+ // If both return edge and block provide no value, collect edge.
+ ReturnEdges.insert(e);
+ }
+ } else {
+ // If the return edge has a proper value, collect it.
+ ReturnWeight += w;
+ }
+ }
+ for (;NBB != End; ++NBB) {
+ if (Unvisited.insert(*NBB).second) {
+ BFS.push(*NBB);
+ }
+ }
+ }
+
+ while (Unvisited.size() > 0) {
+ unsigned oldUnvisitedCount = Unvisited.size();
+ bool FoundPath = false;
+
+ // If there is only one edge left, calculate it.
+ if (ReturnEdges.size() == 1) {
+ ReturnWeight = getExecutionCount(&(F->getEntryBlock())) - ReturnWeight;
+
+ Edge e = *ReturnEdges.begin();
+ setEdgeWeight(e,ReturnWeight);
+ setExecutionCount(e.first,ReturnWeight);
+
+ Unvisited.erase(e.first);
+ ReturnEdges.erase(e);
+ continue;
+ }
+
+ // Calculate all blocks where only one edge is missing, this may also
+ // resolve furhter return edges.
+ std::set<const BasicBlock *>::iterator FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE) {
+ const BasicBlock *BB = *FI; ++FI;
+ Edge e;
+ if(CalculateMissingEdge(BB,e,true)) {
+ if (BlockInformation[F].find(BB) == BlockInformation[F].end()) {
+ setExecutionCount(BB,getExecutionCount(BB));
+ }
+ Unvisited.erase(BB);
+ if (e.first != 0 && e.second == 0) {
+ ReturnEdges.erase(e);
+ ReturnWeight += getEdgeWeight(e);
+ }
+ }
+ }
+ if (oldUnvisitedCount > Unvisited.size()) continue;
+
+ // Estimate edge weights by dividing the flow proportionally.
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE) {
+ const BasicBlock *BB = *FI; ++FI;
+ const BasicBlock *Dest = 0;
+ bool AllEdgesHaveSameReturn = true;
+ // Check each Successor, these must all end up in the same or an empty
+ // return block otherwise its dangerous to do an estimation on them.
+ for (succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
+ Succ != End; ++Succ) {
+ Path P;
+ GetPath(*Succ, 0, P, GetPathToExit);
+ if (Dest && Dest != P[0]) {
+ AllEdgesHaveSameReturn = false;
+ }
+ Dest = P[0];
+ }
+ if (AllEdgesHaveSameReturn) {
+ if(EstimateMissingEdges(BB)) {
+ Unvisited.erase(BB);
+ break;
+ }
+ }
+ }
+ if (oldUnvisitedCount > Unvisited.size()) continue;
+
+ // Check if there is a path to an block that has a known value and redirect
+ // flow accordingly.
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE && !FoundPath) {
+ // Fetch path.
+ const BasicBlock *BB = *FI; ++FI;
+ Path P;
+ const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToValue);
+
+ // Calculate incoming flow.
+ double iw = 0; unsigned inmissing = 0; unsigned incount = 0; unsigned invalid = 0;
+ std::set<const BasicBlock *> Processed;
+ for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+ NBB != End; ++NBB) {
+ if (Processed.insert(*NBB).second) {
+ Edge e = getEdge(*NBB, BB);
+ double ew = getEdgeWeight(e);
+ if (ew != MissingValue) {
+ iw += ew;
+ invalid++;
+ } else {
+ // If the path contains the successor, this means its a backedge,
+ // do not count as missing.
+ if (P.find(*NBB) == P.end())
+ inmissing++;
+ }
+ incount++;
+ }
+ }
+ if (inmissing == incount) continue;
+ if (invalid == 0) continue;
+
+ // Subtract (already) outgoing flow.
+ Processed.clear();
+ for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+ NBB != End; ++NBB) {
+ if (Processed.insert(*NBB).second) {
+ Edge e = getEdge(BB, *NBB);
+ double ew = getEdgeWeight(e);
+ if (ew != MissingValue) {
+ iw -= ew;
+ }
+ }
+ }
+ if (iw < 0) continue;
+
+ // Check the receiving end of the path if it can handle the flow.
+ double ow = getExecutionCount(Dest);
+ Processed.clear();
+ for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+ NBB != End; ++NBB) {
+ if (Processed.insert(*NBB).second) {
+ Edge e = getEdge(BB, *NBB);
+ double ew = getEdgeWeight(e);
+ if (ew != MissingValue) {
+ ow -= ew;
+ }
+ }
+ }
+ if (ow < 0) continue;
+
+ // Determine how much flow shall be used.
+ double ew = getEdgeWeight(getEdge(P[Dest],Dest));
+ if (ew != MissingValue) {
+ ew = ew<ow?ew:ow;
+ ew = ew<iw?ew:iw;
+ } else {
+ if (inmissing == 0)
+ ew = iw;
+ }
+
+ // Create flow.
+ if (ew != MissingValue) {
+ do {
+ Edge e = getEdge(P[Dest],Dest);
+ if (getEdgeWeight(e) == MissingValue) {
+ setEdgeWeight(e,ew);
+ FoundPath = true;
+ }
+ Dest = P[Dest];
+ } while (Dest != BB);
+ }
+ }
+ if (FoundPath) continue;
+
+ // Calculate a block with self loop.
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE && !FoundPath) {
+ const BasicBlock *BB = *FI; ++FI;
+ bool SelfEdgeFound = false;
+ for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+ NBB != End; ++NBB) {
+ if (*NBB == BB) {
+ SelfEdgeFound = true;
+ break;
+ }
+ }
+ if (SelfEdgeFound) {
+ Edge e = getEdge(BB,BB);
+ if (getEdgeWeight(e) == MissingValue) {
+ double iw = 0;
+ std::set<const BasicBlock *> Processed;
+ for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+ NBB != End; ++NBB) {
+ if (Processed.insert(*NBB).second) {
+ Edge e = getEdge(*NBB, BB);
+ double ew = getEdgeWeight(e);
+ if (ew != MissingValue) {
+ iw += ew;
+ }
+ }
+ }
+ setEdgeWeight(e,iw * 10);
+ FoundPath = true;
+ }
+ }
+ }
+ if (FoundPath) continue;
+
+ // Determine backedges, set them to zero.
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE && !FoundPath) {
+ const BasicBlock *BB = *FI; ++FI;
+ const BasicBlock *Dest = 0;
+ Path P;
+ bool BackEdgeFound = false;
+ for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+ NBB != End; ++NBB) {
+ Dest = GetPath(BB, *NBB, P, GetPathToDest | GetPathWithNewEdges);
+ if (Dest == *NBB) {
+ BackEdgeFound = true;
+ break;
+ }
+ }
+ if (BackEdgeFound) {
+ Edge e = getEdge(Dest,BB);
+ double w = getEdgeWeight(e);
+ if (w == MissingValue) {
+ setEdgeWeight(e,0);
+ FoundPath = true;
+ }
+ do {
+ Edge e = getEdge(P[Dest], Dest);
+ double w = getEdgeWeight(e);
+ if (w == MissingValue) {
+ setEdgeWeight(e,0);
+ FoundPath = true;
+ }
+ Dest = P[Dest];
+ } while (Dest != BB);
+ }
+ }
+ if (FoundPath) continue;
+
+ // Channel flow to return block.
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE && !FoundPath) {
+ const BasicBlock *BB = *FI; ++FI;
+
+ Path P;
+ const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges);
+ Dest = P[0];
+ if (!Dest) continue;
+
+ if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) {
+ // Calculate incoming flow.
+ double iw = 0;
+ std::set<const BasicBlock *> Processed;
+ for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+ NBB != End; ++NBB) {
+ if (Processed.insert(*NBB).second) {
+ Edge e = getEdge(*NBB, BB);
+ double ew = getEdgeWeight(e);
+ if (ew != MissingValue) {
+ iw += ew;
+ }
+ }
+ }
+ do {
+ Edge e = getEdge(P[Dest], Dest);
+ double w = getEdgeWeight(e);
+ if (w == MissingValue) {
+ setEdgeWeight(e,iw);
+ FoundPath = true;
+ } else {
+ assert(0 && "Edge should not have value already!");
+ }
+ Dest = P[Dest];
+ } while (Dest != BB);
+ }
+ }
+ if (FoundPath) continue;
+
+ // Speculatively set edges to zero.
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE && !FoundPath) {
+ const BasicBlock *BB = *FI; ++FI;
+
+ for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+ NBB != End; ++NBB) {
+ Edge e = getEdge(*NBB,BB);
+ double w = getEdgeWeight(e);
+ if (w == MissingValue) {
+ setEdgeWeight(e,0);
+ FoundPath = true;
+ break;
+ }
+ }
+ }
+ if (FoundPath) continue;
+
+ errs() << "{";
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE) {
+ const BasicBlock *BB = *FI; ++FI;
+ dbgs() << BB->getName();
+ if (FI != FE)
+ dbgs() << ",";
+ }
+ errs() << "}";
+
+ errs() << "ASSERT: could not repair function";
+ assert(0 && "could not repair function");
+ }
+
+ EdgeWeights J = EdgeInformation[F];
+ for (EdgeWeights::iterator EI = J.begin(), EE = J.end(); EI != EE; ++EI) {
+ Edge e = EI->first;
+
+ bool SuccFound = false;
+ if (e.first != 0) {
+ succ_const_iterator NBB = succ_begin(e.first), End = succ_end(e.first);
+ if (NBB == End) {
+ if (0 == e.second) {
+ SuccFound = true;
+ }
+ }
+ for (;NBB != End; ++NBB) {
+ if (*NBB == e.second) {
+ SuccFound = true;
+ break;
+ }
+ }
+ if (!SuccFound) {
+ removeEdge(e);
+ }
+ }
+ }
+}
+
+raw_ostream& operator<<(raw_ostream &O, const Function *F) {
+ return O << F->getName();
+}
+
+raw_ostream& operator<<(raw_ostream &O, const MachineFunction *MF) {
+ return O << MF->getFunction()->getName() << "(MF)";
+}
+
+raw_ostream& operator<<(raw_ostream &O, const BasicBlock *BB) {
+ return O << BB->getName();
+}
+
+raw_ostream& operator<<(raw_ostream &O, const MachineBasicBlock *MBB) {
+ return O << MBB->getBasicBlock()->getName() << "(MB)";
+}
+
+raw_ostream& operator<<(raw_ostream &O, std::pair<const BasicBlock *, const BasicBlock *> E) {
+ O << "(";
+
+ if (E.first)
+ O << E.first;
+ else
+ O << "0";
+
+ O << ",";
+
+ if (E.second)
+ O << E.second;
+ else
+ O << "0";
+
+ return O << ")";
+}
+
+raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, const MachineBasicBlock *> E) {
+ O << "(";
+
+ if (E.first)
+ O << E.first;
+ else
+ O << "0";
+
+ O << ",";
+
+ if (E.second)
+ O << E.second;
+ else
+ O << "0";
+
+ return O << ")";
+}
+
+} // namespace llvm
+
+//===----------------------------------------------------------------------===//
+// NoProfile ProfileInfo implementation
+//
+
+namespace {
+ struct NoProfileInfo : public ImmutablePass, public ProfileInfo {
+ static char ID; // Class identification, replacement for typeinfo
+ NoProfileInfo() : ImmutablePass(ID) {
+ initializeNoProfileInfoPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &ProfileInfo::ID)
+ return (ProfileInfo*)this;
+ return this;
+ }
+
+ virtual const char *getPassName() const {
+ return "NoProfileInfo";
+ }
+ };
+} // End of anonymous namespace
+
+char NoProfileInfo::ID = 0;
+// Register this pass...
+INITIALIZE_AG_PASS(NoProfileInfo, ProfileInfo, "no-profile",
+ "No Profile Information", false, true, true)
+
+ImmutablePass *llvm::createNoProfileInfoPass() { return new NoProfileInfo(); }
diff --git a/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp b/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp
new file mode 100644
index 000000000000..eaa38dad16a1
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp
@@ -0,0 +1,157 @@
+//===- ProfileInfoLoad.cpp - Load profile information from disk -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The ProfileInfoLoader class is used to load and represent profiling
+// information read in from the dump file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ProfileInfoLoader.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include "llvm/Module.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdio>
+#include <cstdlib>
+using namespace llvm;
+
+// ByteSwap - Byteswap 'Var' if 'Really' is true.
+//
+static inline unsigned ByteSwap(unsigned Var, bool Really) {
+ if (!Really) return Var;
+ return ((Var & (255U<< 0U)) << 24U) |
+ ((Var & (255U<< 8U)) << 8U) |
+ ((Var & (255U<<16U)) >> 8U) |
+ ((Var & (255U<<24U)) >> 24U);
+}
+
+static unsigned AddCounts(unsigned A, unsigned B) {
+ // If either value is undefined, use the other.
+ if (A == ProfileInfoLoader::Uncounted) return B;
+ if (B == ProfileInfoLoader::Uncounted) return A;
+ return A + B;
+}
+
+static void ReadProfilingBlock(const char *ToolName, FILE *F,
+ bool ShouldByteSwap,
+ std::vector<unsigned> &Data) {
+ // Read the number of entries...
+ unsigned NumEntries;
+ if (fread(&NumEntries, sizeof(unsigned), 1, F) != 1) {
+ errs() << ToolName << ": data packet truncated!\n";
+ perror(0);
+ exit(1);
+ }
+ NumEntries = ByteSwap(NumEntries, ShouldByteSwap);
+
+ // Read the counts...
+ std::vector<unsigned> TempSpace(NumEntries);
+
+ // Read in the block of data...
+ if (fread(&TempSpace[0], sizeof(unsigned)*NumEntries, 1, F) != 1) {
+ errs() << ToolName << ": data packet truncated!\n";
+ perror(0);
+ exit(1);
+ }
+
+ // Make sure we have enough space... The space is initialised to -1 to
+ // facitiltate the loading of missing values for OptimalEdgeProfiling.
+ if (Data.size() < NumEntries)
+ Data.resize(NumEntries, ProfileInfoLoader::Uncounted);
+
+ // Accumulate the data we just read into the data.
+ if (!ShouldByteSwap) {
+ for (unsigned i = 0; i != NumEntries; ++i) {
+ Data[i] = AddCounts(TempSpace[i], Data[i]);
+ }
+ } else {
+ for (unsigned i = 0; i != NumEntries; ++i) {
+ Data[i] = AddCounts(ByteSwap(TempSpace[i], true), Data[i]);
+ }
+ }
+}
+
+const unsigned ProfileInfoLoader::Uncounted = ~0U;
+
+// ProfileInfoLoader ctor - Read the specified profiling data file, exiting the
+// program if the file is invalid or broken.
+//
+ProfileInfoLoader::ProfileInfoLoader(const char *ToolName,
+ const std::string &Filename,
+ Module &TheModule) :
+ Filename(Filename),
+ M(TheModule), Warned(false) {
+ FILE *F = fopen(Filename.c_str(), "rb");
+ if (F == 0) {
+ errs() << ToolName << ": Error opening '" << Filename << "': ";
+ perror(0);
+ exit(1);
+ }
+
+ // Keep reading packets until we run out of them.
+ unsigned PacketType;
+ while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) {
+ // If the low eight bits of the packet are zero, we must be dealing with an
+ // endianness mismatch. Byteswap all words read from the profiling
+ // information.
+ bool ShouldByteSwap = (char)PacketType == 0;
+ PacketType = ByteSwap(PacketType, ShouldByteSwap);
+
+ switch (PacketType) {
+ case ArgumentInfo: {
+ unsigned ArgLength;
+ if (fread(&ArgLength, sizeof(unsigned), 1, F) != 1) {
+ errs() << ToolName << ": arguments packet truncated!\n";
+ perror(0);
+ exit(1);
+ }
+ ArgLength = ByteSwap(ArgLength, ShouldByteSwap);
+
+ // Read in the arguments...
+ std::vector<char> Chars(ArgLength+4);
+
+ if (ArgLength)
+ if (fread(&Chars[0], (ArgLength+3) & ~3, 1, F) != 1) {
+ errs() << ToolName << ": arguments packet truncated!\n";
+ perror(0);
+ exit(1);
+ }
+ CommandLines.push_back(std::string(&Chars[0], &Chars[ArgLength]));
+ break;
+ }
+
+ case FunctionInfo:
+ ReadProfilingBlock(ToolName, F, ShouldByteSwap, FunctionCounts);
+ break;
+
+ case BlockInfo:
+ ReadProfilingBlock(ToolName, F, ShouldByteSwap, BlockCounts);
+ break;
+
+ case EdgeInfo:
+ ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts);
+ break;
+
+ case OptEdgeInfo:
+ ReadProfilingBlock(ToolName, F, ShouldByteSwap, OptimalEdgeCounts);
+ break;
+
+ case BBTraceInfo:
+ ReadProfilingBlock(ToolName, F, ShouldByteSwap, BBTrace);
+ break;
+
+ default:
+ errs() << ToolName << ": Unknown packet type #" << PacketType << "!\n";
+ exit(1);
+ }
+ }
+
+ fclose(F);
+}
+
diff --git a/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp b/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp
new file mode 100644
index 000000000000..098079bcffc4
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp
@@ -0,0 +1,267 @@
+//===- ProfileInfoLoaderPass.cpp - LLVM Pass to load profile info ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a concrete implementation of profiling information that
+// loads the information from a profile dump file.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "profile-loader"
+#include "llvm/BasicBlock.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/ProfileInfoLoader.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallSet.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumEdgesRead, "The # of edges read.");
+
+static cl::opt<std::string>
+ProfileInfoFilename("profile-info-file", cl::init("llvmprof.out"),
+ cl::value_desc("filename"),
+ cl::desc("Profile file loaded by -profile-loader"));
+
+namespace {
+ class LoaderPass : public ModulePass, public ProfileInfo {
+ std::string Filename;
+ std::set<Edge> SpanningTree;
+ std::set<const BasicBlock*> BBisUnvisited;
+ unsigned ReadCount;
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ explicit LoaderPass(const std::string &filename = "")
+ : ModulePass(ID), Filename(filename) {
+ initializeLoaderPassPass(*PassRegistry::getPassRegistry());
+ if (filename.empty()) Filename = ProfileInfoFilename;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ virtual const char *getPassName() const {
+ return "Profiling information loader";
+ }
+
+ // recurseBasicBlock() - Calculates the edge weights for as much basic
+ // blocks as possbile.
+ virtual void recurseBasicBlock(const BasicBlock *BB);
+ virtual void readEdgeOrRemember(Edge, Edge&, unsigned &, double &);
+ virtual void readEdge(ProfileInfo::Edge, std::vector<unsigned>&);
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &ProfileInfo::ID)
+ return (ProfileInfo*)this;
+ return this;
+ }
+
+ /// run - Load the profile information from the specified file.
+ virtual bool runOnModule(Module &M);
+ };
+} // End of anonymous namespace
+
+char LoaderPass::ID = 0;
+INITIALIZE_AG_PASS(LoaderPass, ProfileInfo, "profile-loader",
+ "Load profile information from llvmprof.out", false, true, false)
+
+char &llvm::ProfileLoaderPassID = LoaderPass::ID;
+
+ModulePass *llvm::createProfileLoaderPass() { return new LoaderPass(); }
+
+/// createProfileLoaderPass - This function returns a Pass that loads the
+/// profiling information for the module from the specified filename, making it
+/// available to the optimizers.
+Pass *llvm::createProfileLoaderPass(const std::string &Filename) {
+ return new LoaderPass(Filename);
+}
+
+void LoaderPass::readEdgeOrRemember(Edge edge, Edge &tocalc,
+ unsigned &uncalc, double &count) {
+ double w;
+ if ((w = getEdgeWeight(edge)) == MissingValue) {
+ tocalc = edge;
+ uncalc++;
+ } else {
+ count+=w;
+ }
+}
+
+// recurseBasicBlock - Visits all neighbours of a block and then tries to
+// calculate the missing edge values.
+void LoaderPass::recurseBasicBlock(const BasicBlock *BB) {
+
+ // break recursion if already visited
+ if (BBisUnvisited.find(BB) == BBisUnvisited.end()) return;
+ BBisUnvisited.erase(BB);
+ if (!BB) return;
+
+ for (succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+ bbi != bbe; ++bbi) {
+ recurseBasicBlock(*bbi);
+ }
+ for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ bbi != bbe; ++bbi) {
+ recurseBasicBlock(*bbi);
+ }
+
+ Edge tocalc;
+ if (CalculateMissingEdge(BB, tocalc)) {
+ SpanningTree.erase(tocalc);
+ }
+}
+
+void LoaderPass::readEdge(ProfileInfo::Edge e,
+ std::vector<unsigned> &ECs) {
+ if (ReadCount < ECs.size()) {
+ double weight = ECs[ReadCount++];
+ if (weight != ProfileInfoLoader::Uncounted) {
+ // Here the data realm changes from the unsigned of the file to the
+ // double of the ProfileInfo. This conversion is save because we know
+ // that everything thats representable in unsinged is also representable
+ // in double.
+ EdgeInformation[getFunction(e)][e] += (double)weight;
+
+ DEBUG(dbgs() << "--Read Edge Counter for " << e
+ << " (# "<< (ReadCount-1) << "): "
+ << (unsigned)getEdgeWeight(e) << "\n");
+ } else {
+ // This happens only if reading optimal profiling information, not when
+ // reading regular profiling information.
+ SpanningTree.insert(e);
+ }
+ }
+}
+
+bool LoaderPass::runOnModule(Module &M) {
+ ProfileInfoLoader PIL("profile-loader", Filename, M);
+
+ EdgeInformation.clear();
+ std::vector<unsigned> Counters = PIL.getRawEdgeCounts();
+ if (Counters.size() > 0) {
+ ReadCount = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n");
+ readEdge(getEdge(0,&F->getEntryBlock()), Counters);
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+ readEdge(getEdge(BB,TI->getSuccessor(s)), Counters);
+ }
+ }
+ }
+ if (ReadCount != Counters.size()) {
+ errs() << "WARNING: profile information is inconsistent with "
+ << "the current program!\n";
+ }
+ NumEdgesRead = ReadCount;
+ }
+
+ Counters = PIL.getRawOptimalEdgeCounts();
+ if (Counters.size() > 0) {
+ ReadCount = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n");
+ readEdge(getEdge(0,&F->getEntryBlock()), Counters);
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ if (TI->getNumSuccessors() == 0) {
+ readEdge(getEdge(BB,0), Counters);
+ }
+ for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+ readEdge(getEdge(BB,TI->getSuccessor(s)), Counters);
+ }
+ }
+ while (SpanningTree.size() > 0) {
+
+ unsigned size = SpanningTree.size();
+
+ BBisUnvisited.clear();
+ for (std::set<Edge>::iterator ei = SpanningTree.begin(),
+ ee = SpanningTree.end(); ei != ee; ++ei) {
+ BBisUnvisited.insert(ei->first);
+ BBisUnvisited.insert(ei->second);
+ }
+ while (BBisUnvisited.size() > 0) {
+ recurseBasicBlock(*BBisUnvisited.begin());
+ }
+
+ if (SpanningTree.size() == size) {
+ DEBUG(dbgs()<<"{");
+ for (std::set<Edge>::iterator ei = SpanningTree.begin(),
+ ee = SpanningTree.end(); ei != ee; ++ei) {
+ DEBUG(dbgs()<< *ei <<",");
+ }
+ assert(0 && "No edge calculated!");
+ }
+
+ }
+ }
+ if (ReadCount != Counters.size()) {
+ errs() << "WARNING: profile information is inconsistent with "
+ << "the current program!\n";
+ }
+ NumEdgesRead = ReadCount;
+ }
+
+ BlockInformation.clear();
+ Counters = PIL.getRawBlockCounts();
+ if (Counters.size() > 0) {
+ ReadCount = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ if (ReadCount < Counters.size())
+ // Here the data realm changes from the unsigned of the file to the
+ // double of the ProfileInfo. This conversion is save because we know
+ // that everything thats representable in unsinged is also
+ // representable in double.
+ BlockInformation[F][BB] = (double)Counters[ReadCount++];
+ }
+ if (ReadCount != Counters.size()) {
+ errs() << "WARNING: profile information is inconsistent with "
+ << "the current program!\n";
+ }
+ }
+
+ FunctionInformation.clear();
+ Counters = PIL.getRawFunctionCounts();
+ if (Counters.size() > 0) {
+ ReadCount = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ if (ReadCount < Counters.size())
+ // Here the data realm changes from the unsigned of the file to the
+ // double of the ProfileInfo. This conversion is save because we know
+ // that everything thats representable in unsinged is also
+ // representable in double.
+ FunctionInformation[F] = (double)Counters[ReadCount++];
+ }
+ if (ReadCount != Counters.size()) {
+ errs() << "WARNING: profile information is inconsistent with "
+ << "the current program!\n";
+ }
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp b/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp
new file mode 100644
index 000000000000..a01751849c51
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp
@@ -0,0 +1,382 @@
+//===- ProfileVerifierPass.cpp - LLVM Pass to estimate profile info -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass that checks profiling information for
+// plausibility.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "profile-verifier"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Debug.h"
+#include <set>
+using namespace llvm;
+
+static cl::opt<bool,false>
+ProfileVerifierDisableAssertions("profile-verifier-noassert",
+ cl::desc("Disable assertions"));
+
+namespace llvm {
+ template<class FType, class BType>
+ class ProfileVerifierPassT : public FunctionPass {
+
+ struct DetailedBlockInfo {
+ const BType *BB;
+ double BBWeight;
+ double inWeight;
+ int inCount;
+ double outWeight;
+ int outCount;
+ };
+
+ ProfileInfoT<FType, BType> *PI;
+ std::set<const BType*> BBisVisited;
+ std::set<const FType*> FisVisited;
+ bool DisableAssertions;
+
+ // When debugging is enabled, the verifier prints a whole slew of debug
+ // information, otherwise its just the assert. These are all the helper
+ // functions.
+ bool PrintedDebugTree;
+ std::set<const BType*> BBisPrinted;
+ void debugEntry(DetailedBlockInfo*);
+ void printDebugInfo(const BType *BB);
+
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+
+ explicit ProfileVerifierPassT () : FunctionPass(ID) {
+ initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry());
+ DisableAssertions = ProfileVerifierDisableAssertions;
+ }
+ explicit ProfileVerifierPassT (bool da) : FunctionPass(ID),
+ DisableAssertions(da) {
+ initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<ProfileInfoT<FType, BType> >();
+ }
+
+ const char *getPassName() const {
+ return "Profiling information verifier";
+ }
+
+ /// run - Verify the profile information.
+ bool runOnFunction(FType &F);
+ void recurseBasicBlock(const BType*);
+
+ bool exitReachable(const FType*);
+ double ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge);
+ void CheckValue(bool, const char*, DetailedBlockInfo*);
+ };
+
+ typedef ProfileVerifierPassT<Function, BasicBlock> ProfileVerifierPass;
+
+ template<class FType, class BType>
+ void ProfileVerifierPassT<FType, BType>::printDebugInfo(const BType *BB) {
+
+ if (BBisPrinted.find(BB) != BBisPrinted.end()) return;
+
+ double BBWeight = PI->getExecutionCount(BB);
+ if (BBWeight == ProfileInfoT<FType, BType>::MissingValue) { BBWeight = 0; }
+ double inWeight = 0;
+ int inCount = 0;
+ std::set<const BType*> ProcessedPreds;
+ for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ bbi != bbe; ++bbi ) {
+ if (ProcessedPreds.insert(*bbi).second) {
+ typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(*bbi,BB);
+ double EdgeWeight = PI->getEdgeWeight(E);
+ if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; }
+ dbgs() << "calculated in-edge " << E << ": "
+ << format("%20.20g",EdgeWeight) << "\n";
+ inWeight += EdgeWeight;
+ inCount++;
+ }
+ }
+ double outWeight = 0;
+ int outCount = 0;
+ std::set<const BType*> ProcessedSuccs;
+ for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+ bbi != bbe; ++bbi ) {
+ if (ProcessedSuccs.insert(*bbi).second) {
+ typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(BB,*bbi);
+ double EdgeWeight = PI->getEdgeWeight(E);
+ if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; }
+ dbgs() << "calculated out-edge " << E << ": "
+ << format("%20.20g",EdgeWeight) << "\n";
+ outWeight += EdgeWeight;
+ outCount++;
+ }
+ }
+ dbgs() << "Block " << BB->getNameStr() << " in "
+ << BB->getParent()->getNameStr() << ":"
+ << "BBWeight=" << format("%20.20g",BBWeight) << ","
+ << "inWeight=" << format("%20.20g",inWeight) << ","
+ << "inCount=" << inCount << ","
+ << "outWeight=" << format("%20.20g",outWeight) << ","
+ << "outCount" << outCount << "\n";
+
+ // mark as visited and recurse into subnodes
+ BBisPrinted.insert(BB);
+ for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+ bbi != bbe; ++bbi ) {
+ printDebugInfo(*bbi);
+ }
+ }
+
+ template<class FType, class BType>
+ void ProfileVerifierPassT<FType, BType>::debugEntry (DetailedBlockInfo *DI) {
+ dbgs() << "TROUBLE: Block " << DI->BB->getNameStr() << " in "
+ << DI->BB->getParent()->getNameStr() << ":"
+ << "BBWeight=" << format("%20.20g",DI->BBWeight) << ","
+ << "inWeight=" << format("%20.20g",DI->inWeight) << ","
+ << "inCount=" << DI->inCount << ","
+ << "outWeight=" << format("%20.20g",DI->outWeight) << ","
+ << "outCount=" << DI->outCount << "\n";
+ if (!PrintedDebugTree) {
+ PrintedDebugTree = true;
+ printDebugInfo(&(DI->BB->getParent()->getEntryBlock()));
+ }
+ }
+
+ // This compares A and B for equality.
+ static bool Equals(double A, double B) {
+ return A == B;
+ }
+
+ // This checks if the function "exit" is reachable from an given function
+ // via calls, this is necessary to check if a profile is valid despite the
+ // counts not fitting exactly.
+ template<class FType, class BType>
+ bool ProfileVerifierPassT<FType, BType>::exitReachable(const FType *F) {
+ if (!F) return false;
+
+ if (FisVisited.count(F)) return false;
+
+ FType *Exit = F->getParent()->getFunction("exit");
+ if (Exit == F) {
+ return true;
+ }
+
+ FisVisited.insert(F);
+ bool exits = false;
+ for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+ if (const CallInst *CI = dyn_cast<CallInst>(&*I)) {
+ FType *F = CI->getCalledFunction();
+ if (F) {
+ exits |= exitReachable(F);
+ } else {
+ // This is a call to a pointer, all bets are off...
+ exits = true;
+ }
+ if (exits) break;
+ }
+ }
+ return exits;
+ }
+
+ #define ASSERTMESSAGE(M) \
+ { dbgs() << "ASSERT:" << (M) << "\n"; \
+ if (!DisableAssertions) assert(0 && (M)); }
+
+ template<class FType, class BType>
+ double ProfileVerifierPassT<FType, BType>::ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge E) {
+ double EdgeWeight = PI->getEdgeWeight(E);
+ if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) {
+ dbgs() << "Edge " << E << " in Function "
+ << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": ";
+ ASSERTMESSAGE("Edge has missing value");
+ return 0;
+ } else {
+ if (EdgeWeight < 0) {
+ dbgs() << "Edge " << E << " in Function "
+ << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": ";
+ ASSERTMESSAGE("Edge has negative value");
+ }
+ return EdgeWeight;
+ }
+ }
+
+ template<class FType, class BType>
+ void ProfileVerifierPassT<FType, BType>::CheckValue(bool Error,
+ const char *Message,
+ DetailedBlockInfo *DI) {
+ if (Error) {
+ DEBUG(debugEntry(DI));
+ dbgs() << "Block " << DI->BB->getNameStr() << " in Function "
+ << DI->BB->getParent()->getNameStr() << ": ";
+ ASSERTMESSAGE(Message);
+ }
+ return;
+ }
+
+ // This calculates the Information for a block and then recurses into the
+ // successors.
+ template<class FType, class BType>
+ void ProfileVerifierPassT<FType, BType>::recurseBasicBlock(const BType *BB) {
+
+ // Break the recursion by remembering all visited blocks.
+ if (BBisVisited.find(BB) != BBisVisited.end()) return;
+
+ // Use a data structure to store all the information, this can then be handed
+ // to debug printers.
+ DetailedBlockInfo DI;
+ DI.BB = BB;
+ DI.outCount = DI.inCount = 0;
+ DI.inWeight = DI.outWeight = 0;
+
+ // Read predecessors.
+ std::set<const BType*> ProcessedPreds;
+ const_pred_iterator bpi = pred_begin(BB), bpe = pred_end(BB);
+ // If there are none, check for (0,BB) edge.
+ if (bpi == bpe) {
+ DI.inWeight += ReadOrAssert(PI->getEdge(0,BB));
+ DI.inCount++;
+ }
+ for (;bpi != bpe; ++bpi) {
+ if (ProcessedPreds.insert(*bpi).second) {
+ DI.inWeight += ReadOrAssert(PI->getEdge(*bpi,BB));
+ DI.inCount++;
+ }
+ }
+
+ // Read successors.
+ std::set<const BType*> ProcessedSuccs;
+ succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+ // If there is an (0,BB) edge, consider it too. (This is done not only when
+ // there are no successors, but every time; not every function contains
+ // return blocks with no successors (think loop latch as return block)).
+ double w = PI->getEdgeWeight(PI->getEdge(BB,0));
+ if (w != ProfileInfoT<FType, BType>::MissingValue) {
+ DI.outWeight += w;
+ DI.outCount++;
+ }
+ for (;bbi != bbe; ++bbi) {
+ if (ProcessedSuccs.insert(*bbi).second) {
+ DI.outWeight += ReadOrAssert(PI->getEdge(BB,*bbi));
+ DI.outCount++;
+ }
+ }
+
+ // Read block weight.
+ DI.BBWeight = PI->getExecutionCount(BB);
+ CheckValue(DI.BBWeight == ProfileInfoT<FType, BType>::MissingValue,
+ "BasicBlock has missing value", &DI);
+ CheckValue(DI.BBWeight < 0,
+ "BasicBlock has negative value", &DI);
+
+ // Check if this block is a setjmp target.
+ bool isSetJmpTarget = false;
+ if (DI.outWeight > DI.inWeight) {
+ for (typename BType::const_iterator i = BB->begin(), ie = BB->end();
+ i != ie; ++i) {
+ if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
+ FType *F = CI->getCalledFunction();
+ if (F && (F->getName() == "_setjmp")) {
+ isSetJmpTarget = true; break;
+ }
+ }
+ }
+ }
+ // Check if this block is eventually reaching exit.
+ bool isExitReachable = false;
+ if (DI.inWeight > DI.outWeight) {
+ for (typename BType::const_iterator i = BB->begin(), ie = BB->end();
+ i != ie; ++i) {
+ if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
+ FType *F = CI->getCalledFunction();
+ if (F) {
+ FisVisited.clear();
+ isExitReachable |= exitReachable(F);
+ } else {
+ // This is a call to a pointer, all bets are off...
+ isExitReachable = true;
+ }
+ if (isExitReachable) break;
+ }
+ }
+ }
+
+ if (DI.inCount > 0 && DI.outCount == 0) {
+ // If this is a block with no successors.
+ if (!isSetJmpTarget) {
+ CheckValue(!Equals(DI.inWeight,DI.BBWeight),
+ "inWeight and BBWeight do not match", &DI);
+ }
+ } else if (DI.inCount == 0 && DI.outCount > 0) {
+ // If this is a block with no predecessors.
+ if (!isExitReachable)
+ CheckValue(!Equals(DI.BBWeight,DI.outWeight),
+ "BBWeight and outWeight do not match", &DI);
+ } else {
+ // If this block has successors and predecessors.
+ if (DI.inWeight > DI.outWeight && !isExitReachable)
+ CheckValue(!Equals(DI.inWeight,DI.outWeight),
+ "inWeight and outWeight do not match", &DI);
+ if (DI.inWeight < DI.outWeight && !isSetJmpTarget)
+ CheckValue(!Equals(DI.inWeight,DI.outWeight),
+ "inWeight and outWeight do not match", &DI);
+ }
+
+
+ // Mark this block as visited, rescurse into successors.
+ BBisVisited.insert(BB);
+ for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+ bbi != bbe; ++bbi ) {
+ recurseBasicBlock(*bbi);
+ }
+ }
+
+ template<class FType, class BType>
+ bool ProfileVerifierPassT<FType, BType>::runOnFunction(FType &F) {
+ PI = getAnalysisIfAvailable<ProfileInfoT<FType, BType> >();
+ if (!PI)
+ ASSERTMESSAGE("No ProfileInfo available");
+
+ // Prepare global variables.
+ PrintedDebugTree = false;
+ BBisVisited.clear();
+
+ // Fetch entry block and recurse into it.
+ const BType *entry = &F.getEntryBlock();
+ recurseBasicBlock(entry);
+
+ if (PI->getExecutionCount(&F) != PI->getExecutionCount(entry))
+ ASSERTMESSAGE("Function count and entry block count do not match");
+
+ return false;
+ }
+
+ template<class FType, class BType>
+ char ProfileVerifierPassT<FType, BType>::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(ProfileVerifierPass, "profile-verifier",
+ "Verify profiling information", false, true)
+INITIALIZE_AG_DEPENDENCY(ProfileInfo)
+INITIALIZE_PASS_END(ProfileVerifierPass, "profile-verifier",
+ "Verify profiling information", false, true)
+
+namespace llvm {
+ FunctionPass *createProfileVerifierPass() {
+ return new ProfileVerifierPass(ProfileVerifierDisableAssertions);
+ }
+}
+
diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp
new file mode 100644
index 000000000000..52753cbe85af
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp
@@ -0,0 +1,851 @@
+//===- RegionInfo.cpp - SESE region detection analysis --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Detects single entry single exit regions in the control flow graph.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Assembly/Writer.h"
+
+#define DEBUG_TYPE "region"
+#include "llvm/Support/Debug.h"
+
+#include <set>
+#include <algorithm>
+
+using namespace llvm;
+
+// Always verify if expensive checking is enabled.
+#ifdef XDEBUG
+static bool VerifyRegionInfo = true;
+#else
+static bool VerifyRegionInfo = false;
+#endif
+
+static cl::opt<bool,true>
+VerifyRegionInfoX("verify-region-info", cl::location(VerifyRegionInfo),
+ cl::desc("Verify region info (time consuming)"));
+
+STATISTIC(numRegions, "The # of regions");
+STATISTIC(numSimpleRegions, "The # of simple regions");
+
+static cl::opt<enum Region::PrintStyle> printStyle("print-region-style",
+ cl::Hidden,
+ cl::desc("style of printing regions"),
+ cl::values(
+ clEnumValN(Region::PrintNone, "none", "print no details"),
+ clEnumValN(Region::PrintBB, "bb",
+ "print regions in detail with block_iterator"),
+ clEnumValN(Region::PrintRN, "rn",
+ "print regions in detail with element_iterator"),
+ clEnumValEnd));
+//===----------------------------------------------------------------------===//
+/// Region Implementation
+Region::Region(BasicBlock *Entry, BasicBlock *Exit, RegionInfo* RInfo,
+ DominatorTree *dt, Region *Parent)
+ : RegionNode(Parent, Entry, 1), RI(RInfo), DT(dt), exit(Exit) {}
+
+Region::~Region() {
+ // Free the cached nodes.
+ for (BBNodeMapT::iterator it = BBNodeMap.begin(),
+ ie = BBNodeMap.end(); it != ie; ++it)
+ delete it->second;
+
+ // Only clean the cache for this Region. Caches of child Regions will be
+ // cleaned when the child Regions are deleted.
+ BBNodeMap.clear();
+
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ delete *I;
+}
+
+void Region::replaceEntry(BasicBlock *BB) {
+ entry.setPointer(BB);
+}
+
+void Region::replaceExit(BasicBlock *BB) {
+ assert(exit && "No exit to replace!");
+ exit = BB;
+}
+
+bool Region::contains(const BasicBlock *B) const {
+ BasicBlock *BB = const_cast<BasicBlock*>(B);
+
+ assert(DT->getNode(BB) && "BB not part of the dominance tree");
+
+ BasicBlock *entry = getEntry(), *exit = getExit();
+
+ // Toplevel region.
+ if (!exit)
+ return true;
+
+ return (DT->dominates(entry, BB)
+ && !(DT->dominates(exit, BB) && DT->dominates(entry, exit)));
+}
+
+bool Region::contains(const Loop *L) const {
+ // BBs that are not part of any loop are element of the Loop
+ // described by the NULL pointer. This loop is not part of any region,
+ // except if the region describes the whole function.
+ if (L == 0)
+ return getExit() == 0;
+
+ if (!contains(L->getHeader()))
+ return false;
+
+ SmallVector<BasicBlock *, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ for (SmallVectorImpl<BasicBlock*>::iterator BI = ExitingBlocks.begin(),
+ BE = ExitingBlocks.end(); BI != BE; ++BI)
+ if (!contains(*BI))
+ return false;
+
+ return true;
+}
+
+Loop *Region::outermostLoopInRegion(Loop *L) const {
+ if (!contains(L))
+ return 0;
+
+ while (L && contains(L->getParentLoop())) {
+ L = L->getParentLoop();
+ }
+
+ return L;
+}
+
+Loop *Region::outermostLoopInRegion(LoopInfo *LI, BasicBlock* BB) const {
+ assert(LI && BB && "LI and BB cannot be null!");
+ Loop *L = LI->getLoopFor(BB);
+ return outermostLoopInRegion(L);
+}
+
+BasicBlock *Region::getEnteringBlock() const {
+ BasicBlock *entry = getEntry();
+ BasicBlock *Pred;
+ BasicBlock *enteringBlock = 0;
+
+ for (pred_iterator PI = pred_begin(entry), PE = pred_end(entry); PI != PE;
+ ++PI) {
+ Pred = *PI;
+ if (DT->getNode(Pred) && !contains(Pred)) {
+ if (enteringBlock)
+ return 0;
+
+ enteringBlock = Pred;
+ }
+ }
+
+ return enteringBlock;
+}
+
+BasicBlock *Region::getExitingBlock() const {
+ BasicBlock *exit = getExit();
+ BasicBlock *Pred;
+ BasicBlock *exitingBlock = 0;
+
+ if (!exit)
+ return 0;
+
+ for (pred_iterator PI = pred_begin(exit), PE = pred_end(exit); PI != PE;
+ ++PI) {
+ Pred = *PI;
+ if (contains(Pred)) {
+ if (exitingBlock)
+ return 0;
+
+ exitingBlock = Pred;
+ }
+ }
+
+ return exitingBlock;
+}
+
+bool Region::isSimple() const {
+ return !isTopLevelRegion() && getEnteringBlock() && getExitingBlock();
+}
+
+std::string Region::getNameStr() const {
+ std::string exitName;
+ std::string entryName;
+
+ if (getEntry()->getName().empty()) {
+ raw_string_ostream OS(entryName);
+
+ WriteAsOperand(OS, getEntry(), false);
+ entryName = OS.str();
+ } else
+ entryName = getEntry()->getNameStr();
+
+ if (getExit()) {
+ if (getExit()->getName().empty()) {
+ raw_string_ostream OS(exitName);
+
+ WriteAsOperand(OS, getExit(), false);
+ exitName = OS.str();
+ } else
+ exitName = getExit()->getNameStr();
+ } else
+ exitName = "<Function Return>";
+
+ return entryName + " => " + exitName;
+}
+
+void Region::verifyBBInRegion(BasicBlock *BB) const {
+ if (!contains(BB))
+ llvm_unreachable("Broken region found!");
+
+ BasicBlock *entry = getEntry(), *exit = getExit();
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ if (!contains(*SI) && exit != *SI)
+ llvm_unreachable("Broken region found!");
+
+ if (entry != BB)
+ for (pred_iterator SI = pred_begin(BB), SE = pred_end(BB); SI != SE; ++SI)
+ if (!contains(*SI))
+ llvm_unreachable("Broken region found!");
+}
+
+void Region::verifyWalk(BasicBlock *BB, std::set<BasicBlock*> *visited) const {
+ BasicBlock *exit = getExit();
+
+ visited->insert(BB);
+
+ verifyBBInRegion(BB);
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ if (*SI != exit && visited->find(*SI) == visited->end())
+ verifyWalk(*SI, visited);
+}
+
+void Region::verifyRegion() const {
+ // Only do verification when user wants to, otherwise this expensive
+ // check will be invoked by PassManager.
+ if (!VerifyRegionInfo) return;
+
+ std::set<BasicBlock*> visited;
+ verifyWalk(getEntry(), &visited);
+}
+
+void Region::verifyRegionNest() const {
+ for (Region::const_iterator RI = begin(), RE = end(); RI != RE; ++RI)
+ (*RI)->verifyRegionNest();
+
+ verifyRegion();
+}
+
+Region::block_iterator Region::block_begin() {
+ return GraphTraits<FlatIt<Region*> >::nodes_begin(this);
+}
+
+Region::block_iterator Region::block_end() {
+ return GraphTraits<FlatIt<Region*> >::nodes_end(this);
+}
+
+Region::const_block_iterator Region::block_begin() const {
+ return GraphTraits<FlatIt<const Region*> >::nodes_begin(this);
+}
+
+Region::const_block_iterator Region::block_end() const {
+ return GraphTraits<FlatIt<const Region*> >::nodes_end(this);
+}
+
+Region::element_iterator Region::element_begin() {
+ return GraphTraits<Region*>::nodes_begin(this);
+}
+
+Region::element_iterator Region::element_end() {
+ return GraphTraits<Region*>::nodes_end(this);
+}
+
+Region::const_element_iterator Region::element_begin() const {
+ return GraphTraits<const Region*>::nodes_begin(this);
+}
+
+Region::const_element_iterator Region::element_end() const {
+ return GraphTraits<const Region*>::nodes_end(this);
+}
+
+Region* Region::getSubRegionNode(BasicBlock *BB) const {
+ Region *R = RI->getRegionFor(BB);
+
+ if (!R || R == this)
+ return 0;
+
+ // If we pass the BB out of this region, that means our code is broken.
+ assert(contains(R) && "BB not in current region!");
+
+ while (contains(R->getParent()) && R->getParent() != this)
+ R = R->getParent();
+
+ if (R->getEntry() != BB)
+ return 0;
+
+ return R;
+}
+
+RegionNode* Region::getBBNode(BasicBlock *BB) const {
+ assert(contains(BB) && "Can get BB node out of this region!");
+
+ BBNodeMapT::const_iterator at = BBNodeMap.find(BB);
+
+ if (at != BBNodeMap.end())
+ return at->second;
+
+ RegionNode *NewNode = new RegionNode(const_cast<Region*>(this), BB);
+ BBNodeMap.insert(std::make_pair(BB, NewNode));
+ return NewNode;
+}
+
+RegionNode* Region::getNode(BasicBlock *BB) const {
+ assert(contains(BB) && "Can get BB node out of this region!");
+ if (Region* Child = getSubRegionNode(BB))
+ return Child->getNode();
+
+ return getBBNode(BB);
+}
+
+void Region::transferChildrenTo(Region *To) {
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ (*I)->parent = To;
+ To->children.push_back(*I);
+ }
+ children.clear();
+}
+
+void Region::addSubRegion(Region *SubRegion, bool moveChildren) {
+ assert(SubRegion->parent == 0 && "SubRegion already has a parent!");
+ assert(std::find(begin(), end(), SubRegion) == children.end()
+ && "Subregion already exists!");
+
+ SubRegion->parent = this;
+ children.push_back(SubRegion);
+
+ if (!moveChildren)
+ return;
+
+ assert(SubRegion->children.size() == 0
+ && "SubRegions that contain children are not supported");
+
+ for (element_iterator I = element_begin(), E = element_end(); I != E; ++I)
+ if (!(*I)->isSubRegion()) {
+ BasicBlock *BB = (*I)->getNodeAs<BasicBlock>();
+
+ if (SubRegion->contains(BB))
+ RI->setRegionFor(BB, SubRegion);
+ }
+
+ std::vector<Region*> Keep;
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ if (SubRegion->contains(*I) && *I != SubRegion) {
+ SubRegion->children.push_back(*I);
+ (*I)->parent = SubRegion;
+ } else
+ Keep.push_back(*I);
+
+ children.clear();
+ children.insert(children.begin(), Keep.begin(), Keep.end());
+}
+
+
+Region *Region::removeSubRegion(Region *Child) {
+ assert(Child->parent == this && "Child is not a child of this region!");
+ Child->parent = 0;
+ RegionSet::iterator I = std::find(children.begin(), children.end(), Child);
+ assert(I != children.end() && "Region does not exit. Unable to remove.");
+ children.erase(children.begin()+(I-begin()));
+ return Child;
+}
+
+unsigned Region::getDepth() const {
+ unsigned Depth = 0;
+
+ for (Region *R = parent; R != 0; R = R->parent)
+ ++Depth;
+
+ return Depth;
+}
+
+Region *Region::getExpandedRegion() const {
+ unsigned NumSuccessors = exit->getTerminator()->getNumSuccessors();
+
+ if (NumSuccessors == 0)
+ return NULL;
+
+ for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit());
+ PI != PE; ++PI)
+ if (!DT->dominates(getEntry(), *PI))
+ return NULL;
+
+ Region *R = RI->getRegionFor(exit);
+
+ if (R->getEntry() != exit) {
+ if (exit->getTerminator()->getNumSuccessors() == 1)
+ return new Region(getEntry(), *succ_begin(exit), RI, DT);
+ else
+ return NULL;
+ }
+
+ while (R->getParent() && R->getParent()->getEntry() == exit)
+ R = R->getParent();
+
+ if (!DT->dominates(getEntry(), R->getExit()))
+ for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit());
+ PI != PE; ++PI)
+ if (!DT->dominates(R->getExit(), *PI))
+ return NULL;
+
+ return new Region(getEntry(), R->getExit(), RI, DT);
+}
+
+void Region::print(raw_ostream &OS, bool print_tree, unsigned level,
+ enum PrintStyle Style) const {
+ if (print_tree)
+ OS.indent(level*2) << "[" << level << "] " << getNameStr();
+ else
+ OS.indent(level*2) << getNameStr();
+
+ OS << "\n";
+
+
+ if (Style != PrintNone) {
+ OS.indent(level*2) << "{\n";
+ OS.indent(level*2 + 2);
+
+ if (Style == PrintBB) {
+ for (const_block_iterator I = block_begin(), E = block_end(); I!=E; ++I)
+ OS << **I << ", "; // TODO: remove the last ","
+ } else if (Style == PrintRN) {
+ for (const_element_iterator I = element_begin(), E = element_end(); I!=E; ++I)
+ OS << **I << ", "; // TODO: remove the last ",
+ }
+
+ OS << "\n";
+ }
+
+ if (print_tree)
+ for (const_iterator RI = begin(), RE = end(); RI != RE; ++RI)
+ (*RI)->print(OS, print_tree, level+1, Style);
+
+ if (Style != PrintNone)
+ OS.indent(level*2) << "} \n";
+}
+
+void Region::dump() const {
+ print(dbgs(), true, getDepth(), printStyle.getValue());
+}
+
+void Region::clearNodeCache() {
+ // Free the cached nodes.
+ for (BBNodeMapT::iterator I = BBNodeMap.begin(),
+ IE = BBNodeMap.end(); I != IE; ++I)
+ delete I->second;
+
+ BBNodeMap.clear();
+ for (Region::iterator RI = begin(), RE = end(); RI != RE; ++RI)
+ (*RI)->clearNodeCache();
+}
+
+//===----------------------------------------------------------------------===//
+// RegionInfo implementation
+//
+
+bool RegionInfo::isCommonDomFrontier(BasicBlock *BB, BasicBlock *entry,
+ BasicBlock *exit) const {
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (DT->dominates(entry, P) && !DT->dominates(exit, P))
+ return false;
+ }
+ return true;
+}
+
+bool RegionInfo::isRegion(BasicBlock *entry, BasicBlock *exit) const {
+ assert(entry && exit && "entry and exit must not be null!");
+ typedef DominanceFrontier::DomSetType DST;
+
+ DST *entrySuccs = &DF->find(entry)->second;
+
+ // Exit is the header of a loop that contains the entry. In this case,
+ // the dominance frontier must only contain the exit.
+ if (!DT->dominates(entry, exit)) {
+ for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end();
+ SI != SE; ++SI)
+ if (*SI != exit && *SI != entry)
+ return false;
+
+ return true;
+ }
+
+ DST *exitSuccs = &DF->find(exit)->second;
+
+ // Do not allow edges leaving the region.
+ for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end();
+ SI != SE; ++SI) {
+ if (*SI == exit || *SI == entry)
+ continue;
+ if (exitSuccs->find(*SI) == exitSuccs->end())
+ return false;
+ if (!isCommonDomFrontier(*SI, entry, exit))
+ return false;
+ }
+
+ // Do not allow edges pointing into the region.
+ for (DST::iterator SI = exitSuccs->begin(), SE = exitSuccs->end();
+ SI != SE; ++SI)
+ if (DT->properlyDominates(entry, *SI) && *SI != exit)
+ return false;
+
+
+ return true;
+}
+
+void RegionInfo::insertShortCut(BasicBlock *entry, BasicBlock *exit,
+ BBtoBBMap *ShortCut) const {
+ assert(entry && exit && "entry and exit must not be null!");
+
+ BBtoBBMap::iterator e = ShortCut->find(exit);
+
+ if (e == ShortCut->end())
+ // No further region at exit available.
+ (*ShortCut)[entry] = exit;
+ else {
+ // We found a region e that starts at exit. Therefore (entry, e->second)
+ // is also a region, that is larger than (entry, exit). Insert the
+ // larger one.
+ BasicBlock *BB = e->second;
+ (*ShortCut)[entry] = BB;
+ }
+}
+
+DomTreeNode* RegionInfo::getNextPostDom(DomTreeNode* N,
+ BBtoBBMap *ShortCut) const {
+ BBtoBBMap::iterator e = ShortCut->find(N->getBlock());
+
+ if (e == ShortCut->end())
+ return N->getIDom();
+
+ return PDT->getNode(e->second)->getIDom();
+}
+
+bool RegionInfo::isTrivialRegion(BasicBlock *entry, BasicBlock *exit) const {
+ assert(entry && exit && "entry and exit must not be null!");
+
+ unsigned num_successors = succ_end(entry) - succ_begin(entry);
+
+ if (num_successors <= 1 && exit == *(succ_begin(entry)))
+ return true;
+
+ return false;
+}
+
+void RegionInfo::updateStatistics(Region *R) {
+ ++numRegions;
+
+ // TODO: Slow. Should only be enabled if -stats is used.
+ if (R->isSimple()) ++numSimpleRegions;
+}
+
+Region *RegionInfo::createRegion(BasicBlock *entry, BasicBlock *exit) {
+ assert(entry && exit && "entry and exit must not be null!");
+
+ if (isTrivialRegion(entry, exit))
+ return 0;
+
+ Region *region = new Region(entry, exit, this, DT);
+ BBtoRegion.insert(std::make_pair(entry, region));
+
+ #ifdef XDEBUG
+ region->verifyRegion();
+ #else
+ DEBUG(region->verifyRegion());
+ #endif
+
+ updateStatistics(region);
+ return region;
+}
+
+void RegionInfo::findRegionsWithEntry(BasicBlock *entry, BBtoBBMap *ShortCut) {
+ assert(entry);
+
+ DomTreeNode *N = PDT->getNode(entry);
+
+ if (!N)
+ return;
+
+ Region *lastRegion= 0;
+ BasicBlock *lastExit = entry;
+
+ // As only a BasicBlock that postdominates entry can finish a region, walk the
+ // post dominance tree upwards.
+ while ((N = getNextPostDom(N, ShortCut))) {
+ BasicBlock *exit = N->getBlock();
+
+ if (!exit)
+ break;
+
+ if (isRegion(entry, exit)) {
+ Region *newRegion = createRegion(entry, exit);
+
+ if (lastRegion)
+ newRegion->addSubRegion(lastRegion);
+
+ lastRegion = newRegion;
+ lastExit = exit;
+ }
+
+ // This can never be a region, so stop the search.
+ if (!DT->dominates(entry, exit))
+ break;
+ }
+
+ // Tried to create regions from entry to lastExit. Next time take a
+ // shortcut from entry to lastExit.
+ if (lastExit != entry)
+ insertShortCut(entry, lastExit, ShortCut);
+}
+
+void RegionInfo::scanForRegions(Function &F, BBtoBBMap *ShortCut) {
+ BasicBlock *entry = &(F.getEntryBlock());
+ DomTreeNode *N = DT->getNode(entry);
+
+ // Iterate over the dominance tree in post order to start with the small
+ // regions from the bottom of the dominance tree. If the small regions are
+ // detected first, detection of bigger regions is faster, as we can jump
+ // over the small regions.
+ for (po_iterator<DomTreeNode*> FI = po_begin(N), FE = po_end(N); FI != FE;
+ ++FI) {
+ findRegionsWithEntry(FI->getBlock(), ShortCut);
+ }
+}
+
+Region *RegionInfo::getTopMostParent(Region *region) {
+ while (region->parent)
+ region = region->getParent();
+
+ return region;
+}
+
+void RegionInfo::buildRegionsTree(DomTreeNode *N, Region *region) {
+ BasicBlock *BB = N->getBlock();
+
+ // Passed region exit
+ while (BB == region->getExit())
+ region = region->getParent();
+
+ BBtoRegionMap::iterator it = BBtoRegion.find(BB);
+
+ // This basic block is a start block of a region. It is already in the
+ // BBtoRegion relation. Only the child basic blocks have to be updated.
+ if (it != BBtoRegion.end()) {
+ Region *newRegion = it->second;;
+ region->addSubRegion(getTopMostParent(newRegion));
+ region = newRegion;
+ } else {
+ BBtoRegion[BB] = region;
+ }
+
+ for (DomTreeNode::iterator CI = N->begin(), CE = N->end(); CI != CE; ++CI)
+ buildRegionsTree(*CI, region);
+}
+
+void RegionInfo::releaseMemory() {
+ BBtoRegion.clear();
+ if (TopLevelRegion)
+ delete TopLevelRegion;
+ TopLevelRegion = 0;
+}
+
+RegionInfo::RegionInfo() : FunctionPass(ID) {
+ initializeRegionInfoPass(*PassRegistry::getPassRegistry());
+ TopLevelRegion = 0;
+}
+
+RegionInfo::~RegionInfo() {
+ releaseMemory();
+}
+
+void RegionInfo::Calculate(Function &F) {
+ // ShortCut a function where for every BB the exit of the largest region
+ // starting with BB is stored. These regions can be threated as single BBS.
+ // This improves performance on linear CFGs.
+ BBtoBBMap ShortCut;
+
+ scanForRegions(F, &ShortCut);
+ BasicBlock *BB = &F.getEntryBlock();
+ buildRegionsTree(DT->getNode(BB), TopLevelRegion);
+}
+
+bool RegionInfo::runOnFunction(Function &F) {
+ releaseMemory();
+
+ DT = &getAnalysis<DominatorTree>();
+ PDT = &getAnalysis<PostDominatorTree>();
+ DF = &getAnalysis<DominanceFrontier>();
+
+ TopLevelRegion = new Region(&F.getEntryBlock(), 0, this, DT, 0);
+ updateStatistics(TopLevelRegion);
+
+ Calculate(F);
+
+ return false;
+}
+
+void RegionInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<DominatorTree>();
+ AU.addRequired<PostDominatorTree>();
+ AU.addRequired<DominanceFrontier>();
+}
+
+void RegionInfo::print(raw_ostream &OS, const Module *) const {
+ OS << "Region tree:\n";
+ TopLevelRegion->print(OS, true, 0, printStyle.getValue());
+ OS << "End region tree\n";
+}
+
+void RegionInfo::verifyAnalysis() const {
+ // Only do verification when user wants to, otherwise this expensive check
+ // will be invoked by PMDataManager::verifyPreservedAnalysis when
+ // a regionpass (marked PreservedAll) finish.
+ if (!VerifyRegionInfo) return;
+
+ TopLevelRegion->verifyRegionNest();
+}
+
+// Region pass manager support.
+Region *RegionInfo::getRegionFor(BasicBlock *BB) const {
+ BBtoRegionMap::const_iterator I=
+ BBtoRegion.find(BB);
+ return I != BBtoRegion.end() ? I->second : 0;
+}
+
+void RegionInfo::setRegionFor(BasicBlock *BB, Region *R) {
+ BBtoRegion[BB] = R;
+}
+
+Region *RegionInfo::operator[](BasicBlock *BB) const {
+ return getRegionFor(BB);
+}
+
+BasicBlock *RegionInfo::getMaxRegionExit(BasicBlock *BB) const {
+ BasicBlock *Exit = NULL;
+
+ while (true) {
+ // Get largest region that starts at BB.
+ Region *R = getRegionFor(BB);
+ while (R && R->getParent() && R->getParent()->getEntry() == BB)
+ R = R->getParent();
+
+ // Get the single exit of BB.
+ if (R && R->getEntry() == BB)
+ Exit = R->getExit();
+ else if (++succ_begin(BB) == succ_end(BB))
+ Exit = *succ_begin(BB);
+ else // No single exit exists.
+ return Exit;
+
+ // Get largest region that starts at Exit.
+ Region *ExitR = getRegionFor(Exit);
+ while (ExitR && ExitR->getParent()
+ && ExitR->getParent()->getEntry() == Exit)
+ ExitR = ExitR->getParent();
+
+ for (pred_iterator PI = pred_begin(Exit), PE = pred_end(Exit); PI != PE;
+ ++PI)
+ if (!R->contains(*PI) && !ExitR->contains(*PI))
+ break;
+
+ // This stops infinite cycles.
+ if (DT->dominates(Exit, BB))
+ break;
+
+ BB = Exit;
+ }
+
+ return Exit;
+}
+
+Region*
+RegionInfo::getCommonRegion(Region *A, Region *B) const {
+ assert (A && B && "One of the Regions is NULL");
+
+ if (A->contains(B)) return A;
+
+ while (!B->contains(A))
+ B = B->getParent();
+
+ return B;
+}
+
+Region*
+RegionInfo::getCommonRegion(SmallVectorImpl<Region*> &Regions) const {
+ Region* ret = Regions.back();
+ Regions.pop_back();
+
+ for (SmallVectorImpl<Region*>::const_iterator I = Regions.begin(),
+ E = Regions.end(); I != E; ++I)
+ ret = getCommonRegion(ret, *I);
+
+ return ret;
+}
+
+Region*
+RegionInfo::getCommonRegion(SmallVectorImpl<BasicBlock*> &BBs) const {
+ Region* ret = getRegionFor(BBs.back());
+ BBs.pop_back();
+
+ for (SmallVectorImpl<BasicBlock*>::const_iterator I = BBs.begin(),
+ E = BBs.end(); I != E; ++I)
+ ret = getCommonRegion(ret, getRegionFor(*I));
+
+ return ret;
+}
+
+void RegionInfo::splitBlock(BasicBlock* NewBB, BasicBlock *OldBB)
+{
+ Region *R = getRegionFor(OldBB);
+
+ setRegionFor(NewBB, R);
+
+ while (R->getEntry() == OldBB && !R->isTopLevelRegion()) {
+ R->replaceEntry(NewBB);
+ R = R->getParent();
+ }
+
+ setRegionFor(OldBB, R);
+}
+
+char RegionInfo::ID = 0;
+INITIALIZE_PASS_BEGIN(RegionInfo, "regions",
+ "Detect single entry single exit regions", true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominanceFrontier)
+INITIALIZE_PASS_END(RegionInfo, "regions",
+ "Detect single entry single exit regions", true, true)
+
+// Create methods available outside of this file, to use them
+// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
+// the link time optimization.
+
+namespace llvm {
+ FunctionPass *createRegionInfoPass() {
+ return new RegionInfo();
+ }
+}
+
diff --git a/contrib/llvm/lib/Analysis/RegionPass.cpp b/contrib/llvm/lib/Analysis/RegionPass.cpp
new file mode 100644
index 000000000000..3269dcc63d5e
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/RegionPass.cpp
@@ -0,0 +1,275 @@
+//===- RegionPass.cpp - Region Pass and Region Pass Manager ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements RegionPass and RGPassManager. All region optimization
+// and transformation passes are derived from RegionPass. RGPassManager is
+// responsible for managing RegionPasses.
+// most of these codes are COPY from LoopPass.cpp
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/RegionPass.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Support/Timer.h"
+
+#define DEBUG_TYPE "regionpassmgr"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// RGPassManager
+//
+
+char RGPassManager::ID = 0;
+
+RGPassManager::RGPassManager(int Depth)
+ : FunctionPass(ID), PMDataManager(Depth) {
+ skipThisRegion = false;
+ redoThisRegion = false;
+ RI = NULL;
+ CurrentRegion = NULL;
+}
+
+// Recurse through all subregions and all regions into RQ.
+static void addRegionIntoQueue(Region *R, std::deque<Region *> &RQ) {
+ RQ.push_back(R);
+ for (Region::iterator I = R->begin(), E = R->end(); I != E; ++I)
+ addRegionIntoQueue(*I, RQ);
+}
+
+/// Pass Manager itself does not invalidate any analysis info.
+void RGPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.addRequired<RegionInfo>();
+ Info.setPreservesAll();
+}
+
+/// run - Execute all of the passes scheduled for execution. Keep track of
+/// whether any of the passes modifies the function, and if so, return true.
+bool RGPassManager::runOnFunction(Function &F) {
+ RI = &getAnalysis<RegionInfo>();
+ bool Changed = false;
+
+ // Collect inherited analysis from Module level pass manager.
+ populateInheritedAnalysis(TPM->activeStack);
+
+ addRegionIntoQueue(RI->getTopLevelRegion(), RQ);
+
+ if (RQ.empty()) // No regions, skip calling finalizers
+ return false;
+
+ // Initialization
+ for (std::deque<Region *>::const_iterator I = RQ.begin(), E = RQ.end();
+ I != E; ++I) {
+ Region *R = *I;
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ RegionPass *RP = (RegionPass *)getContainedPass(Index);
+ Changed |= RP->doInitialization(R, *this);
+ }
+ }
+
+ // Walk Regions
+ while (!RQ.empty()) {
+
+ CurrentRegion = RQ.back();
+ skipThisRegion = false;
+ redoThisRegion = false;
+
+ // Run all passes on the current Region.
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ RegionPass *P = (RegionPass*)getContainedPass(Index);
+
+ dumpPassInfo(P, EXECUTION_MSG, ON_REGION_MSG,
+ CurrentRegion->getNameStr());
+ dumpRequiredSet(P);
+
+ initializeAnalysisImpl(P);
+
+ {
+ PassManagerPrettyStackEntry X(P, *CurrentRegion->getEntry());
+
+ TimeRegion PassTimer(getPassTimer(P));
+ Changed |= P->runOnRegion(CurrentRegion, *this);
+ }
+
+ if (Changed)
+ dumpPassInfo(P, MODIFICATION_MSG, ON_REGION_MSG,
+ skipThisRegion ? "<deleted>" :
+ CurrentRegion->getNameStr());
+ dumpPreservedSet(P);
+
+ if (!skipThisRegion) {
+ // Manually check that this region is still healthy. This is done
+ // instead of relying on RegionInfo::verifyRegion since RegionInfo
+ // is a function pass and it's really expensive to verify every
+ // Region in the function every time. That level of checking can be
+ // enabled with the -verify-region-info option.
+ {
+ TimeRegion PassTimer(getPassTimer(P));
+ CurrentRegion->verifyRegion();
+ }
+
+ // Then call the regular verifyAnalysis functions.
+ verifyPreservedAnalysis(P);
+ }
+
+ removeNotPreservedAnalysis(P);
+ recordAvailableAnalysis(P);
+ removeDeadPasses(P,
+ skipThisRegion ? "<deleted>" :
+ CurrentRegion->getNameStr(),
+ ON_REGION_MSG);
+
+ if (skipThisRegion)
+ // Do not run other passes on this region.
+ break;
+ }
+
+ // If the region was deleted, release all the region passes. This frees up
+ // some memory, and avoids trouble with the pass manager trying to call
+ // verifyAnalysis on them.
+ if (skipThisRegion)
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+ freePass(P, "<deleted>", ON_REGION_MSG);
+ }
+
+ // Pop the region from queue after running all passes.
+ RQ.pop_back();
+
+ if (redoThisRegion)
+ RQ.push_back(CurrentRegion);
+
+ // Free all region nodes created in region passes.
+ RI->clearNodeCache();
+ }
+
+ // Finalization
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ RegionPass *P = (RegionPass*)getContainedPass(Index);
+ Changed |= P->doFinalization();
+ }
+
+ // Print the region tree after all pass.
+ DEBUG(
+ dbgs() << "\nRegion tree of function " << F.getName()
+ << " after all region Pass:\n";
+ RI->dump();
+ dbgs() << "\n";
+ );
+
+ return Changed;
+}
+
+/// Print passes managed by this manager
+void RGPassManager::dumpPassStructure(unsigned Offset) {
+ errs().indent(Offset*2) << "Region Pass Manager\n";
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+ P->dumpPassStructure(Offset + 1);
+ dumpLastUses(P, Offset+1);
+ }
+}
+
+namespace {
+//===----------------------------------------------------------------------===//
+// PrintRegionPass
+class PrintRegionPass : public RegionPass {
+private:
+ std::string Banner;
+ raw_ostream &Out; // raw_ostream to print on.
+
+public:
+ static char ID;
+ PrintRegionPass() : RegionPass(ID), Out(dbgs()) {}
+ PrintRegionPass(const std::string &B, raw_ostream &o)
+ : RegionPass(ID), Banner(B), Out(o) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ virtual bool runOnRegion(Region *R, RGPassManager &RGM) {
+ Out << Banner;
+ for (Region::block_iterator I = R->block_begin(), E = R->block_end();
+ I != E; ++I)
+ (*I)->getEntry()->print(Out);
+
+ return false;
+ }
+};
+
+char PrintRegionPass::ID = 0;
+} //end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// RegionPass
+
+// Check if this pass is suitable for the current RGPassManager, if
+// available. This pass P is not suitable for a RGPassManager if P
+// is not preserving higher level analysis info used by other
+// RGPassManager passes. In such case, pop RGPassManager from the
+// stack. This will force assignPassManager() to create new
+// LPPassManger as expected.
+void RegionPass::preparePassManager(PMStack &PMS) {
+
+ // Find RGPassManager
+ while (!PMS.empty() &&
+ PMS.top()->getPassManagerType() > PMT_RegionPassManager)
+ PMS.pop();
+
+
+ // If this pass is destroying high level information that is used
+ // by other passes that are managed by LPM then do not insert
+ // this pass in current LPM. Use new RGPassManager.
+ if (PMS.top()->getPassManagerType() == PMT_RegionPassManager &&
+ !PMS.top()->preserveHigherLevelAnalysis(this))
+ PMS.pop();
+}
+
+/// Assign pass manager to manage this pass.
+void RegionPass::assignPassManager(PMStack &PMS,
+ PassManagerType PreferredType) {
+ // Find RGPassManager
+ while (!PMS.empty() &&
+ PMS.top()->getPassManagerType() > PMT_RegionPassManager)
+ PMS.pop();
+
+ RGPassManager *RGPM;
+
+ // Create new Region Pass Manager if it does not exist.
+ if (PMS.top()->getPassManagerType() == PMT_RegionPassManager)
+ RGPM = (RGPassManager*)PMS.top();
+ else {
+
+ assert (!PMS.empty() && "Unable to create Region Pass Manager");
+ PMDataManager *PMD = PMS.top();
+
+ // [1] Create new Call Graph Pass Manager
+ RGPM = new RGPassManager(PMD->getDepth() + 1);
+ RGPM->populateInheritedAnalysis(PMS);
+
+ // [2] Set up new manager's top level manager
+ PMTopLevelManager *TPM = PMD->getTopLevelManager();
+ TPM->addIndirectPassManager(RGPM);
+
+ // [3] Assign manager to manage this new manager. This may create
+ // and push new managers into PMS
+ TPM->schedulePass(RGPM);
+
+ // [4] Push new manager into PMS
+ PMS.push(RGPM);
+ }
+
+ RGPM->add(this);
+}
+
+/// Get the printer pass
+Pass *RegionPass::createPrinterPass(raw_ostream &O,
+ const std::string &Banner) const {
+ return new PrintRegionPass(Banner, O);
+}
diff --git a/contrib/llvm/lib/Analysis/RegionPrinter.cpp b/contrib/llvm/lib/Analysis/RegionPrinter.cpp
new file mode 100644
index 000000000000..a1730b0a3ca1
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/RegionPrinter.cpp
@@ -0,0 +1,220 @@
+//===- RegionPrinter.cpp - Print regions tree pass ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Print out the region tree of a function using dotty/graphviz.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Analysis/RegionPrinter.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/DOTGraphTraitsPass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+/// onlySimpleRegion - Show only the simple regions in the RegionViewer.
+static cl::opt<bool>
+onlySimpleRegions("only-simple-regions",
+ cl::desc("Show only simple regions in the graphviz viewer"),
+ cl::Hidden,
+ cl::init(false));
+
+namespace llvm {
+template<>
+struct DOTGraphTraits<RegionNode*> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits (bool isSimple=false)
+ : DefaultDOTGraphTraits(isSimple) {}
+
+ std::string getNodeLabel(RegionNode *Node, RegionNode *Graph) {
+
+ if (!Node->isSubRegion()) {
+ BasicBlock *BB = Node->getNodeAs<BasicBlock>();
+
+ if (isSimple())
+ return DOTGraphTraits<const Function*>
+ ::getSimpleNodeLabel(BB, BB->getParent());
+ else
+ return DOTGraphTraits<const Function*>
+ ::getCompleteNodeLabel(BB, BB->getParent());
+ }
+
+ return "Not implemented";
+ }
+};
+
+template<>
+struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> {
+
+ DOTGraphTraits (bool isSimple=false)
+ : DOTGraphTraits<RegionNode*>(isSimple) {}
+
+ static std::string getGraphName(RegionInfo *DT) {
+ return "Region Graph";
+ }
+
+ std::string getNodeLabel(RegionNode *Node, RegionInfo *G) {
+ return DOTGraphTraits<RegionNode*>::getNodeLabel(Node,
+ G->getTopLevelRegion());
+ }
+
+ std::string getEdgeAttributes(RegionNode *srcNode,
+ GraphTraits<RegionInfo*>::ChildIteratorType CI, RegionInfo *RI) {
+
+ RegionNode *destNode = *CI;
+
+ if (srcNode->isSubRegion() || destNode->isSubRegion())
+ return "";
+
+ // In case of a backedge, do not use it to define the layout of the nodes.
+ BasicBlock *srcBB = srcNode->getNodeAs<BasicBlock>();
+ BasicBlock *destBB = destNode->getNodeAs<BasicBlock>();
+
+ Region *R = RI->getRegionFor(destBB);
+
+ while (R && R->getParent())
+ if (R->getParent()->getEntry() == destBB)
+ R = R->getParent();
+ else
+ break;
+
+ if (R->getEntry() == destBB && R->contains(srcBB))
+ return "constraint=false";
+
+ return "";
+ }
+
+ // Print the cluster of the subregions. This groups the single basic blocks
+ // and adds a different background color for each group.
+ static void printRegionCluster(const Region *R, GraphWriter<RegionInfo*> &GW,
+ unsigned depth = 0) {
+ raw_ostream &O = GW.getOStream();
+ O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(R)
+ << " {\n";
+ O.indent(2 * (depth + 1)) << "label = \"\";\n";
+
+ if (!onlySimpleRegions || R->isSimple()) {
+ O.indent(2 * (depth + 1)) << "style = filled;\n";
+ O.indent(2 * (depth + 1)) << "color = "
+ << ((R->getDepth() * 2 % 12) + 1) << "\n";
+
+ } else {
+ O.indent(2 * (depth + 1)) << "style = solid;\n";
+ O.indent(2 * (depth + 1)) << "color = "
+ << ((R->getDepth() * 2 % 12) + 2) << "\n";
+ }
+
+ for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI)
+ printRegionCluster(*RI, GW, depth + 1);
+
+ RegionInfo *RI = R->getRegionInfo();
+
+ for (Region::const_block_iterator BI = R->block_begin(),
+ BE = R->block_end(); BI != BE; ++BI) {
+ BasicBlock *BB = (*BI)->getNodeAs<BasicBlock>();
+ if (RI->getRegionFor(BB) == R)
+ O.indent(2 * (depth + 1)) << "Node"
+ << static_cast<const void*>(RI->getTopLevelRegion()->getBBNode(BB))
+ << ";\n";
+ }
+
+ O.indent(2 * depth) << "}\n";
+ }
+
+ static void addCustomGraphFeatures(const RegionInfo* RI,
+ GraphWriter<RegionInfo*> &GW) {
+ raw_ostream &O = GW.getOStream();
+ O << "\tcolorscheme = \"paired12\"\n";
+ printRegionCluster(RI->getTopLevelRegion(), GW, 4);
+ }
+};
+} //end namespace llvm
+
+namespace {
+
+struct RegionViewer
+ : public DOTGraphTraitsViewer<RegionInfo, false> {
+ static char ID;
+ RegionViewer() : DOTGraphTraitsViewer<RegionInfo, false>("reg", ID){
+ initializeRegionViewerPass(*PassRegistry::getPassRegistry());
+ }
+};
+char RegionViewer::ID = 0;
+
+struct RegionOnlyViewer
+ : public DOTGraphTraitsViewer<RegionInfo, true> {
+ static char ID;
+ RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfo, true>("regonly", ID) {
+ initializeRegionOnlyViewerPass(*PassRegistry::getPassRegistry());
+ }
+};
+char RegionOnlyViewer::ID = 0;
+
+struct RegionPrinter
+ : public DOTGraphTraitsPrinter<RegionInfo, false> {
+ static char ID;
+ RegionPrinter() :
+ DOTGraphTraitsPrinter<RegionInfo, false>("reg", ID) {
+ initializeRegionPrinterPass(*PassRegistry::getPassRegistry());
+ }
+};
+char RegionPrinter::ID = 0;
+} //end anonymous namespace
+
+INITIALIZE_PASS(RegionPrinter, "dot-regions",
+ "Print regions of function to 'dot' file", true, true)
+
+INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function",
+ true, true)
+
+INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only",
+ "View regions of function (with no function bodies)",
+ true, true)
+
+namespace {
+
+struct RegionOnlyPrinter
+ : public DOTGraphTraitsPrinter<RegionInfo, true> {
+ static char ID;
+ RegionOnlyPrinter() :
+ DOTGraphTraitsPrinter<RegionInfo, true>("reg", ID) {
+ initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry());
+ }
+};
+
+}
+
+char RegionOnlyPrinter::ID = 0;
+INITIALIZE_PASS(RegionOnlyPrinter, "dot-regions-only",
+ "Print regions of function to 'dot' file "
+ "(with no function bodies)",
+ true, true)
+
+FunctionPass* llvm::createRegionViewerPass() {
+ return new RegionViewer();
+}
+
+FunctionPass* llvm::createRegionOnlyViewerPass() {
+ return new RegionOnlyViewer();
+}
+
+FunctionPass* llvm::createRegionPrinterPass() {
+ return new RegionPrinter();
+}
+
+FunctionPass* llvm::createRegionOnlyPrinterPass() {
+ return new RegionOnlyPrinter();
+}
+
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
new file mode 100644
index 000000000000..bab4619894c7
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -0,0 +1,6361 @@
+//===- ScalarEvolution.cpp - Scalar Evolution Analysis ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the scalar evolution analysis
+// engine, which is used primarily to analyze expressions involving induction
+// variables in loops.
+//
+// There are several aspects to this library. First is the representation of
+// scalar expressions, which are represented as subclasses of the SCEV class.
+// These classes are used to represent certain types of subexpressions that we
+// can handle. We only create one SCEV of a particular shape, so
+// pointer-comparisons for equality are legal.
+//
+// One important aspect of the SCEV objects is that they are never cyclic, even
+// if there is a cycle in the dataflow for an expression (ie, a PHI node). If
+// the PHI node is one of the idioms that we can represent (e.g., a polynomial
+// recurrence) then we represent it directly as a recurrence node, otherwise we
+// represent it as a SCEVUnknown node.
+//
+// In addition to being able to represent expressions of various types, we also
+// have folders that are used to build the *canonical* representation for a
+// particular expression. These folders are capable of using a variety of
+// rewrite rules to simplify the expressions.
+//
+// Once the folders are defined, we can implement the more interesting
+// higher-level code, such as the code that recognizes PHI nodes of various
+// types, computes the execution count of a loop, etc.
+//
+// TODO: We should use these routines and value representations to implement
+// dependence analysis!
+//
+//===----------------------------------------------------------------------===//
+//
+// There are several good references for the techniques used in this analysis.
+//
+// Chains of recurrences -- a method to expedite the evaluation
+// of closed-form functions
+// Olaf Bachmann, Paul S. Wang, Eugene V. Zima
+//
+// On computational properties of chains of recurrences
+// Eugene V. Zima
+//
+// Symbolic Evaluation of Chains of Recurrences for Loop Optimization
+// Robert A. van Engelen
+//
+// Efficient Symbolic Analysis for Optimizing Compilers
+// Robert A. van Engelen
+//
+// Using the chains of recurrences algebra for data dependence testing and
+// induction variable substitution
+// MS Thesis, Johnie Birch
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scalar-evolution"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Operator.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumArrayLenItCounts,
+ "Number of trip counts computed with array length");
+STATISTIC(NumTripCountsComputed,
+ "Number of loops with predictable loop counts");
+STATISTIC(NumTripCountsNotComputed,
+ "Number of loops without predictable loop counts");
+STATISTIC(NumBruteForceTripCountsComputed,
+ "Number of loops with trip counts computed by force");
+
+static cl::opt<unsigned>
+MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
+ cl::desc("Maximum number of iterations SCEV will "
+ "symbolically execute a constant "
+ "derived loop"),
+ cl::init(100));
+
+INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
+ "Scalar Evolution Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
+ "Scalar Evolution Analysis", false, true)
+char ScalarEvolution::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// SCEV class definitions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Implementation of the SCEV class.
+//
+
+void SCEV::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
+}
+
+void SCEV::print(raw_ostream &OS) const {
+ switch (getSCEVType()) {
+ case scConstant:
+ WriteAsOperand(OS, cast<SCEVConstant>(this)->getValue(), false);
+ return;
+ case scTruncate: {
+ const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this);
+ const SCEV *Op = Trunc->getOperand();
+ OS << "(trunc " << *Op->getType() << " " << *Op << " to "
+ << *Trunc->getType() << ")";
+ return;
+ }
+ case scZeroExtend: {
+ const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this);
+ const SCEV *Op = ZExt->getOperand();
+ OS << "(zext " << *Op->getType() << " " << *Op << " to "
+ << *ZExt->getType() << ")";
+ return;
+ }
+ case scSignExtend: {
+ const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this);
+ const SCEV *Op = SExt->getOperand();
+ OS << "(sext " << *Op->getType() << " " << *Op << " to "
+ << *SExt->getType() << ")";
+ return;
+ }
+ case scAddRecExpr: {
+ const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this);
+ OS << "{" << *AR->getOperand(0);
+ for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i)
+ OS << ",+," << *AR->getOperand(i);
+ OS << "}<";
+ if (AR->getNoWrapFlags(FlagNUW))
+ OS << "nuw><";
+ if (AR->getNoWrapFlags(FlagNSW))
+ OS << "nsw><";
+ if (AR->getNoWrapFlags(FlagNW) &&
+ !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)))
+ OS << "nw><";
+ WriteAsOperand(OS, AR->getLoop()->getHeader(), /*PrintType=*/false);
+ OS << ">";
+ return;
+ }
+ case scAddExpr:
+ case scMulExpr:
+ case scUMaxExpr:
+ case scSMaxExpr: {
+ const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
+ const char *OpStr = 0;
+ switch (NAry->getSCEVType()) {
+ case scAddExpr: OpStr = " + "; break;
+ case scMulExpr: OpStr = " * "; break;
+ case scUMaxExpr: OpStr = " umax "; break;
+ case scSMaxExpr: OpStr = " smax "; break;
+ }
+ OS << "(";
+ for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+ I != E; ++I) {
+ OS << **I;
+ if (llvm::next(I) != E)
+ OS << OpStr;
+ }
+ OS << ")";
+ return;
+ }
+ case scUDivExpr: {
+ const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this);
+ OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")";
+ return;
+ }
+ case scUnknown: {
+ const SCEVUnknown *U = cast<SCEVUnknown>(this);
+ const Type *AllocTy;
+ if (U->isSizeOf(AllocTy)) {
+ OS << "sizeof(" << *AllocTy << ")";
+ return;
+ }
+ if (U->isAlignOf(AllocTy)) {
+ OS << "alignof(" << *AllocTy << ")";
+ return;
+ }
+
+ const Type *CTy;
+ Constant *FieldNo;
+ if (U->isOffsetOf(CTy, FieldNo)) {
+ OS << "offsetof(" << *CTy << ", ";
+ WriteAsOperand(OS, FieldNo, false);
+ OS << ")";
+ return;
+ }
+
+ // Otherwise just print it normally.
+ WriteAsOperand(OS, U->getValue(), false);
+ return;
+ }
+ case scCouldNotCompute:
+ OS << "***COULDNOTCOMPUTE***";
+ return;
+ default: break;
+ }
+ llvm_unreachable("Unknown SCEV kind!");
+}
+
+const Type *SCEV::getType() const {
+ switch (getSCEVType()) {
+ case scConstant:
+ return cast<SCEVConstant>(this)->getType();
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend:
+ return cast<SCEVCastExpr>(this)->getType();
+ case scAddRecExpr:
+ case scMulExpr:
+ case scUMaxExpr:
+ case scSMaxExpr:
+ return cast<SCEVNAryExpr>(this)->getType();
+ case scAddExpr:
+ return cast<SCEVAddExpr>(this)->getType();
+ case scUDivExpr:
+ return cast<SCEVUDivExpr>(this)->getType();
+ case scUnknown:
+ return cast<SCEVUnknown>(this)->getType();
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ return 0;
+ default: break;
+ }
+ llvm_unreachable("Unknown SCEV kind!");
+ return 0;
+}
+
+bool SCEV::isZero() const {
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
+ return SC->getValue()->isZero();
+ return false;
+}
+
+bool SCEV::isOne() const {
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
+ return SC->getValue()->isOne();
+ return false;
+}
+
+bool SCEV::isAllOnesValue() const {
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
+ return SC->getValue()->isAllOnesValue();
+ return false;
+}
+
+SCEVCouldNotCompute::SCEVCouldNotCompute() :
+ SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
+
+bool SCEVCouldNotCompute::classof(const SCEV *S) {
+ return S->getSCEVType() == scCouldNotCompute;
+}
+
+const SCEV *ScalarEvolution::getConstant(ConstantInt *V) {
+ FoldingSetNodeID ID;
+ ID.AddInteger(scConstant);
+ ID.AddPointer(V);
+ void *IP = 0;
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+}
+
+const SCEV *ScalarEvolution::getConstant(const APInt& Val) {
+ return getConstant(ConstantInt::get(getContext(), Val));
+}
+
+const SCEV *
+ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) {
+ const IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
+ return getConstant(ConstantInt::get(ITy, V, isSigned));
+}
+
+SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
+ unsigned SCEVTy, const SCEV *op, const Type *ty)
+ : SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
+
+SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
+ const SCEV *op, const Type *ty)
+ : SCEVCastExpr(ID, scTruncate, op, ty) {
+ assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot truncate non-integer value!");
+}
+
+SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
+ const SCEV *op, const Type *ty)
+ : SCEVCastExpr(ID, scZeroExtend, op, ty) {
+ assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot zero extend non-integer value!");
+}
+
+SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
+ const SCEV *op, const Type *ty)
+ : SCEVCastExpr(ID, scSignExtend, op, ty) {
+ assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot sign extend non-integer value!");
+}
+
+void SCEVUnknown::deleted() {
+ // Clear this SCEVUnknown from various maps.
+ SE->forgetMemoizedResults(this);
+
+ // Remove this SCEVUnknown from the uniquing map.
+ SE->UniqueSCEVs.RemoveNode(this);
+
+ // Release the value.
+ setValPtr(0);
+}
+
+void SCEVUnknown::allUsesReplacedWith(Value *New) {
+ // Clear this SCEVUnknown from various maps.
+ SE->forgetMemoizedResults(this);
+
+ // Remove this SCEVUnknown from the uniquing map.
+ SE->UniqueSCEVs.RemoveNode(this);
+
+ // Update this SCEVUnknown to point to the new value. This is needed
+ // because there may still be outstanding SCEVs which still point to
+ // this SCEVUnknown.
+ setValPtr(New);
+}
+
+bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const {
+ if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
+ if (VCE->getOpcode() == Instruction::PtrToInt)
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ CE->getOperand(0)->isNullValue() &&
+ CE->getNumOperands() == 2)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1)))
+ if (CI->isOne()) {
+ AllocTy = cast<PointerType>(CE->getOperand(0)->getType())
+ ->getElementType();
+ return true;
+ }
+
+ return false;
+}
+
+bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const {
+ if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
+ if (VCE->getOpcode() == Instruction::PtrToInt)
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ CE->getOperand(0)->isNullValue()) {
+ const Type *Ty =
+ cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
+ if (const StructType *STy = dyn_cast<StructType>(Ty))
+ if (!STy->isPacked() &&
+ CE->getNumOperands() == 3 &&
+ CE->getOperand(1)->isNullValue()) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2)))
+ if (CI->isOne() &&
+ STy->getNumElements() == 2 &&
+ STy->getElementType(0)->isIntegerTy(1)) {
+ AllocTy = STy->getElementType(1);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const {
+ if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
+ if (VCE->getOpcode() == Instruction::PtrToInt)
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ CE->getNumOperands() == 3 &&
+ CE->getOperand(0)->isNullValue() &&
+ CE->getOperand(1)->isNullValue()) {
+ const Type *Ty =
+ cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
+ // Ignore vector types here so that ScalarEvolutionExpander doesn't
+ // emit getelementptrs that index into vectors.
+ if (Ty->isStructTy() || Ty->isArrayTy()) {
+ CTy = Ty;
+ FieldNo = CE->getOperand(2);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// SCEV Utilities
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// SCEVComplexityCompare - Return true if the complexity of the LHS is less
+ /// than the complexity of the RHS. This comparator is used to canonicalize
+ /// expressions.
+ class SCEVComplexityCompare {
+ const LoopInfo *const LI;
+ public:
+ explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {}
+
+ // Return true or false if LHS is less than, or at least RHS, respectively.
+ bool operator()(const SCEV *LHS, const SCEV *RHS) const {
+ return compare(LHS, RHS) < 0;
+ }
+
+ // Return negative, zero, or positive, if LHS is less than, equal to, or
+ // greater than RHS, respectively. A three-way result allows recursive
+ // comparisons to be more efficient.
+ int compare(const SCEV *LHS, const SCEV *RHS) const {
+ // Fast-path: SCEVs are uniqued so we can do a quick equality check.
+ if (LHS == RHS)
+ return 0;
+
+ // Primarily, sort the SCEVs by their getSCEVType().
+ unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
+ if (LType != RType)
+ return (int)LType - (int)RType;
+
+ // Aside from the getSCEVType() ordering, the particular ordering
+ // isn't very important except that it's beneficial to be consistent,
+ // so that (a + b) and (b + a) don't end up as different expressions.
+ switch (LType) {
+ case scUnknown: {
+ const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
+ const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
+
+ // Sort SCEVUnknown values with some loose heuristics. TODO: This is
+ // not as complete as it could be.
+ const Value *LV = LU->getValue(), *RV = RU->getValue();
+
+ // Order pointer values after integer values. This helps SCEVExpander
+ // form GEPs.
+ bool LIsPointer = LV->getType()->isPointerTy(),
+ RIsPointer = RV->getType()->isPointerTy();
+ if (LIsPointer != RIsPointer)
+ return (int)LIsPointer - (int)RIsPointer;
+
+ // Compare getValueID values.
+ unsigned LID = LV->getValueID(),
+ RID = RV->getValueID();
+ if (LID != RID)
+ return (int)LID - (int)RID;
+
+ // Sort arguments by their position.
+ if (const Argument *LA = dyn_cast<Argument>(LV)) {
+ const Argument *RA = cast<Argument>(RV);
+ unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
+ return (int)LArgNo - (int)RArgNo;
+ }
+
+ // For instructions, compare their loop depth, and their operand
+ // count. This is pretty loose.
+ if (const Instruction *LInst = dyn_cast<Instruction>(LV)) {
+ const Instruction *RInst = cast<Instruction>(RV);
+
+ // Compare loop depths.
+ const BasicBlock *LParent = LInst->getParent(),
+ *RParent = RInst->getParent();
+ if (LParent != RParent) {
+ unsigned LDepth = LI->getLoopDepth(LParent),
+ RDepth = LI->getLoopDepth(RParent);
+ if (LDepth != RDepth)
+ return (int)LDepth - (int)RDepth;
+ }
+
+ // Compare the number of operands.
+ unsigned LNumOps = LInst->getNumOperands(),
+ RNumOps = RInst->getNumOperands();
+ return (int)LNumOps - (int)RNumOps;
+ }
+
+ return 0;
+ }
+
+ case scConstant: {
+ const SCEVConstant *LC = cast<SCEVConstant>(LHS);
+ const SCEVConstant *RC = cast<SCEVConstant>(RHS);
+
+ // Compare constant values.
+ const APInt &LA = LC->getValue()->getValue();
+ const APInt &RA = RC->getValue()->getValue();
+ unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
+ if (LBitWidth != RBitWidth)
+ return (int)LBitWidth - (int)RBitWidth;
+ return LA.ult(RA) ? -1 : 1;
+ }
+
+ case scAddRecExpr: {
+ const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
+ const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
+
+ // Compare addrec loop depths.
+ const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
+ if (LLoop != RLoop) {
+ unsigned LDepth = LLoop->getLoopDepth(),
+ RDepth = RLoop->getLoopDepth();
+ if (LDepth != RDepth)
+ return (int)LDepth - (int)RDepth;
+ }
+
+ // Addrec complexity grows with operand count.
+ unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
+ if (LNumOps != RNumOps)
+ return (int)LNumOps - (int)RNumOps;
+
+ // Lexicographically compare.
+ for (unsigned i = 0; i != LNumOps; ++i) {
+ long X = compare(LA->getOperand(i), RA->getOperand(i));
+ if (X != 0)
+ return X;
+ }
+
+ return 0;
+ }
+
+ case scAddExpr:
+ case scMulExpr:
+ case scSMaxExpr:
+ case scUMaxExpr: {
+ const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
+ const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
+
+ // Lexicographically compare n-ary expressions.
+ unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
+ for (unsigned i = 0; i != LNumOps; ++i) {
+ if (i >= RNumOps)
+ return 1;
+ long X = compare(LC->getOperand(i), RC->getOperand(i));
+ if (X != 0)
+ return X;
+ }
+ return (int)LNumOps - (int)RNumOps;
+ }
+
+ case scUDivExpr: {
+ const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
+ const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
+
+ // Lexicographically compare udiv expressions.
+ long X = compare(LC->getLHS(), RC->getLHS());
+ if (X != 0)
+ return X;
+ return compare(LC->getRHS(), RC->getRHS());
+ }
+
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend: {
+ const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
+ const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
+
+ // Compare cast expressions by operand.
+ return compare(LC->getOperand(), RC->getOperand());
+ }
+
+ default:
+ break;
+ }
+
+ llvm_unreachable("Unknown SCEV kind!");
+ return 0;
+ }
+ };
+}
+
+/// GroupByComplexity - Given a list of SCEV objects, order them by their
+/// complexity, and group objects of the same complexity together by value.
+/// When this routine is finished, we know that any duplicates in the vector are
+/// consecutive and that complexity is monotonically increasing.
+///
+/// Note that we go take special precautions to ensure that we get deterministic
+/// results from this routine. In other words, we don't want the results of
+/// this to depend on where the addresses of various SCEV objects happened to
+/// land in memory.
+///
+static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
+ LoopInfo *LI) {
+ if (Ops.size() < 2) return; // Noop
+ if (Ops.size() == 2) {
+ // This is the common case, which also happens to be trivially simple.
+ // Special case it.
+ const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
+ if (SCEVComplexityCompare(LI)(RHS, LHS))
+ std::swap(LHS, RHS);
+ return;
+ }
+
+ // Do the rough sort by complexity.
+ std::stable_sort(Ops.begin(), Ops.end(), SCEVComplexityCompare(LI));
+
+ // Now that we are sorted by complexity, group elements of the same
+ // complexity. Note that this is, at worst, N^2, but the vector is likely to
+ // be extremely short in practice. Note that we take this approach because we
+ // do not want to depend on the addresses of the objects we are grouping.
+ for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) {
+ const SCEV *S = Ops[i];
+ unsigned Complexity = S->getSCEVType();
+
+ // If there are any objects of the same complexity and same value as this
+ // one, group them.
+ for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) {
+ if (Ops[j] == S) { // Found a duplicate.
+ // Move it to immediately after i'th element.
+ std::swap(Ops[i+1], Ops[j]);
+ ++i; // no need to rescan it.
+ if (i == e-2) return; // Done!
+ }
+ }
+ }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Simple SCEV method implementations
+//===----------------------------------------------------------------------===//
+
+/// BinomialCoefficient - Compute BC(It, K). The result has width W.
+/// Assume, K > 0.
+static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
+ ScalarEvolution &SE,
+ const Type* ResultTy) {
+ // Handle the simplest case efficiently.
+ if (K == 1)
+ return SE.getTruncateOrZeroExtend(It, ResultTy);
+
+ // We are using the following formula for BC(It, K):
+ //
+ // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K!
+ //
+ // Suppose, W is the bitwidth of the return value. We must be prepared for
+ // overflow. Hence, we must assure that the result of our computation is
+ // equal to the accurate one modulo 2^W. Unfortunately, division isn't
+ // safe in modular arithmetic.
+ //
+ // However, this code doesn't use exactly that formula; the formula it uses
+ // is something like the following, where T is the number of factors of 2 in
+ // K! (i.e. trailing zeros in the binary representation of K!), and ^ is
+ // exponentiation:
+ //
+ // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T)
+ //
+ // This formula is trivially equivalent to the previous formula. However,
+ // this formula can be implemented much more efficiently. The trick is that
+ // K! / 2^T is odd, and exact division by an odd number *is* safe in modular
+ // arithmetic. To do exact division in modular arithmetic, all we have
+ // to do is multiply by the inverse. Therefore, this step can be done at
+ // width W.
+ //
+ // The next issue is how to safely do the division by 2^T. The way this
+ // is done is by doing the multiplication step at a width of at least W + T
+ // bits. This way, the bottom W+T bits of the product are accurate. Then,
+ // when we perform the division by 2^T (which is equivalent to a right shift
+ // by T), the bottom W bits are accurate. Extra bits are okay; they'll get
+ // truncated out after the division by 2^T.
+ //
+ // In comparison to just directly using the first formula, this technique
+ // is much more efficient; using the first formula requires W * K bits,
+ // but this formula less than W + K bits. Also, the first formula requires
+ // a division step, whereas this formula only requires multiplies and shifts.
+ //
+ // It doesn't matter whether the subtraction step is done in the calculation
+ // width or the input iteration count's width; if the subtraction overflows,
+ // the result must be zero anyway. We prefer here to do it in the width of
+ // the induction variable because it helps a lot for certain cases; CodeGen
+ // isn't smart enough to ignore the overflow, which leads to much less
+ // efficient code if the width of the subtraction is wider than the native
+ // register width.
+ //
+ // (It's possible to not widen at all by pulling out factors of 2 before
+ // the multiplication; for example, K=2 can be calculated as
+ // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires
+ // extra arithmetic, so it's not an obvious win, and it gets
+ // much more complicated for K > 3.)
+
+ // Protection from insane SCEVs; this bound is conservative,
+ // but it probably doesn't matter.
+ if (K > 1000)
+ return SE.getCouldNotCompute();
+
+ unsigned W = SE.getTypeSizeInBits(ResultTy);
+
+ // Calculate K! / 2^T and T; we divide out the factors of two before
+ // multiplying for calculating K! / 2^T to avoid overflow.
+ // Other overflow doesn't matter because we only care about the bottom
+ // W bits of the result.
+ APInt OddFactorial(W, 1);
+ unsigned T = 1;
+ for (unsigned i = 3; i <= K; ++i) {
+ APInt Mult(W, i);
+ unsigned TwoFactors = Mult.countTrailingZeros();
+ T += TwoFactors;
+ Mult = Mult.lshr(TwoFactors);
+ OddFactorial *= Mult;
+ }
+
+ // We need at least W + T bits for the multiplication step
+ unsigned CalculationBits = W + T;
+
+ // Calculate 2^T, at width T+W.
+ APInt DivFactor = APInt(CalculationBits, 1).shl(T);
+
+ // Calculate the multiplicative inverse of K! / 2^T;
+ // this multiplication factor will perform the exact division by
+ // K! / 2^T.
+ APInt Mod = APInt::getSignedMinValue(W+1);
+ APInt MultiplyFactor = OddFactorial.zext(W+1);
+ MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod);
+ MultiplyFactor = MultiplyFactor.trunc(W);
+
+ // Calculate the product, at width T+W
+ const IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
+ CalculationBits);
+ const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
+ for (unsigned i = 1; i != K; ++i) {
+ const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i));
+ Dividend = SE.getMulExpr(Dividend,
+ SE.getTruncateOrZeroExtend(S, CalculationTy));
+ }
+
+ // Divide by 2^T
+ const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor));
+
+ // Truncate the result, and divide by K! / 2^T.
+
+ return SE.getMulExpr(SE.getConstant(MultiplyFactor),
+ SE.getTruncateOrZeroExtend(DivResult, ResultTy));
+}
+
+/// evaluateAtIteration - Return the value of this chain of recurrences at
+/// the specified iteration number. We can evaluate this recurrence by
+/// multiplying each element in the chain by the binomial coefficient
+/// corresponding to it. In other words, we can evaluate {A,+,B,+,C,+,D} as:
+///
+/// A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3)
+///
+/// where BC(It, k) stands for binomial coefficient.
+///
+const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
+ ScalarEvolution &SE) const {
+ const SCEV *Result = getStart();
+ for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+ // The computation is correct in the face of overflow provided that the
+ // multiplication is performed _after_ the evaluation of the binomial
+ // coefficient.
+ const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType());
+ if (isa<SCEVCouldNotCompute>(Coeff))
+ return Coeff;
+
+ Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff));
+ }
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// SCEV Expression folder implementations
+//===----------------------------------------------------------------------===//
+
+const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
+ const Type *Ty) {
+ assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
+ "This is not a truncating conversion!");
+ assert(isSCEVable(Ty) &&
+ "This is not a conversion to a SCEVable type!");
+ Ty = getEffectiveSCEVType(Ty);
+
+ FoldingSetNodeID ID;
+ ID.AddInteger(scTruncate);
+ ID.AddPointer(Op);
+ ID.AddPointer(Ty);
+ void *IP = 0;
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+
+ // Fold if the operand is constant.
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+ return getConstant(
+ cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(),
+ getEffectiveSCEVType(Ty))));
+
+ // trunc(trunc(x)) --> trunc(x)
+ if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
+ return getTruncateExpr(ST->getOperand(), Ty);
+
+ // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
+ if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
+ return getTruncateOrSignExtend(SS->getOperand(), Ty);
+
+ // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
+ if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
+ return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
+
+ // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can
+ // eliminate all the truncates.
+ if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) {
+ SmallVector<const SCEV *, 4> Operands;
+ bool hasTrunc = false;
+ for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) {
+ const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty);
+ hasTrunc = isa<SCEVTruncateExpr>(S);
+ Operands.push_back(S);
+ }
+ if (!hasTrunc)
+ return getAddExpr(Operands);
+ UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL.
+ }
+
+ // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can
+ // eliminate all the truncates.
+ if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) {
+ SmallVector<const SCEV *, 4> Operands;
+ bool hasTrunc = false;
+ for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) {
+ const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty);
+ hasTrunc = isa<SCEVTruncateExpr>(S);
+ Operands.push_back(S);
+ }
+ if (!hasTrunc)
+ return getMulExpr(Operands);
+ UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL.
+ }
+
+ // If the input value is a chrec scev, truncate the chrec's operands.
+ if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
+ SmallVector<const SCEV *, 4> Operands;
+ for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
+ Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty));
+ return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
+ }
+
+ // As a special case, fold trunc(undef) to undef. We don't want to
+ // know too much about SCEVUnknowns, but this special case is handy
+ // and harmless.
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Op))
+ if (isa<UndefValue>(U->getValue()))
+ return getSCEV(UndefValue::get(Ty));
+
+ // The cast wasn't folded; create an explicit cast node. We can reuse
+ // the existing insert position since if we get here, we won't have
+ // made any changes which would invalidate it.
+ SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator),
+ Op, Ty);
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+}
+
+const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
+ const Type *Ty) {
+ assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
+ "This is not an extending conversion!");
+ assert(isSCEVable(Ty) &&
+ "This is not a conversion to a SCEVable type!");
+ Ty = getEffectiveSCEVType(Ty);
+
+ // Fold if the operand is constant.
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+ return getConstant(
+ cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(),
+ getEffectiveSCEVType(Ty))));
+
+ // zext(zext(x)) --> zext(x)
+ if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
+ return getZeroExtendExpr(SZ->getOperand(), Ty);
+
+ // Before doing any expensive analysis, check to see if we've already
+ // computed a SCEV for this Op and Ty.
+ FoldingSetNodeID ID;
+ ID.AddInteger(scZeroExtend);
+ ID.AddPointer(Op);
+ ID.AddPointer(Ty);
+ void *IP = 0;
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+
+ // zext(trunc(x)) --> zext(x) or x or trunc(x)
+ if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
+ // It's possible the bits taken off by the truncate were all zero bits. If
+ // so, we should be able to simplify this further.
+ const SCEV *X = ST->getOperand();
+ ConstantRange CR = getUnsignedRange(X);
+ unsigned TruncBits = getTypeSizeInBits(ST->getType());
+ unsigned NewBits = getTypeSizeInBits(Ty);
+ if (CR.truncate(TruncBits).zeroExtend(NewBits).contains(
+ CR.zextOrTrunc(NewBits)))
+ return getTruncateOrZeroExtend(X, Ty);
+ }
+
+ // If the input value is a chrec scev, and we can prove that the value
+ // did not overflow the old, smaller, value, we can zero extend all of the
+ // operands (often constants). This allows analysis of something like
+ // this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; }
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
+ if (AR->isAffine()) {
+ const SCEV *Start = AR->getStart();
+ const SCEV *Step = AR->getStepRecurrence(*this);
+ unsigned BitWidth = getTypeSizeInBits(AR->getType());
+ const Loop *L = AR->getLoop();
+
+ // If we have special knowledge that this addrec won't overflow,
+ // we don't need to do any further analysis.
+ if (AR->getNoWrapFlags(SCEV::FlagNUW))
+ return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+ getZeroExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+
+ // Check whether the backedge-taken count is SCEVCouldNotCompute.
+ // Note that this serves two purposes: It filters out loops that are
+ // simply not analyzable, and it covers the case where this code is
+ // being called from within backedge-taken count analysis, such that
+ // attempting to ask for the backedge-taken count would likely result
+ // in infinite recursion. In the later case, the analysis code will
+ // cope with a conservative value, and it will take care to purge
+ // that value once it has finished.
+ const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
+ if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
+ // Manually compute the final value for AR, checking for
+ // overflow.
+
+ // Check whether the backedge-taken count can be losslessly casted to
+ // the addrec's type. The count is always unsigned.
+ const SCEV *CastedMaxBECount =
+ getTruncateOrZeroExtend(MaxBECount, Start->getType());
+ const SCEV *RecastedMaxBECount =
+ getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
+ if (MaxBECount == RecastedMaxBECount) {
+ const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
+ // Check whether Start+Step*MaxBECount has no unsigned overflow.
+ const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
+ const SCEV *Add = getAddExpr(Start, ZMul);
+ const SCEV *OperandExtendedAdd =
+ getAddExpr(getZeroExtendExpr(Start, WideTy),
+ getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+ getZeroExtendExpr(Step, WideTy)));
+ if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) {
+ // Cache knowledge of AR NUW, which is propagated to this AddRec.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+ getZeroExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+ }
+ // Similar to above, only this time treat the step value as signed.
+ // This covers loops that count down.
+ const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
+ Add = getAddExpr(Start, SMul);
+ OperandExtendedAdd =
+ getAddExpr(getZeroExtendExpr(Start, WideTy),
+ getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+ getSignExtendExpr(Step, WideTy)));
+ if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) {
+ // Cache knowledge of AR NW, which is propagated to this AddRec.
+ // Negative step causes unsigned wrap, but it still can't self-wrap.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+ getSignExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+ }
+ }
+
+ // If the backedge is guarded by a comparison with the pre-inc value
+ // the addrec is safe. Also, if the entry is guarded by a comparison
+ // with the start value and the backedge is guarded by a comparison
+ // with the post-inc value, the addrec is safe.
+ if (isKnownPositive(Step)) {
+ const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
+ getUnsignedRange(Step).getUnsignedMax());
+ if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
+ (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
+ isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT,
+ AR->getPostIncExpr(*this), N))) {
+ // Cache knowledge of AR NUW, which is propagated to this AddRec.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+ getZeroExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+ }
+ } else if (isKnownNegative(Step)) {
+ const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
+ getSignedRange(Step).getSignedMin());
+ if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
+ (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) &&
+ isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT,
+ AR->getPostIncExpr(*this), N))) {
+ // Cache knowledge of AR NW, which is propagated to this AddRec.
+ // Negative step causes unsigned wrap, but it still can't self-wrap.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+ getSignExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+ }
+ }
+ }
+ }
+
+ // The cast wasn't folded; create an explicit cast node.
+ // Recompute the insert position, as it may have been invalidated.
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
+ Op, Ty);
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+}
+
+const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
+ const Type *Ty) {
+ assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
+ "This is not an extending conversion!");
+ assert(isSCEVable(Ty) &&
+ "This is not a conversion to a SCEVable type!");
+ Ty = getEffectiveSCEVType(Ty);
+
+ // Fold if the operand is constant.
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+ return getConstant(
+ cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(),
+ getEffectiveSCEVType(Ty))));
+
+ // sext(sext(x)) --> sext(x)
+ if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
+ return getSignExtendExpr(SS->getOperand(), Ty);
+
+ // sext(zext(x)) --> zext(x)
+ if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
+ return getZeroExtendExpr(SZ->getOperand(), Ty);
+
+ // Before doing any expensive analysis, check to see if we've already
+ // computed a SCEV for this Op and Ty.
+ FoldingSetNodeID ID;
+ ID.AddInteger(scSignExtend);
+ ID.AddPointer(Op);
+ ID.AddPointer(Ty);
+ void *IP = 0;
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+
+ // If the input value is provably positive, build a zext instead.
+ if (isKnownNonNegative(Op))
+ return getZeroExtendExpr(Op, Ty);
+
+ // sext(trunc(x)) --> sext(x) or x or trunc(x)
+ if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
+ // It's possible the bits taken off by the truncate were all sign bits. If
+ // so, we should be able to simplify this further.
+ const SCEV *X = ST->getOperand();
+ ConstantRange CR = getSignedRange(X);
+ unsigned TruncBits = getTypeSizeInBits(ST->getType());
+ unsigned NewBits = getTypeSizeInBits(Ty);
+ if (CR.truncate(TruncBits).signExtend(NewBits).contains(
+ CR.sextOrTrunc(NewBits)))
+ return getTruncateOrSignExtend(X, Ty);
+ }
+
+ // If the input value is a chrec scev, and we can prove that the value
+ // did not overflow the old, smaller, value, we can sign extend all of the
+ // operands (often constants). This allows analysis of something like
+ // this: for (signed char X = 0; X < 100; ++X) { int Y = X; }
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
+ if (AR->isAffine()) {
+ const SCEV *Start = AR->getStart();
+ const SCEV *Step = AR->getStepRecurrence(*this);
+ unsigned BitWidth = getTypeSizeInBits(AR->getType());
+ const Loop *L = AR->getLoop();
+
+ // If we have special knowledge that this addrec won't overflow,
+ // we don't need to do any further analysis.
+ if (AR->getNoWrapFlags(SCEV::FlagNSW))
+ return getAddRecExpr(getSignExtendExpr(Start, Ty),
+ getSignExtendExpr(Step, Ty),
+ L, SCEV::FlagNSW);
+
+ // Check whether the backedge-taken count is SCEVCouldNotCompute.
+ // Note that this serves two purposes: It filters out loops that are
+ // simply not analyzable, and it covers the case where this code is
+ // being called from within backedge-taken count analysis, such that
+ // attempting to ask for the backedge-taken count would likely result
+ // in infinite recursion. In the later case, the analysis code will
+ // cope with a conservative value, and it will take care to purge
+ // that value once it has finished.
+ const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
+ if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
+ // Manually compute the final value for AR, checking for
+ // overflow.
+
+ // Check whether the backedge-taken count can be losslessly casted to
+ // the addrec's type. The count is always unsigned.
+ const SCEV *CastedMaxBECount =
+ getTruncateOrZeroExtend(MaxBECount, Start->getType());
+ const SCEV *RecastedMaxBECount =
+ getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
+ if (MaxBECount == RecastedMaxBECount) {
+ const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
+ // Check whether Start+Step*MaxBECount has no signed overflow.
+ const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
+ const SCEV *Add = getAddExpr(Start, SMul);
+ const SCEV *OperandExtendedAdd =
+ getAddExpr(getSignExtendExpr(Start, WideTy),
+ getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+ getSignExtendExpr(Step, WideTy)));
+ if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) {
+ // Cache knowledge of AR NSW, which is propagated to this AddRec.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getSignExtendExpr(Start, Ty),
+ getSignExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+ }
+ // Similar to above, only this time treat the step value as unsigned.
+ // This covers loops that count up with an unsigned step.
+ const SCEV *UMul = getMulExpr(CastedMaxBECount, Step);
+ Add = getAddExpr(Start, UMul);
+ OperandExtendedAdd =
+ getAddExpr(getSignExtendExpr(Start, WideTy),
+ getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+ getZeroExtendExpr(Step, WideTy)));
+ if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) {
+ // Cache knowledge of AR NSW, which is propagated to this AddRec.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getSignExtendExpr(Start, Ty),
+ getZeroExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+ }
+ }
+
+ // If the backedge is guarded by a comparison with the pre-inc value
+ // the addrec is safe. Also, if the entry is guarded by a comparison
+ // with the start value and the backedge is guarded by a comparison
+ // with the post-inc value, the addrec is safe.
+ if (isKnownPositive(Step)) {
+ const SCEV *N = getConstant(APInt::getSignedMinValue(BitWidth) -
+ getSignedRange(Step).getSignedMax());
+ if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, AR, N) ||
+ (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) &&
+ isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT,
+ AR->getPostIncExpr(*this), N))) {
+ // Cache knowledge of AR NSW, which is propagated to this AddRec.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getSignExtendExpr(Start, Ty),
+ getSignExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+ }
+ } else if (isKnownNegative(Step)) {
+ const SCEV *N = getConstant(APInt::getSignedMaxValue(BitWidth) -
+ getSignedRange(Step).getSignedMin());
+ if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, AR, N) ||
+ (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) &&
+ isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT,
+ AR->getPostIncExpr(*this), N))) {
+ // Cache knowledge of AR NSW, which is propagated to this AddRec.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getSignExtendExpr(Start, Ty),
+ getSignExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+ }
+ }
+ }
+ }
+
+ // The cast wasn't folded; create an explicit cast node.
+ // Recompute the insert position, as it may have been invalidated.
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
+ Op, Ty);
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+}
+
+/// getAnyExtendExpr - Return a SCEV for the given operand extended with
+/// unspecified bits out to the given type.
+///
+const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
+ const Type *Ty) {
+ assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
+ "This is not an extending conversion!");
+ assert(isSCEVable(Ty) &&
+ "This is not a conversion to a SCEVable type!");
+ Ty = getEffectiveSCEVType(Ty);
+
+ // Sign-extend negative constants.
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+ if (SC->getValue()->getValue().isNegative())
+ return getSignExtendExpr(Op, Ty);
+
+ // Peel off a truncate cast.
+ if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) {
+ const SCEV *NewOp = T->getOperand();
+ if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty))
+ return getAnyExtendExpr(NewOp, Ty);
+ return getTruncateOrNoop(NewOp, Ty);
+ }
+
+ // Next try a zext cast. If the cast is folded, use it.
+ const SCEV *ZExt = getZeroExtendExpr(Op, Ty);
+ if (!isa<SCEVZeroExtendExpr>(ZExt))
+ return ZExt;
+
+ // Next try a sext cast. If the cast is folded, use it.
+ const SCEV *SExt = getSignExtendExpr(Op, Ty);
+ if (!isa<SCEVSignExtendExpr>(SExt))
+ return SExt;
+
+ // Force the cast to be folded into the operands of an addrec.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
+ SmallVector<const SCEV *, 4> Ops;
+ for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
+ I != E; ++I)
+ Ops.push_back(getAnyExtendExpr(*I, Ty));
+ return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW);
+ }
+
+ // As a special case, fold anyext(undef) to undef. We don't want to
+ // know too much about SCEVUnknowns, but this special case is handy
+ // and harmless.
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Op))
+ if (isa<UndefValue>(U->getValue()))
+ return getSCEV(UndefValue::get(Ty));
+
+ // If the expression is obviously signed, use the sext cast value.
+ if (isa<SCEVSMaxExpr>(Op))
+ return SExt;
+
+ // Absent any other information, use the zext cast value.
+ return ZExt;
+}
+
+/// CollectAddOperandsWithScales - Process the given Ops list, which is
+/// a list of operands to be added under the given scale, update the given
+/// map. This is a helper function for getAddRecExpr. As an example of
+/// what it does, given a sequence of operands that would form an add
+/// expression like this:
+///
+/// m + n + 13 + (A * (o + p + (B * q + m + 29))) + r + (-1 * r)
+///
+/// where A and B are constants, update the map with these values:
+///
+/// (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0)
+///
+/// and add 13 + A*B*29 to AccumulatedConstant.
+/// This will allow getAddRecExpr to produce this:
+///
+/// 13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B)
+///
+/// This form often exposes folding opportunities that are hidden in
+/// the original operand list.
+///
+/// Return true iff it appears that any interesting folding opportunities
+/// may be exposed. This helps getAddRecExpr short-circuit extra work in
+/// the common case where no interesting opportunities are present, and
+/// is also used as a check to avoid infinite recursion.
+///
+static bool
+CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
+ SmallVector<const SCEV *, 8> &NewOps,
+ APInt &AccumulatedConstant,
+ const SCEV *const *Ops, size_t NumOperands,
+ const APInt &Scale,
+ ScalarEvolution &SE) {
+ bool Interesting = false;
+
+ // Iterate over the add operands. They are sorted, with constants first.
+ unsigned i = 0;
+ while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
+ ++i;
+ // Pull a buried constant out to the outside.
+ if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero())
+ Interesting = true;
+ AccumulatedConstant += Scale * C->getValue()->getValue();
+ }
+
+ // Next comes everything else. We're especially interested in multiplies
+ // here, but they're in the middle, so just visit the rest with one loop.
+ for (; i != NumOperands; ++i) {
+ const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
+ if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
+ APInt NewScale =
+ Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue();
+ if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
+ // A multiplication of a constant with another add; recurse.
+ const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1));
+ Interesting |=
+ CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
+ Add->op_begin(), Add->getNumOperands(),
+ NewScale, SE);
+ } else {
+ // A multiplication of a constant with some other value. Update
+ // the map.
+ SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
+ const SCEV *Key = SE.getMulExpr(MulOps);
+ std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
+ M.insert(std::make_pair(Key, NewScale));
+ if (Pair.second) {
+ NewOps.push_back(Pair.first->first);
+ } else {
+ Pair.first->second += NewScale;
+ // The map already had an entry for this value, which may indicate
+ // a folding opportunity.
+ Interesting = true;
+ }
+ }
+ } else {
+ // An ordinary operand. Update the map.
+ std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
+ M.insert(std::make_pair(Ops[i], Scale));
+ if (Pair.second) {
+ NewOps.push_back(Pair.first->first);
+ } else {
+ Pair.first->second += Scale;
+ // The map already had an entry for this value, which may indicate
+ // a folding opportunity.
+ Interesting = true;
+ }
+ }
+ }
+
+ return Interesting;
+}
+
+namespace {
+ struct APIntCompare {
+ bool operator()(const APInt &LHS, const APInt &RHS) const {
+ return LHS.ult(RHS);
+ }
+ };
+}
+
+/// getAddExpr - Get a canonical add expression, or something simpler if
+/// possible.
+const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
+ SCEV::NoWrapFlags Flags) {
+ assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) &&
+ "only nuw or nsw allowed");
+ assert(!Ops.empty() && "Cannot get empty add!");
+ if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+ const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+ for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+ assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+ "SCEVAddExpr operand types don't match!");
+#endif
+
+ // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
+ // And vice-versa.
+ int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
+ SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
+ if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
+ bool All = true;
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
+ E = Ops.end(); I != E; ++I)
+ if (!isKnownNonNegative(*I)) {
+ All = false;
+ break;
+ }
+ if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
+ }
+
+ // Sort by complexity, this groups all similar expression types together.
+ GroupByComplexity(Ops, LI);
+
+ // If there are any constants, fold them together.
+ unsigned Idx = 0;
+ if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+ ++Idx;
+ assert(Idx < Ops.size());
+ while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+ // We found two constants, fold them together!
+ Ops[0] = getConstant(LHSC->getValue()->getValue() +
+ RHSC->getValue()->getValue());
+ if (Ops.size() == 2) return Ops[0];
+ Ops.erase(Ops.begin()+1); // Erase the folded element
+ LHSC = cast<SCEVConstant>(Ops[0]);
+ }
+
+ // If we are left with a constant zero being added, strip it off.
+ if (LHSC->getValue()->isZero()) {
+ Ops.erase(Ops.begin());
+ --Idx;
+ }
+
+ if (Ops.size() == 1) return Ops[0];
+ }
+
+ // Okay, check to see if the same value occurs in the operand list more than
+ // once. If so, merge them together into an multiply expression. Since we
+ // sorted the list, these values are required to be adjacent.
+ const Type *Ty = Ops[0]->getType();
+ bool FoundMatch = false;
+ for (unsigned i = 0, e = Ops.size(); i != e-1; ++i)
+ if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2
+ // Scan ahead to count how many equal operands there are.
+ unsigned Count = 2;
+ while (i+Count != e && Ops[i+Count] == Ops[i])
+ ++Count;
+ // Merge the values into a multiply.
+ const SCEV *Scale = getConstant(Ty, Count);
+ const SCEV *Mul = getMulExpr(Scale, Ops[i]);
+ if (Ops.size() == Count)
+ return Mul;
+ Ops[i] = Mul;
+ Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count);
+ --i; e -= Count - 1;
+ FoundMatch = true;
+ }
+ if (FoundMatch)
+ return getAddExpr(Ops, Flags);
+
+ // Check for truncates. If all the operands are truncated from the same
+ // type, see if factoring out the truncate would permit the result to be
+ // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n)
+ // if the contents of the resulting outer trunc fold to something simple.
+ for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) {
+ const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
+ const Type *DstType = Trunc->getType();
+ const Type *SrcType = Trunc->getOperand()->getType();
+ SmallVector<const SCEV *, 8> LargeOps;
+ bool Ok = true;
+ // Check all the operands to see if they can be represented in the
+ // source type of the truncate.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) {
+ if (T->getOperand()->getType() != SrcType) {
+ Ok = false;
+ break;
+ }
+ LargeOps.push_back(T->getOperand());
+ } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
+ LargeOps.push_back(getAnyExtendExpr(C, SrcType));
+ } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
+ SmallVector<const SCEV *, 8> LargeMulOps;
+ for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
+ if (const SCEVTruncateExpr *T =
+ dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
+ if (T->getOperand()->getType() != SrcType) {
+ Ok = false;
+ break;
+ }
+ LargeMulOps.push_back(T->getOperand());
+ } else if (const SCEVConstant *C =
+ dyn_cast<SCEVConstant>(M->getOperand(j))) {
+ LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
+ } else {
+ Ok = false;
+ break;
+ }
+ }
+ if (Ok)
+ LargeOps.push_back(getMulExpr(LargeMulOps));
+ } else {
+ Ok = false;
+ break;
+ }
+ }
+ if (Ok) {
+ // Evaluate the expression in the larger type.
+ const SCEV *Fold = getAddExpr(LargeOps, Flags);
+ // If it folds to something simple, use it. Otherwise, don't.
+ if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
+ return getTruncateExpr(Fold, DstType);
+ }
+ }
+
+ // Skip past any other cast SCEVs.
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr)
+ ++Idx;
+
+ // If there are add operands they would be next.
+ if (Idx < Ops.size()) {
+ bool DeletedAdd = false;
+ while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) {
+ // If we have an add, expand the add operands onto the end of the operands
+ // list.
+ Ops.erase(Ops.begin()+Idx);
+ Ops.append(Add->op_begin(), Add->op_end());
+ DeletedAdd = true;
+ }
+
+ // If we deleted at least one add, we added operands to the end of the list,
+ // and they are not necessarily sorted. Recurse to resort and resimplify
+ // any operands we just acquired.
+ if (DeletedAdd)
+ return getAddExpr(Ops);
+ }
+
+ // Skip over the add expression until we get to a multiply.
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
+ ++Idx;
+
+ // Check to see if there are any folding opportunities present with
+ // operands multiplied by constant values.
+ if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) {
+ uint64_t BitWidth = getTypeSizeInBits(Ty);
+ DenseMap<const SCEV *, APInt> M;
+ SmallVector<const SCEV *, 8> NewOps;
+ APInt AccumulatedConstant(BitWidth, 0);
+ if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
+ Ops.data(), Ops.size(),
+ APInt(BitWidth, 1), *this)) {
+ // Some interesting folding opportunity is present, so its worthwhile to
+ // re-generate the operands list. Group the operands by constant scale,
+ // to avoid multiplying by the same constant scale multiple times.
+ std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
+ for (SmallVector<const SCEV *, 8>::const_iterator I = NewOps.begin(),
+ E = NewOps.end(); I != E; ++I)
+ MulOpLists[M.find(*I)->second].push_back(*I);
+ // Re-generate the operands list.
+ Ops.clear();
+ if (AccumulatedConstant != 0)
+ Ops.push_back(getConstant(AccumulatedConstant));
+ for (std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare>::iterator
+ I = MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I)
+ if (I->first != 0)
+ Ops.push_back(getMulExpr(getConstant(I->first),
+ getAddExpr(I->second)));
+ if (Ops.empty())
+ return getConstant(Ty, 0);
+ if (Ops.size() == 1)
+ return Ops[0];
+ return getAddExpr(Ops);
+ }
+ }
+
+ // If we are adding something to a multiply expression, make sure the
+ // something is not already an operand of the multiply. If so, merge it into
+ // the multiply.
+ for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) {
+ const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]);
+ for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
+ const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
+ if (isa<SCEVConstant>(MulOpSCEV))
+ continue;
+ for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
+ if (MulOpSCEV == Ops[AddOp]) {
+ // Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1))
+ const SCEV *InnerMul = Mul->getOperand(MulOp == 0);
+ if (Mul->getNumOperands() != 2) {
+ // If the multiply has more than two operands, we must get the
+ // Y*Z term.
+ SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
+ Mul->op_begin()+MulOp);
+ MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
+ InnerMul = getMulExpr(MulOps);
+ }
+ const SCEV *One = getConstant(Ty, 1);
+ const SCEV *AddOne = getAddExpr(One, InnerMul);
+ const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV);
+ if (Ops.size() == 2) return OuterMul;
+ if (AddOp < Idx) {
+ Ops.erase(Ops.begin()+AddOp);
+ Ops.erase(Ops.begin()+Idx-1);
+ } else {
+ Ops.erase(Ops.begin()+Idx);
+ Ops.erase(Ops.begin()+AddOp-1);
+ }
+ Ops.push_back(OuterMul);
+ return getAddExpr(Ops);
+ }
+
+ // Check this multiply against other multiplies being added together.
+ for (unsigned OtherMulIdx = Idx+1;
+ OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]);
+ ++OtherMulIdx) {
+ const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]);
+ // If MulOp occurs in OtherMul, we can fold the two multiplies
+ // together.
+ for (unsigned OMulOp = 0, e = OtherMul->getNumOperands();
+ OMulOp != e; ++OMulOp)
+ if (OtherMul->getOperand(OMulOp) == MulOpSCEV) {
+ // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
+ const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0);
+ if (Mul->getNumOperands() != 2) {
+ SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
+ Mul->op_begin()+MulOp);
+ MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
+ InnerMul1 = getMulExpr(MulOps);
+ }
+ const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
+ if (OtherMul->getNumOperands() != 2) {
+ SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
+ OtherMul->op_begin()+OMulOp);
+ MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end());
+ InnerMul2 = getMulExpr(MulOps);
+ }
+ const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2);
+ const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum);
+ if (Ops.size() == 2) return OuterMul;
+ Ops.erase(Ops.begin()+Idx);
+ Ops.erase(Ops.begin()+OtherMulIdx-1);
+ Ops.push_back(OuterMul);
+ return getAddExpr(Ops);
+ }
+ }
+ }
+ }
+
+ // If there are any add recurrences in the operands list, see if any other
+ // added values are loop invariant. If so, we can fold them into the
+ // recurrence.
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
+ ++Idx;
+
+ // Scan over all recurrences, trying to fold loop invariants into them.
+ for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
+ // Scan all of the other operands to this add and add them to the vector if
+ // they are loop invariant w.r.t. the recurrence.
+ SmallVector<const SCEV *, 8> LIOps;
+ const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
+ const Loop *AddRecLoop = AddRec->getLoop();
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (isLoopInvariant(Ops[i], AddRecLoop)) {
+ LIOps.push_back(Ops[i]);
+ Ops.erase(Ops.begin()+i);
+ --i; --e;
+ }
+
+ // If we found some loop invariants, fold them into the recurrence.
+ if (!LIOps.empty()) {
+ // NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step}
+ LIOps.push_back(AddRec->getStart());
+
+ SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
+ AddRec->op_end());
+ AddRecOps[0] = getAddExpr(LIOps);
+
+ // Build the new addrec. Propagate the NUW and NSW flags if both the
+ // outer add and the inner addrec are guaranteed to have no overflow.
+ // Always propagate NW.
+ Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW));
+ const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags);
+
+ // If all of the other operands were loop invariant, we are done.
+ if (Ops.size() == 1) return NewRec;
+
+ // Otherwise, add the folded AddRec by the non-liv parts.
+ for (unsigned i = 0;; ++i)
+ if (Ops[i] == AddRec) {
+ Ops[i] = NewRec;
+ break;
+ }
+ return getAddExpr(Ops);
+ }
+
+ // Okay, if there weren't any loop invariants to be folded, check to see if
+ // there are multiple AddRec's with the same loop induction variable being
+ // added together. If so, we can fold them.
+ for (unsigned OtherIdx = Idx+1;
+ OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+ ++OtherIdx)
+ if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
+ // Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L>
+ SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
+ AddRec->op_end());
+ for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+ ++OtherIdx)
+ if (const SCEVAddRecExpr *OtherAddRec =
+ dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
+ if (OtherAddRec->getLoop() == AddRecLoop) {
+ for (unsigned i = 0, e = OtherAddRec->getNumOperands();
+ i != e; ++i) {
+ if (i >= AddRecOps.size()) {
+ AddRecOps.append(OtherAddRec->op_begin()+i,
+ OtherAddRec->op_end());
+ break;
+ }
+ AddRecOps[i] = getAddExpr(AddRecOps[i],
+ OtherAddRec->getOperand(i));
+ }
+ Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+ }
+ // Step size has changed, so we cannot guarantee no self-wraparound.
+ Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
+ return getAddExpr(Ops);
+ }
+
+ // Otherwise couldn't fold anything into this recurrence. Move onto the
+ // next one.
+ }
+
+ // Okay, it looks like we really DO need an add expr. Check to see if we
+ // already have one, otherwise create a new one.
+ FoldingSetNodeID ID;
+ ID.AddInteger(scAddExpr);
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ ID.AddPointer(Ops[i]);
+ void *IP = 0;
+ SCEVAddExpr *S =
+ static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+ if (!S) {
+ const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+ std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+ S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator),
+ O, Ops.size());
+ UniqueSCEVs.InsertNode(S, IP);
+ }
+ S->setNoWrapFlags(Flags);
+ return S;
+}
+
+/// getMulExpr - Get a canonical multiply expression, or something simpler if
+/// possible.
+const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
+ SCEV::NoWrapFlags Flags) {
+ assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) &&
+ "only nuw or nsw allowed");
+ assert(!Ops.empty() && "Cannot get empty mul!");
+ if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+ const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+ for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+ assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+ "SCEVMulExpr operand types don't match!");
+#endif
+
+ // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
+ // And vice-versa.
+ int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
+ SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
+ if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
+ bool All = true;
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
+ E = Ops.end(); I != E; ++I)
+ if (!isKnownNonNegative(*I)) {
+ All = false;
+ break;
+ }
+ if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
+ }
+
+ // Sort by complexity, this groups all similar expression types together.
+ GroupByComplexity(Ops, LI);
+
+ // If there are any constants, fold them together.
+ unsigned Idx = 0;
+ if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+
+ // C1*(C2+V) -> C1*C2 + C1*V
+ if (Ops.size() == 2)
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
+ if (Add->getNumOperands() == 2 &&
+ isa<SCEVConstant>(Add->getOperand(0)))
+ return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)),
+ getMulExpr(LHSC, Add->getOperand(1)));
+
+ ++Idx;
+ while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+ // We found two constants, fold them together!
+ ConstantInt *Fold = ConstantInt::get(getContext(),
+ LHSC->getValue()->getValue() *
+ RHSC->getValue()->getValue());
+ Ops[0] = getConstant(Fold);
+ Ops.erase(Ops.begin()+1); // Erase the folded element
+ if (Ops.size() == 1) return Ops[0];
+ LHSC = cast<SCEVConstant>(Ops[0]);
+ }
+
+ // If we are left with a constant one being multiplied, strip it off.
+ if (cast<SCEVConstant>(Ops[0])->getValue()->equalsInt(1)) {
+ Ops.erase(Ops.begin());
+ --Idx;
+ } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
+ // If we have a multiply of zero, it will always be zero.
+ return Ops[0];
+ } else if (Ops[0]->isAllOnesValue()) {
+ // If we have a mul by -1 of an add, try distributing the -1 among the
+ // add operands.
+ if (Ops.size() == 2) {
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
+ SmallVector<const SCEV *, 4> NewOps;
+ bool AnyFolded = false;
+ for (SCEVAddRecExpr::op_iterator I = Add->op_begin(),
+ E = Add->op_end(); I != E; ++I) {
+ const SCEV *Mul = getMulExpr(Ops[0], *I);
+ if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
+ NewOps.push_back(Mul);
+ }
+ if (AnyFolded)
+ return getAddExpr(NewOps);
+ }
+ else if (const SCEVAddRecExpr *
+ AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
+ // Negation preserves a recurrence's no self-wrap property.
+ SmallVector<const SCEV *, 4> Operands;
+ for (SCEVAddRecExpr::op_iterator I = AddRec->op_begin(),
+ E = AddRec->op_end(); I != E; ++I) {
+ Operands.push_back(getMulExpr(Ops[0], *I));
+ }
+ return getAddRecExpr(Operands, AddRec->getLoop(),
+ AddRec->getNoWrapFlags(SCEV::FlagNW));
+ }
+ }
+ }
+
+ if (Ops.size() == 1)
+ return Ops[0];
+ }
+
+ // Skip over the add expression until we get to a multiply.
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
+ ++Idx;
+
+ // If there are mul operands inline them all into this expression.
+ if (Idx < Ops.size()) {
+ bool DeletedMul = false;
+ while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
+ // If we have an mul, expand the mul operands onto the end of the operands
+ // list.
+ Ops.erase(Ops.begin()+Idx);
+ Ops.append(Mul->op_begin(), Mul->op_end());
+ DeletedMul = true;
+ }
+
+ // If we deleted at least one mul, we added operands to the end of the list,
+ // and they are not necessarily sorted. Recurse to resort and resimplify
+ // any operands we just acquired.
+ if (DeletedMul)
+ return getMulExpr(Ops);
+ }
+
+ // If there are any add recurrences in the operands list, see if any other
+ // added values are loop invariant. If so, we can fold them into the
+ // recurrence.
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
+ ++Idx;
+
+ // Scan over all recurrences, trying to fold loop invariants into them.
+ for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
+ // Scan all of the other operands to this mul and add them to the vector if
+ // they are loop invariant w.r.t. the recurrence.
+ SmallVector<const SCEV *, 8> LIOps;
+ const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
+ const Loop *AddRecLoop = AddRec->getLoop();
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (isLoopInvariant(Ops[i], AddRecLoop)) {
+ LIOps.push_back(Ops[i]);
+ Ops.erase(Ops.begin()+i);
+ --i; --e;
+ }
+
+ // If we found some loop invariants, fold them into the recurrence.
+ if (!LIOps.empty()) {
+ // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step}
+ SmallVector<const SCEV *, 4> NewOps;
+ NewOps.reserve(AddRec->getNumOperands());
+ const SCEV *Scale = getMulExpr(LIOps);
+ for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
+ NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i)));
+
+ // Build the new addrec. Propagate the NUW and NSW flags if both the
+ // outer mul and the inner addrec are guaranteed to have no overflow.
+ //
+ // No self-wrap cannot be guaranteed after changing the step size, but
+ // will be inferred if either NUW or NSW is true.
+ Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW));
+ const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags);
+
+ // If all of the other operands were loop invariant, we are done.
+ if (Ops.size() == 1) return NewRec;
+
+ // Otherwise, multiply the folded AddRec by the non-liv parts.
+ for (unsigned i = 0;; ++i)
+ if (Ops[i] == AddRec) {
+ Ops[i] = NewRec;
+ break;
+ }
+ return getMulExpr(Ops);
+ }
+
+ // Okay, if there weren't any loop invariants to be folded, check to see if
+ // there are multiple AddRec's with the same loop induction variable being
+ // multiplied together. If so, we can fold them.
+ for (unsigned OtherIdx = Idx+1;
+ OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+ ++OtherIdx)
+ if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
+ // F * G, where F = {A,+,B}<L> and G = {C,+,D}<L> -->
+ // {A*C,+,F*D + G*B + B*D}<L>
+ for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+ ++OtherIdx)
+ if (const SCEVAddRecExpr *OtherAddRec =
+ dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
+ if (OtherAddRec->getLoop() == AddRecLoop) {
+ const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec;
+ const SCEV *NewStart = getMulExpr(F->getStart(), G->getStart());
+ const SCEV *B = F->getStepRecurrence(*this);
+ const SCEV *D = G->getStepRecurrence(*this);
+ const SCEV *NewStep = getAddExpr(getMulExpr(F, D),
+ getMulExpr(G, B),
+ getMulExpr(B, D));
+ const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep,
+ F->getLoop(),
+ SCEV::FlagAnyWrap);
+ if (Ops.size() == 2) return NewAddRec;
+ Ops[Idx] = AddRec = cast<SCEVAddRecExpr>(NewAddRec);
+ Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+ }
+ return getMulExpr(Ops);
+ }
+
+ // Otherwise couldn't fold anything into this recurrence. Move onto the
+ // next one.
+ }
+
+ // Okay, it looks like we really DO need an mul expr. Check to see if we
+ // already have one, otherwise create a new one.
+ FoldingSetNodeID ID;
+ ID.AddInteger(scMulExpr);
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ ID.AddPointer(Ops[i]);
+ void *IP = 0;
+ SCEVMulExpr *S =
+ static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+ if (!S) {
+ const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+ std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+ S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
+ O, Ops.size());
+ UniqueSCEVs.InsertNode(S, IP);
+ }
+ S->setNoWrapFlags(Flags);
+ return S;
+}
+
+/// getUDivExpr - Get a canonical unsigned division expression, or something
+/// simpler if possible.
+const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
+ const SCEV *RHS) {
+ assert(getEffectiveSCEVType(LHS->getType()) ==
+ getEffectiveSCEVType(RHS->getType()) &&
+ "SCEVUDivExpr operand types don't match!");
+
+ if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
+ if (RHSC->getValue()->equalsInt(1))
+ return LHS; // X udiv 1 --> x
+ // If the denominator is zero, the result of the udiv is undefined. Don't
+ // try to analyze it, because the resolution chosen here may differ from
+ // the resolution chosen in other parts of the compiler.
+ if (!RHSC->getValue()->isZero()) {
+ // Determine if the division can be folded into the operands of
+ // its operands.
+ // TODO: Generalize this to non-constants by using known-bits information.
+ const Type *Ty = LHS->getType();
+ unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros();
+ unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
+ // For non-power-of-two values, effectively round the value up to the
+ // nearest power of two.
+ if (!RHSC->getValue()->getValue().isPowerOf2())
+ ++MaxShiftAmt;
+ const IntegerType *ExtTy =
+ IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
+ // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
+ if (const SCEVConstant *Step =
+ dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this)))
+ if (!Step->getValue()->getValue()
+ .urem(RHSC->getValue()->getValue()) &&
+ getZeroExtendExpr(AR, ExtTy) ==
+ getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
+ getZeroExtendExpr(Step, ExtTy),
+ AR->getLoop(), SCEV::FlagAnyWrap)) {
+ SmallVector<const SCEV *, 4> Operands;
+ for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
+ Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
+ return getAddRecExpr(Operands, AR->getLoop(),
+ SCEV::FlagNW);
+ }
+ // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
+ SmallVector<const SCEV *, 4> Operands;
+ for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i)
+ Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy));
+ if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
+ // Find an operand that's safely divisible.
+ for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
+ const SCEV *Op = M->getOperand(i);
+ const SCEV *Div = getUDivExpr(Op, RHSC);
+ if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
+ Operands = SmallVector<const SCEV *, 4>(M->op_begin(),
+ M->op_end());
+ Operands[i] = Div;
+ return getMulExpr(Operands);
+ }
+ }
+ }
+ // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
+ if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {
+ SmallVector<const SCEV *, 4> Operands;
+ for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
+ Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
+ if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
+ Operands.clear();
+ for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
+ const SCEV *Op = getUDivExpr(A->getOperand(i), RHS);
+ if (isa<SCEVUDivExpr>(Op) ||
+ getMulExpr(Op, RHS) != A->getOperand(i))
+ break;
+ Operands.push_back(Op);
+ }
+ if (Operands.size() == A->getNumOperands())
+ return getAddExpr(Operands);
+ }
+ }
+
+ // Fold if both operands are constant.
+ if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
+ Constant *LHSCV = LHSC->getValue();
+ Constant *RHSCV = RHSC->getValue();
+ return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
+ RHSCV)));
+ }
+ }
+ }
+
+ FoldingSetNodeID ID;
+ ID.AddInteger(scUDivExpr);
+ ID.AddPointer(LHS);
+ ID.AddPointer(RHS);
+ void *IP = 0;
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
+ LHS, RHS);
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+}
+
+
+/// getAddRecExpr - Get an add recurrence expression for the specified loop.
+/// Simplify the expression as much as possible.
+const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step,
+ const Loop *L,
+ SCEV::NoWrapFlags Flags) {
+ SmallVector<const SCEV *, 4> Operands;
+ Operands.push_back(Start);
+ if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
+ if (StepChrec->getLoop() == L) {
+ Operands.append(StepChrec->op_begin(), StepChrec->op_end());
+ return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW));
+ }
+
+ Operands.push_back(Step);
+ return getAddRecExpr(Operands, L, Flags);
+}
+
+/// getAddRecExpr - Get an add recurrence expression for the specified loop.
+/// Simplify the expression as much as possible.
+const SCEV *
+ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
+ const Loop *L, SCEV::NoWrapFlags Flags) {
+ if (Operands.size() == 1) return Operands[0];
+#ifndef NDEBUG
+ const Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
+ for (unsigned i = 1, e = Operands.size(); i != e; ++i)
+ assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
+ "SCEVAddRecExpr operand types don't match!");
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+ assert(isLoopInvariant(Operands[i], L) &&
+ "SCEVAddRecExpr operand is not loop-invariant!");
+#endif
+
+ if (Operands.back()->isZero()) {
+ Operands.pop_back();
+ return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0} --> X
+ }
+
+ // It's tempting to want to call getMaxBackedgeTakenCount count here and
+ // use that information to infer NUW and NSW flags. However, computing a
+ // BE count requires calling getAddRecExpr, so we may not yet have a
+ // meaningful BE count at this point (and if we don't, we'd be stuck
+ // with a SCEVCouldNotCompute as the cached BE count).
+
+ // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
+ // And vice-versa.
+ int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
+ SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
+ if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
+ bool All = true;
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = Operands.begin(),
+ E = Operands.end(); I != E; ++I)
+ if (!isKnownNonNegative(*I)) {
+ All = false;
+ break;
+ }
+ if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
+ }
+
+ // Canonicalize nested AddRecs in by nesting them in order of loop depth.
+ if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
+ const Loop *NestedLoop = NestedAR->getLoop();
+ if (L->contains(NestedLoop) ?
+ (L->getLoopDepth() < NestedLoop->getLoopDepth()) :
+ (!NestedLoop->contains(L) &&
+ DT->dominates(L->getHeader(), NestedLoop->getHeader()))) {
+ SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
+ NestedAR->op_end());
+ Operands[0] = NestedAR->getStart();
+ // AddRecs require their operands be loop-invariant with respect to their
+ // loops. Don't perform this transformation if it would break this
+ // requirement.
+ bool AllInvariant = true;
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+ if (!isLoopInvariant(Operands[i], L)) {
+ AllInvariant = false;
+ break;
+ }
+ if (AllInvariant) {
+ // Create a recurrence for the outer loop with the same step size.
+ //
+ // The outer recurrence keeps its NW flag but only keeps NUW/NSW if the
+ // inner recurrence has the same property.
+ SCEV::NoWrapFlags OuterFlags =
+ maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags());
+
+ NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags);
+ AllInvariant = true;
+ for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i)
+ if (!isLoopInvariant(NestedOperands[i], NestedLoop)) {
+ AllInvariant = false;
+ break;
+ }
+ if (AllInvariant) {
+ // Ok, both add recurrences are valid after the transformation.
+ //
+ // The inner recurrence keeps its NW flag but only keeps NUW/NSW if
+ // the outer recurrence has the same property.
+ SCEV::NoWrapFlags InnerFlags =
+ maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags);
+ return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags);
+ }
+ }
+ // Reset Operands to its original state.
+ Operands[0] = NestedAR;
+ }
+ }
+
+ // Okay, it looks like we really DO need an addrec expr. Check to see if we
+ // already have one, otherwise create a new one.
+ FoldingSetNodeID ID;
+ ID.AddInteger(scAddRecExpr);
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+ ID.AddPointer(Operands[i]);
+ ID.AddPointer(L);
+ void *IP = 0;
+ SCEVAddRecExpr *S =
+ static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+ if (!S) {
+ const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size());
+ std::uninitialized_copy(Operands.begin(), Operands.end(), O);
+ S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator),
+ O, Operands.size(), L);
+ UniqueSCEVs.InsertNode(S, IP);
+ }
+ S->setNoWrapFlags(Flags);
+ return S;
+}
+
+const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
+ const SCEV *RHS) {
+ SmallVector<const SCEV *, 2> Ops;
+ Ops.push_back(LHS);
+ Ops.push_back(RHS);
+ return getSMaxExpr(Ops);
+}
+
+const SCEV *
+ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
+ assert(!Ops.empty() && "Cannot get empty smax!");
+ if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+ const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+ for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+ assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+ "SCEVSMaxExpr operand types don't match!");
+#endif
+
+ // Sort by complexity, this groups all similar expression types together.
+ GroupByComplexity(Ops, LI);
+
+ // If there are any constants, fold them together.
+ unsigned Idx = 0;
+ if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+ ++Idx;
+ assert(Idx < Ops.size());
+ while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+ // We found two constants, fold them together!
+ ConstantInt *Fold = ConstantInt::get(getContext(),
+ APIntOps::smax(LHSC->getValue()->getValue(),
+ RHSC->getValue()->getValue()));
+ Ops[0] = getConstant(Fold);
+ Ops.erase(Ops.begin()+1); // Erase the folded element
+ if (Ops.size() == 1) return Ops[0];
+ LHSC = cast<SCEVConstant>(Ops[0]);
+ }
+
+ // If we are left with a constant minimum-int, strip it off.
+ if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) {
+ Ops.erase(Ops.begin());
+ --Idx;
+ } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) {
+ // If we have an smax with a constant maximum-int, it will always be
+ // maximum-int.
+ return Ops[0];
+ }
+
+ if (Ops.size() == 1) return Ops[0];
+ }
+
+ // Find the first SMax
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr)
+ ++Idx;
+
+ // Check to see if one of the operands is an SMax. If so, expand its operands
+ // onto our operand list, and recurse to simplify.
+ if (Idx < Ops.size()) {
+ bool DeletedSMax = false;
+ while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) {
+ Ops.erase(Ops.begin()+Idx);
+ Ops.append(SMax->op_begin(), SMax->op_end());
+ DeletedSMax = true;
+ }
+
+ if (DeletedSMax)
+ return getSMaxExpr(Ops);
+ }
+
+ // Okay, check to see if the same value occurs in the operand list twice. If
+ // so, delete one. Since we sorted the list, these values are required to
+ // be adjacent.
+ for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
+ // X smax Y smax Y --> X smax Y
+ // X smax Y --> X, if X is always greater than Y
+ if (Ops[i] == Ops[i+1] ||
+ isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) {
+ Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
+ --i; --e;
+ } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) {
+ Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
+ --i; --e;
+ }
+
+ if (Ops.size() == 1) return Ops[0];
+
+ assert(!Ops.empty() && "Reduced smax down to nothing!");
+
+ // Okay, it looks like we really DO need an smax expr. Check to see if we
+ // already have one, otherwise create a new one.
+ FoldingSetNodeID ID;
+ ID.AddInteger(scSMaxExpr);
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ ID.AddPointer(Ops[i]);
+ void *IP = 0;
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+ std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+ SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator),
+ O, Ops.size());
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+}
+
+const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS,
+ const SCEV *RHS) {
+ SmallVector<const SCEV *, 2> Ops;
+ Ops.push_back(LHS);
+ Ops.push_back(RHS);
+ return getUMaxExpr(Ops);
+}
+
+const SCEV *
+ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
+ assert(!Ops.empty() && "Cannot get empty umax!");
+ if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+ const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+ for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+ assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+ "SCEVUMaxExpr operand types don't match!");
+#endif
+
+ // Sort by complexity, this groups all similar expression types together.
+ GroupByComplexity(Ops, LI);
+
+ // If there are any constants, fold them together.
+ unsigned Idx = 0;
+ if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+ ++Idx;
+ assert(Idx < Ops.size());
+ while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+ // We found two constants, fold them together!
+ ConstantInt *Fold = ConstantInt::get(getContext(),
+ APIntOps::umax(LHSC->getValue()->getValue(),
+ RHSC->getValue()->getValue()));
+ Ops[0] = getConstant(Fold);
+ Ops.erase(Ops.begin()+1); // Erase the folded element
+ if (Ops.size() == 1) return Ops[0];
+ LHSC = cast<SCEVConstant>(Ops[0]);
+ }
+
+ // If we are left with a constant minimum-int, strip it off.
+ if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) {
+ Ops.erase(Ops.begin());
+ --Idx;
+ } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) {
+ // If we have an umax with a constant maximum-int, it will always be
+ // maximum-int.
+ return Ops[0];
+ }
+
+ if (Ops.size() == 1) return Ops[0];
+ }
+
+ // Find the first UMax
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr)
+ ++Idx;
+
+ // Check to see if one of the operands is a UMax. If so, expand its operands
+ // onto our operand list, and recurse to simplify.
+ if (Idx < Ops.size()) {
+ bool DeletedUMax = false;
+ while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) {
+ Ops.erase(Ops.begin()+Idx);
+ Ops.append(UMax->op_begin(), UMax->op_end());
+ DeletedUMax = true;
+ }
+
+ if (DeletedUMax)
+ return getUMaxExpr(Ops);
+ }
+
+ // Okay, check to see if the same value occurs in the operand list twice. If
+ // so, delete one. Since we sorted the list, these values are required to
+ // be adjacent.
+ for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
+ // X umax Y umax Y --> X umax Y
+ // X umax Y --> X, if X is always greater than Y
+ if (Ops[i] == Ops[i+1] ||
+ isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) {
+ Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
+ --i; --e;
+ } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) {
+ Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
+ --i; --e;
+ }
+
+ if (Ops.size() == 1) return Ops[0];
+
+ assert(!Ops.empty() && "Reduced umax down to nothing!");
+
+ // Okay, it looks like we really DO need a umax expr. Check to see if we
+ // already have one, otherwise create a new one.
+ FoldingSetNodeID ID;
+ ID.AddInteger(scUMaxExpr);
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ ID.AddPointer(Ops[i]);
+ void *IP = 0;
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+ std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+ SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator),
+ O, Ops.size());
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+}
+
+const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
+ const SCEV *RHS) {
+ // ~smax(~x, ~y) == smin(x, y).
+ return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
+}
+
+const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
+ const SCEV *RHS) {
+ // ~umax(~x, ~y) == umin(x, y)
+ return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
+}
+
+const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) {
+ // If we have TargetData, we can bypass creating a target-independent
+ // constant expression and then folding it back into a ConstantInt.
+ // This is just a compile-time optimization.
+ if (TD)
+ return getConstant(TD->getIntPtrType(getContext()),
+ TD->getTypeAllocSize(AllocTy));
+
+ Constant *C = ConstantExpr::getSizeOf(AllocTy);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ C = Folded;
+ const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
+ return getTruncateOrZeroExtend(getSCEV(C), Ty);
+}
+
+const SCEV *ScalarEvolution::getAlignOfExpr(const Type *AllocTy) {
+ Constant *C = ConstantExpr::getAlignOf(AllocTy);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ C = Folded;
+ const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
+ return getTruncateOrZeroExtend(getSCEV(C), Ty);
+}
+
+const SCEV *ScalarEvolution::getOffsetOfExpr(const StructType *STy,
+ unsigned FieldNo) {
+ // If we have TargetData, we can bypass creating a target-independent
+ // constant expression and then folding it back into a ConstantInt.
+ // This is just a compile-time optimization.
+ if (TD)
+ return getConstant(TD->getIntPtrType(getContext()),
+ TD->getStructLayout(STy)->getElementOffset(FieldNo));
+
+ Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ C = Folded;
+ const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
+ return getTruncateOrZeroExtend(getSCEV(C), Ty);
+}
+
+const SCEV *ScalarEvolution::getOffsetOfExpr(const Type *CTy,
+ Constant *FieldNo) {
+ Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ C = Folded;
+ const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy));
+ return getTruncateOrZeroExtend(getSCEV(C), Ty);
+}
+
+const SCEV *ScalarEvolution::getUnknown(Value *V) {
+ // Don't attempt to do anything other than create a SCEVUnknown object
+ // here. createSCEV only calls getUnknown after checking for all other
+ // interesting possibilities, and any other code that calls getUnknown
+ // is doing so in order to hide a value from SCEV canonicalization.
+
+ FoldingSetNodeID ID;
+ ID.AddInteger(scUnknown);
+ ID.AddPointer(V);
+ void *IP = 0;
+ if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
+ assert(cast<SCEVUnknown>(S)->getValue() == V &&
+ "Stale SCEVUnknown in uniquing map!");
+ return S;
+ }
+ SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this,
+ FirstUnknown);
+ FirstUnknown = cast<SCEVUnknown>(S);
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+}
+
+//===----------------------------------------------------------------------===//
+// Basic SCEV Analysis and PHI Idiom Recognition Code
+//
+
+/// isSCEVable - Test if values of the given type are analyzable within
+/// the SCEV framework. This primarily includes integer types, and it
+/// can optionally include pointer types if the ScalarEvolution class
+/// has access to target-specific information.
+bool ScalarEvolution::isSCEVable(const Type *Ty) const {
+ // Integers and pointers are always SCEVable.
+ return Ty->isIntegerTy() || Ty->isPointerTy();
+}
+
+/// getTypeSizeInBits - Return the size in bits of the specified type,
+/// for which isSCEVable must return true.
+uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const {
+ assert(isSCEVable(Ty) && "Type is not SCEVable!");
+
+ // If we have a TargetData, use it!
+ if (TD)
+ return TD->getTypeSizeInBits(Ty);
+
+ // Integer types have fixed sizes.
+ if (Ty->isIntegerTy())
+ return Ty->getPrimitiveSizeInBits();
+
+ // The only other support type is pointer. Without TargetData, conservatively
+ // assume pointers are 64-bit.
+ assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!");
+ return 64;
+}
+
+/// getEffectiveSCEVType - Return a type with the same bitwidth as
+/// the given type and which represents how SCEV will treat the given
+/// type, for which isSCEVable must return true. For pointer types,
+/// this is the pointer-sized integer type.
+const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const {
+ assert(isSCEVable(Ty) && "Type is not SCEVable!");
+
+ if (Ty->isIntegerTy())
+ return Ty;
+
+ // The only other support type is pointer.
+ assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
+ if (TD) return TD->getIntPtrType(getContext());
+
+ // Without TargetData, conservatively assume pointers are 64-bit.
+ return Type::getInt64Ty(getContext());
+}
+
+const SCEV *ScalarEvolution::getCouldNotCompute() {
+ return &CouldNotCompute;
+}
+
+/// getSCEV - Return an existing SCEV if it exists, otherwise analyze the
+/// expression and create a new one.
+const SCEV *ScalarEvolution::getSCEV(Value *V) {
+ assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
+
+ ValueExprMapType::const_iterator I = ValueExprMap.find(V);
+ if (I != ValueExprMap.end()) return I->second;
+ const SCEV *S = createSCEV(V);
+
+ // The process of creating a SCEV for V may have caused other SCEVs
+ // to have been created, so it's necessary to insert the new entry
+ // from scratch, rather than trying to remember the insert position
+ // above.
+ ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
+ return S;
+}
+
+/// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
+///
+const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) {
+ if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
+ return getConstant(
+ cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
+
+ const Type *Ty = V->getType();
+ Ty = getEffectiveSCEVType(Ty);
+ return getMulExpr(V,
+ getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))));
+}
+
+/// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
+const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
+ if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
+ return getConstant(
+ cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
+
+ const Type *Ty = V->getType();
+ Ty = getEffectiveSCEVType(Ty);
+ const SCEV *AllOnes =
+ getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)));
+ return getMinusSCEV(AllOnes, V);
+}
+
+/// getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
+const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
+ SCEV::NoWrapFlags Flags) {
+ assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW");
+
+ // Fast path: X - X --> 0.
+ if (LHS == RHS)
+ return getConstant(LHS->getType(), 0);
+
+ // X - Y --> X + -Y
+ return getAddExpr(LHS, getNegativeSCEV(RHS), Flags);
+}
+
+/// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type. If the type must be extended, it is zero
+/// extended.
+const SCEV *
+ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, const Type *Ty) {
+ const Type *SrcTy = V->getType();
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot truncate or zero extend with non-integer arguments!");
+ if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+ return V; // No conversion
+ if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
+ return getTruncateExpr(V, Ty);
+ return getZeroExtendExpr(V, Ty);
+}
+
+/// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type. If the type must be extended, it is sign
+/// extended.
+const SCEV *
+ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
+ const Type *Ty) {
+ const Type *SrcTy = V->getType();
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot truncate or zero extend with non-integer arguments!");
+ if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+ return V; // No conversion
+ if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
+ return getTruncateExpr(V, Ty);
+ return getSignExtendExpr(V, Ty);
+}
+
+/// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type. If the type must be extended, it is zero
+/// extended. The conversion must not be narrowing.
+const SCEV *
+ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) {
+ const Type *SrcTy = V->getType();
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot noop or zero extend with non-integer arguments!");
+ assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
+ "getNoopOrZeroExtend cannot truncate!");
+ if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+ return V; // No conversion
+ return getZeroExtendExpr(V, Ty);
+}
+
+/// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type. If the type must be extended, it is sign
+/// extended. The conversion must not be narrowing.
+const SCEV *
+ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) {
+ const Type *SrcTy = V->getType();
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot noop or sign extend with non-integer arguments!");
+ assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
+ "getNoopOrSignExtend cannot truncate!");
+ if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+ return V; // No conversion
+ return getSignExtendExpr(V, Ty);
+}
+
+/// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of
+/// the input value to the specified type. If the type must be extended,
+/// it is extended with unspecified bits. The conversion must not be
+/// narrowing.
+const SCEV *
+ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) {
+ const Type *SrcTy = V->getType();
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot noop or any extend with non-integer arguments!");
+ assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
+ "getNoopOrAnyExtend cannot truncate!");
+ if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+ return V; // No conversion
+ return getAnyExtendExpr(V, Ty);
+}
+
+/// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type. The conversion must not be widening.
+const SCEV *
+ScalarEvolution::getTruncateOrNoop(const SCEV *V, const Type *Ty) {
+ const Type *SrcTy = V->getType();
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+ "Cannot truncate or noop with non-integer arguments!");
+ assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
+ "getTruncateOrNoop cannot extend!");
+ if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+ return V; // No conversion
+ return getTruncateExpr(V, Ty);
+}
+
+/// getUMaxFromMismatchedTypes - Promote the operands to the wider of
+/// the types using zero-extension, and then perform a umax operation
+/// with them.
+const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS,
+ const SCEV *RHS) {
+ const SCEV *PromotedLHS = LHS;
+ const SCEV *PromotedRHS = RHS;
+
+ if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
+ PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
+ else
+ PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
+
+ return getUMaxExpr(PromotedLHS, PromotedRHS);
+}
+
+/// getUMinFromMismatchedTypes - Promote the operands to the wider of
+/// the types using zero-extension, and then perform a umin operation
+/// with them.
+const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS,
+ const SCEV *RHS) {
+ const SCEV *PromotedLHS = LHS;
+ const SCEV *PromotedRHS = RHS;
+
+ if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
+ PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
+ else
+ PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
+
+ return getUMinExpr(PromotedLHS, PromotedRHS);
+}
+
+/// getPointerBase - Transitively follow the chain of pointer-type operands
+/// until reaching a SCEV that does not have a single pointer operand. This
+/// returns a SCEVUnknown pointer for well-formed pointer-type expressions,
+/// but corner cases do exist.
+const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
+ // A pointer operand may evaluate to a nonpointer expression, such as null.
+ if (!V->getType()->isPointerTy())
+ return V;
+
+ if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) {
+ return getPointerBase(Cast->getOperand());
+ }
+ else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
+ const SCEV *PtrOp = 0;
+ for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+ I != E; ++I) {
+ if ((*I)->getType()->isPointerTy()) {
+ // Cannot find the base of an expression with multiple pointer operands.
+ if (PtrOp)
+ return V;
+ PtrOp = *I;
+ }
+ }
+ if (!PtrOp)
+ return V;
+ return getPointerBase(PtrOp);
+ }
+ return V;
+}
+
+/// PushDefUseChildren - Push users of the given Instruction
+/// onto the given Worklist.
+static void
+PushDefUseChildren(Instruction *I,
+ SmallVectorImpl<Instruction *> &Worklist) {
+ // Push the def-use children onto the Worklist stack.
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI)
+ Worklist.push_back(cast<Instruction>(*UI));
+}
+
+/// ForgetSymbolicValue - This looks up computed SCEV values for all
+/// instructions that depend on the given instruction and removes them from
+/// the ValueExprMapType map if they reference SymName. This is used during PHI
+/// resolution.
+void
+ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
+ SmallVector<Instruction *, 16> Worklist;
+ PushDefUseChildren(PN, Worklist);
+
+ SmallPtrSet<Instruction *, 8> Visited;
+ Visited.insert(PN);
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
+ if (!Visited.insert(I)) continue;
+
+ ValueExprMapType::iterator It =
+ ValueExprMap.find(static_cast<Value *>(I));
+ if (It != ValueExprMap.end()) {
+ const SCEV *Old = It->second;
+
+ // Short-circuit the def-use traversal if the symbolic name
+ // ceases to appear in expressions.
+ if (Old != SymName && !hasOperand(Old, SymName))
+ continue;
+
+ // SCEVUnknown for a PHI either means that it has an unrecognized
+ // structure, it's a PHI that's in the progress of being computed
+ // by createNodeForPHI, or it's a single-value PHI. In the first case,
+ // additional loop trip count information isn't going to change anything.
+ // In the second case, createNodeForPHI will perform the necessary
+ // updates on its own when it gets to that point. In the third, we do
+ // want to forget the SCEVUnknown.
+ if (!isa<PHINode>(I) ||
+ !isa<SCEVUnknown>(Old) ||
+ (I != PN && Old == SymName)) {
+ forgetMemoizedResults(Old);
+ ValueExprMap.erase(It);
+ }
+ }
+
+ PushDefUseChildren(I, Worklist);
+ }
+}
+
+/// createNodeForPHI - PHI nodes have two cases. Either the PHI node exists in
+/// a loop header, making it a potential recurrence, or it doesn't.
+///
+const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
+ if (const Loop *L = LI->getLoopFor(PN->getParent()))
+ if (L->getHeader() == PN->getParent()) {
+ // The loop may have multiple entrances or multiple exits; we can analyze
+ // this phi as an addrec if it has a unique entry value and a unique
+ // backedge value.
+ Value *BEValueV = 0, *StartValueV = 0;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *V = PN->getIncomingValue(i);
+ if (L->contains(PN->getIncomingBlock(i))) {
+ if (!BEValueV) {
+ BEValueV = V;
+ } else if (BEValueV != V) {
+ BEValueV = 0;
+ break;
+ }
+ } else if (!StartValueV) {
+ StartValueV = V;
+ } else if (StartValueV != V) {
+ StartValueV = 0;
+ break;
+ }
+ }
+ if (BEValueV && StartValueV) {
+ // While we are analyzing this PHI node, handle its value symbolically.
+ const SCEV *SymbolicName = getUnknown(PN);
+ assert(ValueExprMap.find(PN) == ValueExprMap.end() &&
+ "PHI node already processed?");
+ ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
+
+ // Using this symbolic name for the PHI, analyze the value coming around
+ // the back-edge.
+ const SCEV *BEValue = getSCEV(BEValueV);
+
+ // NOTE: If BEValue is loop invariant, we know that the PHI node just
+ // has a special value for the first iteration of the loop.
+
+ // If the value coming around the backedge is an add with the symbolic
+ // value we just inserted, then we found a simple induction variable!
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
+ // If there is a single occurrence of the symbolic value, replace it
+ // with a recurrence.
+ unsigned FoundIndex = Add->getNumOperands();
+ for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+ if (Add->getOperand(i) == SymbolicName)
+ if (FoundIndex == e) {
+ FoundIndex = i;
+ break;
+ }
+
+ if (FoundIndex != Add->getNumOperands()) {
+ // Create an add with everything but the specified operand.
+ SmallVector<const SCEV *, 8> Ops;
+ for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+ if (i != FoundIndex)
+ Ops.push_back(Add->getOperand(i));
+ const SCEV *Accum = getAddExpr(Ops);
+
+ // This is not a valid addrec if the step amount is varying each
+ // loop iteration, but is not itself an addrec in this loop.
+ if (isLoopInvariant(Accum, L) ||
+ (isa<SCEVAddRecExpr>(Accum) &&
+ cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
+ SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
+
+ // If the increment doesn't overflow, then neither the addrec nor
+ // the post-increment will overflow.
+ if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
+ if (OBO->hasNoUnsignedWrap())
+ Flags = setFlags(Flags, SCEV::FlagNUW);
+ if (OBO->hasNoSignedWrap())
+ Flags = setFlags(Flags, SCEV::FlagNSW);
+ } else if (const GEPOperator *GEP =
+ dyn_cast<GEPOperator>(BEValueV)) {
+ // If the increment is an inbounds GEP, then we know the address
+ // space cannot be wrapped around. We cannot make any guarantee
+ // about signed or unsigned overflow because pointers are
+ // unsigned but we may have a negative index from the base
+ // pointer.
+ if (GEP->isInBounds())
+ Flags = setFlags(Flags, SCEV::FlagNW);
+ }
+
+ const SCEV *StartVal = getSCEV(StartValueV);
+ const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
+
+ // Since the no-wrap flags are on the increment, they apply to the
+ // post-incremented value as well.
+ if (isLoopInvariant(Accum, L))
+ (void)getAddRecExpr(getAddExpr(StartVal, Accum),
+ Accum, L, Flags);
+
+ // Okay, for the entire analysis of this edge we assumed the PHI
+ // to be symbolic. We now need to go back and purge all of the
+ // entries for the scalars that use the symbolic expression.
+ ForgetSymbolicName(PN, SymbolicName);
+ ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
+ return PHISCEV;
+ }
+ }
+ } else if (const SCEVAddRecExpr *AddRec =
+ dyn_cast<SCEVAddRecExpr>(BEValue)) {
+ // Otherwise, this could be a loop like this:
+ // i = 0; for (j = 1; ..; ++j) { .... i = j; }
+ // In this case, j = {1,+,1} and BEValue is j.
+ // Because the other in-value of i (0) fits the evolution of BEValue
+ // i really is an addrec evolution.
+ if (AddRec->getLoop() == L && AddRec->isAffine()) {
+ const SCEV *StartVal = getSCEV(StartValueV);
+
+ // If StartVal = j.start - j.stride, we can use StartVal as the
+ // initial step of the addrec evolution.
+ if (StartVal == getMinusSCEV(AddRec->getOperand(0),
+ AddRec->getOperand(1))) {
+ // FIXME: For constant StartVal, we should be able to infer
+ // no-wrap flags.
+ const SCEV *PHISCEV =
+ getAddRecExpr(StartVal, AddRec->getOperand(1), L,
+ SCEV::FlagAnyWrap);
+
+ // Okay, for the entire analysis of this edge we assumed the PHI
+ // to be symbolic. We now need to go back and purge all of the
+ // entries for the scalars that use the symbolic expression.
+ ForgetSymbolicName(PN, SymbolicName);
+ ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
+ return PHISCEV;
+ }
+ }
+ }
+ }
+ }
+
+ // If the PHI has a single incoming value, follow that value, unless the
+ // PHI's incoming blocks are in a different loop, in which case doing so
+ // risks breaking LCSSA form. Instcombine would normally zap these, but
+ // it doesn't have DominatorTree information, so it may miss cases.
+ if (Value *V = SimplifyInstruction(PN, TD, DT))
+ if (LI->replacementPreservesLCSSAForm(PN, V))
+ return getSCEV(V);
+
+ // If it's not a loop phi, we can't handle it yet.
+ return getUnknown(PN);
+}
+
+/// createNodeForGEP - Expand GEP instructions into add and multiply
+/// operations. This allows them to be analyzed by regular SCEV code.
+///
+const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
+
+ // Don't blindly transfer the inbounds flag from the GEP instruction to the
+ // Add expression, because the Instruction may be guarded by control flow
+ // and the no-overflow bits may not be valid for the expression in any
+ // context.
+ bool isInBounds = GEP->isInBounds();
+
+ const Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
+ Value *Base = GEP->getOperand(0);
+ // Don't attempt to analyze GEPs over unsized objects.
+ if (!cast<PointerType>(Base->getType())->getElementType()->isSized())
+ return getUnknown(GEP);
+ const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()),
+ E = GEP->op_end();
+ I != E; ++I) {
+ Value *Index = *I;
+ // Compute the (potentially symbolic) offset in bytes for this index.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI++)) {
+ // For a struct, add the member offset.
+ unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
+ const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo);
+
+ // Add the field offset to the running total offset.
+ TotalOffset = getAddExpr(TotalOffset, FieldOffset);
+ } else {
+ // For an array, add the element offset, explicitly scaled.
+ const SCEV *ElementSize = getSizeOfExpr(*GTI);
+ const SCEV *IndexS = getSCEV(Index);
+ // Getelementptr indices are signed.
+ IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy);
+
+ // Multiply the index by the element size to compute the element offset.
+ const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize,
+ isInBounds ? SCEV::FlagNSW :
+ SCEV::FlagAnyWrap);
+
+ // Add the element offset to the running total offset.
+ TotalOffset = getAddExpr(TotalOffset, LocalOffset);
+ }
+ }
+
+ // Get the SCEV for the GEP base.
+ const SCEV *BaseS = getSCEV(Base);
+
+ // Add the total offset from all the GEP indices to the base.
+ return getAddExpr(BaseS, TotalOffset,
+ isInBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap);
+}
+
+/// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
+/// guaranteed to end in (at every loop iteration). It is, at the same time,
+/// the minimum number of times S is divisible by 2. For example, given {4,+,8}
+/// it returns 2. If S is guaranteed to be 0, it returns the bitwidth of S.
+uint32_t
+ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
+ return C->getValue()->getValue().countTrailingZeros();
+
+ if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
+ return std::min(GetMinTrailingZeros(T->getOperand()),
+ (uint32_t)getTypeSizeInBits(T->getType()));
+
+ if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) {
+ uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
+ return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
+ getTypeSizeInBits(E->getType()) : OpRes;
+ }
+
+ if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) {
+ uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
+ return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
+ getTypeSizeInBits(E->getType()) : OpRes;
+ }
+
+ if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
+ // The result is the min of all operands results.
+ uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
+ for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
+ MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
+ return MinOpRes;
+ }
+
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
+ // The result is the sum of all operands results.
+ uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0));
+ uint32_t BitWidth = getTypeSizeInBits(M->getType());
+ for (unsigned i = 1, e = M->getNumOperands();
+ SumOpRes != BitWidth && i != e; ++i)
+ SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)),
+ BitWidth);
+ return SumOpRes;
+ }
+
+ if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
+ // The result is the min of all operands results.
+ uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
+ for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
+ MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
+ return MinOpRes;
+ }
+
+ if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) {
+ // The result is the min of all operands results.
+ uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
+ for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
+ MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
+ return MinOpRes;
+ }
+
+ if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) {
+ // The result is the min of all operands results.
+ uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
+ for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
+ MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
+ return MinOpRes;
+ }
+
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ // For a SCEVUnknown, ask ValueTracking.
+ unsigned BitWidth = getTypeSizeInBits(U->getType());
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+ APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
+ ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones);
+ return Zeros.countTrailingOnes();
+ }
+
+ // SCEVUDivExpr
+ return 0;
+}
+
+/// getUnsignedRange - Determine the unsigned range for a particular SCEV.
+///
+ConstantRange
+ScalarEvolution::getUnsignedRange(const SCEV *S) {
+ // See if we've computed this range already.
+ DenseMap<const SCEV *, ConstantRange>::iterator I = UnsignedRanges.find(S);
+ if (I != UnsignedRanges.end())
+ return I->second;
+
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
+ return setUnsignedRange(C, ConstantRange(C->getValue()->getValue()));
+
+ unsigned BitWidth = getTypeSizeInBits(S->getType());
+ ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
+
+ // If the value has known zeros, the maximum unsigned value will have those
+ // known zeros as well.
+ uint32_t TZ = GetMinTrailingZeros(S);
+ if (TZ != 0)
+ ConservativeResult =
+ ConstantRange(APInt::getMinValue(BitWidth),
+ APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
+
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ ConstantRange X = getUnsignedRange(Add->getOperand(0));
+ for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
+ X = X.add(getUnsignedRange(Add->getOperand(i)));
+ return setUnsignedRange(Add, ConservativeResult.intersectWith(X));
+ }
+
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+ ConstantRange X = getUnsignedRange(Mul->getOperand(0));
+ for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
+ X = X.multiply(getUnsignedRange(Mul->getOperand(i)));
+ return setUnsignedRange(Mul, ConservativeResult.intersectWith(X));
+ }
+
+ if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
+ ConstantRange X = getUnsignedRange(SMax->getOperand(0));
+ for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
+ X = X.smax(getUnsignedRange(SMax->getOperand(i)));
+ return setUnsignedRange(SMax, ConservativeResult.intersectWith(X));
+ }
+
+ if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
+ ConstantRange X = getUnsignedRange(UMax->getOperand(0));
+ for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
+ X = X.umax(getUnsignedRange(UMax->getOperand(i)));
+ return setUnsignedRange(UMax, ConservativeResult.intersectWith(X));
+ }
+
+ if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
+ ConstantRange X = getUnsignedRange(UDiv->getLHS());
+ ConstantRange Y = getUnsignedRange(UDiv->getRHS());
+ return setUnsignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
+ }
+
+ if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
+ ConstantRange X = getUnsignedRange(ZExt->getOperand());
+ return setUnsignedRange(ZExt,
+ ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
+ }
+
+ if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
+ ConstantRange X = getUnsignedRange(SExt->getOperand());
+ return setUnsignedRange(SExt,
+ ConservativeResult.intersectWith(X.signExtend(BitWidth)));
+ }
+
+ if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
+ ConstantRange X = getUnsignedRange(Trunc->getOperand());
+ return setUnsignedRange(Trunc,
+ ConservativeResult.intersectWith(X.truncate(BitWidth)));
+ }
+
+ if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
+ // If there's no unsigned wrap, the value will never be less than its
+ // initial value.
+ if (AddRec->getNoWrapFlags(SCEV::FlagNUW))
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
+ if (!C->getValue()->isZero())
+ ConservativeResult =
+ ConservativeResult.intersectWith(
+ ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0)));
+
+ // TODO: non-affine addrec
+ if (AddRec->isAffine()) {
+ const Type *Ty = AddRec->getType();
+ const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
+ if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
+ getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
+ MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
+
+ const SCEV *Start = AddRec->getStart();
+ const SCEV *Step = AddRec->getStepRecurrence(*this);
+
+ ConstantRange StartRange = getUnsignedRange(Start);
+ ConstantRange StepRange = getSignedRange(Step);
+ ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
+ ConstantRange EndRange =
+ StartRange.add(MaxBECountRange.multiply(StepRange));
+
+ // Check for overflow. This must be done with ConstantRange arithmetic
+ // because we could be called from within the ScalarEvolution overflow
+ // checking code.
+ ConstantRange ExtStartRange = StartRange.zextOrTrunc(BitWidth*2+1);
+ ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
+ ConstantRange ExtMaxBECountRange =
+ MaxBECountRange.zextOrTrunc(BitWidth*2+1);
+ ConstantRange ExtEndRange = EndRange.zextOrTrunc(BitWidth*2+1);
+ if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
+ ExtEndRange)
+ return setUnsignedRange(AddRec, ConservativeResult);
+
+ APInt Min = APIntOps::umin(StartRange.getUnsignedMin(),
+ EndRange.getUnsignedMin());
+ APInt Max = APIntOps::umax(StartRange.getUnsignedMax(),
+ EndRange.getUnsignedMax());
+ if (Min.isMinValue() && Max.isMaxValue())
+ return setUnsignedRange(AddRec, ConservativeResult);
+ return setUnsignedRange(AddRec,
+ ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
+ }
+ }
+
+ return setUnsignedRange(AddRec, ConservativeResult);
+ }
+
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ // For a SCEVUnknown, ask ValueTracking.
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+ APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
+ ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD);
+ if (Ones == ~Zeros + 1)
+ return setUnsignedRange(U, ConservativeResult);
+ return setUnsignedRange(U,
+ ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)));
+ }
+
+ return setUnsignedRange(S, ConservativeResult);
+}
+
+/// getSignedRange - Determine the signed range for a particular SCEV.
+///
+ConstantRange
+ScalarEvolution::getSignedRange(const SCEV *S) {
+ // See if we've computed this range already.
+ DenseMap<const SCEV *, ConstantRange>::iterator I = SignedRanges.find(S);
+ if (I != SignedRanges.end())
+ return I->second;
+
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
+ return setSignedRange(C, ConstantRange(C->getValue()->getValue()));
+
+ unsigned BitWidth = getTypeSizeInBits(S->getType());
+ ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
+
+ // If the value has known zeros, the maximum signed value will have those
+ // known zeros as well.
+ uint32_t TZ = GetMinTrailingZeros(S);
+ if (TZ != 0)
+ ConservativeResult =
+ ConstantRange(APInt::getSignedMinValue(BitWidth),
+ APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
+
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ ConstantRange X = getSignedRange(Add->getOperand(0));
+ for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
+ X = X.add(getSignedRange(Add->getOperand(i)));
+ return setSignedRange(Add, ConservativeResult.intersectWith(X));
+ }
+
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+ ConstantRange X = getSignedRange(Mul->getOperand(0));
+ for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
+ X = X.multiply(getSignedRange(Mul->getOperand(i)));
+ return setSignedRange(Mul, ConservativeResult.intersectWith(X));
+ }
+
+ if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
+ ConstantRange X = getSignedRange(SMax->getOperand(0));
+ for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
+ X = X.smax(getSignedRange(SMax->getOperand(i)));
+ return setSignedRange(SMax, ConservativeResult.intersectWith(X));
+ }
+
+ if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
+ ConstantRange X = getSignedRange(UMax->getOperand(0));
+ for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
+ X = X.umax(getSignedRange(UMax->getOperand(i)));
+ return setSignedRange(UMax, ConservativeResult.intersectWith(X));
+ }
+
+ if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
+ ConstantRange X = getSignedRange(UDiv->getLHS());
+ ConstantRange Y = getSignedRange(UDiv->getRHS());
+ return setSignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
+ }
+
+ if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
+ ConstantRange X = getSignedRange(ZExt->getOperand());
+ return setSignedRange(ZExt,
+ ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
+ }
+
+ if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
+ ConstantRange X = getSignedRange(SExt->getOperand());
+ return setSignedRange(SExt,
+ ConservativeResult.intersectWith(X.signExtend(BitWidth)));
+ }
+
+ if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
+ ConstantRange X = getSignedRange(Trunc->getOperand());
+ return setSignedRange(Trunc,
+ ConservativeResult.intersectWith(X.truncate(BitWidth)));
+ }
+
+ if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
+ // If there's no signed wrap, and all the operands have the same sign or
+ // zero, the value won't ever change sign.
+ if (AddRec->getNoWrapFlags(SCEV::FlagNSW)) {
+ bool AllNonNeg = true;
+ bool AllNonPos = true;
+ for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
+ if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false;
+ if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false;
+ }
+ if (AllNonNeg)
+ ConservativeResult = ConservativeResult.intersectWith(
+ ConstantRange(APInt(BitWidth, 0),
+ APInt::getSignedMinValue(BitWidth)));
+ else if (AllNonPos)
+ ConservativeResult = ConservativeResult.intersectWith(
+ ConstantRange(APInt::getSignedMinValue(BitWidth),
+ APInt(BitWidth, 1)));
+ }
+
+ // TODO: non-affine addrec
+ if (AddRec->isAffine()) {
+ const Type *Ty = AddRec->getType();
+ const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
+ if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
+ getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
+ MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
+
+ const SCEV *Start = AddRec->getStart();
+ const SCEV *Step = AddRec->getStepRecurrence(*this);
+
+ ConstantRange StartRange = getSignedRange(Start);
+ ConstantRange StepRange = getSignedRange(Step);
+ ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
+ ConstantRange EndRange =
+ StartRange.add(MaxBECountRange.multiply(StepRange));
+
+ // Check for overflow. This must be done with ConstantRange arithmetic
+ // because we could be called from within the ScalarEvolution overflow
+ // checking code.
+ ConstantRange ExtStartRange = StartRange.sextOrTrunc(BitWidth*2+1);
+ ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
+ ConstantRange ExtMaxBECountRange =
+ MaxBECountRange.zextOrTrunc(BitWidth*2+1);
+ ConstantRange ExtEndRange = EndRange.sextOrTrunc(BitWidth*2+1);
+ if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
+ ExtEndRange)
+ return setSignedRange(AddRec, ConservativeResult);
+
+ APInt Min = APIntOps::smin(StartRange.getSignedMin(),
+ EndRange.getSignedMin());
+ APInt Max = APIntOps::smax(StartRange.getSignedMax(),
+ EndRange.getSignedMax());
+ if (Min.isMinSignedValue() && Max.isMaxSignedValue())
+ return setSignedRange(AddRec, ConservativeResult);
+ return setSignedRange(AddRec,
+ ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
+ }
+ }
+
+ return setSignedRange(AddRec, ConservativeResult);
+ }
+
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ // For a SCEVUnknown, ask ValueTracking.
+ if (!U->getValue()->getType()->isIntegerTy() && !TD)
+ return setSignedRange(U, ConservativeResult);
+ unsigned NS = ComputeNumSignBits(U->getValue(), TD);
+ if (NS == 1)
+ return setSignedRange(U, ConservativeResult);
+ return setSignedRange(U, ConservativeResult.intersectWith(
+ ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
+ APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1)));
+ }
+
+ return setSignedRange(S, ConservativeResult);
+}
+
+/// createSCEV - We know that there is no SCEV for the specified value.
+/// Analyze the expression.
+///
+const SCEV *ScalarEvolution::createSCEV(Value *V) {
+ if (!isSCEVable(V->getType()))
+ return getUnknown(V);
+
+ unsigned Opcode = Instruction::UserOp1;
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ Opcode = I->getOpcode();
+
+ // Don't attempt to analyze instructions in blocks that aren't
+ // reachable. Such instructions don't matter, and they aren't required
+ // to obey basic rules for definitions dominating uses which this
+ // analysis depends on.
+ if (!DT->isReachableFromEntry(I->getParent()))
+ return getUnknown(V);
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ Opcode = CE->getOpcode();
+ else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+ return getConstant(CI);
+ else if (isa<ConstantPointerNull>(V))
+ return getConstant(V->getType(), 0);
+ else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
+ return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee());
+ else
+ return getUnknown(V);
+
+ Operator *U = cast<Operator>(V);
+ switch (Opcode) {
+ case Instruction::Add: {
+ // The simple thing to do would be to just call getSCEV on both operands
+ // and call getAddExpr with the result. However if we're looking at a
+ // bunch of things all added together, this can be quite inefficient,
+ // because it leads to N-1 getAddExpr calls for N ultimate operands.
+ // Instead, gather up all the operands and make a single getAddExpr call.
+ // LLVM IR canonical form means we need only traverse the left operands.
+ SmallVector<const SCEV *, 4> AddOps;
+ AddOps.push_back(getSCEV(U->getOperand(1)));
+ for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) {
+ unsigned Opcode = Op->getValueID() - Value::InstructionVal;
+ if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
+ break;
+ U = cast<Operator>(Op);
+ const SCEV *Op1 = getSCEV(U->getOperand(1));
+ if (Opcode == Instruction::Sub)
+ AddOps.push_back(getNegativeSCEV(Op1));
+ else
+ AddOps.push_back(Op1);
+ }
+ AddOps.push_back(getSCEV(U->getOperand(0)));
+ return getAddExpr(AddOps);
+ }
+ case Instruction::Mul: {
+ // See the Add code above.
+ SmallVector<const SCEV *, 4> MulOps;
+ MulOps.push_back(getSCEV(U->getOperand(1)));
+ for (Value *Op = U->getOperand(0);
+ Op->getValueID() == Instruction::Mul + Value::InstructionVal;
+ Op = U->getOperand(0)) {
+ U = cast<Operator>(Op);
+ MulOps.push_back(getSCEV(U->getOperand(1)));
+ }
+ MulOps.push_back(getSCEV(U->getOperand(0)));
+ return getMulExpr(MulOps);
+ }
+ case Instruction::UDiv:
+ return getUDivExpr(getSCEV(U->getOperand(0)),
+ getSCEV(U->getOperand(1)));
+ case Instruction::Sub:
+ return getMinusSCEV(getSCEV(U->getOperand(0)),
+ getSCEV(U->getOperand(1)));
+ case Instruction::And:
+ // For an expression like x&255 that merely masks off the high bits,
+ // use zext(trunc(x)) as the SCEV expression.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ if (CI->isNullValue())
+ return getSCEV(U->getOperand(1));
+ if (CI->isAllOnesValue())
+ return getSCEV(U->getOperand(0));
+ const APInt &A = CI->getValue();
+
+ // Instcombine's ShrinkDemandedConstant may strip bits out of
+ // constants, obscuring what would otherwise be a low-bits mask.
+ // Use ComputeMaskedBits to compute what ShrinkDemandedConstant
+ // knew about to reconstruct a low-bits mask value.
+ unsigned LZ = A.countLeadingZeros();
+ unsigned BitWidth = A.getBitWidth();
+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(U->getOperand(0), AllOnes, KnownZero, KnownOne, TD);
+
+ APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ);
+
+ if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask))
+ return
+ getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)),
+ IntegerType::get(getContext(), BitWidth - LZ)),
+ U->getType());
+ }
+ break;
+
+ case Instruction::Or:
+ // If the RHS of the Or is a constant, we may have something like:
+ // X*4+1 which got turned into X*4|1. Handle this as an Add so loop
+ // optimizations will transparently handle this case.
+ //
+ // In order for this transformation to be safe, the LHS must be of the
+ // form X*(2^n) and the Or constant must be less than 2^n.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ const SCEV *LHS = getSCEV(U->getOperand(0));
+ const APInt &CIVal = CI->getValue();
+ if (GetMinTrailingZeros(LHS) >=
+ (CIVal.getBitWidth() - CIVal.countLeadingZeros())) {
+ // Build a plain add SCEV.
+ const SCEV *S = getAddExpr(LHS, getSCEV(CI));
+ // If the LHS of the add was an addrec and it has no-wrap flags,
+ // transfer the no-wrap flags, since an or won't introduce a wrap.
+ if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) {
+ const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS);
+ const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags(
+ OldAR->getNoWrapFlags());
+ }
+ return S;
+ }
+ }
+ break;
+ case Instruction::Xor:
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ // If the RHS of the xor is a signbit, then this is just an add.
+ // Instcombine turns add of signbit into xor as a strength reduction step.
+ if (CI->getValue().isSignBit())
+ return getAddExpr(getSCEV(U->getOperand(0)),
+ getSCEV(U->getOperand(1)));
+
+ // If the RHS of xor is -1, then this is a not operation.
+ if (CI->isAllOnesValue())
+ return getNotSCEV(getSCEV(U->getOperand(0)));
+
+ // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask.
+ // This is a variant of the check for xor with -1, and it handles
+ // the case where instcombine has trimmed non-demanded bits out
+ // of an xor with -1.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0)))
+ if (ConstantInt *LCI = dyn_cast<ConstantInt>(BO->getOperand(1)))
+ if (BO->getOpcode() == Instruction::And &&
+ LCI->getValue() == CI->getValue())
+ if (const SCEVZeroExtendExpr *Z =
+ dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) {
+ const Type *UTy = U->getType();
+ const SCEV *Z0 = Z->getOperand();
+ const Type *Z0Ty = Z0->getType();
+ unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
+
+ // If C is a low-bits mask, the zero extend is serving to
+ // mask off the high bits. Complement the operand and
+ // re-apply the zext.
+ if (APIntOps::isMask(Z0TySize, CI->getValue()))
+ return getZeroExtendExpr(getNotSCEV(Z0), UTy);
+
+ // If C is a single bit, it may be in the sign-bit position
+ // before the zero-extend. In this case, represent the xor
+ // using an add, which is equivalent, and re-apply the zext.
+ APInt Trunc = CI->getValue().trunc(Z0TySize);
+ if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
+ Trunc.isSignBit())
+ return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
+ UTy);
+ }
+ }
+ break;
+
+ case Instruction::Shl:
+ // Turn shift left of a constant amount into a multiply.
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
+
+ // If the shift count is not less than the bitwidth, the result of
+ // the shift is undefined. Don't try to analyze it, because the
+ // resolution chosen here may differ from the resolution chosen in
+ // other parts of the compiler.
+ if (SA->getValue().uge(BitWidth))
+ break;
+
+ Constant *X = ConstantInt::get(getContext(),
+ APInt(BitWidth, 1).shl(SA->getZExtValue()));
+ return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
+ }
+ break;
+
+ case Instruction::LShr:
+ // Turn logical shift right of a constant into a unsigned divide.
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
+
+ // If the shift count is not less than the bitwidth, the result of
+ // the shift is undefined. Don't try to analyze it, because the
+ // resolution chosen here may differ from the resolution chosen in
+ // other parts of the compiler.
+ if (SA->getValue().uge(BitWidth))
+ break;
+
+ Constant *X = ConstantInt::get(getContext(),
+ APInt(BitWidth, 1).shl(SA->getZExtValue()));
+ return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X));
+ }
+ break;
+
+ case Instruction::AShr:
+ // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1)))
+ if (Operator *L = dyn_cast<Operator>(U->getOperand(0)))
+ if (L->getOpcode() == Instruction::Shl &&
+ L->getOperand(1) == U->getOperand(1)) {
+ uint64_t BitWidth = getTypeSizeInBits(U->getType());
+
+ // If the shift count is not less than the bitwidth, the result of
+ // the shift is undefined. Don't try to analyze it, because the
+ // resolution chosen here may differ from the resolution chosen in
+ // other parts of the compiler.
+ if (CI->getValue().uge(BitWidth))
+ break;
+
+ uint64_t Amt = BitWidth - CI->getZExtValue();
+ if (Amt == BitWidth)
+ return getSCEV(L->getOperand(0)); // shift by zero --> noop
+ return
+ getSignExtendExpr(getTruncateExpr(getSCEV(L->getOperand(0)),
+ IntegerType::get(getContext(),
+ Amt)),
+ U->getType());
+ }
+ break;
+
+ case Instruction::Trunc:
+ return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType());
+
+ case Instruction::ZExt:
+ return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType());
+
+ case Instruction::SExt:
+ return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType());
+
+ case Instruction::BitCast:
+ // BitCasts are no-op casts so we just eliminate the cast.
+ if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType()))
+ return getSCEV(U->getOperand(0));
+ break;
+
+ // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can
+ // lead to pointer expressions which cannot safely be expanded to GEPs,
+ // because ScalarEvolution doesn't respect the GEP aliasing rules when
+ // simplifying integer expressions.
+
+ case Instruction::GetElementPtr:
+ return createNodeForGEP(cast<GEPOperator>(U));
+
+ case Instruction::PHI:
+ return createNodeForPHI(cast<PHINode>(U));
+
+ case Instruction::Select:
+ // This could be a smax or umax that was lowered earlier.
+ // Try to recover it.
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(U->getOperand(0))) {
+ Value *LHS = ICI->getOperand(0);
+ Value *RHS = ICI->getOperand(1);
+ switch (ICI->getPredicate()) {
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ std::swap(LHS, RHS);
+ // fall through
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ // a >s b ? a+x : b+x -> smax(a, b)+x
+ // a >s b ? b+x : a+x -> smin(a, b)+x
+ if (LHS->getType() == U->getType()) {
+ const SCEV *LS = getSCEV(LHS);
+ const SCEV *RS = getSCEV(RHS);
+ const SCEV *LA = getSCEV(U->getOperand(1));
+ const SCEV *RA = getSCEV(U->getOperand(2));
+ const SCEV *LDiff = getMinusSCEV(LA, LS);
+ const SCEV *RDiff = getMinusSCEV(RA, RS);
+ if (LDiff == RDiff)
+ return getAddExpr(getSMaxExpr(LS, RS), LDiff);
+ LDiff = getMinusSCEV(LA, RS);
+ RDiff = getMinusSCEV(RA, LS);
+ if (LDiff == RDiff)
+ return getAddExpr(getSMinExpr(LS, RS), LDiff);
+ }
+ break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ std::swap(LHS, RHS);
+ // fall through
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ // a >u b ? a+x : b+x -> umax(a, b)+x
+ // a >u b ? b+x : a+x -> umin(a, b)+x
+ if (LHS->getType() == U->getType()) {
+ const SCEV *LS = getSCEV(LHS);
+ const SCEV *RS = getSCEV(RHS);
+ const SCEV *LA = getSCEV(U->getOperand(1));
+ const SCEV *RA = getSCEV(U->getOperand(2));
+ const SCEV *LDiff = getMinusSCEV(LA, LS);
+ const SCEV *RDiff = getMinusSCEV(RA, RS);
+ if (LDiff == RDiff)
+ return getAddExpr(getUMaxExpr(LS, RS), LDiff);
+ LDiff = getMinusSCEV(LA, RS);
+ RDiff = getMinusSCEV(RA, LS);
+ if (LDiff == RDiff)
+ return getAddExpr(getUMinExpr(LS, RS), LDiff);
+ }
+ break;
+ case ICmpInst::ICMP_NE:
+ // n != 0 ? n+x : 1+x -> umax(n, 1)+x
+ if (LHS->getType() == U->getType() &&
+ isa<ConstantInt>(RHS) &&
+ cast<ConstantInt>(RHS)->isZero()) {
+ const SCEV *One = getConstant(LHS->getType(), 1);
+ const SCEV *LS = getSCEV(LHS);
+ const SCEV *LA = getSCEV(U->getOperand(1));
+ const SCEV *RA = getSCEV(U->getOperand(2));
+ const SCEV *LDiff = getMinusSCEV(LA, LS);
+ const SCEV *RDiff = getMinusSCEV(RA, One);
+ if (LDiff == RDiff)
+ return getAddExpr(getUMaxExpr(One, LS), LDiff);
+ }
+ break;
+ case ICmpInst::ICMP_EQ:
+ // n == 0 ? 1+x : n+x -> umax(n, 1)+x
+ if (LHS->getType() == U->getType() &&
+ isa<ConstantInt>(RHS) &&
+ cast<ConstantInt>(RHS)->isZero()) {
+ const SCEV *One = getConstant(LHS->getType(), 1);
+ const SCEV *LS = getSCEV(LHS);
+ const SCEV *LA = getSCEV(U->getOperand(1));
+ const SCEV *RA = getSCEV(U->getOperand(2));
+ const SCEV *LDiff = getMinusSCEV(LA, One);
+ const SCEV *RDiff = getMinusSCEV(RA, LS);
+ if (LDiff == RDiff)
+ return getAddExpr(getUMaxExpr(One, LS), LDiff);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ default: // We cannot analyze this expression.
+ break;
+ }
+
+ return getUnknown(V);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Iteration Count Computation Code
+//
+
+/// getBackedgeTakenCount - If the specified loop has a predictable
+/// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
+/// object. The backedge-taken count is the number of times the loop header
+/// will be branched to from within the loop. This is one less than the
+/// trip count of the loop, since it doesn't count the first iteration,
+/// when the header is branched to from outside the loop.
+///
+/// Note that it is not valid to call this method on a loop without a
+/// loop-invariant backedge-taken count (see
+/// hasLoopInvariantBackedgeTakenCount).
+///
+const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
+ return getBackedgeTakenInfo(L).Exact;
+}
+
+/// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except
+/// return the least SCEV value that is known never to be less than the
+/// actual backedge taken count.
+const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
+ return getBackedgeTakenInfo(L).Max;
+}
+
+/// PushLoopPHIs - Push PHI nodes in the header of the given loop
+/// onto the given Worklist.
+static void
+PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
+ BasicBlock *Header = L->getHeader();
+
+ // Push all Loop-header PHIs onto the Worklist stack.
+ for (BasicBlock::iterator I = Header->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ Worklist.push_back(PN);
+}
+
+const ScalarEvolution::BackedgeTakenInfo &
+ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
+ // Initially insert a CouldNotCompute for this loop. If the insertion
+ // succeeds, proceed to actually compute a backedge-taken count and
+ // update the value. The temporary CouldNotCompute value tells SCEV
+ // code elsewhere that it shouldn't attempt to request a new
+ // backedge-taken count, which could result in infinite recursion.
+ std::pair<std::map<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
+ BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute()));
+ if (!Pair.second)
+ return Pair.first->second;
+
+ BackedgeTakenInfo Result = getCouldNotCompute();
+ BackedgeTakenInfo Computed = ComputeBackedgeTakenCount(L);
+ if (Computed.Exact != getCouldNotCompute()) {
+ assert(isLoopInvariant(Computed.Exact, L) &&
+ isLoopInvariant(Computed.Max, L) &&
+ "Computed backedge-taken count isn't loop invariant for loop!");
+ ++NumTripCountsComputed;
+
+ // Update the value in the map.
+ Result = Computed;
+ } else {
+ if (Computed.Max != getCouldNotCompute())
+ // Update the value in the map.
+ Result = Computed;
+ if (isa<PHINode>(L->getHeader()->begin()))
+ // Only count loops that have phi nodes as not being computable.
+ ++NumTripCountsNotComputed;
+ }
+
+ // Now that we know more about the trip count for this loop, forget any
+ // existing SCEV values for PHI nodes in this loop since they are only
+ // conservative estimates made without the benefit of trip count
+ // information. This is similar to the code in forgetLoop, except that
+ // it handles SCEVUnknown PHI nodes specially.
+ if (Computed.hasAnyInfo()) {
+ SmallVector<Instruction *, 16> Worklist;
+ PushLoopPHIs(L, Worklist);
+
+ SmallPtrSet<Instruction *, 8> Visited;
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
+ if (!Visited.insert(I)) continue;
+
+ ValueExprMapType::iterator It =
+ ValueExprMap.find(static_cast<Value *>(I));
+ if (It != ValueExprMap.end()) {
+ const SCEV *Old = It->second;
+
+ // SCEVUnknown for a PHI either means that it has an unrecognized
+ // structure, or it's a PHI that's in the progress of being computed
+ // by createNodeForPHI. In the former case, additional loop trip
+ // count information isn't going to change anything. In the later
+ // case, createNodeForPHI will perform the necessary updates on its
+ // own when it gets to that point.
+ if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) {
+ forgetMemoizedResults(Old);
+ ValueExprMap.erase(It);
+ }
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ ConstantEvolutionLoopExitValue.erase(PN);
+ }
+
+ PushDefUseChildren(I, Worklist);
+ }
+ }
+
+ // Re-lookup the insert position, since the call to
+ // ComputeBackedgeTakenCount above could result in a
+ // recusive call to getBackedgeTakenInfo (on a different
+ // loop), which would invalidate the iterator computed
+ // earlier.
+ return BackedgeTakenCounts.find(L)->second = Result;
+}
+
+/// forgetLoop - This method should be called by the client when it has
+/// changed a loop in a way that may effect ScalarEvolution's ability to
+/// compute a trip count, or if the loop is deleted.
+void ScalarEvolution::forgetLoop(const Loop *L) {
+ // Drop any stored trip count value.
+ BackedgeTakenCounts.erase(L);
+
+ // Drop information about expressions based on loop-header PHIs.
+ SmallVector<Instruction *, 16> Worklist;
+ PushLoopPHIs(L, Worklist);
+
+ SmallPtrSet<Instruction *, 8> Visited;
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
+ if (!Visited.insert(I)) continue;
+
+ ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I));
+ if (It != ValueExprMap.end()) {
+ forgetMemoizedResults(It->second);
+ ValueExprMap.erase(It);
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ ConstantEvolutionLoopExitValue.erase(PN);
+ }
+
+ PushDefUseChildren(I, Worklist);
+ }
+
+ // Forget all contained loops too, to avoid dangling entries in the
+ // ValuesAtScopes map.
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ forgetLoop(*I);
+}
+
+/// forgetValue - This method should be called by the client when it has
+/// changed a value in a way that may effect its value, or which may
+/// disconnect it from a def-use chain linking it to a loop.
+void ScalarEvolution::forgetValue(Value *V) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return;
+
+ // Drop information about expressions based on loop-header PHIs.
+ SmallVector<Instruction *, 16> Worklist;
+ Worklist.push_back(I);
+
+ SmallPtrSet<Instruction *, 8> Visited;
+ while (!Worklist.empty()) {
+ I = Worklist.pop_back_val();
+ if (!Visited.insert(I)) continue;
+
+ ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I));
+ if (It != ValueExprMap.end()) {
+ forgetMemoizedResults(It->second);
+ ValueExprMap.erase(It);
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ ConstantEvolutionLoopExitValue.erase(PN);
+ }
+
+ PushDefUseChildren(I, Worklist);
+ }
+}
+
+/// ComputeBackedgeTakenCount - Compute the number of times the backedge
+/// of the specified loop will execute.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
+ SmallVector<BasicBlock *, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ // Examine all exits and pick the most conservative values.
+ const SCEV *BECount = getCouldNotCompute();
+ const SCEV *MaxBECount = getCouldNotCompute();
+ bool CouldNotComputeBECount = false;
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ BackedgeTakenInfo NewBTI =
+ ComputeBackedgeTakenCountFromExit(L, ExitingBlocks[i]);
+
+ if (NewBTI.Exact == getCouldNotCompute()) {
+ // We couldn't compute an exact value for this exit, so
+ // we won't be able to compute an exact value for the loop.
+ CouldNotComputeBECount = true;
+ BECount = getCouldNotCompute();
+ } else if (!CouldNotComputeBECount) {
+ if (BECount == getCouldNotCompute())
+ BECount = NewBTI.Exact;
+ else
+ BECount = getUMinFromMismatchedTypes(BECount, NewBTI.Exact);
+ }
+ if (MaxBECount == getCouldNotCompute())
+ MaxBECount = NewBTI.Max;
+ else if (NewBTI.Max != getCouldNotCompute())
+ MaxBECount = getUMinFromMismatchedTypes(MaxBECount, NewBTI.Max);
+ }
+
+ return BackedgeTakenInfo(BECount, MaxBECount);
+}
+
+/// ComputeBackedgeTakenCountFromExit - Compute the number of times the backedge
+/// of the specified loop will execute if it exits via the specified block.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L,
+ BasicBlock *ExitingBlock) {
+
+ // Okay, we've chosen an exiting block. See what condition causes us to
+ // exit at this block.
+ //
+ // FIXME: we should be able to handle switch instructions (with a single exit)
+ BranchInst *ExitBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (ExitBr == 0) return getCouldNotCompute();
+ assert(ExitBr->isConditional() && "If unconditional, it can't be in loop!");
+
+ // At this point, we know we have a conditional branch that determines whether
+ // the loop is exited. However, we don't know if the branch is executed each
+ // time through the loop. If not, then the execution count of the branch will
+ // not be equal to the trip count of the loop.
+ //
+ // Currently we check for this by checking to see if the Exit branch goes to
+ // the loop header. If so, we know it will always execute the same number of
+ // times as the loop. We also handle the case where the exit block *is* the
+ // loop header. This is common for un-rotated loops.
+ //
+ // If both of those tests fail, walk up the unique predecessor chain to the
+ // header, stopping if there is an edge that doesn't exit the loop. If the
+ // header is reached, the execution count of the branch will be equal to the
+ // trip count of the loop.
+ //
+ // More extensive analysis could be done to handle more cases here.
+ //
+ if (ExitBr->getSuccessor(0) != L->getHeader() &&
+ ExitBr->getSuccessor(1) != L->getHeader() &&
+ ExitBr->getParent() != L->getHeader()) {
+ // The simple checks failed, try climbing the unique predecessor chain
+ // up to the header.
+ bool Ok = false;
+ for (BasicBlock *BB = ExitBr->getParent(); BB; ) {
+ BasicBlock *Pred = BB->getUniquePredecessor();
+ if (!Pred)
+ return getCouldNotCompute();
+ TerminatorInst *PredTerm = Pred->getTerminator();
+ for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *PredSucc = PredTerm->getSuccessor(i);
+ if (PredSucc == BB)
+ continue;
+ // If the predecessor has a successor that isn't BB and isn't
+ // outside the loop, assume the worst.
+ if (L->contains(PredSucc))
+ return getCouldNotCompute();
+ }
+ if (Pred == L->getHeader()) {
+ Ok = true;
+ break;
+ }
+ BB = Pred;
+ }
+ if (!Ok)
+ return getCouldNotCompute();
+ }
+
+ // Proceed to the next level to examine the exit condition expression.
+ return ComputeBackedgeTakenCountFromExitCond(L, ExitBr->getCondition(),
+ ExitBr->getSuccessor(0),
+ ExitBr->getSuccessor(1));
+}
+
+/// ComputeBackedgeTakenCountFromExitCond - Compute the number of times the
+/// backedge of the specified loop will execute if its exit condition
+/// were a conditional branch of ExitCond, TBB, and FBB.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
+ Value *ExitCond,
+ BasicBlock *TBB,
+ BasicBlock *FBB) {
+ // Check if the controlling expression for this loop is an And or Or.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
+ if (BO->getOpcode() == Instruction::And) {
+ // Recurse on the operands of the and.
+ BackedgeTakenInfo BTI0 =
+ ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
+ BackedgeTakenInfo BTI1 =
+ ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
+ const SCEV *BECount = getCouldNotCompute();
+ const SCEV *MaxBECount = getCouldNotCompute();
+ if (L->contains(TBB)) {
+ // Both conditions must be true for the loop to continue executing.
+ // Choose the less conservative count.
+ if (BTI0.Exact == getCouldNotCompute() ||
+ BTI1.Exact == getCouldNotCompute())
+ BECount = getCouldNotCompute();
+ else
+ BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
+ if (BTI0.Max == getCouldNotCompute())
+ MaxBECount = BTI1.Max;
+ else if (BTI1.Max == getCouldNotCompute())
+ MaxBECount = BTI0.Max;
+ else
+ MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max);
+ } else {
+ // Both conditions must be true at the same time for the loop to exit.
+ // For now, be conservative.
+ assert(L->contains(FBB) && "Loop block has no successor in loop!");
+ if (BTI0.Max == BTI1.Max)
+ MaxBECount = BTI0.Max;
+ if (BTI0.Exact == BTI1.Exact)
+ BECount = BTI0.Exact;
+ }
+
+ return BackedgeTakenInfo(BECount, MaxBECount);
+ }
+ if (BO->getOpcode() == Instruction::Or) {
+ // Recurse on the operands of the or.
+ BackedgeTakenInfo BTI0 =
+ ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
+ BackedgeTakenInfo BTI1 =
+ ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
+ const SCEV *BECount = getCouldNotCompute();
+ const SCEV *MaxBECount = getCouldNotCompute();
+ if (L->contains(FBB)) {
+ // Both conditions must be false for the loop to continue executing.
+ // Choose the less conservative count.
+ if (BTI0.Exact == getCouldNotCompute() ||
+ BTI1.Exact == getCouldNotCompute())
+ BECount = getCouldNotCompute();
+ else
+ BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
+ if (BTI0.Max == getCouldNotCompute())
+ MaxBECount = BTI1.Max;
+ else if (BTI1.Max == getCouldNotCompute())
+ MaxBECount = BTI0.Max;
+ else
+ MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max);
+ } else {
+ // Both conditions must be false at the same time for the loop to exit.
+ // For now, be conservative.
+ assert(L->contains(TBB) && "Loop block has no successor in loop!");
+ if (BTI0.Max == BTI1.Max)
+ MaxBECount = BTI0.Max;
+ if (BTI0.Exact == BTI1.Exact)
+ BECount = BTI0.Exact;
+ }
+
+ return BackedgeTakenInfo(BECount, MaxBECount);
+ }
+ }
+
+ // With an icmp, it may be feasible to compute an exact backedge-taken count.
+ // Proceed to the next level to examine the icmp.
+ if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
+ return ComputeBackedgeTakenCountFromExitCondICmp(L, ExitCondICmp, TBB, FBB);
+
+ // Check for a constant condition. These are normally stripped out by
+ // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
+ // preserve the CFG and is temporarily leaving constant conditions
+ // in place.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) {
+ if (L->contains(FBB) == !CI->getZExtValue())
+ // The backedge is always taken.
+ return getCouldNotCompute();
+ else
+ // The backedge is never taken.
+ return getConstant(CI->getType(), 0);
+ }
+
+ // If it's not an integer or pointer comparison then compute it the hard way.
+ return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
+}
+
+/// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of times the
+/// backedge of the specified loop will execute if its exit condition
+/// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
+ ICmpInst *ExitCond,
+ BasicBlock *TBB,
+ BasicBlock *FBB) {
+
+ // If the condition was exit on true, convert the condition to exit on false
+ ICmpInst::Predicate Cond;
+ if (!L->contains(FBB))
+ Cond = ExitCond->getPredicate();
+ else
+ Cond = ExitCond->getInversePredicate();
+
+ // Handle common loops like: for (X = "string"; *X; ++X)
+ if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
+ if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
+ BackedgeTakenInfo ItCnt =
+ ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond);
+ if (ItCnt.hasAnyInfo())
+ return ItCnt;
+ }
+
+ const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
+ const SCEV *RHS = getSCEV(ExitCond->getOperand(1));
+
+ // Try to evaluate any dependencies out of the loop.
+ LHS = getSCEVAtScope(LHS, L);
+ RHS = getSCEVAtScope(RHS, L);
+
+ // At this point, we would like to compute how many iterations of the
+ // loop the predicate will return true for these inputs.
+ if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) {
+ // If there is a loop-invariant, force it into the RHS.
+ std::swap(LHS, RHS);
+ Cond = ICmpInst::getSwappedPredicate(Cond);
+ }
+
+ // Simplify the operands before analyzing them.
+ (void)SimplifyICmpOperands(Cond, LHS, RHS);
+
+ // If we have a comparison of a chrec against a constant, try to use value
+ // ranges to answer this query.
+ if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS))
+ if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS))
+ if (AddRec->getLoop() == L) {
+ // Form the constant range.
+ ConstantRange CompRange(
+ ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue()));
+
+ const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
+ if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
+ }
+
+ switch (Cond) {
+ case ICmpInst::ICMP_NE: { // while (X != Y)
+ // Convert to: while (X-Y != 0)
+ BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEV(LHS, RHS), L);
+ if (BTI.hasAnyInfo()) return BTI;
+ break;
+ }
+ case ICmpInst::ICMP_EQ: { // while (X == Y)
+ // Convert to: while (X-Y == 0)
+ BackedgeTakenInfo BTI = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
+ if (BTI.hasAnyInfo()) return BTI;
+ break;
+ }
+ case ICmpInst::ICMP_SLT: {
+ BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, true);
+ if (BTI.hasAnyInfo()) return BTI;
+ break;
+ }
+ case ICmpInst::ICMP_SGT: {
+ BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS),
+ getNotSCEV(RHS), L, true);
+ if (BTI.hasAnyInfo()) return BTI;
+ break;
+ }
+ case ICmpInst::ICMP_ULT: {
+ BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, false);
+ if (BTI.hasAnyInfo()) return BTI;
+ break;
+ }
+ case ICmpInst::ICMP_UGT: {
+ BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS),
+ getNotSCEV(RHS), L, false);
+ if (BTI.hasAnyInfo()) return BTI;
+ break;
+ }
+ default:
+#if 0
+ dbgs() << "ComputeBackedgeTakenCount ";
+ if (ExitCond->getOperand(0)->getType()->isUnsigned())
+ dbgs() << "[unsigned] ";
+ dbgs() << *LHS << " "
+ << Instruction::getOpcodeName(Instruction::ICmp)
+ << " " << *RHS << "\n";
+#endif
+ break;
+ }
+ return
+ ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
+}
+
+static ConstantInt *
+EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
+ ScalarEvolution &SE) {
+ const SCEV *InVal = SE.getConstant(C);
+ const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE);
+ assert(isa<SCEVConstant>(Val) &&
+ "Evaluation of SCEV at constant didn't fold correctly?");
+ return cast<SCEVConstant>(Val)->getValue();
+}
+
+/// GetAddressedElementFromGlobal - Given a global variable with an initializer
+/// and a GEP expression (missing the pointer index) indexing into it, return
+/// the addressed element of the initializer or null if the index expression is
+/// invalid.
+static Constant *
+GetAddressedElementFromGlobal(GlobalVariable *GV,
+ const std::vector<ConstantInt*> &Indices) {
+ Constant *Init = GV->getInitializer();
+ for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
+ uint64_t Idx = Indices[i]->getZExtValue();
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) {
+ assert(Idx < CS->getNumOperands() && "Bad struct index!");
+ Init = cast<Constant>(CS->getOperand(Idx));
+ } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
+ if (Idx >= CA->getNumOperands()) return 0; // Bogus program
+ Init = cast<Constant>(CA->getOperand(Idx));
+ } else if (isa<ConstantAggregateZero>(Init)) {
+ if (const StructType *STy = dyn_cast<StructType>(Init->getType())) {
+ assert(Idx < STy->getNumElements() && "Bad struct index!");
+ Init = Constant::getNullValue(STy->getElementType(Idx));
+ } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Init->getType())) {
+ if (Idx >= ATy->getNumElements()) return 0; // Bogus program
+ Init = Constant::getNullValue(ATy->getElementType());
+ } else {
+ llvm_unreachable("Unknown constant aggregate type!");
+ }
+ return 0;
+ } else {
+ return 0; // Unknown initializer type
+ }
+ }
+ return Init;
+}
+
+/// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition of
+/// 'icmp op load X, cst', try to see if we can compute the backedge
+/// execution count.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
+ LoadInst *LI,
+ Constant *RHS,
+ const Loop *L,
+ ICmpInst::Predicate predicate) {
+ if (LI->isVolatile()) return getCouldNotCompute();
+
+ // Check to see if the loaded pointer is a getelementptr of a global.
+ // TODO: Use SCEV instead of manually grubbing with GEPs.
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
+ if (!GEP) return getCouldNotCompute();
+
+ // Make sure that it is really a constant global we are gepping, with an
+ // initializer, and make sure the first IDX is really 0.
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
+ if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+ GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) ||
+ !cast<Constant>(GEP->getOperand(1))->isNullValue())
+ return getCouldNotCompute();
+
+ // Okay, we allow one non-constant index into the GEP instruction.
+ Value *VarIdx = 0;
+ std::vector<ConstantInt*> Indexes;
+ unsigned VarIdxNum = 0;
+ for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
+ Indexes.push_back(CI);
+ } else if (!isa<ConstantInt>(GEP->getOperand(i))) {
+ if (VarIdx) return getCouldNotCompute(); // Multiple non-constant idx's.
+ VarIdx = GEP->getOperand(i);
+ VarIdxNum = i-2;
+ Indexes.push_back(0);
+ }
+
+ // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
+ // Check to see if X is a loop variant variable value now.
+ const SCEV *Idx = getSCEV(VarIdx);
+ Idx = getSCEVAtScope(Idx, L);
+
+ // We can only recognize very limited forms of loop index expressions, in
+ // particular, only affine AddRec's like {C1,+,C2}.
+ const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx);
+ if (!IdxExpr || !IdxExpr->isAffine() || isLoopInvariant(IdxExpr, L) ||
+ !isa<SCEVConstant>(IdxExpr->getOperand(0)) ||
+ !isa<SCEVConstant>(IdxExpr->getOperand(1)))
+ return getCouldNotCompute();
+
+ unsigned MaxSteps = MaxBruteForceIterations;
+ for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) {
+ ConstantInt *ItCst = ConstantInt::get(
+ cast<IntegerType>(IdxExpr->getType()), IterationNum);
+ ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this);
+
+ // Form the GEP offset.
+ Indexes[VarIdxNum] = Val;
+
+ Constant *Result = GetAddressedElementFromGlobal(GV, Indexes);
+ if (Result == 0) break; // Cannot compute!
+
+ // Evaluate the condition for this iteration.
+ Result = ConstantExpr::getICmp(predicate, Result, RHS);
+ if (!isa<ConstantInt>(Result)) break; // Couldn't decide for sure
+ if (cast<ConstantInt>(Result)->getValue().isMinValue()) {
+#if 0
+ dbgs() << "\n***\n*** Computed loop count " << *ItCst
+ << "\n*** From global " << *GV << "*** BB: " << *L->getHeader()
+ << "***\n";
+#endif
+ ++NumArrayLenItCounts;
+ return getConstant(ItCst); // Found terminating iteration!
+ }
+ }
+ return getCouldNotCompute();
+}
+
+
+/// CanConstantFold - Return true if we can constant fold an instruction of the
+/// specified type, assuming that all operands were constants.
+static bool CanConstantFold(const Instruction *I) {
+ if (isa<BinaryOperator>(I) || isa<CmpInst>(I) ||
+ isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I))
+ return true;
+
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (const Function *F = CI->getCalledFunction())
+ return canConstantFoldCallTo(F);
+ return false;
+}
+
+/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
+/// in the loop that V is derived from. We allow arbitrary operations along the
+/// way, but the operands of an operation must either be constants or a value
+/// derived from a constant PHI. If this expression does not fit with these
+/// constraints, return null.
+static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
+ // If this is not an instruction, or if this is an instruction outside of the
+ // loop, it can't be derived from a loop PHI.
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0 || !L->contains(I)) return 0;
+
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ if (L->getHeader() == I->getParent())
+ return PN;
+ else
+ // We don't currently keep track of the control flow needed to evaluate
+ // PHIs, so we cannot handle PHIs inside of loops.
+ return 0;
+ }
+
+ // If we won't be able to constant fold this expression even if the operands
+ // are constants, return early.
+ if (!CanConstantFold(I)) return 0;
+
+ // Otherwise, we can evaluate this instruction if all of its operands are
+ // constant or derived from a PHI node themselves.
+ PHINode *PHI = 0;
+ for (unsigned Op = 0, e = I->getNumOperands(); Op != e; ++Op)
+ if (!isa<Constant>(I->getOperand(Op))) {
+ PHINode *P = getConstantEvolvingPHI(I->getOperand(Op), L);
+ if (P == 0) return 0; // Not evolving from PHI
+ if (PHI == 0)
+ PHI = P;
+ else if (PHI != P)
+ return 0; // Evolving from multiple different PHIs.
+ }
+
+ // This is a expression evolving from a constant PHI!
+ return PHI;
+}
+
+/// EvaluateExpression - Given an expression that passes the
+/// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node
+/// in the loop has the value PHIVal. If we can't fold this expression for some
+/// reason, return null.
+static Constant *EvaluateExpression(Value *V, Constant *PHIVal,
+ const TargetData *TD) {
+ if (isa<PHINode>(V)) return PHIVal;
+ if (Constant *C = dyn_cast<Constant>(V)) return C;
+ Instruction *I = cast<Instruction>(V);
+
+ std::vector<Constant*> Operands(I->getNumOperands());
+
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Operands[i] = EvaluateExpression(I->getOperand(i), PHIVal, TD);
+ if (Operands[i] == 0) return 0;
+ }
+
+ if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+ return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
+ Operands[1], TD);
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
+ &Operands[0], Operands.size(), TD);
+}
+
+/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
+/// in the header of its containing loop, we know the loop executes a
+/// constant number of times, and the PHI node is just a recurrence
+/// involving constants, fold it.
+Constant *
+ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
+ const APInt &BEs,
+ const Loop *L) {
+ std::map<PHINode*, Constant*>::const_iterator I =
+ ConstantEvolutionLoopExitValue.find(PN);
+ if (I != ConstantEvolutionLoopExitValue.end())
+ return I->second;
+
+ if (BEs.ugt(MaxBruteForceIterations))
+ return ConstantEvolutionLoopExitValue[PN] = 0; // Not going to evaluate it.
+
+ Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
+
+ // Since the loop is canonicalized, the PHI node must have two entries. One
+ // entry must be a constant (coming in from outside of the loop), and the
+ // second must be derived from the same PHI.
+ bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
+ Constant *StartCST =
+ dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge));
+ if (StartCST == 0)
+ return RetVal = 0; // Must be a constant.
+
+ Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
+ if (getConstantEvolvingPHI(BEValue, L) != PN &&
+ !isa<Constant>(BEValue))
+ return RetVal = 0; // Not derived from same PHI.
+
+ // Execute the loop symbolically to determine the exit value.
+ if (BEs.getActiveBits() >= 32)
+ return RetVal = 0; // More than 2^32-1 iterations?? Not doing it!
+
+ unsigned NumIterations = BEs.getZExtValue(); // must be in range
+ unsigned IterationNum = 0;
+ for (Constant *PHIVal = StartCST; ; ++IterationNum) {
+ if (IterationNum == NumIterations)
+ return RetVal = PHIVal; // Got exit value!
+
+ // Compute the value of the PHI node for the next iteration.
+ Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD);
+ if (NextPHI == PHIVal)
+ return RetVal = NextPHI; // Stopped evolving!
+ if (NextPHI == 0)
+ return 0; // Couldn't evaluate!
+ PHIVal = NextPHI;
+ }
+}
+
+/// ComputeBackedgeTakenCountExhaustively - If the loop is known to execute a
+/// constant number of times (the condition evolves only from constants),
+/// try to evaluate a few iterations of the loop until we get the exit
+/// condition gets a value of ExitWhen (true or false). If we cannot
+/// evaluate the trip count of the loop, return getCouldNotCompute().
+const SCEV *
+ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
+ Value *Cond,
+ bool ExitWhen) {
+ PHINode *PN = getConstantEvolvingPHI(Cond, L);
+ if (PN == 0) return getCouldNotCompute();
+
+ // If the loop is canonicalized, the PHI will have exactly two entries.
+ // That's the only form we support here.
+ if (PN->getNumIncomingValues() != 2) return getCouldNotCompute();
+
+ // One entry must be a constant (coming in from outside of the loop), and the
+ // second must be derived from the same PHI.
+ bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
+ Constant *StartCST =
+ dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge));
+ if (StartCST == 0) return getCouldNotCompute(); // Must be a constant.
+
+ Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
+ if (getConstantEvolvingPHI(BEValue, L) != PN &&
+ !isa<Constant>(BEValue))
+ return getCouldNotCompute(); // Not derived from same PHI.
+
+ // Okay, we find a PHI node that defines the trip count of this loop. Execute
+ // the loop symbolically to determine when the condition gets a value of
+ // "ExitWhen".
+ unsigned IterationNum = 0;
+ unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis.
+ for (Constant *PHIVal = StartCST;
+ IterationNum != MaxIterations; ++IterationNum) {
+ ConstantInt *CondVal =
+ dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, PHIVal, TD));
+
+ // Couldn't symbolically evaluate.
+ if (!CondVal) return getCouldNotCompute();
+
+ if (CondVal->getValue() == uint64_t(ExitWhen)) {
+ ++NumBruteForceTripCountsComputed;
+ return getConstant(Type::getInt32Ty(getContext()), IterationNum);
+ }
+
+ // Compute the value of the PHI node for the next iteration.
+ Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD);
+ if (NextPHI == 0 || NextPHI == PHIVal)
+ return getCouldNotCompute();// Couldn't evaluate or not making progress...
+ PHIVal = NextPHI;
+ }
+
+ // Too many iterations were needed to evaluate.
+ return getCouldNotCompute();
+}
+
+/// getSCEVAtScope - Return a SCEV expression for the specified value
+/// at the specified scope in the program. The L value specifies a loop
+/// nest to evaluate the expression at, where null is the top-level or a
+/// specified loop is immediately inside of the loop.
+///
+/// This method can be used to compute the exit value for a variable defined
+/// in a loop by querying what the value will hold in the parent loop.
+///
+/// In the case that a relevant loop exit value cannot be computed, the
+/// original value V is returned.
+const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
+ // Check to see if we've folded this expression at this loop before.
+ std::map<const Loop *, const SCEV *> &Values = ValuesAtScopes[V];
+ std::pair<std::map<const Loop *, const SCEV *>::iterator, bool> Pair =
+ Values.insert(std::make_pair(L, static_cast<const SCEV *>(0)));
+ if (!Pair.second)
+ return Pair.first->second ? Pair.first->second : V;
+
+ // Otherwise compute it.
+ const SCEV *C = computeSCEVAtScope(V, L);
+ ValuesAtScopes[V][L] = C;
+ return C;
+}
+
+const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
+ if (isa<SCEVConstant>(V)) return V;
+
+ // If this instruction is evolved from a constant-evolving PHI, compute the
+ // exit value from the loop without using SCEVs.
+ if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {
+ if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) {
+ const Loop *LI = (*this->LI)[I->getParent()];
+ if (LI && LI->getParentLoop() == L) // Looking for loop exit value.
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ if (PN->getParent() == LI->getHeader()) {
+ // Okay, there is no closed form solution for the PHI node. Check
+ // to see if the loop that contains it has a known backedge-taken
+ // count. If so, we may be able to force computation of the exit
+ // value.
+ const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
+ if (const SCEVConstant *BTCC =
+ dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
+ // Okay, we know how many times the containing loop executes. If
+ // this is a constant evolving PHI node, get the final value at
+ // the specified iteration number.
+ Constant *RV = getConstantEvolutionLoopExitValue(PN,
+ BTCC->getValue()->getValue(),
+ LI);
+ if (RV) return getSCEV(RV);
+ }
+ }
+
+ // Okay, this is an expression that we cannot symbolically evaluate
+ // into a SCEV. Check to see if it's possible to symbolically evaluate
+ // the arguments into constants, and if so, try to constant propagate the
+ // result. This is particularly useful for computing loop exit values.
+ if (CanConstantFold(I)) {
+ SmallVector<Constant *, 4> Operands;
+ bool MadeImprovement = false;
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Value *Op = I->getOperand(i);
+ if (Constant *C = dyn_cast<Constant>(Op)) {
+ Operands.push_back(C);
+ continue;
+ }
+
+ // If any of the operands is non-constant and if they are
+ // non-integer and non-pointer, don't even try to analyze them
+ // with scev techniques.
+ if (!isSCEVable(Op->getType()))
+ return V;
+
+ const SCEV *OrigV = getSCEV(Op);
+ const SCEV *OpV = getSCEVAtScope(OrigV, L);
+ MadeImprovement |= OrigV != OpV;
+
+ Constant *C = 0;
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OpV))
+ C = SC->getValue();
+ if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(OpV))
+ C = dyn_cast<Constant>(SU->getValue());
+ if (!C) return V;
+ if (C->getType() != Op->getType())
+ C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+ Op->getType(),
+ false),
+ C, Op->getType());
+ Operands.push_back(C);
+ }
+
+ // Check to see if getSCEVAtScope actually made an improvement.
+ if (MadeImprovement) {
+ Constant *C = 0;
+ if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+ C = ConstantFoldCompareInstOperands(CI->getPredicate(),
+ Operands[0], Operands[1], TD);
+ else
+ C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
+ &Operands[0], Operands.size(), TD);
+ if (!C) return V;
+ return getSCEV(C);
+ }
+ }
+ }
+
+ // This is some other type of SCEVUnknown, just return it.
+ return V;
+ }
+
+ if (const SCEVCommutativeExpr *Comm = dyn_cast<SCEVCommutativeExpr>(V)) {
+ // Avoid performing the look-up in the common case where the specified
+ // expression has no loop-variant portions.
+ for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) {
+ const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
+ if (OpAtScope != Comm->getOperand(i)) {
+ // Okay, at least one of these operands is loop variant but might be
+ // foldable. Build a new instance of the folded commutative expression.
+ SmallVector<const SCEV *, 8> NewOps(Comm->op_begin(),
+ Comm->op_begin()+i);
+ NewOps.push_back(OpAtScope);
+
+ for (++i; i != e; ++i) {
+ OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
+ NewOps.push_back(OpAtScope);
+ }
+ if (isa<SCEVAddExpr>(Comm))
+ return getAddExpr(NewOps);
+ if (isa<SCEVMulExpr>(Comm))
+ return getMulExpr(NewOps);
+ if (isa<SCEVSMaxExpr>(Comm))
+ return getSMaxExpr(NewOps);
+ if (isa<SCEVUMaxExpr>(Comm))
+ return getUMaxExpr(NewOps);
+ llvm_unreachable("Unknown commutative SCEV type!");
+ }
+ }
+ // If we got here, all operands are loop invariant.
+ return Comm;
+ }
+
+ if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) {
+ const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L);
+ const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L);
+ if (LHS == Div->getLHS() && RHS == Div->getRHS())
+ return Div; // must be loop invariant
+ return getUDivExpr(LHS, RHS);
+ }
+
+ // If this is a loop recurrence for a loop that does not contain L, then we
+ // are dealing with the final value computed by the loop.
+ if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) {
+ // First, attempt to evaluate each operand.
+ // Avoid performing the look-up in the common case where the specified
+ // expression has no loop-variant portions.
+ for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
+ const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L);
+ if (OpAtScope == AddRec->getOperand(i))
+ continue;
+
+ // Okay, at least one of these operands is loop variant but might be
+ // foldable. Build a new instance of the folded commutative expression.
+ SmallVector<const SCEV *, 8> NewOps(AddRec->op_begin(),
+ AddRec->op_begin()+i);
+ NewOps.push_back(OpAtScope);
+ for (++i; i != e; ++i)
+ NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L));
+
+ const SCEV *FoldedRec =
+ getAddRecExpr(NewOps, AddRec->getLoop(),
+ AddRec->getNoWrapFlags(SCEV::FlagNW));
+ AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec);
+ // The addrec may be folded to a nonrecurrence, for example, if the
+ // induction variable is multiplied by zero after constant folding. Go
+ // ahead and return the folded value.
+ if (!AddRec)
+ return FoldedRec;
+ break;
+ }
+
+ // If the scope is outside the addrec's loop, evaluate it by using the
+ // loop exit value of the addrec.
+ if (!AddRec->getLoop()->contains(L)) {
+ // To evaluate this recurrence, we need to know how many times the AddRec
+ // loop iterates. Compute this now.
+ const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop());
+ if (BackedgeTakenCount == getCouldNotCompute()) return AddRec;
+
+ // Then, evaluate the AddRec.
+ return AddRec->evaluateAtIteration(BackedgeTakenCount, *this);
+ }
+
+ return AddRec;
+ }
+
+ if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) {
+ const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
+ if (Op == Cast->getOperand())
+ return Cast; // must be loop invariant
+ return getZeroExtendExpr(Op, Cast->getType());
+ }
+
+ if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) {
+ const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
+ if (Op == Cast->getOperand())
+ return Cast; // must be loop invariant
+ return getSignExtendExpr(Op, Cast->getType());
+ }
+
+ if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) {
+ const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
+ if (Op == Cast->getOperand())
+ return Cast; // must be loop invariant
+ return getTruncateExpr(Op, Cast->getType());
+ }
+
+ llvm_unreachable("Unknown SCEV type!");
+ return 0;
+}
+
+/// getSCEVAtScope - This is a convenience function which does
+/// getSCEVAtScope(getSCEV(V), L).
+const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) {
+ return getSCEVAtScope(getSCEV(V), L);
+}
+
+/// SolveLinEquationWithOverflow - Finds the minimum unsigned root of the
+/// following equation:
+///
+/// A * X = B (mod N)
+///
+/// where N = 2^BW and BW is the common bit width of A and B. The signedness of
+/// A and B isn't important.
+///
+/// If the equation does not have a solution, SCEVCouldNotCompute is returned.
+static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
+ ScalarEvolution &SE) {
+ uint32_t BW = A.getBitWidth();
+ assert(BW == B.getBitWidth() && "Bit widths must be the same.");
+ assert(A != 0 && "A must be non-zero.");
+
+ // 1. D = gcd(A, N)
+ //
+ // The gcd of A and N may have only one prime factor: 2. The number of
+ // trailing zeros in A is its multiplicity
+ uint32_t Mult2 = A.countTrailingZeros();
+ // D = 2^Mult2
+
+ // 2. Check if B is divisible by D.
+ //
+ // B is divisible by D if and only if the multiplicity of prime factor 2 for B
+ // is not less than multiplicity of this prime factor for D.
+ if (B.countTrailingZeros() < Mult2)
+ return SE.getCouldNotCompute();
+
+ // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic
+ // modulo (N / D).
+ //
+ // (N / D) may need BW+1 bits in its representation. Hence, we'll use this
+ // bit width during computations.
+ APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D
+ APInt Mod(BW + 1, 0);
+ Mod.setBit(BW - Mult2); // Mod = N / D
+ APInt I = AD.multiplicativeInverse(Mod);
+
+ // 4. Compute the minimum unsigned root of the equation:
+ // I * (B / D) mod (N / D)
+ APInt Result = (I * B.lshr(Mult2).zext(BW + 1)).urem(Mod);
+
+ // The result is guaranteed to be less than 2^BW so we may truncate it to BW
+ // bits.
+ return SE.getConstant(Result.trunc(BW));
+}
+
+/// SolveQuadraticEquation - Find the roots of the quadratic equation for the
+/// given quadratic chrec {L,+,M,+,N}. This returns either the two roots (which
+/// might be the same) or two SCEVCouldNotCompute objects.
+///
+static std::pair<const SCEV *,const SCEV *>
+SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
+ assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!");
+ const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0));
+ const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1));
+ const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2));
+
+ // We currently can only solve this if the coefficients are constants.
+ if (!LC || !MC || !NC) {
+ const SCEV *CNC = SE.getCouldNotCompute();
+ return std::make_pair(CNC, CNC);
+ }
+
+ uint32_t BitWidth = LC->getValue()->getValue().getBitWidth();
+ const APInt &L = LC->getValue()->getValue();
+ const APInt &M = MC->getValue()->getValue();
+ const APInt &N = NC->getValue()->getValue();
+ APInt Two(BitWidth, 2);
+ APInt Four(BitWidth, 4);
+
+ {
+ using namespace APIntOps;
+ const APInt& C = L;
+ // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
+ // The B coefficient is M-N/2
+ APInt B(M);
+ B -= sdiv(N,Two);
+
+ // The A coefficient is N/2
+ APInt A(N.sdiv(Two));
+
+ // Compute the B^2-4ac term.
+ APInt SqrtTerm(B);
+ SqrtTerm *= B;
+ SqrtTerm -= Four * (A * C);
+
+ // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
+ // integer value or else APInt::sqrt() will assert.
+ APInt SqrtVal(SqrtTerm.sqrt());
+
+ // Compute the two solutions for the quadratic formula.
+ // The divisions must be performed as signed divisions.
+ APInt NegB(-B);
+ APInt TwoA( A << 1 );
+ if (TwoA.isMinValue()) {
+ const SCEV *CNC = SE.getCouldNotCompute();
+ return std::make_pair(CNC, CNC);
+ }
+
+ LLVMContext &Context = SE.getContext();
+
+ ConstantInt *Solution1 =
+ ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA));
+ ConstantInt *Solution2 =
+ ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA));
+
+ return std::make_pair(SE.getConstant(Solution1),
+ SE.getConstant(Solution2));
+ } // end APIntOps namespace
+}
+
+/// HowFarToZero - Return the number of times a backedge comparing the specified
+/// value to zero will execute. If not computable, return CouldNotCompute.
+///
+/// This is only used for loops with a "x != y" exit test. The exit condition is
+/// now expressed as a single expression, V = x-y. So the exit test is
+/// effectively V != 0. We know and take advantage of the fact that this
+/// expression only being used in a comparison by zero context.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
+ // If the value is a constant
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
+ // If the value is already zero, the branch will execute zero times.
+ if (C->getValue()->isZero()) return C;
+ return getCouldNotCompute(); // Otherwise it will loop infinitely.
+ }
+
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V);
+ if (!AddRec || AddRec->getLoop() != L)
+ return getCouldNotCompute();
+
+ // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
+ // the quadratic equation to solve it.
+ if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
+ std::pair<const SCEV *,const SCEV *> Roots =
+ SolveQuadraticEquation(AddRec, *this);
+ const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
+ const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
+ if (R1 && R2) {
+#if 0
+ dbgs() << "HFTZ: " << *V << " - sol#1: " << *R1
+ << " sol#2: " << *R2 << "\n";
+#endif
+ // Pick the smallest positive root value.
+ if (ConstantInt *CB =
+ dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT,
+ R1->getValue(),
+ R2->getValue()))) {
+ if (CB->getZExtValue() == false)
+ std::swap(R1, R2); // R1 is the minimum root now.
+
+ // We can only use this value if the chrec ends up with an exact zero
+ // value at this index. When solving for "X*X != 5", for example, we
+ // should not accept a root of 2.
+ const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
+ if (Val->isZero())
+ return R1; // We found a quadratic root!
+ }
+ }
+ return getCouldNotCompute();
+ }
+
+ // Otherwise we can only handle this if it is affine.
+ if (!AddRec->isAffine())
+ return getCouldNotCompute();
+
+ // If this is an affine expression, the execution count of this branch is
+ // the minimum unsigned root of the following equation:
+ //
+ // Start + Step*N = 0 (mod 2^BW)
+ //
+ // equivalent to:
+ //
+ // Step*N = -Start (mod 2^BW)
+ //
+ // where BW is the common bit width of Start and Step.
+
+ // Get the initial value for the loop.
+ const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop());
+ const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop());
+
+ // For now we handle only constant steps.
+ //
+ // TODO: Handle a nonconstant Step given AddRec<NUW>. If the
+ // AddRec is NUW, then (in an unsigned sense) it cannot be counting up to wrap
+ // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step.
+ // We have not yet seen any such cases.
+ const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
+ if (StepC == 0)
+ return getCouldNotCompute();
+
+ // For positive steps (counting up until unsigned overflow):
+ // N = -Start/Step (as unsigned)
+ // For negative steps (counting down to zero):
+ // N = Start/-Step
+ // First compute the unsigned distance from zero in the direction of Step.
+ bool CountDown = StepC->getValue()->getValue().isNegative();
+ const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start);
+
+ // Handle unitary steps, which cannot wraparound.
+ // 1*N = -Start; -1*N = Start (mod 2^BW), so:
+ // N = Distance (as unsigned)
+ if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue())
+ return Distance;
+
+ // If the recurrence is known not to wraparound, unsigned divide computes the
+ // back edge count. We know that the value will either become zero (and thus
+ // the loop terminates), that the loop will terminate through some other exit
+ // condition first, or that the loop has undefined behavior. This means
+ // we can't "miss" the exit value, even with nonunit stride.
+ //
+ // FIXME: Prove that loops always exhibits *acceptable* undefined
+ // behavior. Loops must exhibit defined behavior until a wrapped value is
+ // actually used. So the trip count computed by udiv could be smaller than the
+ // number of well-defined iterations.
+ if (AddRec->getNoWrapFlags(SCEV::FlagNW))
+ // FIXME: We really want an "isexact" bit for udiv.
+ return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
+
+ // Then, try to solve the above equation provided that Start is constant.
+ if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
+ return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
+ -StartC->getValue()->getValue(),
+ *this);
+ return getCouldNotCompute();
+}
+
+/// HowFarToNonZero - Return the number of times a backedge checking the
+/// specified value for nonzero will execute. If not computable, return
+/// CouldNotCompute
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
+ // Loops that look like: while (X == 0) are very strange indeed. We don't
+ // handle them yet except for the trivial case. This could be expanded in the
+ // future as needed.
+
+ // If the value is a constant, check to see if it is known to be non-zero
+ // already. If so, the backedge will execute zero times.
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
+ if (!C->getValue()->isNullValue())
+ return getConstant(C->getType(), 0);
+ return getCouldNotCompute(); // Otherwise it will loop infinitely.
+ }
+
+ // We could implement others, but I really doubt anyone writes loops like
+ // this, and if they did, they would already be constant folded.
+ return getCouldNotCompute();
+}
+
+/// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB
+/// (which may not be an immediate predecessor) which has exactly one
+/// successor from which BB is reachable, or null if no such block is
+/// found.
+///
+std::pair<BasicBlock *, BasicBlock *>
+ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
+ // If the block has a unique predecessor, then there is no path from the
+ // predecessor to the block that does not go through the direct edge
+ // from the predecessor to the block.
+ if (BasicBlock *Pred = BB->getSinglePredecessor())
+ return std::make_pair(Pred, BB);
+
+ // A loop's header is defined to be a block that dominates the loop.
+ // If the header has a unique predecessor outside the loop, it must be
+ // a block that has exactly one successor that can reach the loop.
+ if (Loop *L = LI->getLoopFor(BB))
+ return std::make_pair(L->getLoopPredecessor(), L->getHeader());
+
+ return std::pair<BasicBlock *, BasicBlock *>();
+}
+
+/// HasSameValue - SCEV structural equivalence is usually sufficient for
+/// testing whether two expressions are equal, however for the purposes of
+/// looking for a condition guarding a loop, it can be useful to be a little
+/// more general, since a front-end may have replicated the controlling
+/// expression.
+///
+static bool HasSameValue(const SCEV *A, const SCEV *B) {
+ // Quick check to see if they are the same SCEV.
+ if (A == B) return true;
+
+ // Otherwise, if they're both SCEVUnknown, it's possible that they hold
+ // two different instructions with the same value. Check for this case.
+ if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A))
+ if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B))
+ if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue()))
+ if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue()))
+ if (AI->isIdenticalTo(BI) && !AI->mayReadFromMemory())
+ return true;
+
+ // Otherwise assume they may have a different value.
+ return false;
+}
+
+/// SimplifyICmpOperands - Simplify LHS and RHS in a comparison with
+/// predicate Pred. Return true iff any changes were made.
+///
+bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
+ const SCEV *&LHS, const SCEV *&RHS) {
+ bool Changed = false;
+
+ // Canonicalize a constant to the right side.
+ if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
+ // Check for both operands constant.
+ if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
+ if (ConstantExpr::getICmp(Pred,
+ LHSC->getValue(),
+ RHSC->getValue())->isNullValue())
+ goto trivially_false;
+ else
+ goto trivially_true;
+ }
+ // Otherwise swap the operands to put the constant on the right.
+ std::swap(LHS, RHS);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ Changed = true;
+ }
+
+ // If we're comparing an addrec with a value which is loop-invariant in the
+ // addrec's loop, put the addrec on the left. Also make a dominance check,
+ // as both operands could be addrecs loop-invariant in each other's loop.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) {
+ const Loop *L = AR->getLoop();
+ if (isLoopInvariant(LHS, L) && properlyDominates(LHS, L->getHeader())) {
+ std::swap(LHS, RHS);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ Changed = true;
+ }
+ }
+
+ // If there's a constant operand, canonicalize comparisons with boundary
+ // cases, and canonicalize *-or-equal comparisons to regular comparisons.
+ if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
+ const APInt &RA = RC->getValue()->getValue();
+ switch (Pred) {
+ default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_NE:
+ break;
+ case ICmpInst::ICMP_UGE:
+ if ((RA - 1).isMinValue()) {
+ Pred = ICmpInst::ICMP_NE;
+ RHS = getConstant(RA - 1);
+ Changed = true;
+ break;
+ }
+ if (RA.isMaxValue()) {
+ Pred = ICmpInst::ICMP_EQ;
+ Changed = true;
+ break;
+ }
+ if (RA.isMinValue()) goto trivially_true;
+
+ Pred = ICmpInst::ICMP_UGT;
+ RHS = getConstant(RA - 1);
+ Changed = true;
+ break;
+ case ICmpInst::ICMP_ULE:
+ if ((RA + 1).isMaxValue()) {
+ Pred = ICmpInst::ICMP_NE;
+ RHS = getConstant(RA + 1);
+ Changed = true;
+ break;
+ }
+ if (RA.isMinValue()) {
+ Pred = ICmpInst::ICMP_EQ;
+ Changed = true;
+ break;
+ }
+ if (RA.isMaxValue()) goto trivially_true;
+
+ Pred = ICmpInst::ICMP_ULT;
+ RHS = getConstant(RA + 1);
+ Changed = true;
+ break;
+ case ICmpInst::ICMP_SGE:
+ if ((RA - 1).isMinSignedValue()) {
+ Pred = ICmpInst::ICMP_NE;
+ RHS = getConstant(RA - 1);
+ Changed = true;
+ break;
+ }
+ if (RA.isMaxSignedValue()) {
+ Pred = ICmpInst::ICMP_EQ;
+ Changed = true;
+ break;
+ }
+ if (RA.isMinSignedValue()) goto trivially_true;
+
+ Pred = ICmpInst::ICMP_SGT;
+ RHS = getConstant(RA - 1);
+ Changed = true;
+ break;
+ case ICmpInst::ICMP_SLE:
+ if ((RA + 1).isMaxSignedValue()) {
+ Pred = ICmpInst::ICMP_NE;
+ RHS = getConstant(RA + 1);
+ Changed = true;
+ break;
+ }
+ if (RA.isMinSignedValue()) {
+ Pred = ICmpInst::ICMP_EQ;
+ Changed = true;
+ break;
+ }
+ if (RA.isMaxSignedValue()) goto trivially_true;
+
+ Pred = ICmpInst::ICMP_SLT;
+ RHS = getConstant(RA + 1);
+ Changed = true;
+ break;
+ case ICmpInst::ICMP_UGT:
+ if (RA.isMinValue()) {
+ Pred = ICmpInst::ICMP_NE;
+ Changed = true;
+ break;
+ }
+ if ((RA + 1).isMaxValue()) {
+ Pred = ICmpInst::ICMP_EQ;
+ RHS = getConstant(RA + 1);
+ Changed = true;
+ break;
+ }
+ if (RA.isMaxValue()) goto trivially_false;
+ break;
+ case ICmpInst::ICMP_ULT:
+ if (RA.isMaxValue()) {
+ Pred = ICmpInst::ICMP_NE;
+ Changed = true;
+ break;
+ }
+ if ((RA - 1).isMinValue()) {
+ Pred = ICmpInst::ICMP_EQ;
+ RHS = getConstant(RA - 1);
+ Changed = true;
+ break;
+ }
+ if (RA.isMinValue()) goto trivially_false;
+ break;
+ case ICmpInst::ICMP_SGT:
+ if (RA.isMinSignedValue()) {
+ Pred = ICmpInst::ICMP_NE;
+ Changed = true;
+ break;
+ }
+ if ((RA + 1).isMaxSignedValue()) {
+ Pred = ICmpInst::ICMP_EQ;
+ RHS = getConstant(RA + 1);
+ Changed = true;
+ break;
+ }
+ if (RA.isMaxSignedValue()) goto trivially_false;
+ break;
+ case ICmpInst::ICMP_SLT:
+ if (RA.isMaxSignedValue()) {
+ Pred = ICmpInst::ICMP_NE;
+ Changed = true;
+ break;
+ }
+ if ((RA - 1).isMinSignedValue()) {
+ Pred = ICmpInst::ICMP_EQ;
+ RHS = getConstant(RA - 1);
+ Changed = true;
+ break;
+ }
+ if (RA.isMinSignedValue()) goto trivially_false;
+ break;
+ }
+ }
+
+ // Check for obvious equality.
+ if (HasSameValue(LHS, RHS)) {
+ if (ICmpInst::isTrueWhenEqual(Pred))
+ goto trivially_true;
+ if (ICmpInst::isFalseWhenEqual(Pred))
+ goto trivially_false;
+ }
+
+ // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by
+ // adding or subtracting 1 from one of the operands.
+ switch (Pred) {
+ case ICmpInst::ICMP_SLE:
+ if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) {
+ RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
+ SCEV::FlagNSW);
+ Pred = ICmpInst::ICMP_SLT;
+ Changed = true;
+ } else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) {
+ LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
+ SCEV::FlagNSW);
+ Pred = ICmpInst::ICMP_SLT;
+ Changed = true;
+ }
+ break;
+ case ICmpInst::ICMP_SGE:
+ if (!getSignedRange(RHS).getSignedMin().isMinSignedValue()) {
+ RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
+ SCEV::FlagNSW);
+ Pred = ICmpInst::ICMP_SGT;
+ Changed = true;
+ } else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) {
+ LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
+ SCEV::FlagNSW);
+ Pred = ICmpInst::ICMP_SGT;
+ Changed = true;
+ }
+ break;
+ case ICmpInst::ICMP_ULE:
+ if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) {
+ RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
+ SCEV::FlagNUW);
+ Pred = ICmpInst::ICMP_ULT;
+ Changed = true;
+ } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) {
+ LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
+ SCEV::FlagNUW);
+ Pred = ICmpInst::ICMP_ULT;
+ Changed = true;
+ }
+ break;
+ case ICmpInst::ICMP_UGE:
+ if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) {
+ RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
+ SCEV::FlagNUW);
+ Pred = ICmpInst::ICMP_UGT;
+ Changed = true;
+ } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) {
+ LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
+ SCEV::FlagNUW);
+ Pred = ICmpInst::ICMP_UGT;
+ Changed = true;
+ }
+ break;
+ default:
+ break;
+ }
+
+ // TODO: More simplifications are possible here.
+
+ return Changed;
+
+trivially_true:
+ // Return 0 == 0.
+ LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
+ Pred = ICmpInst::ICMP_EQ;
+ return true;
+
+trivially_false:
+ // Return 0 != 0.
+ LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
+ Pred = ICmpInst::ICMP_NE;
+ return true;
+}
+
+bool ScalarEvolution::isKnownNegative(const SCEV *S) {
+ return getSignedRange(S).getSignedMax().isNegative();
+}
+
+bool ScalarEvolution::isKnownPositive(const SCEV *S) {
+ return getSignedRange(S).getSignedMin().isStrictlyPositive();
+}
+
+bool ScalarEvolution::isKnownNonNegative(const SCEV *S) {
+ return !getSignedRange(S).getSignedMin().isNegative();
+}
+
+bool ScalarEvolution::isKnownNonPositive(const SCEV *S) {
+ return !getSignedRange(S).getSignedMax().isStrictlyPositive();
+}
+
+bool ScalarEvolution::isKnownNonZero(const SCEV *S) {
+ return isKnownNegative(S) || isKnownPositive(S);
+}
+
+bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS) {
+ // Canonicalize the inputs first.
+ (void)SimplifyICmpOperands(Pred, LHS, RHS);
+
+ // If LHS or RHS is an addrec, check to see if the condition is true in
+ // every iteration of the loop.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
+ if (isLoopEntryGuardedByCond(
+ AR->getLoop(), Pred, AR->getStart(), RHS) &&
+ isLoopBackedgeGuardedByCond(
+ AR->getLoop(), Pred, AR->getPostIncExpr(*this), RHS))
+ return true;
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS))
+ if (isLoopEntryGuardedByCond(
+ AR->getLoop(), Pred, LHS, AR->getStart()) &&
+ isLoopBackedgeGuardedByCond(
+ AR->getLoop(), Pred, LHS, AR->getPostIncExpr(*this)))
+ return true;
+
+ // Otherwise see what can be done with known constant ranges.
+ return isKnownPredicateWithRanges(Pred, LHS, RHS);
+}
+
+bool
+ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS) {
+ if (HasSameValue(LHS, RHS))
+ return ICmpInst::isTrueWhenEqual(Pred);
+
+ // This code is split out from isKnownPredicate because it is called from
+ // within isLoopEntryGuardedByCond.
+ switch (Pred) {
+ default:
+ llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+ break;
+ case ICmpInst::ICMP_SGT:
+ Pred = ICmpInst::ICMP_SLT;
+ std::swap(LHS, RHS);
+ case ICmpInst::ICMP_SLT: {
+ ConstantRange LHSRange = getSignedRange(LHS);
+ ConstantRange RHSRange = getSignedRange(RHS);
+ if (LHSRange.getSignedMax().slt(RHSRange.getSignedMin()))
+ return true;
+ if (LHSRange.getSignedMin().sge(RHSRange.getSignedMax()))
+ return false;
+ break;
+ }
+ case ICmpInst::ICMP_SGE:
+ Pred = ICmpInst::ICMP_SLE;
+ std::swap(LHS, RHS);
+ case ICmpInst::ICMP_SLE: {
+ ConstantRange LHSRange = getSignedRange(LHS);
+ ConstantRange RHSRange = getSignedRange(RHS);
+ if (LHSRange.getSignedMax().sle(RHSRange.getSignedMin()))
+ return true;
+ if (LHSRange.getSignedMin().sgt(RHSRange.getSignedMax()))
+ return false;
+ break;
+ }
+ case ICmpInst::ICMP_UGT:
+ Pred = ICmpInst::ICMP_ULT;
+ std::swap(LHS, RHS);
+ case ICmpInst::ICMP_ULT: {
+ ConstantRange LHSRange = getUnsignedRange(LHS);
+ ConstantRange RHSRange = getUnsignedRange(RHS);
+ if (LHSRange.getUnsignedMax().ult(RHSRange.getUnsignedMin()))
+ return true;
+ if (LHSRange.getUnsignedMin().uge(RHSRange.getUnsignedMax()))
+ return false;
+ break;
+ }
+ case ICmpInst::ICMP_UGE:
+ Pred = ICmpInst::ICMP_ULE;
+ std::swap(LHS, RHS);
+ case ICmpInst::ICMP_ULE: {
+ ConstantRange LHSRange = getUnsignedRange(LHS);
+ ConstantRange RHSRange = getUnsignedRange(RHS);
+ if (LHSRange.getUnsignedMax().ule(RHSRange.getUnsignedMin()))
+ return true;
+ if (LHSRange.getUnsignedMin().ugt(RHSRange.getUnsignedMax()))
+ return false;
+ break;
+ }
+ case ICmpInst::ICMP_NE: {
+ if (getUnsignedRange(LHS).intersectWith(getUnsignedRange(RHS)).isEmptySet())
+ return true;
+ if (getSignedRange(LHS).intersectWith(getSignedRange(RHS)).isEmptySet())
+ return true;
+
+ const SCEV *Diff = getMinusSCEV(LHS, RHS);
+ if (isKnownNonZero(Diff))
+ return true;
+ break;
+ }
+ case ICmpInst::ICMP_EQ:
+ // The check at the top of the function catches the case where
+ // the values are known to be equal.
+ break;
+ }
+ return false;
+}
+
+/// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
+/// protected by a conditional between LHS and RHS. This is used to
+/// to eliminate casts.
+bool
+ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
+ ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS) {
+ // Interpret a null as meaning no loop, where there is obviously no guard
+ // (interprocedural conditions notwithstanding).
+ if (!L) return true;
+
+ BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch)
+ return false;
+
+ BranchInst *LoopContinuePredicate =
+ dyn_cast<BranchInst>(Latch->getTerminator());
+ if (!LoopContinuePredicate ||
+ LoopContinuePredicate->isUnconditional())
+ return false;
+
+ return isImpliedCond(Pred, LHS, RHS,
+ LoopContinuePredicate->getCondition(),
+ LoopContinuePredicate->getSuccessor(0) != L->getHeader());
+}
+
+/// isLoopEntryGuardedByCond - Test whether entry to the loop is protected
+/// by a conditional between LHS and RHS. This is used to help avoid max
+/// expressions in loop trip counts, and to eliminate casts.
+bool
+ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
+ ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS) {
+ // Interpret a null as meaning no loop, where there is obviously no guard
+ // (interprocedural conditions notwithstanding).
+ if (!L) return false;
+
+ // Starting at the loop predecessor, climb up the predecessor chain, as long
+ // as there are predecessors that can be found that have unique successors
+ // leading to the original header.
+ for (std::pair<BasicBlock *, BasicBlock *>
+ Pair(L->getLoopPredecessor(), L->getHeader());
+ Pair.first;
+ Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
+
+ BranchInst *LoopEntryPredicate =
+ dyn_cast<BranchInst>(Pair.first->getTerminator());
+ if (!LoopEntryPredicate ||
+ LoopEntryPredicate->isUnconditional())
+ continue;
+
+ if (isImpliedCond(Pred, LHS, RHS,
+ LoopEntryPredicate->getCondition(),
+ LoopEntryPredicate->getSuccessor(0) != Pair.second))
+ return true;
+ }
+
+ return false;
+}
+
+/// isImpliedCond - Test whether the condition described by Pred, LHS,
+/// and RHS is true whenever the given Cond value evaluates to true.
+bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS,
+ Value *FoundCondValue,
+ bool Inverse) {
+ // Recursively handle And and Or conditions.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) {
+ if (BO->getOpcode() == Instruction::And) {
+ if (!Inverse)
+ return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
+ isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
+ } else if (BO->getOpcode() == Instruction::Or) {
+ if (Inverse)
+ return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
+ isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
+ }
+ }
+
+ ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue);
+ if (!ICI) return false;
+
+ // Bail if the ICmp's operands' types are wider than the needed type
+ // before attempting to call getSCEV on them. This avoids infinite
+ // recursion, since the analysis of widening casts can require loop
+ // exit condition information for overflow checking, which would
+ // lead back here.
+ if (getTypeSizeInBits(LHS->getType()) <
+ getTypeSizeInBits(ICI->getOperand(0)->getType()))
+ return false;
+
+ // Now that we found a conditional branch that dominates the loop, check to
+ // see if it is the comparison we are looking for.
+ ICmpInst::Predicate FoundPred;
+ if (Inverse)
+ FoundPred = ICI->getInversePredicate();
+ else
+ FoundPred = ICI->getPredicate();
+
+ const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
+ const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));
+
+ // Balance the types. The case where FoundLHS' type is wider than
+ // LHS' type is checked for above.
+ if (getTypeSizeInBits(LHS->getType()) >
+ getTypeSizeInBits(FoundLHS->getType())) {
+ if (CmpInst::isSigned(Pred)) {
+ FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
+ FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType());
+ } else {
+ FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType());
+ FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType());
+ }
+ }
+
+ // Canonicalize the query to match the way instcombine will have
+ // canonicalized the comparison.
+ if (SimplifyICmpOperands(Pred, LHS, RHS))
+ if (LHS == RHS)
+ return CmpInst::isTrueWhenEqual(Pred);
+ if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS))
+ if (FoundLHS == FoundRHS)
+ return CmpInst::isFalseWhenEqual(Pred);
+
+ // Check to see if we can make the LHS or RHS match.
+ if (LHS == FoundRHS || RHS == FoundLHS) {
+ if (isa<SCEVConstant>(RHS)) {
+ std::swap(FoundLHS, FoundRHS);
+ FoundPred = ICmpInst::getSwappedPredicate(FoundPred);
+ } else {
+ std::swap(LHS, RHS);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+ }
+
+ // Check whether the found predicate is the same as the desired predicate.
+ if (FoundPred == Pred)
+ return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS);
+
+ // Check whether swapping the found predicate makes it the same as the
+ // desired predicate.
+ if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) {
+ if (isa<SCEVConstant>(RHS))
+ return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS);
+ else
+ return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred),
+ RHS, LHS, FoundLHS, FoundRHS);
+ }
+
+ // Check whether the actual condition is beyond sufficient.
+ if (FoundPred == ICmpInst::ICMP_EQ)
+ if (ICmpInst::isTrueWhenEqual(Pred))
+ if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS))
+ return true;
+ if (Pred == ICmpInst::ICMP_NE)
+ if (!ICmpInst::isTrueWhenEqual(FoundPred))
+ if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS))
+ return true;
+
+ // Otherwise assume the worst.
+ return false;
+}
+
+/// isImpliedCondOperands - Test whether the condition described by Pred,
+/// LHS, and RHS is true whenever the condition described by Pred, FoundLHS,
+/// and FoundRHS is true.
+bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS,
+ const SCEV *FoundLHS,
+ const SCEV *FoundRHS) {
+ return isImpliedCondOperandsHelper(Pred, LHS, RHS,
+ FoundLHS, FoundRHS) ||
+ // ~x < ~y --> x > y
+ isImpliedCondOperandsHelper(Pred, LHS, RHS,
+ getNotSCEV(FoundRHS),
+ getNotSCEV(FoundLHS));
+}
+
+/// isImpliedCondOperandsHelper - Test whether the condition described by
+/// Pred, LHS, and RHS is true whenever the condition described by Pred,
+/// FoundLHS, and FoundRHS is true.
+bool
+ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS,
+ const SCEV *FoundLHS,
+ const SCEV *FoundRHS) {
+ switch (Pred) {
+ default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_NE:
+ if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS))
+ return true;
+ break;
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ if (isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
+ isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, RHS, FoundRHS))
+ return true;
+ break;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ if (isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
+ isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, RHS, FoundRHS))
+ return true;
+ break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ if (isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
+ isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, RHS, FoundRHS))
+ return true;
+ break;
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ if (isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
+ isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, RHS, FoundRHS))
+ return true;
+ break;
+ }
+
+ return false;
+}
+
+/// getBECount - Subtract the end and start values and divide by the step,
+/// rounding up, to get the number of times the backedge is executed. Return
+/// CouldNotCompute if an intermediate computation overflows.
+const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
+ const SCEV *End,
+ const SCEV *Step,
+ bool NoWrap) {
+ assert(!isKnownNegative(Step) &&
+ "This code doesn't handle negative strides yet!");
+
+ const Type *Ty = Start->getType();
+
+ // When Start == End, we have an exact BECount == 0. Short-circuit this case
+ // here because SCEV may not be able to determine that the unsigned division
+ // after rounding is zero.
+ if (Start == End)
+ return getConstant(Ty, 0);
+
+ const SCEV *NegOne = getConstant(Ty, (uint64_t)-1);
+ const SCEV *Diff = getMinusSCEV(End, Start);
+ const SCEV *RoundUp = getAddExpr(Step, NegOne);
+
+ // Add an adjustment to the difference between End and Start so that
+ // the division will effectively round up.
+ const SCEV *Add = getAddExpr(Diff, RoundUp);
+
+ if (!NoWrap) {
+ // Check Add for unsigned overflow.
+ // TODO: More sophisticated things could be done here.
+ const Type *WideTy = IntegerType::get(getContext(),
+ getTypeSizeInBits(Ty) + 1);
+ const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy);
+ const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy);
+ const SCEV *OperandExtendedAdd = getAddExpr(EDiff, ERoundUp);
+ if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd)
+ return getCouldNotCompute();
+ }
+
+ return getUDivExpr(Add, Step);
+}
+
+/// HowManyLessThans - Return the number of times a backedge containing the
+/// specified less-than comparison will execute. If not computable, return
+/// CouldNotCompute.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
+ const Loop *L, bool isSigned) {
+ // Only handle: "ADDREC < LoopInvariant".
+ if (!isLoopInvariant(RHS, L)) return getCouldNotCompute();
+
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS);
+ if (!AddRec || AddRec->getLoop() != L)
+ return getCouldNotCompute();
+
+ // Check to see if we have a flag which makes analysis easy.
+ bool NoWrap = isSigned ? AddRec->getNoWrapFlags(SCEV::FlagNSW) :
+ AddRec->getNoWrapFlags(SCEV::FlagNUW);
+
+ if (AddRec->isAffine()) {
+ unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
+ const SCEV *Step = AddRec->getStepRecurrence(*this);
+
+ if (Step->isZero())
+ return getCouldNotCompute();
+ if (Step->isOne()) {
+ // With unit stride, the iteration never steps past the limit value.
+ } else if (isKnownPositive(Step)) {
+ // Test whether a positive iteration can step past the limit
+ // value and past the maximum value for its type in a single step.
+ // Note that it's not sufficient to check NoWrap here, because even
+ // though the value after a wrap is undefined, it's not undefined
+ // behavior, so if wrap does occur, the loop could either terminate or
+ // loop infinitely, but in either case, the loop is guaranteed to
+ // iterate at least until the iteration where the wrapping occurs.
+ const SCEV *One = getConstant(Step->getType(), 1);
+ if (isSigned) {
+ APInt Max = APInt::getSignedMaxValue(BitWidth);
+ if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax())
+ .slt(getSignedRange(RHS).getSignedMax()))
+ return getCouldNotCompute();
+ } else {
+ APInt Max = APInt::getMaxValue(BitWidth);
+ if ((Max - getUnsignedRange(getMinusSCEV(Step, One)).getUnsignedMax())
+ .ult(getUnsignedRange(RHS).getUnsignedMax()))
+ return getCouldNotCompute();
+ }
+ } else
+ // TODO: Handle negative strides here and below.
+ return getCouldNotCompute();
+
+ // We know the LHS is of the form {n,+,s} and the RHS is some loop-invariant
+ // m. So, we count the number of iterations in which {n,+,s} < m is true.
+ // Note that we cannot simply return max(m-n,0)/s because it's not safe to
+ // treat m-n as signed nor unsigned due to overflow possibility.
+
+ // First, we get the value of the LHS in the first iteration: n
+ const SCEV *Start = AddRec->getOperand(0);
+
+ // Determine the minimum constant start value.
+ const SCEV *MinStart = getConstant(isSigned ?
+ getSignedRange(Start).getSignedMin() :
+ getUnsignedRange(Start).getUnsignedMin());
+
+ // If we know that the condition is true in order to enter the loop,
+ // then we know that it will run exactly (m-n)/s times. Otherwise, we
+ // only know that it will execute (max(m,n)-n)/s times. In both cases,
+ // the division must round up.
+ const SCEV *End = RHS;
+ if (!isLoopEntryGuardedByCond(L,
+ isSigned ? ICmpInst::ICMP_SLT :
+ ICmpInst::ICMP_ULT,
+ getMinusSCEV(Start, Step), RHS))
+ End = isSigned ? getSMaxExpr(RHS, Start)
+ : getUMaxExpr(RHS, Start);
+
+ // Determine the maximum constant end value.
+ const SCEV *MaxEnd = getConstant(isSigned ?
+ getSignedRange(End).getSignedMax() :
+ getUnsignedRange(End).getUnsignedMax());
+
+ // If MaxEnd is within a step of the maximum integer value in its type,
+ // adjust it down to the minimum value which would produce the same effect.
+ // This allows the subsequent ceiling division of (N+(step-1))/step to
+ // compute the correct value.
+ const SCEV *StepMinusOne = getMinusSCEV(Step,
+ getConstant(Step->getType(), 1));
+ MaxEnd = isSigned ?
+ getSMinExpr(MaxEnd,
+ getMinusSCEV(getConstant(APInt::getSignedMaxValue(BitWidth)),
+ StepMinusOne)) :
+ getUMinExpr(MaxEnd,
+ getMinusSCEV(getConstant(APInt::getMaxValue(BitWidth)),
+ StepMinusOne));
+
+ // Finally, we subtract these two values and divide, rounding up, to get
+ // the number of times the backedge is executed.
+ const SCEV *BECount = getBECount(Start, End, Step, NoWrap);
+
+ // The maximum backedge count is similar, except using the minimum start
+ // value and the maximum end value.
+ // If we already have an exact constant BECount, use it instead.
+ const SCEV *MaxBECount = isa<SCEVConstant>(BECount) ? BECount
+ : getBECount(MinStart, MaxEnd, Step, NoWrap);
+
+ // If the stride is nonconstant, and NoWrap == true, then
+ // getBECount(MinStart, MaxEnd) may not compute. This would result in an
+ // exact BECount and invalid MaxBECount, which should be avoided to catch
+ // more optimization opportunities.
+ if (isa<SCEVCouldNotCompute>(MaxBECount))
+ MaxBECount = BECount;
+
+ return BackedgeTakenInfo(BECount, MaxBECount);
+ }
+
+ return getCouldNotCompute();
+}
+
+/// getNumIterationsInRange - Return the number of iterations of this loop that
+/// produce values in the specified constant range. Another way of looking at
+/// this is that it returns the first iteration number where the value is not in
+/// the condition, thus computing the exit count. If the iteration count can't
+/// be computed, an instance of SCEVCouldNotCompute is returned.
+const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
+ ScalarEvolution &SE) const {
+ if (Range.isFullSet()) // Infinite loop.
+ return SE.getCouldNotCompute();
+
+ // If the start is a non-zero constant, shift the range to simplify things.
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
+ if (!SC->getValue()->isZero()) {
+ SmallVector<const SCEV *, 4> Operands(op_begin(), op_end());
+ Operands[0] = SE.getConstant(SC->getType(), 0);
+ const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(),
+ getNoWrapFlags(FlagNW));
+ if (const SCEVAddRecExpr *ShiftedAddRec =
+ dyn_cast<SCEVAddRecExpr>(Shifted))
+ return ShiftedAddRec->getNumIterationsInRange(
+ Range.subtract(SC->getValue()->getValue()), SE);
+ // This is strange and shouldn't happen.
+ return SE.getCouldNotCompute();
+ }
+
+ // The only time we can solve this is when we have all constant indices.
+ // Otherwise, we cannot determine the overflow conditions.
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (!isa<SCEVConstant>(getOperand(i)))
+ return SE.getCouldNotCompute();
+
+
+ // Okay at this point we know that all elements of the chrec are constants and
+ // that the start element is zero.
+
+ // First check to see if the range contains zero. If not, the first
+ // iteration exits.
+ unsigned BitWidth = SE.getTypeSizeInBits(getType());
+ if (!Range.contains(APInt(BitWidth, 0)))
+ return SE.getConstant(getType(), 0);
+
+ if (isAffine()) {
+ // If this is an affine expression then we have this situation:
+ // Solve {0,+,A} in Range === Ax in Range
+
+ // We know that zero is in the range. If A is positive then we know that
+ // the upper value of the range must be the first possible exit value.
+ // If A is negative then the lower of the range is the last possible loop
+ // value. Also note that we already checked for a full range.
+ APInt One(BitWidth,1);
+ APInt A = cast<SCEVConstant>(getOperand(1))->getValue()->getValue();
+ APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower();
+
+ // The exit value should be (End+A)/A.
+ APInt ExitVal = (End + A).udiv(A);
+ ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal);
+
+ // Evaluate at the exit value. If we really did fall out of the valid
+ // range, then we computed our trip count, otherwise wrap around or other
+ // things must have happened.
+ ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE);
+ if (Range.contains(Val->getValue()))
+ return SE.getCouldNotCompute(); // Something strange happened
+
+ // Ensure that the previous value is in the range. This is a sanity check.
+ assert(Range.contains(
+ EvaluateConstantChrecAtConstant(this,
+ ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) &&
+ "Linear scev computation is off in a bad way!");
+ return SE.getConstant(ExitValue);
+ } else if (isQuadratic()) {
+ // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the
+ // quadratic equation to solve it. To do this, we must frame our problem in
+ // terms of figuring out when zero is crossed, instead of when
+ // Range.getUpper() is crossed.
+ SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end());
+ NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
+ const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(),
+ // getNoWrapFlags(FlagNW)
+ FlagAnyWrap);
+
+ // Next, solve the constructed addrec
+ std::pair<const SCEV *,const SCEV *> Roots =
+ SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE);
+ const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
+ const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
+ if (R1) {
+ // Pick the smallest positive root value.
+ if (ConstantInt *CB =
+ dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT,
+ R1->getValue(), R2->getValue()))) {
+ if (CB->getZExtValue() == false)
+ std::swap(R1, R2); // R1 is the minimum root now.
+
+ // Make sure the root is not off by one. The returned iteration should
+ // not be in the range, but the previous one should be. When solving
+ // for "X*X < 5", for example, we should not return a root of 2.
+ ConstantInt *R1Val = EvaluateConstantChrecAtConstant(this,
+ R1->getValue(),
+ SE);
+ if (Range.contains(R1Val->getValue())) {
+ // The next iteration must be out of the range...
+ ConstantInt *NextVal =
+ ConstantInt::get(SE.getContext(), R1->getValue()->getValue()+1);
+
+ R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
+ if (!Range.contains(R1Val->getValue()))
+ return SE.getConstant(NextVal);
+ return SE.getCouldNotCompute(); // Something strange happened
+ }
+
+ // If R1 was not in the range, then it is a good return value. Make
+ // sure that R1-1 WAS in the range though, just in case.
+ ConstantInt *NextVal =
+ ConstantInt::get(SE.getContext(), R1->getValue()->getValue()-1);
+ R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
+ if (Range.contains(R1Val->getValue()))
+ return R1;
+ return SE.getCouldNotCompute(); // Something strange happened
+ }
+ }
+ }
+
+ return SE.getCouldNotCompute();
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// SCEVCallbackVH Class Implementation
+//===----------------------------------------------------------------------===//
+
+void ScalarEvolution::SCEVCallbackVH::deleted() {
+ assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
+ if (PHINode *PN = dyn_cast<PHINode>(getValPtr()))
+ SE->ConstantEvolutionLoopExitValue.erase(PN);
+ SE->ValueExprMap.erase(getValPtr());
+ // this now dangles!
+}
+
+void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
+ assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
+
+ // Forget all the expressions associated with users of the old value,
+ // so that future queries will recompute the expressions using the new
+ // value.
+ Value *Old = getValPtr();
+ SmallVector<User *, 16> Worklist;
+ SmallPtrSet<User *, 8> Visited;
+ for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end();
+ UI != UE; ++UI)
+ Worklist.push_back(*UI);
+ while (!Worklist.empty()) {
+ User *U = Worklist.pop_back_val();
+ // Deleting the Old value will cause this to dangle. Postpone
+ // that until everything else is done.
+ if (U == Old)
+ continue;
+ if (!Visited.insert(U))
+ continue;
+ if (PHINode *PN = dyn_cast<PHINode>(U))
+ SE->ConstantEvolutionLoopExitValue.erase(PN);
+ SE->ValueExprMap.erase(U);
+ for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
+ UI != UE; ++UI)
+ Worklist.push_back(*UI);
+ }
+ // Delete the Old value.
+ if (PHINode *PN = dyn_cast<PHINode>(Old))
+ SE->ConstantEvolutionLoopExitValue.erase(PN);
+ SE->ValueExprMap.erase(Old);
+ // this now dangles!
+}
+
+ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
+ : CallbackVH(V), SE(se) {}
+
+//===----------------------------------------------------------------------===//
+// ScalarEvolution Class Implementation
+//===----------------------------------------------------------------------===//
+
+ScalarEvolution::ScalarEvolution()
+ : FunctionPass(ID), FirstUnknown(0) {
+ initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
+}
+
+bool ScalarEvolution::runOnFunction(Function &F) {
+ this->F = &F;
+ LI = &getAnalysis<LoopInfo>();
+ TD = getAnalysisIfAvailable<TargetData>();
+ DT = &getAnalysis<DominatorTree>();
+ return false;
+}
+
+void ScalarEvolution::releaseMemory() {
+ // Iterate through all the SCEVUnknown instances and call their
+ // destructors, so that they release their references to their values.
+ for (SCEVUnknown *U = FirstUnknown; U; U = U->Next)
+ U->~SCEVUnknown();
+ FirstUnknown = 0;
+
+ ValueExprMap.clear();
+ BackedgeTakenCounts.clear();
+ ConstantEvolutionLoopExitValue.clear();
+ ValuesAtScopes.clear();
+ LoopDispositions.clear();
+ BlockDispositions.clear();
+ UnsignedRanges.clear();
+ SignedRanges.clear();
+ UniqueSCEVs.clear();
+ SCEVAllocator.Reset();
+}
+
+void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<LoopInfo>();
+ AU.addRequiredTransitive<DominatorTree>();
+}
+
+bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
+ return !isa<SCEVCouldNotCompute>(getBackedgeTakenCount(L));
+}
+
+static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
+ const Loop *L) {
+ // Print all inner loops first
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ PrintLoopInfo(OS, SE, *I);
+
+ OS << "Loop ";
+ WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
+ OS << ": ";
+
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ if (ExitBlocks.size() != 1)
+ OS << "<multiple exits> ";
+
+ if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
+ OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L);
+ } else {
+ OS << "Unpredictable backedge-taken count. ";
+ }
+
+ OS << "\n"
+ "Loop ";
+ WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
+ OS << ": ";
+
+ if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
+ OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L);
+ } else {
+ OS << "Unpredictable max backedge-taken count. ";
+ }
+
+ OS << "\n";
+}
+
+void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
+ // ScalarEvolution's implementation of the print method is to print
+ // out SCEV values of all instructions that are interesting. Doing
+ // this potentially causes it to create new SCEV objects though,
+ // which technically conflicts with the const qualifier. This isn't
+ // observable from outside the class though, so casting away the
+ // const isn't dangerous.
+ ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
+
+ OS << "Classifying expressions for: ";
+ WriteAsOperand(OS, F, /*PrintType=*/false);
+ OS << "\n";
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+ if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) {
+ OS << *I << '\n';
+ OS << " --> ";
+ const SCEV *SV = SE.getSCEV(&*I);
+ SV->print(OS);
+
+ const Loop *L = LI->getLoopFor((*I).getParent());
+
+ const SCEV *AtUse = SE.getSCEVAtScope(SV, L);
+ if (AtUse != SV) {
+ OS << " --> ";
+ AtUse->print(OS);
+ }
+
+ if (L) {
+ OS << "\t\t" "Exits: ";
+ const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
+ if (!SE.isLoopInvariant(ExitValue, L)) {
+ OS << "<<Unknown>>";
+ } else {
+ OS << *ExitValue;
+ }
+ }
+
+ OS << "\n";
+ }
+
+ OS << "Determining loop execution counts for: ";
+ WriteAsOperand(OS, F, /*PrintType=*/false);
+ OS << "\n";
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+ PrintLoopInfo(OS, &SE, *I);
+}
+
+ScalarEvolution::LoopDisposition
+ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
+ std::map<const Loop *, LoopDisposition> &Values = LoopDispositions[S];
+ std::pair<std::map<const Loop *, LoopDisposition>::iterator, bool> Pair =
+ Values.insert(std::make_pair(L, LoopVariant));
+ if (!Pair.second)
+ return Pair.first->second;
+
+ LoopDisposition D = computeLoopDisposition(S, L);
+ return LoopDispositions[S][L] = D;
+}
+
+ScalarEvolution::LoopDisposition
+ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
+ switch (S->getSCEVType()) {
+ case scConstant:
+ return LoopInvariant;
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend:
+ return getLoopDisposition(cast<SCEVCastExpr>(S)->getOperand(), L);
+ case scAddRecExpr: {
+ const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
+
+ // If L is the addrec's loop, it's computable.
+ if (AR->getLoop() == L)
+ return LoopComputable;
+
+ // Add recurrences are never invariant in the function-body (null loop).
+ if (!L)
+ return LoopVariant;
+
+ // This recurrence is variant w.r.t. L if L contains AR's loop.
+ if (L->contains(AR->getLoop()))
+ return LoopVariant;
+
+ // This recurrence is invariant w.r.t. L if AR's loop contains L.
+ if (AR->getLoop()->contains(L))
+ return LoopInvariant;
+
+ // This recurrence is variant w.r.t. L if any of its operands
+ // are variant.
+ for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
+ I != E; ++I)
+ if (!isLoopInvariant(*I, L))
+ return LoopVariant;
+
+ // Otherwise it's loop-invariant.
+ return LoopInvariant;
+ }
+ case scAddExpr:
+ case scMulExpr:
+ case scUMaxExpr:
+ case scSMaxExpr: {
+ const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
+ bool HasVarying = false;
+ for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+ I != E; ++I) {
+ LoopDisposition D = getLoopDisposition(*I, L);
+ if (D == LoopVariant)
+ return LoopVariant;
+ if (D == LoopComputable)
+ HasVarying = true;
+ }
+ return HasVarying ? LoopComputable : LoopInvariant;
+ }
+ case scUDivExpr: {
+ const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
+ LoopDisposition LD = getLoopDisposition(UDiv->getLHS(), L);
+ if (LD == LoopVariant)
+ return LoopVariant;
+ LoopDisposition RD = getLoopDisposition(UDiv->getRHS(), L);
+ if (RD == LoopVariant)
+ return LoopVariant;
+ return (LD == LoopInvariant && RD == LoopInvariant) ?
+ LoopInvariant : LoopComputable;
+ }
+ case scUnknown:
+ // All non-instruction values are loop invariant. All instructions are loop
+ // invariant if they are not contained in the specified loop.
+ // Instructions are never considered invariant in the function body
+ // (null loop) because they are defined within the "loop".
+ if (Instruction *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue()))
+ return (L && !L->contains(I)) ? LoopInvariant : LoopVariant;
+ return LoopInvariant;
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ return LoopVariant;
+ default: break;
+ }
+ llvm_unreachable("Unknown SCEV kind!");
+ return LoopVariant;
+}
+
+bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) {
+ return getLoopDisposition(S, L) == LoopInvariant;
+}
+
+bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) {
+ return getLoopDisposition(S, L) == LoopComputable;
+}
+
+ScalarEvolution::BlockDisposition
+ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
+ std::map<const BasicBlock *, BlockDisposition> &Values = BlockDispositions[S];
+ std::pair<std::map<const BasicBlock *, BlockDisposition>::iterator, bool>
+ Pair = Values.insert(std::make_pair(BB, DoesNotDominateBlock));
+ if (!Pair.second)
+ return Pair.first->second;
+
+ BlockDisposition D = computeBlockDisposition(S, BB);
+ return BlockDispositions[S][BB] = D;
+}
+
+ScalarEvolution::BlockDisposition
+ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
+ switch (S->getSCEVType()) {
+ case scConstant:
+ return ProperlyDominatesBlock;
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend:
+ return getBlockDisposition(cast<SCEVCastExpr>(S)->getOperand(), BB);
+ case scAddRecExpr: {
+ // This uses a "dominates" query instead of "properly dominates" query
+ // to test for proper dominance too, because the instruction which
+ // produces the addrec's value is a PHI, and a PHI effectively properly
+ // dominates its entire containing block.
+ const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
+ if (!DT->dominates(AR->getLoop()->getHeader(), BB))
+ return DoesNotDominateBlock;
+ }
+ // FALL THROUGH into SCEVNAryExpr handling.
+ case scAddExpr:
+ case scMulExpr:
+ case scUMaxExpr:
+ case scSMaxExpr: {
+ const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
+ bool Proper = true;
+ for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+ I != E; ++I) {
+ BlockDisposition D = getBlockDisposition(*I, BB);
+ if (D == DoesNotDominateBlock)
+ return DoesNotDominateBlock;
+ if (D == DominatesBlock)
+ Proper = false;
+ }
+ return Proper ? ProperlyDominatesBlock : DominatesBlock;
+ }
+ case scUDivExpr: {
+ const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
+ const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS();
+ BlockDisposition LD = getBlockDisposition(LHS, BB);
+ if (LD == DoesNotDominateBlock)
+ return DoesNotDominateBlock;
+ BlockDisposition RD = getBlockDisposition(RHS, BB);
+ if (RD == DoesNotDominateBlock)
+ return DoesNotDominateBlock;
+ return (LD == ProperlyDominatesBlock && RD == ProperlyDominatesBlock) ?
+ ProperlyDominatesBlock : DominatesBlock;
+ }
+ case scUnknown:
+ if (Instruction *I =
+ dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) {
+ if (I->getParent() == BB)
+ return DominatesBlock;
+ if (DT->properlyDominates(I->getParent(), BB))
+ return ProperlyDominatesBlock;
+ return DoesNotDominateBlock;
+ }
+ return ProperlyDominatesBlock;
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ return DoesNotDominateBlock;
+ default: break;
+ }
+ llvm_unreachable("Unknown SCEV kind!");
+ return DoesNotDominateBlock;
+}
+
+bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) {
+ return getBlockDisposition(S, BB) >= DominatesBlock;
+}
+
+bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) {
+ return getBlockDisposition(S, BB) == ProperlyDominatesBlock;
+}
+
+bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
+ switch (S->getSCEVType()) {
+ case scConstant:
+ return false;
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend: {
+ const SCEVCastExpr *Cast = cast<SCEVCastExpr>(S);
+ const SCEV *CastOp = Cast->getOperand();
+ return Op == CastOp || hasOperand(CastOp, Op);
+ }
+ case scAddRecExpr:
+ case scAddExpr:
+ case scMulExpr:
+ case scUMaxExpr:
+ case scSMaxExpr: {
+ const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
+ for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+ I != E; ++I) {
+ const SCEV *NAryOp = *I;
+ if (NAryOp == Op || hasOperand(NAryOp, Op))
+ return true;
+ }
+ return false;
+ }
+ case scUDivExpr: {
+ const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
+ const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS();
+ return LHS == Op || hasOperand(LHS, Op) ||
+ RHS == Op || hasOperand(RHS, Op);
+ }
+ case scUnknown:
+ return false;
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ return false;
+ default: break;
+ }
+ llvm_unreachable("Unknown SCEV kind!");
+ return false;
+}
+
+void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
+ ValuesAtScopes.erase(S);
+ LoopDispositions.erase(S);
+ BlockDispositions.erase(S);
+ UnsignedRanges.erase(S);
+ SignedRanges.erase(S);
+}
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
new file mode 100644
index 000000000000..e9edb3e083de
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -0,0 +1,173 @@
+//===- ScalarEvolutionAliasAnalysis.cpp - SCEV-based Alias Analysis -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ScalarEvolutionAliasAnalysis pass, which implements a
+// simple alias analysis implemented in terms of ScalarEvolution queries.
+//
+// This differs from traditional loop dependence analysis in that it tests
+// for dependencies within a single iteration of a loop, rather than
+// dependencies between different iterations.
+//
+// ScalarEvolution has a more complete understanding of pointer arithmetic
+// than BasicAliasAnalysis' collection of ad-hoc analyses.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+ /// ScalarEvolutionAliasAnalysis - This is a simple alias analysis
+ /// implementation that uses ScalarEvolution to answer queries.
+ class ScalarEvolutionAliasAnalysis : public FunctionPass,
+ public AliasAnalysis {
+ ScalarEvolution *SE;
+
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(0) {
+ initializeScalarEvolutionAliasAnalysisPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+
+ private:
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual bool runOnFunction(Function &F);
+ virtual AliasResult alias(const Location &LocA, const Location &LocB);
+
+ Value *GetBaseValue(const SCEV *S);
+ };
+} // End of anonymous namespace
+
+// Register this pass...
+char ScalarEvolutionAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS_BEGIN(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa",
+ "ScalarEvolution-based Alias Analysis", false, true, false)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_AG_PASS_END(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa",
+ "ScalarEvolution-based Alias Analysis", false, true, false)
+
+FunctionPass *llvm::createScalarEvolutionAliasAnalysisPass() {
+ return new ScalarEvolutionAliasAnalysis();
+}
+
+void
+ScalarEvolutionAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredTransitive<ScalarEvolution>();
+ AU.setPreservesAll();
+ AliasAnalysis::getAnalysisUsage(AU);
+}
+
+bool
+ScalarEvolutionAliasAnalysis::runOnFunction(Function &F) {
+ InitializeAliasAnalysis(this);
+ SE = &getAnalysis<ScalarEvolution>();
+ return false;
+}
+
+/// GetBaseValue - Given an expression, try to find a
+/// base value. Return null is none was found.
+Value *
+ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) {
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ // In an addrec, assume that the base will be in the start, rather
+ // than the step.
+ return GetBaseValue(AR->getStart());
+ } else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
+ // If there's a pointer operand, it'll be sorted at the end of the list.
+ const SCEV *Last = A->getOperand(A->getNumOperands()-1);
+ if (Last->getType()->isPointerTy())
+ return GetBaseValue(Last);
+ } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ // This is a leaf node.
+ return U->getValue();
+ }
+ // No Identified object found.
+ return 0;
+}
+
+AliasAnalysis::AliasResult
+ScalarEvolutionAliasAnalysis::alias(const Location &LocA,
+ const Location &LocB) {
+ // If either of the memory references is empty, it doesn't matter what the
+ // pointer values are. This allows the code below to ignore this special
+ // case.
+ if (LocA.Size == 0 || LocB.Size == 0)
+ return NoAlias;
+
+ // This is ScalarEvolutionAliasAnalysis. Get the SCEVs!
+ const SCEV *AS = SE->getSCEV(const_cast<Value *>(LocA.Ptr));
+ const SCEV *BS = SE->getSCEV(const_cast<Value *>(LocB.Ptr));
+
+ // If they evaluate to the same expression, it's a MustAlias.
+ if (AS == BS) return MustAlias;
+
+ // If something is known about the difference between the two addresses,
+ // see if it's enough to prove a NoAlias.
+ if (SE->getEffectiveSCEVType(AS->getType()) ==
+ SE->getEffectiveSCEVType(BS->getType())) {
+ unsigned BitWidth = SE->getTypeSizeInBits(AS->getType());
+ APInt ASizeInt(BitWidth, LocA.Size);
+ APInt BSizeInt(BitWidth, LocB.Size);
+
+ // Compute the difference between the two pointers.
+ const SCEV *BA = SE->getMinusSCEV(BS, AS);
+
+ // Test whether the difference is known to be great enough that memory of
+ // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt
+ // are non-zero, which is special-cased above.
+ if (ASizeInt.ule(SE->getUnsignedRange(BA).getUnsignedMin()) &&
+ (-BSizeInt).uge(SE->getUnsignedRange(BA).getUnsignedMax()))
+ return NoAlias;
+
+ // Folding the subtraction while preserving range information can be tricky
+ // (because of INT_MIN, etc.); if the prior test failed, swap AS and BS
+ // and try again to see if things fold better that way.
+
+ // Compute the difference between the two pointers.
+ const SCEV *AB = SE->getMinusSCEV(AS, BS);
+
+ // Test whether the difference is known to be great enough that memory of
+ // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt
+ // are non-zero, which is special-cased above.
+ if (BSizeInt.ule(SE->getUnsignedRange(AB).getUnsignedMin()) &&
+ (-ASizeInt).uge(SE->getUnsignedRange(AB).getUnsignedMax()))
+ return NoAlias;
+ }
+
+ // If ScalarEvolution can find an underlying object, form a new query.
+ // The correctness of this depends on ScalarEvolution not recognizing
+ // inttoptr and ptrtoint operators.
+ Value *AO = GetBaseValue(AS);
+ Value *BO = GetBaseValue(BS);
+ if ((AO && AO != LocA.Ptr) || (BO && BO != LocB.Ptr))
+ if (alias(Location(AO ? AO : LocA.Ptr,
+ AO ? +UnknownSize : LocA.Size,
+ AO ? 0 : LocA.TBAATag),
+ Location(BO ? BO : LocB.Ptr,
+ BO ? +UnknownSize : LocB.Size,
+ BO ? 0 : LocB.TBAATag)) == NoAlias)
+ return NoAlias;
+
+ // Forward the query to the next analysis.
+ return AliasAnalysis::alias(LocA, LocB);
+}
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
new file mode 100644
index 000000000000..8e5a40008d88
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -0,0 +1,1390 @@
+//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the scalar evolution expander,
+// which is used to generate the code corresponding to a given scalar evolution
+// expression.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
+/// reusing an existing cast if a suitable one exists, moving an existing
+/// cast if a suitable one exists but isn't in the right place, or
+/// creating a new one.
+Value *SCEVExpander::ReuseOrCreateCast(Value *V, const Type *Ty,
+ Instruction::CastOps Op,
+ BasicBlock::iterator IP) {
+ // Check to see if there is already a cast!
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ User *U = *UI;
+ if (U->getType() == Ty)
+ if (CastInst *CI = dyn_cast<CastInst>(U))
+ if (CI->getOpcode() == Op) {
+ // If the cast isn't where we want it, fix it.
+ if (BasicBlock::iterator(CI) != IP) {
+ // Create a new cast, and leave the old cast in place in case
+ // it is being used as an insert point. Clear its operand
+ // so that it doesn't hold anything live.
+ Instruction *NewCI = CastInst::Create(Op, V, Ty, "", IP);
+ NewCI->takeName(CI);
+ CI->replaceAllUsesWith(NewCI);
+ CI->setOperand(0, UndefValue::get(V->getType()));
+ rememberInstruction(NewCI);
+ return NewCI;
+ }
+ rememberInstruction(CI);
+ return CI;
+ }
+ }
+
+ // Create a new cast.
+ Instruction *I = CastInst::Create(Op, V, Ty, V->getName(), IP);
+ rememberInstruction(I);
+ return I;
+}
+
+/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
+/// which must be possible with a noop cast, doing what we can to share
+/// the casts.
+Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) {
+ Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false);
+ assert((Op == Instruction::BitCast ||
+ Op == Instruction::PtrToInt ||
+ Op == Instruction::IntToPtr) &&
+ "InsertNoopCastOfTo cannot perform non-noop casts!");
+ assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
+ "InsertNoopCastOfTo cannot change sizes!");
+
+ // Short-circuit unnecessary bitcasts.
+ if (Op == Instruction::BitCast && V->getType() == Ty)
+ return V;
+
+ // Short-circuit unnecessary inttoptr<->ptrtoint casts.
+ if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
+ SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
+ if (CastInst *CI = dyn_cast<CastInst>(V))
+ if ((CI->getOpcode() == Instruction::PtrToInt ||
+ CI->getOpcode() == Instruction::IntToPtr) &&
+ SE.getTypeSizeInBits(CI->getType()) ==
+ SE.getTypeSizeInBits(CI->getOperand(0)->getType()))
+ return CI->getOperand(0);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if ((CE->getOpcode() == Instruction::PtrToInt ||
+ CE->getOpcode() == Instruction::IntToPtr) &&
+ SE.getTypeSizeInBits(CE->getType()) ==
+ SE.getTypeSizeInBits(CE->getOperand(0)->getType()))
+ return CE->getOperand(0);
+ }
+
+ // Fold a cast of a constant.
+ if (Constant *C = dyn_cast<Constant>(V))
+ return ConstantExpr::getCast(Op, C, Ty);
+
+ // Cast the argument at the beginning of the entry block, after
+ // any bitcasts of other arguments.
+ if (Argument *A = dyn_cast<Argument>(V)) {
+ BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
+ while ((isa<BitCastInst>(IP) &&
+ isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
+ cast<BitCastInst>(IP)->getOperand(0) != A) ||
+ isa<DbgInfoIntrinsic>(IP))
+ ++IP;
+ return ReuseOrCreateCast(A, Ty, Op, IP);
+ }
+
+ // Cast the instruction immediately after the instruction.
+ Instruction *I = cast<Instruction>(V);
+ BasicBlock::iterator IP = I; ++IP;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+ IP = II->getNormalDest()->begin();
+ while (isa<PHINode>(IP) || isa<DbgInfoIntrinsic>(IP)) ++IP;
+ return ReuseOrCreateCast(I, Ty, Op, IP);
+}
+
+/// InsertBinop - Insert the specified binary operator, doing a small amount
+/// of work to avoid inserting an obviously redundant operation.
+Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
+ Value *LHS, Value *RHS) {
+ // Fold a binop with constant operands.
+ if (Constant *CLHS = dyn_cast<Constant>(LHS))
+ if (Constant *CRHS = dyn_cast<Constant>(RHS))
+ return ConstantExpr::get(Opcode, CLHS, CRHS);
+
+ // Do a quick scan to see if we have this binop nearby. If so, reuse it.
+ unsigned ScanLimit = 6;
+ BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
+ // Scanning starts from the last instruction before the insertion point.
+ BasicBlock::iterator IP = Builder.GetInsertPoint();
+ if (IP != BlockBegin) {
+ --IP;
+ for (; ScanLimit; --IP, --ScanLimit) {
+ // Don't count dbg.value against the ScanLimit, to avoid perturbing the
+ // generated code.
+ if (isa<DbgInfoIntrinsic>(IP))
+ ScanLimit++;
+ if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
+ IP->getOperand(1) == RHS)
+ return IP;
+ if (IP == BlockBegin) break;
+ }
+ }
+
+ // Save the original insertion point so we can restore it when we're done.
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+ // Move the insertion point out of as many loops as we can.
+ while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+ if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) break;
+
+ // Ok, move up a level.
+ Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+ }
+
+ // If we haven't found this binop, insert it.
+ Value *BO = Builder.CreateBinOp(Opcode, LHS, RHS, "tmp");
+ rememberInstruction(BO);
+
+ // Restore the original insert point.
+ if (SaveInsertBB)
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
+ return BO;
+}
+
+/// FactorOutConstant - Test if S is divisible by Factor, using signed
+/// division. If so, update S with Factor divided out and return true.
+/// S need not be evenly divisible if a reasonable remainder can be
+/// computed.
+/// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made
+/// unnecessary; in its place, just signed-divide Ops[i] by the scale and
+/// check to see if the divide was folded.
+static bool FactorOutConstant(const SCEV *&S,
+ const SCEV *&Remainder,
+ const SCEV *Factor,
+ ScalarEvolution &SE,
+ const TargetData *TD) {
+ // Everything is divisible by one.
+ if (Factor->isOne())
+ return true;
+
+ // x/x == 1.
+ if (S == Factor) {
+ S = SE.getConstant(S->getType(), 1);
+ return true;
+ }
+
+ // For a Constant, check for a multiple of the given factor.
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+ // 0/x == 0.
+ if (C->isZero())
+ return true;
+ // Check for divisibility.
+ if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) {
+ ConstantInt *CI =
+ ConstantInt::get(SE.getContext(),
+ C->getValue()->getValue().sdiv(
+ FC->getValue()->getValue()));
+ // If the quotient is zero and the remainder is non-zero, reject
+ // the value at this scale. It will be considered for subsequent
+ // smaller scales.
+ if (!CI->isZero()) {
+ const SCEV *Div = SE.getConstant(CI);
+ S = Div;
+ Remainder =
+ SE.getAddExpr(Remainder,
+ SE.getConstant(C->getValue()->getValue().srem(
+ FC->getValue()->getValue())));
+ return true;
+ }
+ }
+ }
+
+ // In a Mul, check if there is a constant operand which is a multiple
+ // of the given factor.
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
+ if (TD) {
+ // With TargetData, the size is known. Check if there is a constant
+ // operand which is a multiple of the given factor. If so, we can
+ // factor it.
+ const SCEVConstant *FC = cast<SCEVConstant>(Factor);
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
+ if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) {
+ SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
+ NewMulOps[0] =
+ SE.getConstant(C->getValue()->getValue().sdiv(
+ FC->getValue()->getValue()));
+ S = SE.getMulExpr(NewMulOps);
+ return true;
+ }
+ } else {
+ // Without TargetData, check if Factor can be factored out of any of the
+ // Mul's operands. If so, we can just remove it.
+ for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
+ const SCEV *SOp = M->getOperand(i);
+ const SCEV *Remainder = SE.getConstant(SOp->getType(), 0);
+ if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) &&
+ Remainder->isZero()) {
+ SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
+ NewMulOps[i] = SOp;
+ S = SE.getMulExpr(NewMulOps);
+ return true;
+ }
+ }
+ }
+ }
+
+ // In an AddRec, check if both start and step are divisible.
+ if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
+ const SCEV *Step = A->getStepRecurrence(SE);
+ const SCEV *StepRem = SE.getConstant(Step->getType(), 0);
+ if (!FactorOutConstant(Step, StepRem, Factor, SE, TD))
+ return false;
+ if (!StepRem->isZero())
+ return false;
+ const SCEV *Start = A->getStart();
+ if (!FactorOutConstant(Start, Remainder, Factor, SE, TD))
+ return false;
+ // FIXME: can use A->getNoWrapFlags(FlagNW)
+ S = SE.getAddRecExpr(Start, Step, A->getLoop(), SCEV::FlagAnyWrap);
+ return true;
+ }
+
+ return false;
+}
+
+/// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs
+/// is the number of SCEVAddRecExprs present, which are kept at the end of
+/// the list.
+///
+static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops,
+ const Type *Ty,
+ ScalarEvolution &SE) {
+ unsigned NumAddRecs = 0;
+ for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i)
+ ++NumAddRecs;
+ // Group Ops into non-addrecs and addrecs.
+ SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs);
+ SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end());
+ // Let ScalarEvolution sort and simplify the non-addrecs list.
+ const SCEV *Sum = NoAddRecs.empty() ?
+ SE.getConstant(Ty, 0) :
+ SE.getAddExpr(NoAddRecs);
+ // If it returned an add, use the operands. Otherwise it simplified
+ // the sum into a single value, so just use that.
+ Ops.clear();
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum))
+ Ops.append(Add->op_begin(), Add->op_end());
+ else if (!Sum->isZero())
+ Ops.push_back(Sum);
+ // Then append the addrecs.
+ Ops.append(AddRecs.begin(), AddRecs.end());
+}
+
+/// SplitAddRecs - Flatten a list of add operands, moving addrec start values
+/// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}.
+/// This helps expose more opportunities for folding parts of the expressions
+/// into GEP indices.
+///
+static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
+ const Type *Ty,
+ ScalarEvolution &SE) {
+ // Find the addrecs.
+ SmallVector<const SCEV *, 8> AddRecs;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) {
+ const SCEV *Start = A->getStart();
+ if (Start->isZero()) break;
+ const SCEV *Zero = SE.getConstant(Ty, 0);
+ AddRecs.push_back(SE.getAddRecExpr(Zero,
+ A->getStepRecurrence(SE),
+ A->getLoop(),
+ // FIXME: A->getNoWrapFlags(FlagNW)
+ SCEV::FlagAnyWrap));
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) {
+ Ops[i] = Zero;
+ Ops.append(Add->op_begin(), Add->op_end());
+ e += Add->getNumOperands();
+ } else {
+ Ops[i] = Start;
+ }
+ }
+ if (!AddRecs.empty()) {
+ // Add the addrecs onto the end of the list.
+ Ops.append(AddRecs.begin(), AddRecs.end());
+ // Resort the operand list, moving any constants to the front.
+ SimplifyAddOperands(Ops, Ty, SE);
+ }
+}
+
+/// expandAddToGEP - Expand an addition expression with a pointer type into
+/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
+/// BasicAliasAnalysis and other passes analyze the result. See the rules
+/// for getelementptr vs. inttoptr in
+/// http://llvm.org/docs/LangRef.html#pointeraliasing
+/// for details.
+///
+/// Design note: The correctness of using getelementptr here depends on
+/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
+/// they may introduce pointer arithmetic which may not be safely converted
+/// into getelementptr.
+///
+/// Design note: It might seem desirable for this function to be more
+/// loop-aware. If some of the indices are loop-invariant while others
+/// aren't, it might seem desirable to emit multiple GEPs, keeping the
+/// loop-invariant portions of the overall computation outside the loop.
+/// However, there are a few reasons this is not done here. Hoisting simple
+/// arithmetic is a low-level optimization that often isn't very
+/// important until late in the optimization process. In fact, passes
+/// like InstructionCombining will combine GEPs, even if it means
+/// pushing loop-invariant computation down into loops, so even if the
+/// GEPs were split here, the work would quickly be undone. The
+/// LoopStrengthReduction pass, which is usually run quite late (and
+/// after the last InstructionCombining pass), takes care of hoisting
+/// loop-invariant portions of expressions, after considering what
+/// can be folded using target addressing modes.
+///
+Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
+ const SCEV *const *op_end,
+ const PointerType *PTy,
+ const Type *Ty,
+ Value *V) {
+ const Type *ElTy = PTy->getElementType();
+ SmallVector<Value *, 4> GepIndices;
+ SmallVector<const SCEV *, 8> Ops(op_begin, op_end);
+ bool AnyNonZeroIndices = false;
+
+ // Split AddRecs up into parts as either of the parts may be usable
+ // without the other.
+ SplitAddRecs(Ops, Ty, SE);
+
+ // Descend down the pointer's type and attempt to convert the other
+ // operands into GEP indices, at each level. The first index in a GEP
+ // indexes into the array implied by the pointer operand; the rest of
+ // the indices index into the element or field type selected by the
+ // preceding index.
+ for (;;) {
+ // If the scale size is not 0, attempt to factor out a scale for
+ // array indexing.
+ SmallVector<const SCEV *, 8> ScaledOps;
+ if (ElTy->isSized()) {
+ const SCEV *ElSize = SE.getSizeOfExpr(ElTy);
+ if (!ElSize->isZero()) {
+ SmallVector<const SCEV *, 8> NewOps;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ const SCEV *Op = Ops[i];
+ const SCEV *Remainder = SE.getConstant(Ty, 0);
+ if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) {
+ // Op now has ElSize factored out.
+ ScaledOps.push_back(Op);
+ if (!Remainder->isZero())
+ NewOps.push_back(Remainder);
+ AnyNonZeroIndices = true;
+ } else {
+ // The operand was not divisible, so add it to the list of operands
+ // we'll scan next iteration.
+ NewOps.push_back(Ops[i]);
+ }
+ }
+ // If we made any changes, update Ops.
+ if (!ScaledOps.empty()) {
+ Ops = NewOps;
+ SimplifyAddOperands(Ops, Ty, SE);
+ }
+ }
+ }
+
+ // Record the scaled array index for this level of the type. If
+ // we didn't find any operands that could be factored, tentatively
+ // assume that element zero was selected (since the zero offset
+ // would obviously be folded away).
+ Value *Scaled = ScaledOps.empty() ?
+ Constant::getNullValue(Ty) :
+ expandCodeFor(SE.getAddExpr(ScaledOps), Ty);
+ GepIndices.push_back(Scaled);
+
+ // Collect struct field index operands.
+ while (const StructType *STy = dyn_cast<StructType>(ElTy)) {
+ bool FoundFieldNo = false;
+ // An empty struct has no fields.
+ if (STy->getNumElements() == 0) break;
+ if (SE.TD) {
+ // With TargetData, field offsets are known. See if a constant offset
+ // falls within any of the struct fields.
+ if (Ops.empty()) break;
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
+ if (SE.getTypeSizeInBits(C->getType()) <= 64) {
+ const StructLayout &SL = *SE.TD->getStructLayout(STy);
+ uint64_t FullOffset = C->getValue()->getZExtValue();
+ if (FullOffset < SL.getSizeInBytes()) {
+ unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
+ GepIndices.push_back(
+ ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
+ ElTy = STy->getTypeAtIndex(ElIdx);
+ Ops[0] =
+ SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
+ AnyNonZeroIndices = true;
+ FoundFieldNo = true;
+ }
+ }
+ } else {
+ // Without TargetData, just check for an offsetof expression of the
+ // appropriate struct type.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Ops[i])) {
+ const Type *CTy;
+ Constant *FieldNo;
+ if (U->isOffsetOf(CTy, FieldNo) && CTy == STy) {
+ GepIndices.push_back(FieldNo);
+ ElTy =
+ STy->getTypeAtIndex(cast<ConstantInt>(FieldNo)->getZExtValue());
+ Ops[i] = SE.getConstant(Ty, 0);
+ AnyNonZeroIndices = true;
+ FoundFieldNo = true;
+ break;
+ }
+ }
+ }
+ // If no struct field offsets were found, tentatively assume that
+ // field zero was selected (since the zero offset would obviously
+ // be folded away).
+ if (!FoundFieldNo) {
+ ElTy = STy->getTypeAtIndex(0u);
+ GepIndices.push_back(
+ Constant::getNullValue(Type::getInt32Ty(Ty->getContext())));
+ }
+ }
+
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(ElTy))
+ ElTy = ATy->getElementType();
+ else
+ break;
+ }
+
+ // If none of the operands were convertible to proper GEP indices, cast
+ // the base to i8* and do an ugly getelementptr with that. It's still
+ // better than ptrtoint+arithmetic+inttoptr at least.
+ if (!AnyNonZeroIndices) {
+ // Cast the base to i8*.
+ V = InsertNoopCastOfTo(V,
+ Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));
+
+ // Expand the operands for a plain byte offset.
+ Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty);
+
+ // Fold a GEP with constant operands.
+ if (Constant *CLHS = dyn_cast<Constant>(V))
+ if (Constant *CRHS = dyn_cast<Constant>(Idx))
+ return ConstantExpr::getGetElementPtr(CLHS, &CRHS, 1);
+
+ // Do a quick scan to see if we have this GEP nearby. If so, reuse it.
+ unsigned ScanLimit = 6;
+ BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
+ // Scanning starts from the last instruction before the insertion point.
+ BasicBlock::iterator IP = Builder.GetInsertPoint();
+ if (IP != BlockBegin) {
+ --IP;
+ for (; ScanLimit; --IP, --ScanLimit) {
+ // Don't count dbg.value against the ScanLimit, to avoid perturbing the
+ // generated code.
+ if (isa<DbgInfoIntrinsic>(IP))
+ ScanLimit++;
+ if (IP->getOpcode() == Instruction::GetElementPtr &&
+ IP->getOperand(0) == V && IP->getOperand(1) == Idx)
+ return IP;
+ if (IP == BlockBegin) break;
+ }
+ }
+
+ // Save the original insertion point so we can restore it when we're done.
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+ // Move the insertion point out of as many loops as we can.
+ while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+ if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) break;
+
+ // Ok, move up a level.
+ Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+ }
+
+ // Emit a GEP.
+ Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
+ rememberInstruction(GEP);
+
+ // Restore the original insert point.
+ if (SaveInsertBB)
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
+ return GEP;
+ }
+
+ // Save the original insertion point so we can restore it when we're done.
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+ // Move the insertion point out of as many loops as we can.
+ while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+ if (!L->isLoopInvariant(V)) break;
+
+ bool AnyIndexNotLoopInvariant = false;
+ for (SmallVectorImpl<Value *>::const_iterator I = GepIndices.begin(),
+ E = GepIndices.end(); I != E; ++I)
+ if (!L->isLoopInvariant(*I)) {
+ AnyIndexNotLoopInvariant = true;
+ break;
+ }
+ if (AnyIndexNotLoopInvariant)
+ break;
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) break;
+
+ // Ok, move up a level.
+ Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+ }
+
+ // Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
+ // because ScalarEvolution may have changed the address arithmetic to
+ // compute a value which is beyond the end of the allocated object.
+ Value *Casted = V;
+ if (V->getType() != PTy)
+ Casted = InsertNoopCastOfTo(Casted, PTy);
+ Value *GEP = Builder.CreateGEP(Casted,
+ GepIndices.begin(),
+ GepIndices.end(),
+ "scevgep");
+ Ops.push_back(SE.getUnknown(GEP));
+ rememberInstruction(GEP);
+
+ // Restore the original insert point.
+ if (SaveInsertBB)
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
+ return expand(SE.getAddExpr(Ops));
+}
+
+/// isNonConstantNegative - Return true if the specified scev is negated, but
+/// not a constant.
+static bool isNonConstantNegative(const SCEV *F) {
+ const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(F);
+ if (!Mul) return false;
+
+ // If there is a constant factor, it will be first.
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
+ if (!SC) return false;
+
+ // Return true if the value is negative, this matches things like (-42 * V).
+ return SC->getValue()->getValue().isNegative();
+}
+
+/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
+/// SCEV expansion. If they are nested, this is the most nested. If they are
+/// neighboring, pick the later.
+static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
+ DominatorTree &DT) {
+ if (!A) return B;
+ if (!B) return A;
+ if (A->contains(B)) return B;
+ if (B->contains(A)) return A;
+ if (DT.dominates(A->getHeader(), B->getHeader())) return B;
+ if (DT.dominates(B->getHeader(), A->getHeader())) return A;
+ return A; // Arbitrarily break the tie.
+}
+
+/// getRelevantLoop - Get the most relevant loop associated with the given
+/// expression, according to PickMostRelevantLoop.
+const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
+ // Test whether we've already computed the most relevant loop for this SCEV.
+ std::pair<DenseMap<const SCEV *, const Loop *>::iterator, bool> Pair =
+ RelevantLoops.insert(std::make_pair(S, static_cast<const Loop *>(0)));
+ if (!Pair.second)
+ return Pair.first->second;
+
+ if (isa<SCEVConstant>(S))
+ // A constant has no relevant loops.
+ return 0;
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
+ return Pair.first->second = SE.LI->getLoopFor(I->getParent());
+ // A non-instruction has no relevant loops.
+ return 0;
+ }
+ if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) {
+ const Loop *L = 0;
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+ L = AR->getLoop();
+ for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end();
+ I != E; ++I)
+ L = PickMostRelevantLoop(L, getRelevantLoop(*I), *SE.DT);
+ return RelevantLoops[N] = L;
+ }
+ if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) {
+ const Loop *Result = getRelevantLoop(C->getOperand());
+ return RelevantLoops[C] = Result;
+ }
+ if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+ const Loop *Result =
+ PickMostRelevantLoop(getRelevantLoop(D->getLHS()),
+ getRelevantLoop(D->getRHS()),
+ *SE.DT);
+ return RelevantLoops[D] = Result;
+ }
+ llvm_unreachable("Unexpected SCEV type!");
+ return 0;
+}
+
+namespace {
+
+/// LoopCompare - Compare loops by PickMostRelevantLoop.
+class LoopCompare {
+ DominatorTree &DT;
+public:
+ explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
+
+ bool operator()(std::pair<const Loop *, const SCEV *> LHS,
+ std::pair<const Loop *, const SCEV *> RHS) const {
+ // Keep pointer operands sorted at the end.
+ if (LHS.second->getType()->isPointerTy() !=
+ RHS.second->getType()->isPointerTy())
+ return LHS.second->getType()->isPointerTy();
+
+ // Compare loops with PickMostRelevantLoop.
+ if (LHS.first != RHS.first)
+ return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
+
+ // If one operand is a non-constant negative and the other is not,
+ // put the non-constant negative on the right so that a sub can
+ // be used instead of a negate and add.
+ if (isNonConstantNegative(LHS.second)) {
+ if (!isNonConstantNegative(RHS.second))
+ return false;
+ } else if (isNonConstantNegative(RHS.second))
+ return true;
+
+ // Otherwise they are equivalent according to this comparison.
+ return false;
+ }
+};
+
+}
+
+Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
+ const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+
+ // Collect all the add operands in a loop, along with their associated loops.
+ // Iterate in reverse so that constants are emitted last, all else equal, and
+ // so that pointer operands are inserted first, which the code below relies on
+ // to form more involved GEPs.
+ SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
+ for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()),
+ E(S->op_begin()); I != E; ++I)
+ OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
+
+ // Sort by loop. Use a stable sort so that constants follow non-constants and
+ // pointer operands precede non-pointer operands.
+ std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
+
+ // Emit instructions to add all the operands. Hoist as much as possible
+ // out of loops, and form meaningful getelementptrs where possible.
+ Value *Sum = 0;
+ for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
+ I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
+ const Loop *CurLoop = I->first;
+ const SCEV *Op = I->second;
+ if (!Sum) {
+ // This is the first operand. Just expand it.
+ Sum = expand(Op);
+ ++I;
+ } else if (const PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) {
+ // The running sum expression is a pointer. Try to form a getelementptr
+ // at this level with that as the base.
+ SmallVector<const SCEV *, 4> NewOps;
+ for (; I != E && I->first == CurLoop; ++I) {
+ // If the operand is SCEVUnknown and not instructions, peek through
+ // it, to enable more of it to be folded into the GEP.
+ const SCEV *X = I->second;
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X))
+ if (!isa<Instruction>(U->getValue()))
+ X = SE.getSCEV(U->getValue());
+ NewOps.push_back(X);
+ }
+ Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum);
+ } else if (const PointerType *PTy = dyn_cast<PointerType>(Op->getType())) {
+ // The running sum is an integer, and there's a pointer at this level.
+ // Try to form a getelementptr. If the running sum is instructions,
+ // use a SCEVUnknown to avoid re-analyzing them.
+ SmallVector<const SCEV *, 4> NewOps;
+ NewOps.push_back(isa<Instruction>(Sum) ? SE.getUnknown(Sum) :
+ SE.getSCEV(Sum));
+ for (++I; I != E && I->first == CurLoop; ++I)
+ NewOps.push_back(I->second);
+ Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op));
+ } else if (isNonConstantNegative(Op)) {
+ // Instead of doing a negate and add, just do a subtract.
+ Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
+ Sum = InsertNoopCastOfTo(Sum, Ty);
+ Sum = InsertBinop(Instruction::Sub, Sum, W);
+ ++I;
+ } else {
+ // A simple add.
+ Value *W = expandCodeFor(Op, Ty);
+ Sum = InsertNoopCastOfTo(Sum, Ty);
+ // Canonicalize a constant to the RHS.
+ if (isa<Constant>(Sum)) std::swap(Sum, W);
+ Sum = InsertBinop(Instruction::Add, Sum, W);
+ ++I;
+ }
+ }
+
+ return Sum;
+}
+
+Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
+ const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+
+ // Collect all the mul operands in a loop, along with their associated loops.
+ // Iterate in reverse so that constants are emitted last, all else equal.
+ SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
+ for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()),
+ E(S->op_begin()); I != E; ++I)
+ OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
+
+ // Sort by loop. Use a stable sort so that constants follow non-constants.
+ std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
+
+ // Emit instructions to mul all the operands. Hoist as much as possible
+ // out of loops.
+ Value *Prod = 0;
+ for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
+ I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
+ const SCEV *Op = I->second;
+ if (!Prod) {
+ // This is the first operand. Just expand it.
+ Prod = expand(Op);
+ ++I;
+ } else if (Op->isAllOnesValue()) {
+ // Instead of doing a multiply by negative one, just do a negate.
+ Prod = InsertNoopCastOfTo(Prod, Ty);
+ Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod);
+ ++I;
+ } else {
+ // A simple mul.
+ Value *W = expandCodeFor(Op, Ty);
+ Prod = InsertNoopCastOfTo(Prod, Ty);
+ // Canonicalize a constant to the RHS.
+ if (isa<Constant>(Prod)) std::swap(Prod, W);
+ Prod = InsertBinop(Instruction::Mul, Prod, W);
+ ++I;
+ }
+ }
+
+ return Prod;
+}
+
+Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
+ const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+
+ Value *LHS = expandCodeFor(S->getLHS(), Ty);
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
+ const APInt &RHS = SC->getValue()->getValue();
+ if (RHS.isPowerOf2())
+ return InsertBinop(Instruction::LShr, LHS,
+ ConstantInt::get(Ty, RHS.logBase2()));
+ }
+
+ Value *RHS = expandCodeFor(S->getRHS(), Ty);
+ return InsertBinop(Instruction::UDiv, LHS, RHS);
+}
+
+/// Move parts of Base into Rest to leave Base with the minimal
+/// expression that provides a pointer operand suitable for a
+/// GEP expansion.
+static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
+ ScalarEvolution &SE) {
+ while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) {
+ Base = A->getStart();
+ Rest = SE.getAddExpr(Rest,
+ SE.getAddRecExpr(SE.getConstant(A->getType(), 0),
+ A->getStepRecurrence(SE),
+ A->getLoop(),
+ // FIXME: A->getNoWrapFlags(FlagNW)
+ SCEV::FlagAnyWrap));
+ }
+ if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) {
+ Base = A->getOperand(A->getNumOperands()-1);
+ SmallVector<const SCEV *, 8> NewAddOps(A->op_begin(), A->op_end());
+ NewAddOps.back() = Rest;
+ Rest = SE.getAddExpr(NewAddOps);
+ ExposePointerBase(Base, Rest, SE);
+ }
+}
+
+/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
+/// the base addrec, which is the addrec without any non-loop-dominating
+/// values, and return the PHI.
+PHINode *
+SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
+ const Loop *L,
+ const Type *ExpandTy,
+ const Type *IntTy) {
+ // Reuse a previously-inserted PHI, if present.
+ for (BasicBlock::iterator I = L->getHeader()->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ if (SE.isSCEVable(PN->getType()) &&
+ (SE.getEffectiveSCEVType(PN->getType()) ==
+ SE.getEffectiveSCEVType(Normalized->getType())) &&
+ SE.getSCEV(PN) == Normalized)
+ if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+ Instruction *IncV =
+ cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
+
+ // Determine if this is a well-behaved chain of instructions leading
+ // back to the PHI. It probably will be, if we're scanning an inner
+ // loop already visited by LSR for example, but it wouldn't have
+ // to be.
+ do {
+ if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
+ (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV))) {
+ IncV = 0;
+ break;
+ }
+ // If any of the operands don't dominate the insert position, bail.
+ // Addrec operands are always loop-invariant, so this can only happen
+ // if there are instructions which haven't been hoisted.
+ for (User::op_iterator OI = IncV->op_begin()+1,
+ OE = IncV->op_end(); OI != OE; ++OI)
+ if (Instruction *OInst = dyn_cast<Instruction>(OI))
+ if (!SE.DT->dominates(OInst, IVIncInsertPos)) {
+ IncV = 0;
+ break;
+ }
+ if (!IncV)
+ break;
+ // Advance to the next instruction.
+ IncV = dyn_cast<Instruction>(IncV->getOperand(0));
+ if (!IncV)
+ break;
+ if (IncV->mayHaveSideEffects()) {
+ IncV = 0;
+ break;
+ }
+ } while (IncV != PN);
+
+ if (IncV) {
+ // Ok, the add recurrence looks usable.
+ // Remember this PHI, even in post-inc mode.
+ InsertedValues.insert(PN);
+ // Remember the increment.
+ IncV = cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
+ rememberInstruction(IncV);
+ if (L == IVIncInsertLoop)
+ do {
+ if (SE.DT->dominates(IncV, IVIncInsertPos))
+ break;
+ // Make sure the increment is where we want it. But don't move it
+ // down past a potential existing post-inc user.
+ IncV->moveBefore(IVIncInsertPos);
+ IVIncInsertPos = IncV;
+ IncV = cast<Instruction>(IncV->getOperand(0));
+ } while (IncV != PN);
+ return PN;
+ }
+ }
+
+ // Save the original insertion point so we can restore it when we're done.
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+ // Expand code for the start value.
+ Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy,
+ L->getHeader()->begin());
+
+ // Expand code for the step value. Insert instructions right before the
+ // terminator corresponding to the back-edge. Do this before creating the PHI
+ // so that PHI reuse code doesn't see an incomplete PHI. If the stride is
+ // negative, insert a sub instead of an add for the increment (unless it's a
+ // constant, because subtracts of constants are canonicalized to adds).
+ const SCEV *Step = Normalized->getStepRecurrence(SE);
+ bool isPointer = ExpandTy->isPointerTy();
+ bool isNegative = !isPointer && isNonConstantNegative(Step);
+ if (isNegative)
+ Step = SE.getNegativeSCEV(Step);
+ Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
+
+ // Create the PHI.
+ BasicBlock *Header = L->getHeader();
+ Builder.SetInsertPoint(Header, Header->begin());
+ pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
+ PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE), "lsr.iv");
+ rememberInstruction(PN);
+
+ // Create the step instructions and populate the PHI.
+ for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
+ BasicBlock *Pred = *HPI;
+
+ // Add a start value.
+ if (!L->contains(Pred)) {
+ PN->addIncoming(StartV, Pred);
+ continue;
+ }
+
+ // Create a step value and add it to the PHI. If IVIncInsertLoop is
+ // non-null and equal to the addrec's loop, insert the instructions
+ // at IVIncInsertPos.
+ Instruction *InsertPos = L == IVIncInsertLoop ?
+ IVIncInsertPos : Pred->getTerminator();
+ Builder.SetInsertPoint(InsertPos->getParent(), InsertPos);
+ Value *IncV;
+ // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
+ if (isPointer) {
+ const PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
+ // If the step isn't constant, don't use an implicitly scaled GEP, because
+ // that would require a multiply inside the loop.
+ if (!isa<ConstantInt>(StepV))
+ GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
+ GEPPtrTy->getAddressSpace());
+ const SCEV *const StepArray[1] = { SE.getSCEV(StepV) };
+ IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN);
+ if (IncV->getType() != PN->getType()) {
+ IncV = Builder.CreateBitCast(IncV, PN->getType(), "tmp");
+ rememberInstruction(IncV);
+ }
+ } else {
+ IncV = isNegative ?
+ Builder.CreateSub(PN, StepV, "lsr.iv.next") :
+ Builder.CreateAdd(PN, StepV, "lsr.iv.next");
+ rememberInstruction(IncV);
+ }
+ PN->addIncoming(IncV, Pred);
+ }
+
+ // Restore the original insert point.
+ if (SaveInsertBB)
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
+ // Remember this PHI, even in post-inc mode.
+ InsertedValues.insert(PN);
+
+ return PN;
+}
+
+Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
+ const Type *STy = S->getType();
+ const Type *IntTy = SE.getEffectiveSCEVType(STy);
+ const Loop *L = S->getLoop();
+
+ // Determine a normalized form of this expression, which is the expression
+ // before any post-inc adjustment is made.
+ const SCEVAddRecExpr *Normalized = S;
+ if (PostIncLoops.count(L)) {
+ PostIncLoopSet Loops;
+ Loops.insert(L);
+ Normalized =
+ cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, 0, 0,
+ Loops, SE, *SE.DT));
+ }
+
+ // Strip off any non-loop-dominating component from the addrec start.
+ const SCEV *Start = Normalized->getStart();
+ const SCEV *PostLoopOffset = 0;
+ if (!SE.properlyDominates(Start, L->getHeader())) {
+ PostLoopOffset = Start;
+ Start = SE.getConstant(Normalized->getType(), 0);
+ Normalized = cast<SCEVAddRecExpr>(
+ SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE),
+ Normalized->getLoop(),
+ // FIXME: Normalized->getNoWrapFlags(FlagNW)
+ SCEV::FlagAnyWrap));
+ }
+
+ // Strip off any non-loop-dominating component from the addrec step.
+ const SCEV *Step = Normalized->getStepRecurrence(SE);
+ const SCEV *PostLoopScale = 0;
+ if (!SE.dominates(Step, L->getHeader())) {
+ PostLoopScale = Step;
+ Step = SE.getConstant(Normalized->getType(), 1);
+ Normalized =
+ cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start, Step,
+ Normalized->getLoop(),
+ // FIXME: Normalized
+ // ->getNoWrapFlags(FlagNW)
+ SCEV::FlagAnyWrap));
+ }
+
+ // Expand the core addrec. If we need post-loop scaling, force it to
+ // expand to an integer type to avoid the need for additional casting.
+ const Type *ExpandTy = PostLoopScale ? IntTy : STy;
+ PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy);
+
+ // Accommodate post-inc mode, if necessary.
+ Value *Result;
+ if (!PostIncLoops.count(L))
+ Result = PN;
+ else {
+ // In PostInc mode, use the post-incremented value.
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ assert(LatchBlock && "PostInc mode requires a unique loop latch!");
+ Result = PN->getIncomingValueForBlock(LatchBlock);
+ }
+
+ // Re-apply any non-loop-dominating scale.
+ if (PostLoopScale) {
+ Result = InsertNoopCastOfTo(Result, IntTy);
+ Result = Builder.CreateMul(Result,
+ expandCodeFor(PostLoopScale, IntTy));
+ rememberInstruction(Result);
+ }
+
+ // Re-apply any non-loop-dominating offset.
+ if (PostLoopOffset) {
+ if (const PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) {
+ const SCEV *const OffsetArray[1] = { PostLoopOffset };
+ Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result);
+ } else {
+ Result = InsertNoopCastOfTo(Result, IntTy);
+ Result = Builder.CreateAdd(Result,
+ expandCodeFor(PostLoopOffset, IntTy));
+ rememberInstruction(Result);
+ }
+ }
+
+ return Result;
+}
+
+Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
+ if (!CanonicalMode) return expandAddRecExprLiterally(S);
+
+ const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ const Loop *L = S->getLoop();
+
+ // First check for an existing canonical IV in a suitable type.
+ PHINode *CanonicalIV = 0;
+ if (PHINode *PN = L->getCanonicalInductionVariable())
+ if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
+ CanonicalIV = PN;
+
+ // Rewrite an AddRec in terms of the canonical induction variable, if
+ // its type is more narrow.
+ if (CanonicalIV &&
+ SE.getTypeSizeInBits(CanonicalIV->getType()) >
+ SE.getTypeSizeInBits(Ty)) {
+ SmallVector<const SCEV *, 4> NewOps(S->getNumOperands());
+ for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
+ NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType());
+ Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
+ // FIXME: S->getNoWrapFlags(FlagNW)
+ SCEV::FlagAnyWrap));
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ BasicBlock::iterator NewInsertPt =
+ llvm::next(BasicBlock::iterator(cast<Instruction>(V)));
+ while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt))
+ ++NewInsertPt;
+ V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
+ NewInsertPt);
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+ return V;
+ }
+
+ // {X,+,F} --> X + {0,+,F}
+ if (!S->getStart()->isZero()) {
+ SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end());
+ NewOps[0] = SE.getConstant(Ty, 0);
+ // FIXME: can use S->getNoWrapFlags()
+ const SCEV *Rest = SE.getAddRecExpr(NewOps, L, SCEV::FlagAnyWrap);
+
+ // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
+ // comments on expandAddToGEP for details.
+ const SCEV *Base = S->getStart();
+ const SCEV *RestArray[1] = { Rest };
+ // Dig into the expression to find the pointer base for a GEP.
+ ExposePointerBase(Base, RestArray[0], SE);
+ // If we found a pointer, expand the AddRec with a GEP.
+ if (const PointerType *PTy = dyn_cast<PointerType>(Base->getType())) {
+ // Make sure the Base isn't something exotic, such as a multiplied
+ // or divided pointer value. In those cases, the result type isn't
+ // actually a pointer type.
+ if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) {
+ Value *StartV = expand(Base);
+ assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!");
+ return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV);
+ }
+ }
+
+ // Just do a normal add. Pre-expand the operands to suppress folding.
+ return expand(SE.getAddExpr(SE.getUnknown(expand(S->getStart())),
+ SE.getUnknown(expand(Rest))));
+ }
+
+ // If we don't yet have a canonical IV, create one.
+ if (!CanonicalIV) {
+ // Create and insert the PHI node for the induction variable in the
+ // specified loop.
+ BasicBlock *Header = L->getHeader();
+ pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
+ CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar",
+ Header->begin());
+ rememberInstruction(CanonicalIV);
+
+ Constant *One = ConstantInt::get(Ty, 1);
+ for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
+ BasicBlock *HP = *HPI;
+ if (L->contains(HP)) {
+ // Insert a unit add instruction right before the terminator
+ // corresponding to the back-edge.
+ Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One,
+ "indvar.next",
+ HP->getTerminator());
+ rememberInstruction(Add);
+ CanonicalIV->addIncoming(Add, HP);
+ } else {
+ CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP);
+ }
+ }
+ }
+
+ // {0,+,1} --> Insert a canonical induction variable into the loop!
+ if (S->isAffine() && S->getOperand(1)->isOne()) {
+ assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
+ "IVs with types different from the canonical IV should "
+ "already have been handled!");
+ return CanonicalIV;
+ }
+
+ // {0,+,F} --> {0,+,1} * F
+
+ // If this is a simple linear addrec, emit it now as a special case.
+ if (S->isAffine()) // {0,+,F} --> i*F
+ return
+ expand(SE.getTruncateOrNoop(
+ SE.getMulExpr(SE.getUnknown(CanonicalIV),
+ SE.getNoopOrAnyExtend(S->getOperand(1),
+ CanonicalIV->getType())),
+ Ty));
+
+ // If this is a chain of recurrences, turn it into a closed form, using the
+ // folders, then expandCodeFor the closed form. This allows the folders to
+ // simplify the expression without having to build a bunch of special code
+ // into this folder.
+ const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV.
+
+ // Promote S up to the canonical IV type, if the cast is foldable.
+ const SCEV *NewS = S;
+ const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType());
+ if (isa<SCEVAddRecExpr>(Ext))
+ NewS = Ext;
+
+ const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
+ //cerr << "Evaluated: " << *this << "\n to: " << *V << "\n";
+
+ // Truncate the result down to the original type, if needed.
+ const SCEV *T = SE.getTruncateOrNoop(V, Ty);
+ return expand(T);
+}
+
+Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
+ const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Value *V = expandCodeFor(S->getOperand(),
+ SE.getEffectiveSCEVType(S->getOperand()->getType()));
+ Value *I = Builder.CreateTrunc(V, Ty, "tmp");
+ rememberInstruction(I);
+ return I;
+}
+
+Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
+ const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Value *V = expandCodeFor(S->getOperand(),
+ SE.getEffectiveSCEVType(S->getOperand()->getType()));
+ Value *I = Builder.CreateZExt(V, Ty, "tmp");
+ rememberInstruction(I);
+ return I;
+}
+
+Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
+ const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Value *V = expandCodeFor(S->getOperand(),
+ SE.getEffectiveSCEVType(S->getOperand()->getType()));
+ Value *I = Builder.CreateSExt(V, Ty, "tmp");
+ rememberInstruction(I);
+ return I;
+}
+
+Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
+ Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
+ const Type *Ty = LHS->getType();
+ for (int i = S->getNumOperands()-2; i >= 0; --i) {
+ // In the case of mixed integer and pointer types, do the
+ // rest of the comparisons as integer.
+ if (S->getOperand(i)->getType() != Ty) {
+ Ty = SE.getEffectiveSCEVType(Ty);
+ LHS = InsertNoopCastOfTo(LHS, Ty);
+ }
+ Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+ Value *ICmp = Builder.CreateICmpSGT(LHS, RHS, "tmp");
+ rememberInstruction(ICmp);
+ Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax");
+ rememberInstruction(Sel);
+ LHS = Sel;
+ }
+ // In the case of mixed integer and pointer types, cast the
+ // final result back to the pointer type.
+ if (LHS->getType() != S->getType())
+ LHS = InsertNoopCastOfTo(LHS, S->getType());
+ return LHS;
+}
+
+Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
+ Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
+ const Type *Ty = LHS->getType();
+ for (int i = S->getNumOperands()-2; i >= 0; --i) {
+ // In the case of mixed integer and pointer types, do the
+ // rest of the comparisons as integer.
+ if (S->getOperand(i)->getType() != Ty) {
+ Ty = SE.getEffectiveSCEVType(Ty);
+ LHS = InsertNoopCastOfTo(LHS, Ty);
+ }
+ Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+ Value *ICmp = Builder.CreateICmpUGT(LHS, RHS, "tmp");
+ rememberInstruction(ICmp);
+ Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax");
+ rememberInstruction(Sel);
+ LHS = Sel;
+ }
+ // In the case of mixed integer and pointer types, cast the
+ // final result back to the pointer type.
+ if (LHS->getType() != S->getType())
+ LHS = InsertNoopCastOfTo(LHS, S->getType());
+ return LHS;
+}
+
+Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty,
+ Instruction *I) {
+ BasicBlock::iterator IP = I;
+ while (isInsertedInstruction(IP) || isa<DbgInfoIntrinsic>(IP))
+ ++IP;
+ Builder.SetInsertPoint(IP->getParent(), IP);
+ return expandCodeFor(SH, Ty);
+}
+
+Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty) {
+ // Expand the code for this SCEV.
+ Value *V = expand(SH);
+ if (Ty) {
+ assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
+ "non-trivial casts should be done with the SCEVs directly!");
+ V = InsertNoopCastOfTo(V, Ty);
+ }
+ return V;
+}
+
+Value *SCEVExpander::expand(const SCEV *S) {
+ // Compute an insertion point for this SCEV object. Hoist the instructions
+ // as far out in the loop nest as possible.
+ Instruction *InsertPt = Builder.GetInsertPoint();
+ for (Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock()); ;
+ L = L->getParentLoop())
+ if (SE.isLoopInvariant(S, L)) {
+ if (!L) break;
+ if (BasicBlock *Preheader = L->getLoopPreheader())
+ InsertPt = Preheader->getTerminator();
+ } else {
+ // If the SCEV is computable at this level, insert it into the header
+ // after the PHIs (and after any other instructions that we've inserted
+ // there) so that it is guaranteed to dominate any user inside the loop.
+ if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
+ InsertPt = L->getHeader()->getFirstNonPHI();
+ while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt))
+ InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
+ break;
+ }
+
+ // Check to see if we already expanded this here.
+ std::map<std::pair<const SCEV *, Instruction *>,
+ AssertingVH<Value> >::iterator I =
+ InsertedExpressions.find(std::make_pair(S, InsertPt));
+ if (I != InsertedExpressions.end())
+ return I->second;
+
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
+
+ // Expand the expression into instructions.
+ Value *V = visit(S);
+
+ // Remember the expanded value for this SCEV at this location.
+ if (PostIncLoops.empty())
+ InsertedExpressions[std::make_pair(S, InsertPt)] = V;
+
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+ return V;
+}
+
+void SCEVExpander::rememberInstruction(Value *I) {
+ if (!PostIncLoops.empty())
+ InsertedPostIncValues.insert(I);
+ else
+ InsertedValues.insert(I);
+
+ // If we just claimed an existing instruction and that instruction had
+ // been the insert point, adjust the insert point forward so that
+ // subsequently inserted code will be dominated.
+ if (Builder.GetInsertPoint() == I) {
+ BasicBlock::iterator It = cast<Instruction>(I);
+ do { ++It; } while (isInsertedInstruction(It) ||
+ isa<DbgInfoIntrinsic>(It));
+ Builder.SetInsertPoint(Builder.GetInsertBlock(), It);
+ }
+}
+
+void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) {
+ // If we acquired more instructions since the old insert point was saved,
+ // advance past them.
+ while (isInsertedInstruction(I) || isa<DbgInfoIntrinsic>(I)) ++I;
+
+ Builder.SetInsertPoint(BB, I);
+}
+
+/// getOrInsertCanonicalInductionVariable - This method returns the
+/// canonical induction variable of the specified type for the specified
+/// loop (inserting one if there is none). A canonical induction variable
+/// starts at zero and steps by one on each iteration.
+PHINode *
+SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
+ const Type *Ty) {
+ assert(Ty->isIntegerTy() && "Can only insert integer induction variables!");
+
+ // Build a SCEV for {0,+,1}<L>.
+ // Conservatively use FlagAnyWrap for now.
+ const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0),
+ SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap);
+
+ // Emit code for it.
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ PHINode *V = cast<PHINode>(expandCodeFor(H, 0, L->getHeader()->begin()));
+ if (SaveInsertBB)
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
+ return V;
+}
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
new file mode 100644
index 000000000000..60e630aaab88
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -0,0 +1,184 @@
+//===- ScalarEvolutionNormalization.cpp - See below -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements utilities for working with "normalized" expressions.
+// See the comments at the top of ScalarEvolutionNormalization.h for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionNormalization.h"
+using namespace llvm;
+
+/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
+/// and now we need to decide whether the user should use the preinc or post-inc
+/// value. If this user should use the post-inc version of the IV, return true.
+///
+/// Choosing wrong here can break dominance properties (if we choose to use the
+/// post-inc value when we cannot) or it can end up adding extra live-ranges to
+/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
+/// should use the post-inc value).
+static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand,
+ const Loop *L, DominatorTree *DT) {
+ // If the user is in the loop, use the preinc value.
+ if (L->contains(User)) return false;
+
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ if (!LatchBlock)
+ return false;
+
+ // Ok, the user is outside of the loop. If it is dominated by the latch
+ // block, use the post-inc value.
+ if (DT->dominates(LatchBlock, User->getParent()))
+ return true;
+
+ // There is one case we have to be careful of: PHI nodes. These little guys
+ // can live in blocks that are not dominated by the latch block, but (since
+ // their uses occur in the predecessor block, not the block the PHI lives in)
+ // should still use the post-inc value. Check for this case now.
+ PHINode *PN = dyn_cast<PHINode>(User);
+ if (!PN || !Operand) return false; // not a phi, not dominated by latch block.
+
+ // Look at all of the uses of Operand by the PHI node. If any use corresponds
+ // to a block that is not dominated by the latch block, give up and use the
+ // preincremented value.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == Operand &&
+ !DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
+ return false;
+
+ // Okay, all uses of Operand by PN are in predecessor blocks that really are
+ // dominated by the latch block. Use the post-incremented value.
+ return true;
+}
+
+const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
+ const SCEV *S,
+ Instruction *User,
+ Value *OperandValToReplace,
+ PostIncLoopSet &Loops,
+ ScalarEvolution &SE,
+ DominatorTree &DT) {
+ if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S))
+ return S;
+
+ if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) {
+ const SCEV *O = X->getOperand();
+ const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace,
+ Loops, SE, DT);
+ if (O != N)
+ switch (S->getSCEVType()) {
+ case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType());
+ case scSignExtend: return SE.getSignExtendExpr(N, S->getType());
+ case scTruncate: return SE.getTruncateExpr(N, S->getType());
+ default: llvm_unreachable("Unexpected SCEVCastExpr kind!");
+ }
+ return S;
+ }
+
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ // An addrec. This is the interesting part.
+ SmallVector<const SCEV *, 8> Operands;
+ const Loop *L = AR->getLoop();
+ // The addrec conceptually uses its operands at loop entry.
+ Instruction *LUser = L->getHeader()->begin();
+ // Transform each operand.
+ for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
+ I != E; ++I) {
+ const SCEV *O = *I;
+ const SCEV *N = TransformForPostIncUse(Kind, O, LUser, 0, Loops, SE, DT);
+ Operands.push_back(N);
+ }
+ // Conservatively use AnyWrap until/unless we need FlagNW.
+ const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap);
+ switch (Kind) {
+ default: llvm_unreachable("Unexpected transform name!");
+ case NormalizeAutodetect:
+ if (IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) {
+ const SCEV *TransformedStep =
+ TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
+ User, OperandValToReplace, Loops, SE, DT);
+ Result = SE.getMinusSCEV(Result, TransformedStep);
+ Loops.insert(L);
+ }
+#if 0
+ // This assert is conceptually correct, but ScalarEvolution currently
+ // sometimes fails to canonicalize two equal SCEVs to exactly the same
+ // form. It's possibly a pessimization when this happens, but it isn't a
+ // correctness problem, so disable this assert for now.
+ assert(S == TransformForPostIncUse(Denormalize, Result,
+ User, OperandValToReplace,
+ Loops, SE, DT) &&
+ "SCEV normalization is not invertible!");
+#endif
+ break;
+ case Normalize:
+ if (Loops.count(L)) {
+ const SCEV *TransformedStep =
+ TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
+ User, OperandValToReplace, Loops, SE, DT);
+ Result = SE.getMinusSCEV(Result, TransformedStep);
+ }
+#if 0
+ // See the comment on the assert above.
+ assert(S == TransformForPostIncUse(Denormalize, Result,
+ User, OperandValToReplace,
+ Loops, SE, DT) &&
+ "SCEV normalization is not invertible!");
+#endif
+ break;
+ case Denormalize:
+ if (Loops.count(L))
+ Result = cast<SCEVAddRecExpr>(Result)->getPostIncExpr(SE);
+ break;
+ }
+ return Result;
+ }
+
+ if (const SCEVNAryExpr *X = dyn_cast<SCEVNAryExpr>(S)) {
+ SmallVector<const SCEV *, 8> Operands;
+ bool Changed = false;
+ // Transform each operand.
+ for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end();
+ I != E; ++I) {
+ const SCEV *O = *I;
+ const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace,
+ Loops, SE, DT);
+ Changed |= N != O;
+ Operands.push_back(N);
+ }
+ // If any operand actually changed, return a transformed result.
+ if (Changed)
+ switch (S->getSCEVType()) {
+ case scAddExpr: return SE.getAddExpr(Operands);
+ case scMulExpr: return SE.getMulExpr(Operands);
+ case scSMaxExpr: return SE.getSMaxExpr(Operands);
+ case scUMaxExpr: return SE.getUMaxExpr(Operands);
+ default: llvm_unreachable("Unexpected SCEVNAryExpr kind!");
+ }
+ return S;
+ }
+
+ if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) {
+ const SCEV *LO = X->getLHS();
+ const SCEV *RO = X->getRHS();
+ const SCEV *LN = TransformForPostIncUse(Kind, LO, User, OperandValToReplace,
+ Loops, SE, DT);
+ const SCEV *RN = TransformForPostIncUse(Kind, RO, User, OperandValToReplace,
+ Loops, SE, DT);
+ if (LO != LN || RO != RN)
+ return SE.getUDivExpr(LN, RN);
+ return S;
+ }
+
+ llvm_unreachable("Unexpected SCEV kind!");
+ return 0;
+}
diff --git a/contrib/llvm/lib/Analysis/SparsePropagation.cpp b/contrib/llvm/lib/Analysis/SparsePropagation.cpp
new file mode 100644
index 000000000000..d8c207b4bd49
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/SparsePropagation.cpp
@@ -0,0 +1,347 @@
+//===- SparsePropagation.cpp - Sparse Conditional Property Propagation ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an abstract sparse conditional propagation algorithm,
+// modeled after SCCP, but with a customizable lattice function.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sparseprop"
+#include "llvm/Analysis/SparsePropagation.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// AbstractLatticeFunction Implementation
+//===----------------------------------------------------------------------===//
+
+AbstractLatticeFunction::~AbstractLatticeFunction() {}
+
+/// PrintValue - Render the specified lattice value to the specified stream.
+void AbstractLatticeFunction::PrintValue(LatticeVal V, raw_ostream &OS) {
+ if (V == UndefVal)
+ OS << "undefined";
+ else if (V == OverdefinedVal)
+ OS << "overdefined";
+ else if (V == UntrackedVal)
+ OS << "untracked";
+ else
+ OS << "unknown lattice value";
+}
+
+//===----------------------------------------------------------------------===//
+// SparseSolver Implementation
+//===----------------------------------------------------------------------===//
+
+/// getOrInitValueState - Return the LatticeVal object that corresponds to the
+/// value, initializing the value's state if it hasn't been entered into the
+/// map yet. This function is necessary because not all values should start
+/// out in the underdefined state... Arguments should be overdefined, and
+/// constants should be marked as constants.
+///
+SparseSolver::LatticeVal SparseSolver::getOrInitValueState(Value *V) {
+ DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(V);
+ if (I != ValueState.end()) return I->second; // Common case, in the map
+
+ LatticeVal LV;
+ if (LatticeFunc->IsUntrackedValue(V))
+ return LatticeFunc->getUntrackedVal();
+ else if (Constant *C = dyn_cast<Constant>(V))
+ LV = LatticeFunc->ComputeConstant(C);
+ else if (Argument *A = dyn_cast<Argument>(V))
+ LV = LatticeFunc->ComputeArgument(A);
+ else if (!isa<Instruction>(V))
+ // All other non-instructions are overdefined.
+ LV = LatticeFunc->getOverdefinedVal();
+ else
+ // All instructions are underdefined by default.
+ LV = LatticeFunc->getUndefVal();
+
+ // If this value is untracked, don't add it to the map.
+ if (LV == LatticeFunc->getUntrackedVal())
+ return LV;
+ return ValueState[V] = LV;
+}
+
+/// UpdateState - When the state for some instruction is potentially updated,
+/// this function notices and adds I to the worklist if needed.
+void SparseSolver::UpdateState(Instruction &Inst, LatticeVal V) {
+ DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(&Inst);
+ if (I != ValueState.end() && I->second == V)
+ return; // No change.
+
+ // An update. Visit uses of I.
+ ValueState[&Inst] = V;
+ InstWorkList.push_back(&Inst);
+}
+
+/// MarkBlockExecutable - This method can be used by clients to mark all of
+/// the blocks that are known to be intrinsically live in the processed unit.
+void SparseSolver::MarkBlockExecutable(BasicBlock *BB) {
+ DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << "\n");
+ BBExecutable.insert(BB); // Basic block is executable!
+ BBWorkList.push_back(BB); // Add the block to the work list!
+}
+
+/// markEdgeExecutable - Mark a basic block as executable, adding it to the BB
+/// work list if it is not already executable...
+void SparseSolver::markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) {
+ if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second)
+ return; // This edge is already known to be executable!
+
+ DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName()
+ << " -> " << Dest->getName() << "\n");
+
+ if (BBExecutable.count(Dest)) {
+ // The destination is already executable, but we just made an edge
+ // feasible that wasn't before. Revisit the PHI nodes in the block
+ // because they have potentially new operands.
+ for (BasicBlock::iterator I = Dest->begin(); isa<PHINode>(I); ++I)
+ visitPHINode(*cast<PHINode>(I));
+
+ } else {
+ MarkBlockExecutable(Dest);
+ }
+}
+
+
+/// getFeasibleSuccessors - Return a vector of booleans to indicate which
+/// successors are reachable from a given terminator instruction.
+void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI,
+ SmallVectorImpl<bool> &Succs,
+ bool AggressiveUndef) {
+ Succs.resize(TI.getNumSuccessors());
+ if (TI.getNumSuccessors() == 0) return;
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(&TI)) {
+ if (BI->isUnconditional()) {
+ Succs[0] = true;
+ return;
+ }
+
+ LatticeVal BCValue;
+ if (AggressiveUndef)
+ BCValue = getOrInitValueState(BI->getCondition());
+ else
+ BCValue = getLatticeState(BI->getCondition());
+
+ if (BCValue == LatticeFunc->getOverdefinedVal() ||
+ BCValue == LatticeFunc->getUntrackedVal()) {
+ // Overdefined condition variables can branch either way.
+ Succs[0] = Succs[1] = true;
+ return;
+ }
+
+ // If undefined, neither is feasible yet.
+ if (BCValue == LatticeFunc->getUndefVal())
+ return;
+
+ Constant *C = LatticeFunc->GetConstant(BCValue, BI->getCondition(), *this);
+ if (C == 0 || !isa<ConstantInt>(C)) {
+ // Non-constant values can go either way.
+ Succs[0] = Succs[1] = true;
+ return;
+ }
+
+ // Constant condition variables mean the branch can only go a single way
+ Succs[C->isNullValue()] = true;
+ return;
+ }
+
+ if (isa<InvokeInst>(TI)) {
+ // Invoke instructions successors are always executable.
+ // TODO: Could ask the lattice function if the value can throw.
+ Succs[0] = Succs[1] = true;
+ return;
+ }
+
+ if (isa<IndirectBrInst>(TI)) {
+ Succs.assign(Succs.size(), true);
+ return;
+ }
+
+ SwitchInst &SI = cast<SwitchInst>(TI);
+ LatticeVal SCValue;
+ if (AggressiveUndef)
+ SCValue = getOrInitValueState(SI.getCondition());
+ else
+ SCValue = getLatticeState(SI.getCondition());
+
+ if (SCValue == LatticeFunc->getOverdefinedVal() ||
+ SCValue == LatticeFunc->getUntrackedVal()) {
+ // All destinations are executable!
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+
+ // If undefined, neither is feasible yet.
+ if (SCValue == LatticeFunc->getUndefVal())
+ return;
+
+ Constant *C = LatticeFunc->GetConstant(SCValue, SI.getCondition(), *this);
+ if (C == 0 || !isa<ConstantInt>(C)) {
+ // All destinations are executable!
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+
+ Succs[SI.findCaseValue(cast<ConstantInt>(C))] = true;
+}
+
+
+/// isEdgeFeasible - Return true if the control flow edge from the 'From'
+/// basic block to the 'To' basic block is currently feasible...
+bool SparseSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To,
+ bool AggressiveUndef) {
+ SmallVector<bool, 16> SuccFeasible;
+ TerminatorInst *TI = From->getTerminator();
+ getFeasibleSuccessors(*TI, SuccFeasible, AggressiveUndef);
+
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ if (TI->getSuccessor(i) == To && SuccFeasible[i])
+ return true;
+
+ return false;
+}
+
+void SparseSolver::visitTerminatorInst(TerminatorInst &TI) {
+ SmallVector<bool, 16> SuccFeasible;
+ getFeasibleSuccessors(TI, SuccFeasible, true);
+
+ BasicBlock *BB = TI.getParent();
+
+ // Mark all feasible successors executable...
+ for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i)
+ if (SuccFeasible[i])
+ markEdgeExecutable(BB, TI.getSuccessor(i));
+}
+
+void SparseSolver::visitPHINode(PHINode &PN) {
+ // The lattice function may store more information on a PHINode than could be
+ // computed from its incoming values. For example, SSI form stores its sigma
+ // functions as PHINodes with a single incoming value.
+ if (LatticeFunc->IsSpecialCasedPHI(&PN)) {
+ LatticeVal IV = LatticeFunc->ComputeInstructionState(PN, *this);
+ if (IV != LatticeFunc->getUntrackedVal())
+ UpdateState(PN, IV);
+ return;
+ }
+
+ LatticeVal PNIV = getOrInitValueState(&PN);
+ LatticeVal Overdefined = LatticeFunc->getOverdefinedVal();
+
+ // If this value is already overdefined (common) just return.
+ if (PNIV == Overdefined || PNIV == LatticeFunc->getUntrackedVal())
+ return; // Quick exit
+
+ // Super-extra-high-degree PHI nodes are unlikely to ever be interesting,
+ // and slow us down a lot. Just mark them overdefined.
+ if (PN.getNumIncomingValues() > 64) {
+ UpdateState(PN, Overdefined);
+ return;
+ }
+
+ // Look at all of the executable operands of the PHI node. If any of them
+ // are overdefined, the PHI becomes overdefined as well. Otherwise, ask the
+ // transfer function to give us the merge of the incoming values.
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+ // If the edge is not yet known to be feasible, it doesn't impact the PHI.
+ if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent(), true))
+ continue;
+
+ // Merge in this value.
+ LatticeVal OpVal = getOrInitValueState(PN.getIncomingValue(i));
+ if (OpVal != PNIV)
+ PNIV = LatticeFunc->MergeValues(PNIV, OpVal);
+
+ if (PNIV == Overdefined)
+ break; // Rest of input values don't matter.
+ }
+
+ // Update the PHI with the compute value, which is the merge of the inputs.
+ UpdateState(PN, PNIV);
+}
+
+
+void SparseSolver::visitInst(Instruction &I) {
+ // PHIs are handled by the propagation logic, they are never passed into the
+ // transfer functions.
+ if (PHINode *PN = dyn_cast<PHINode>(&I))
+ return visitPHINode(*PN);
+
+ // Otherwise, ask the transfer function what the result is. If this is
+ // something that we care about, remember it.
+ LatticeVal IV = LatticeFunc->ComputeInstructionState(I, *this);
+ if (IV != LatticeFunc->getUntrackedVal())
+ UpdateState(I, IV);
+
+ if (TerminatorInst *TI = dyn_cast<TerminatorInst>(&I))
+ visitTerminatorInst(*TI);
+}
+
+void SparseSolver::Solve(Function &F) {
+ MarkBlockExecutable(&F.getEntryBlock());
+
+ // Process the work lists until they are empty!
+ while (!BBWorkList.empty() || !InstWorkList.empty()) {
+ // Process the instruction work list.
+ while (!InstWorkList.empty()) {
+ Instruction *I = InstWorkList.back();
+ InstWorkList.pop_back();
+
+ DEBUG(dbgs() << "\nPopped off I-WL: " << *I << "\n");
+
+ // "I" got into the work list because it made a transition. See if any
+ // users are both live and in need of updating.
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ Instruction *U = cast<Instruction>(*UI);
+ if (BBExecutable.count(U->getParent())) // Inst is executable?
+ visitInst(*U);
+ }
+ }
+
+ // Process the basic block work list.
+ while (!BBWorkList.empty()) {
+ BasicBlock *BB = BBWorkList.back();
+ BBWorkList.pop_back();
+
+ DEBUG(dbgs() << "\nPopped off BBWL: " << *BB);
+
+ // Notify all instructions in this basic block that they are newly
+ // executable.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ visitInst(*I);
+ }
+ }
+}
+
+void SparseSolver::Print(Function &F, raw_ostream &OS) const {
+ OS << "\nFUNCTION: " << F.getNameStr() << "\n";
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (!BBExecutable.count(BB))
+ OS << "INFEASIBLE: ";
+ OS << "\t";
+ if (BB->hasName())
+ OS << BB->getNameStr() << ":\n";
+ else
+ OS << "; anon bb\n";
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ LatticeFunc->PrintValue(getLatticeState(I), OS);
+ OS << *I << "\n";
+ }
+
+ OS << "\n";
+ }
+}
+
diff --git a/contrib/llvm/lib/Analysis/Trace.cpp b/contrib/llvm/lib/Analysis/Trace.cpp
new file mode 100644
index 000000000000..68a39cd581f4
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Trace.cpp
@@ -0,0 +1,51 @@
+//===- Trace.cpp - Implementation of Trace class --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class represents a single trace of LLVM basic blocks. A trace is a
+// single entry, multiple exit, region of code that is often hot. Trace-based
+// optimizations treat traces almost like they are a large, strange, basic
+// block: because the trace path is assumed to be hot, optimizations for the
+// fall-through path are made at the expense of the non-fall-through paths.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Trace.h"
+#include "llvm/Function.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+Function *Trace::getFunction() const {
+ return getEntryBasicBlock()->getParent();
+}
+
+Module *Trace::getModule() const {
+ return getFunction()->getParent();
+}
+
+/// print - Write trace to output stream.
+///
+void Trace::print(raw_ostream &O) const {
+ Function *F = getFunction();
+ O << "; Trace from function " << F->getNameStr() << ", blocks:\n";
+ for (const_iterator i = begin(), e = end(); i != e; ++i) {
+ O << "; ";
+ WriteAsOperand(O, *i, true, getModule());
+ O << "\n";
+ }
+ O << "; Trace parent function: \n" << *F;
+}
+
+/// dump - Debugger convenience method; writes trace to standard error
+/// output stream.
+///
+void Trace::dump() const {
+ print(dbgs());
+}
diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
new file mode 100644
index 000000000000..0faf1398ec76
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -0,0 +1,300 @@
+//===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the TypeBasedAliasAnalysis pass, which implements
+// metadata-based TBAA.
+//
+// In LLVM IR, memory does not have types, so LLVM's own type system is not
+// suitable for doing TBAA. Instead, metadata is added to the IR to describe
+// a type system of a higher level language. This can be used to implement
+// typical C/C++ TBAA, but it can also be used to implement custom alias
+// analysis behavior for other languages.
+//
+// The current metadata format is very simple. TBAA MDNodes have up to
+// three fields, e.g.:
+// !0 = metadata !{ metadata !"an example type tree" }
+// !1 = metadata !{ metadata !"int", metadata !0 }
+// !2 = metadata !{ metadata !"float", metadata !0 }
+// !3 = metadata !{ metadata !"const float", metadata !2, i64 1 }
+//
+// The first field is an identity field. It can be any value, usually
+// an MDString, which uniquely identifies the type. The most important
+// name in the tree is the name of the root node. Two trees with
+// different root node names are entirely disjoint, even if they
+// have leaves with common names.
+//
+// The second field identifies the type's parent node in the tree, or
+// is null or omitted for a root node. A type is considered to alias
+// all of its descendants and all of its ancestors in the tree. Also,
+// a type is considered to alias all types in other trees, so that
+// bitcode produced from multiple front-ends is handled conservatively.
+//
+// If the third field is present, it's an integer which if equal to 1
+// indicates that the type is "constant" (meaning pointsToConstantMemory
+// should return true; see
+// http://llvm.org/docs/AliasAnalysis.html#OtherItfs).
+//
+// TODO: The current metadata format doesn't support struct
+// fields. For example:
+// struct X {
+// double d;
+// int i;
+// };
+// void foo(struct X *x, struct X *y, double *p) {
+// *x = *y;
+// *p = 0.0;
+// }
+// Struct X has a double member, so the store to *x can alias the store to *p.
+// Currently it's not possible to precisely describe all the things struct X
+// aliases, so struct assignments must use conservative TBAA nodes. There's
+// no scheme for attaching metadata to @llvm.memcpy yet either.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Constants.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Metadata.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+// A handy option for disabling TBAA functionality. The same effect can also be
+// achieved by stripping the !tbaa tags from IR, but this option is sometimes
+// more convenient.
+static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
+
+namespace {
+ /// TBAANode - This is a simple wrapper around an MDNode which provides a
+ /// higher-level interface by hiding the details of how alias analysis
+ /// information is encoded in its operands.
+ class TBAANode {
+ const MDNode *Node;
+
+ public:
+ TBAANode() : Node(0) {}
+ explicit TBAANode(const MDNode *N) : Node(N) {}
+
+ /// getNode - Get the MDNode for this TBAANode.
+ const MDNode *getNode() const { return Node; }
+
+ /// getParent - Get this TBAANode's Alias tree parent.
+ TBAANode getParent() const {
+ if (Node->getNumOperands() < 2)
+ return TBAANode();
+ MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
+ if (!P)
+ return TBAANode();
+ // Ok, this node has a valid parent. Return it.
+ return TBAANode(P);
+ }
+
+ /// TypeIsImmutable - Test if this TBAANode represents a type for objects
+ /// which are not modified (by any means) in the context where this
+ /// AliasAnalysis is relevant.
+ bool TypeIsImmutable() const {
+ if (Node->getNumOperands() < 3)
+ return false;
+ ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(2));
+ if (!CI)
+ return false;
+ return CI->getValue()[0];
+ }
+ };
+}
+
+namespace {
+ /// TypeBasedAliasAnalysis - This is a simple alias analysis
+ /// implementation that uses TypeBased to answer queries.
+ class TypeBasedAliasAnalysis : public ImmutablePass,
+ public AliasAnalysis {
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ TypeBasedAliasAnalysis() : ImmutablePass(ID) {
+ initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() {
+ InitializeAliasAnalysis(this);
+ }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(const void *PI) {
+ if (PI == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+
+ bool Aliases(const MDNode *A, const MDNode *B) const;
+
+ private:
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual AliasResult alias(const Location &LocA, const Location &LocB);
+ virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
+ virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+ virtual ModRefBehavior getModRefBehavior(const Function *F);
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc);
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2);
+ };
+} // End of anonymous namespace
+
+// Register this pass...
+char TypeBasedAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa",
+ "Type-Based Alias Analysis", false, true, false)
+
+ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() {
+ return new TypeBasedAliasAnalysis();
+}
+
+void
+TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AliasAnalysis::getAnalysisUsage(AU);
+}
+
+/// Aliases - Test whether the type represented by A may alias the
+/// type represented by B.
+bool
+TypeBasedAliasAnalysis::Aliases(const MDNode *A,
+ const MDNode *B) const {
+ // Keep track of the root node for A and B.
+ TBAANode RootA, RootB;
+
+ // Climb the tree from A to see if we reach B.
+ for (TBAANode T(A); ; ) {
+ if (T.getNode() == B)
+ // B is an ancestor of A.
+ return true;
+
+ RootA = T;
+ T = T.getParent();
+ if (!T.getNode())
+ break;
+ }
+
+ // Climb the tree from B to see if we reach A.
+ for (TBAANode T(B); ; ) {
+ if (T.getNode() == A)
+ // A is an ancestor of B.
+ return true;
+
+ RootB = T;
+ T = T.getParent();
+ if (!T.getNode())
+ break;
+ }
+
+ // Neither node is an ancestor of the other.
+
+ // If they have different roots, they're part of different potentially
+ // unrelated type systems, so we must be conservative.
+ if (RootA.getNode() != RootB.getNode())
+ return true;
+
+ // If they have the same root, then we've proved there's no alias.
+ return false;
+}
+
+AliasAnalysis::AliasResult
+TypeBasedAliasAnalysis::alias(const Location &LocA,
+ const Location &LocB) {
+ if (!EnableTBAA)
+ return AliasAnalysis::alias(LocA, LocB);
+
+ // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must
+ // be conservative.
+ const MDNode *AM = LocA.TBAATag;
+ if (!AM) return AliasAnalysis::alias(LocA, LocB);
+ const MDNode *BM = LocB.TBAATag;
+ if (!BM) return AliasAnalysis::alias(LocA, LocB);
+
+ // If they may alias, chain to the next AliasAnalysis.
+ if (Aliases(AM, BM))
+ return AliasAnalysis::alias(LocA, LocB);
+
+ // Otherwise return a definitive result.
+ return NoAlias;
+}
+
+bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc,
+ bool OrLocal) {
+ if (!EnableTBAA)
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+
+ const MDNode *M = Loc.TBAATag;
+ if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+
+ // If this is an "immutable" type, we can assume the pointer is pointing
+ // to constant memory.
+ if (TBAANode(M).TypeIsImmutable())
+ return true;
+
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+}
+
+AliasAnalysis::ModRefBehavior
+TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+ if (!EnableTBAA)
+ return AliasAnalysis::getModRefBehavior(CS);
+
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ // If this is an "immutable" type, we can assume the call doesn't write
+ // to memory.
+ if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if (TBAANode(M).TypeIsImmutable())
+ Min = OnlyReadsMemory;
+
+ return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+}
+
+AliasAnalysis::ModRefBehavior
+TypeBasedAliasAnalysis::getModRefBehavior(const Function *F) {
+ // Functions don't have metadata. Just chain to the next implementation.
+ return AliasAnalysis::getModRefBehavior(F);
+}
+
+AliasAnalysis::ModRefResult
+TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ if (!EnableTBAA)
+ return AliasAnalysis::getModRefInfo(CS, Loc);
+
+ if (const MDNode *L = Loc.TBAATag)
+ if (const MDNode *M =
+ CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if (!Aliases(L, M))
+ return NoModRef;
+
+ return AliasAnalysis::getModRefInfo(CS, Loc);
+}
+
+AliasAnalysis::ModRefResult
+TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ if (!EnableTBAA)
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+
+ if (const MDNode *M1 =
+ CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if (const MDNode *M2 =
+ CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if (!Aliases(M1, M2))
+ return NoModRef;
+
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+}
diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp
new file mode 100644
index 000000000000..8f18dd278aa0
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@@ -0,0 +1,1771 @@
+//===- ValueTracking.cpp - Walk computations to compute properties --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains routines that help analyze properties that chains of
+// computations have.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Operator.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <cstring>
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+const unsigned MaxDepth = 6;
+
+/// getBitWidth - Returns the bitwidth of the given scalar or pointer type (if
+/// unknown returns 0). For vector types, returns the element type's bitwidth.
+static unsigned getBitWidth(const Type *Ty, const TargetData *TD) {
+ if (unsigned BitWidth = Ty->getScalarSizeInBits())
+ return BitWidth;
+ assert(isa<PointerType>(Ty) && "Expected a pointer type!");
+ return TD ? TD->getPointerSizeInBits() : 0;
+}
+
+/// ComputeMaskedBits - Determine which of the bits specified in Mask are
+/// known to be either zero or one and return them in the KnownZero/KnownOne
+/// bit sets. This code only analyzes bits in Mask, in order to short-circuit
+/// processing.
+/// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that
+/// we cannot optimize based on the assumption that it is zero without changing
+/// it to be an explicit zero. If we don't change it to zero, other code could
+/// optimized based on the contradictory assumption that it is non-zero.
+/// Because instcombine aggressively folds operations with undef args anyway,
+/// this won't lose us code quality.
+///
+/// This function is defined on values with integer type, values with pointer
+/// type (but only if TD is non-null), and vectors of integers. In the case
+/// where V is a vector, the mask, known zero, and known one values are the
+/// same width as the vector element, and the bit is set only if it is true
+/// for all of the elements in the vector.
+void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
+ APInt &KnownZero, APInt &KnownOne,
+ const TargetData *TD, unsigned Depth) {
+ assert(V && "No Value?");
+ assert(Depth <= MaxDepth && "Limit Search Depth");
+ unsigned BitWidth = Mask.getBitWidth();
+ assert((V->getType()->isIntOrIntVectorTy() || V->getType()->isPointerTy())
+ && "Not integer or pointer type!");
+ assert((!TD ||
+ TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
+ (!V->getType()->isIntOrIntVectorTy() ||
+ V->getType()->getScalarSizeInBits() == BitWidth) &&
+ KnownZero.getBitWidth() == BitWidth &&
+ KnownOne.getBitWidth() == BitWidth &&
+ "V, Mask, KnownOne and KnownZero should have same BitWidth");
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ // We know all of the bits for a constant!
+ KnownOne = CI->getValue() & Mask;
+ KnownZero = ~KnownOne & Mask;
+ return;
+ }
+ // Null and aggregate-zero are all-zeros.
+ if (isa<ConstantPointerNull>(V) ||
+ isa<ConstantAggregateZero>(V)) {
+ KnownOne.clearAllBits();
+ KnownZero = Mask;
+ return;
+ }
+ // Handle a constant vector by taking the intersection of the known bits of
+ // each element.
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
+ KnownZero.setAllBits(); KnownOne.setAllBits();
+ for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
+ APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
+ ComputeMaskedBits(CV->getOperand(i), Mask, KnownZero2, KnownOne2,
+ TD, Depth);
+ KnownZero &= KnownZero2;
+ KnownOne &= KnownOne2;
+ }
+ return;
+ }
+ // The address of an aligned GlobalValue has trailing zeros.
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ unsigned Align = GV->getAlignment();
+ if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) {
+ const Type *ObjectType = GV->getType()->getElementType();
+ // If the object is defined in the current Module, we'll be giving
+ // it the preferred alignment. Otherwise, we have to assume that it
+ // may only have the minimum ABI alignment.
+ if (!GV->isDeclaration() && !GV->mayBeOverridden())
+ Align = TD->getPrefTypeAlignment(ObjectType);
+ else
+ Align = TD->getABITypeAlignment(ObjectType);
+ }
+ if (Align > 0)
+ KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
+ CountTrailingZeros_32(Align));
+ else
+ KnownZero.clearAllBits();
+ KnownOne.clearAllBits();
+ return;
+ }
+ // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
+ // the bits of its aliasee.
+ if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (GA->mayBeOverridden()) {
+ KnownZero.clearAllBits(); KnownOne.clearAllBits();
+ } else {
+ ComputeMaskedBits(GA->getAliasee(), Mask, KnownZero, KnownOne,
+ TD, Depth+1);
+ }
+ return;
+ }
+
+ KnownZero.clearAllBits(); KnownOne.clearAllBits(); // Start out not knowing anything.
+
+ if (Depth == MaxDepth || Mask == 0)
+ return; // Limit search depth.
+
+ Operator *I = dyn_cast<Operator>(V);
+ if (!I) return;
+
+ APInt KnownZero2(KnownZero), KnownOne2(KnownOne);
+ switch (I->getOpcode()) {
+ default: break;
+ case Instruction::And: {
+ // If either the LHS or the RHS are Zero, the result is zero.
+ ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1);
+ APInt Mask2(Mask & ~KnownZero);
+ ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ KnownOne &= KnownOne2;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ KnownZero |= KnownZero2;
+ return;
+ }
+ case Instruction::Or: {
+ ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1);
+ APInt Mask2(Mask & ~KnownOne);
+ ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ KnownZero &= KnownZero2;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ KnownOne |= KnownOne2;
+ return;
+ }
+ case Instruction::Xor: {
+ ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+ KnownZero = KnownZeroOut;
+ return;
+ }
+ case Instruction::Mul: {
+ APInt Mask2 = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero, KnownOne, TD,Depth+1);
+ ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If low bits are zero in either operand, output low known-0 bits.
+ // Also compute a conserative estimate for high known-0 bits.
+ // More trickiness is possible, but this is sufficient for the
+ // interesting case of alignment computation.
+ KnownOne.clearAllBits();
+ unsigned TrailZ = KnownZero.countTrailingOnes() +
+ KnownZero2.countTrailingOnes();
+ unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
+ KnownZero2.countLeadingOnes(),
+ BitWidth) - BitWidth;
+
+ TrailZ = std::min(TrailZ, BitWidth);
+ LeadZ = std::min(LeadZ, BitWidth);
+ KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
+ APInt::getHighBitsSet(BitWidth, LeadZ);
+ KnownZero &= Mask;
+ return;
+ }
+ case Instruction::UDiv: {
+ // For the purposes of computing leading zeros we can conservatively
+ // treat a udiv as a logical right shift by the power of 2 known to
+ // be less than the denominator.
+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(I->getOperand(0),
+ AllOnes, KnownZero2, KnownOne2, TD, Depth+1);
+ unsigned LeadZ = KnownZero2.countLeadingOnes();
+
+ KnownOne2.clearAllBits();
+ KnownZero2.clearAllBits();
+ ComputeMaskedBits(I->getOperand(1),
+ AllOnes, KnownZero2, KnownOne2, TD, Depth+1);
+ unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
+ if (RHSUnknownLeadingOnes != BitWidth)
+ LeadZ = std::min(BitWidth,
+ LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
+
+ KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask;
+ return;
+ }
+ case Instruction::Select:
+ ComputeMaskedBits(I->getOperand(2), Mask, KnownZero, KnownOne, TD, Depth+1);
+ ComputeMaskedBits(I->getOperand(1), Mask, KnownZero2, KnownOne2, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ return;
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ return; // Can't work with floating point.
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ // We can't handle these if we don't know the pointer size.
+ if (!TD) return;
+ // FALL THROUGH and handle them the same as zext/trunc.
+ case Instruction::ZExt:
+ case Instruction::Trunc: {
+ const Type *SrcTy = I->getOperand(0)->getType();
+
+ unsigned SrcBitWidth;
+ // Note that we handle pointer operands here because of inttoptr/ptrtoint
+ // which fall through here.
+ if (SrcTy->isPointerTy())
+ SrcBitWidth = TD->getTypeSizeInBits(SrcTy);
+ else
+ SrcBitWidth = SrcTy->getScalarSizeInBits();
+
+ APInt MaskIn = Mask.zextOrTrunc(SrcBitWidth);
+ KnownZero = KnownZero.zextOrTrunc(SrcBitWidth);
+ KnownOne = KnownOne.zextOrTrunc(SrcBitWidth);
+ ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD,
+ Depth+1);
+ KnownZero = KnownZero.zextOrTrunc(BitWidth);
+ KnownOne = KnownOne.zextOrTrunc(BitWidth);
+ // Any top bits are known to be zero.
+ if (BitWidth > SrcBitWidth)
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+ return;
+ }
+ case Instruction::BitCast: {
+ const Type *SrcTy = I->getOperand(0)->getType();
+ if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ // TODO: For now, not handling conversions like:
+ // (bitcast i64 %x to <2 x i32>)
+ !I->getType()->isVectorTy()) {
+ ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, TD,
+ Depth+1);
+ return;
+ }
+ break;
+ }
+ case Instruction::SExt: {
+ // Compute the bits in the result that are not present in the input.
+ unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
+
+ APInt MaskIn = Mask.trunc(SrcBitWidth);
+ KnownZero = KnownZero.trunc(SrcBitWidth);
+ KnownOne = KnownOne.trunc(SrcBitWidth);
+ ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+ if (KnownZero[SrcBitWidth-1]) // Input sign bit known zero
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+ else if (KnownOne[SrcBitWidth-1]) // Input sign bit known set
+ KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+ return;
+ }
+ case Instruction::Shl:
+ // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+ APInt Mask2(Mask.lshr(ShiftAmt));
+ ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero <<= ShiftAmt;
+ KnownOne <<= ShiftAmt;
+ KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0
+ return;
+ }
+ break;
+ case Instruction::LShr:
+ // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ // Compute the new bits that are at the top now.
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+
+ // Unsigned shift right.
+ APInt Mask2(Mask.shl(ShiftAmt));
+ ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero,KnownOne, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+ KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
+ // high bits known zero.
+ KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
+ return;
+ }
+ break;
+ case Instruction::AShr:
+ // (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ // Compute the new bits that are at the top now.
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+
+ // Signed shift right.
+ APInt Mask2(Mask.shl(ShiftAmt));
+ ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+ KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
+
+ APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
+ if (KnownZero[BitWidth-ShiftAmt-1]) // New bits are known zero.
+ KnownZero |= HighBits;
+ else if (KnownOne[BitWidth-ShiftAmt-1]) // New bits are known one.
+ KnownOne |= HighBits;
+ return;
+ }
+ break;
+ case Instruction::Sub: {
+ if (ConstantInt *CLHS = dyn_cast<ConstantInt>(I->getOperand(0))) {
+ // We know that the top bits of C-X are clear if X contains less bits
+ // than C (i.e. no wrap-around can happen). For example, 20-X is
+ // positive if we can prove that X is >= 0 and < 16.
+ if (!CLHS->getValue().isNegative()) {
+ unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
+ // NLZ can't be BitWidth with no sign bit
+ APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
+ ComputeMaskedBits(I->getOperand(1), MaskV, KnownZero2, KnownOne2,
+ TD, Depth+1);
+
+ // If all of the MaskV bits are known to be zero, then we know the
+ // output top bits are zero, because we now know that the output is
+ // from [0-C].
+ if ((KnownZero2 & MaskV) == MaskV) {
+ unsigned NLZ2 = CLHS->getValue().countLeadingZeros();
+ // Top bits known zero.
+ KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
+ }
+ }
+ }
+ }
+ // fall through
+ case Instruction::Add: {
+ // If one of the operands has trailing zeros, then the bits that the
+ // other operand has in those bit positions will be preserved in the
+ // result. For an add, this works with either operand. For a subtract,
+ // this only works if the known zeros are in the right operand.
+ APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+ APInt Mask2 = APInt::getLowBitsSet(BitWidth,
+ BitWidth - Mask.countLeadingZeros());
+ ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD,
+ Depth+1);
+ assert((LHSKnownZero & LHSKnownOne) == 0 &&
+ "Bits known to be one AND zero?");
+ unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes();
+
+ ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero2, KnownOne2, TD,
+ Depth+1);
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes();
+
+ // Determine which operand has more trailing zeros, and use that
+ // many bits from the other operand.
+ if (LHSKnownZeroOut > RHSKnownZeroOut) {
+ if (I->getOpcode() == Instruction::Add) {
+ APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut);
+ KnownZero |= KnownZero2 & Mask;
+ KnownOne |= KnownOne2 & Mask;
+ } else {
+ // If the known zeros are in the left operand for a subtract,
+ // fall back to the minimum known zeros in both operands.
+ KnownZero |= APInt::getLowBitsSet(BitWidth,
+ std::min(LHSKnownZeroOut,
+ RHSKnownZeroOut));
+ }
+ } else if (RHSKnownZeroOut >= LHSKnownZeroOut) {
+ APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut);
+ KnownZero |= LHSKnownZero & Mask;
+ KnownOne |= LHSKnownOne & Mask;
+ }
+
+ // Are we still trying to solve for the sign bit?
+ if (Mask.isNegative() && !KnownZero.isNegative() && !KnownOne.isNegative()){
+ OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(I);
+ if (OBO->hasNoSignedWrap()) {
+ if (I->getOpcode() == Instruction::Add) {
+ // Adding two positive numbers can't wrap into negative
+ if (LHSKnownZero.isNegative() && KnownZero2.isNegative())
+ KnownZero |= APInt::getSignBit(BitWidth);
+ // and adding two negative numbers can't wrap into positive.
+ else if (LHSKnownOne.isNegative() && KnownOne2.isNegative())
+ KnownOne |= APInt::getSignBit(BitWidth);
+ } else {
+ // Subtracting a negative number from a positive one can't wrap
+ if (LHSKnownZero.isNegative() && KnownOne2.isNegative())
+ KnownZero |= APInt::getSignBit(BitWidth);
+ // neither can subtracting a positive number from a negative one.
+ else if (LHSKnownOne.isNegative() && KnownZero2.isNegative())
+ KnownOne |= APInt::getSignBit(BitWidth);
+ }
+ }
+ }
+
+ return;
+ }
+ case Instruction::SRem:
+ if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ APInt RA = Rem->getValue().abs();
+ if (RA.isPowerOf2()) {
+ APInt LowBits = RA - 1;
+ APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+ ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
+ Depth+1);
+
+ // The low bits of the first operand are unchanged by the srem.
+ KnownZero = KnownZero2 & LowBits;
+ KnownOne = KnownOne2 & LowBits;
+
+ // If the first operand is non-negative or has all low bits zero, then
+ // the upper bits are all zero.
+ if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
+ KnownZero |= ~LowBits;
+
+ // If the first operand is negative and not all low bits are zero, then
+ // the upper bits are all one.
+ if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
+ KnownOne |= ~LowBits;
+
+ KnownZero &= Mask;
+ KnownOne &= Mask;
+
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ }
+ }
+
+ // The sign bit is the LHS's sign bit, except when the result of the
+ // remainder is zero.
+ if (Mask.isNegative() && KnownZero.isNonNegative()) {
+ APInt Mask2 = APInt::getSignBit(BitWidth);
+ APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+ ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD,
+ Depth+1);
+ // If it's known zero, our sign bit is also zero.
+ if (LHSKnownZero.isNegative())
+ KnownZero |= LHSKnownZero;
+ }
+
+ break;
+ case Instruction::URem: {
+ if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ APInt RA = Rem->getValue();
+ if (RA.isPowerOf2()) {
+ APInt LowBits = (RA - 1);
+ APInt Mask2 = LowBits & Mask;
+ KnownZero |= ~LowBits & Mask;
+ ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ break;
+ }
+ }
+
+ // Since the result is less than or equal to either operand, any leading
+ // zero bits in either operand must also exist in the result.
+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(I->getOperand(0), AllOnes, KnownZero, KnownOne,
+ TD, Depth+1);
+ ComputeMaskedBits(I->getOperand(1), AllOnes, KnownZero2, KnownOne2,
+ TD, Depth+1);
+
+ unsigned Leaders = std::max(KnownZero.countLeadingOnes(),
+ KnownZero2.countLeadingOnes());
+ KnownOne.clearAllBits();
+ KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
+ break;
+ }
+
+ case Instruction::Alloca: {
+ AllocaInst *AI = cast<AllocaInst>(V);
+ unsigned Align = AI->getAlignment();
+ if (Align == 0 && TD)
+ Align = TD->getABITypeAlignment(AI->getType()->getElementType());
+
+ if (Align > 0)
+ KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
+ CountTrailingZeros_32(Align));
+ break;
+ }
+ case Instruction::GetElementPtr: {
+ // Analyze all of the subscripts of this getelementptr instruction
+ // to determine if we can prove known low zero bits.
+ APInt LocalMask = APInt::getAllOnesValue(BitWidth);
+ APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0);
+ ComputeMaskedBits(I->getOperand(0), LocalMask,
+ LocalKnownZero, LocalKnownOne, TD, Depth+1);
+ unsigned TrailZ = LocalKnownZero.countTrailingOnes();
+
+ gep_type_iterator GTI = gep_type_begin(I);
+ for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
+ Value *Index = I->getOperand(i);
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ // Handle struct member offset arithmetic.
+ if (!TD) return;
+ const StructLayout *SL = TD->getStructLayout(STy);
+ unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
+ uint64_t Offset = SL->getElementOffset(Idx);
+ TrailZ = std::min(TrailZ,
+ CountTrailingZeros_64(Offset));
+ } else {
+ // Handle array index arithmetic.
+ const Type *IndexedTy = GTI.getIndexedType();
+ if (!IndexedTy->isSized()) return;
+ unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
+ uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1;
+ LocalMask = APInt::getAllOnesValue(GEPOpiBits);
+ LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0);
+ ComputeMaskedBits(Index, LocalMask,
+ LocalKnownZero, LocalKnownOne, TD, Depth+1);
+ TrailZ = std::min(TrailZ,
+ unsigned(CountTrailingZeros_64(TypeSize) +
+ LocalKnownZero.countTrailingOnes()));
+ }
+ }
+
+ KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) & Mask;
+ break;
+ }
+ case Instruction::PHI: {
+ PHINode *P = cast<PHINode>(I);
+ // Handle the case of a simple two-predecessor recurrence PHI.
+ // There's a lot more that could theoretically be done here, but
+ // this is sufficient to catch some interesting cases.
+ if (P->getNumIncomingValues() == 2) {
+ for (unsigned i = 0; i != 2; ++i) {
+ Value *L = P->getIncomingValue(i);
+ Value *R = P->getIncomingValue(!i);
+ Operator *LU = dyn_cast<Operator>(L);
+ if (!LU)
+ continue;
+ unsigned Opcode = LU->getOpcode();
+ // Check for operations that have the property that if
+ // both their operands have low zero bits, the result
+ // will have low zero bits.
+ if (Opcode == Instruction::Add ||
+ Opcode == Instruction::Sub ||
+ Opcode == Instruction::And ||
+ Opcode == Instruction::Or ||
+ Opcode == Instruction::Mul) {
+ Value *LL = LU->getOperand(0);
+ Value *LR = LU->getOperand(1);
+ // Find a recurrence.
+ if (LL == I)
+ L = LR;
+ else if (LR == I)
+ L = LL;
+ else
+ break;
+ // Ok, we have a PHI of the form L op= R. Check for low
+ // zero bits.
+ APInt Mask2 = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(R, Mask2, KnownZero2, KnownOne2, TD, Depth+1);
+ Mask2 = APInt::getLowBitsSet(BitWidth,
+ KnownZero2.countTrailingOnes());
+
+ // We need to take the minimum number of known bits
+ APInt KnownZero3(KnownZero), KnownOne3(KnownOne);
+ ComputeMaskedBits(L, Mask2, KnownZero3, KnownOne3, TD, Depth+1);
+
+ KnownZero = Mask &
+ APInt::getLowBitsSet(BitWidth,
+ std::min(KnownZero2.countTrailingOnes(),
+ KnownZero3.countTrailingOnes()));
+ break;
+ }
+ }
+ }
+
+ // Unreachable blocks may have zero-operand PHI nodes.
+ if (P->getNumIncomingValues() == 0)
+ return;
+
+ // Otherwise take the unions of the known bit sets of the operands,
+ // taking conservative care to avoid excessive recursion.
+ if (Depth < MaxDepth - 1 && !KnownZero && !KnownOne) {
+ // Skip if every incoming value references to ourself.
+ if (P->hasConstantValue() == P)
+ break;
+
+ KnownZero = APInt::getAllOnesValue(BitWidth);
+ KnownOne = APInt::getAllOnesValue(BitWidth);
+ for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) {
+ // Skip direct self references.
+ if (P->getIncomingValue(i) == P) continue;
+
+ KnownZero2 = APInt(BitWidth, 0);
+ KnownOne2 = APInt(BitWidth, 0);
+ // Recurse, but cap the recursion to one level, because we don't
+ // want to waste time spinning around in loops.
+ ComputeMaskedBits(P->getIncomingValue(i), KnownZero | KnownOne,
+ KnownZero2, KnownOne2, TD, MaxDepth-1);
+ KnownZero &= KnownZero2;
+ KnownOne &= KnownOne2;
+ // If all bits have been ruled out, there's no need to check
+ // more operands.
+ if (!KnownZero && !KnownOne)
+ break;
+ }
+ }
+ break;
+ }
+ case Instruction::Call:
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::ctpop:
+ case Intrinsic::ctlz:
+ case Intrinsic::cttz: {
+ unsigned LowBits = Log2_32(BitWidth)+1;
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+ break;
+ }
+ }
+ }
+ break;
+ }
+}
+
+/// ComputeSignBit - Determine whether the sign bit is known to be zero or
+/// one. Convenience wrapper around ComputeMaskedBits.
+void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+ const TargetData *TD, unsigned Depth) {
+ unsigned BitWidth = getBitWidth(V->getType(), TD);
+ if (!BitWidth) {
+ KnownZero = false;
+ KnownOne = false;
+ return;
+ }
+ APInt ZeroBits(BitWidth, 0);
+ APInt OneBits(BitWidth, 0);
+ ComputeMaskedBits(V, APInt::getSignBit(BitWidth), ZeroBits, OneBits, TD,
+ Depth);
+ KnownOne = OneBits[BitWidth - 1];
+ KnownZero = ZeroBits[BitWidth - 1];
+}
+
+/// isPowerOfTwo - Return true if the given value is known to have exactly one
+/// bit set when defined. For vectors return true if every element is known to
+/// be a power of two when defined. Supports values with integer or pointer
+/// types and vectors of integers.
+bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, unsigned Depth) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+ return CI->getValue().isPowerOf2();
+ // TODO: Handle vector constants.
+
+ // 1 << X is clearly a power of two if the one is not shifted off the end. If
+ // it is shifted off the end then the result is undefined.
+ if (match(V, m_Shl(m_One(), m_Value())))
+ return true;
+
+ // (signbit) >>l X is clearly a power of two if the one is not shifted off the
+ // bottom. If it is shifted off the bottom then the result is undefined.
+ if (match(V, m_LShr(m_SignBit(), m_Value())))
+ return true;
+
+ // The remaining tests are all recursive, so bail out if we hit the limit.
+ if (Depth++ == MaxDepth)
+ return false;
+
+ if (ZExtInst *ZI = dyn_cast<ZExtInst>(V))
+ return isPowerOfTwo(ZI->getOperand(0), TD, Depth);
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(V))
+ return isPowerOfTwo(SI->getTrueValue(), TD, Depth) &&
+ isPowerOfTwo(SI->getFalseValue(), TD, Depth);
+
+ // An exact divide or right shift can only shift off zero bits, so the result
+ // is a power of two only if the first operand is a power of two and not
+ // copying a sign bit (sdiv int_min, 2).
+ if (match(V, m_LShr(m_Value(), m_Value())) ||
+ match(V, m_UDiv(m_Value(), m_Value()))) {
+ PossiblyExactOperator *PEO = cast<PossiblyExactOperator>(V);
+ if (PEO->isExact())
+ return isPowerOfTwo(PEO->getOperand(0), TD, Depth);
+ }
+
+ return false;
+}
+
+/// isKnownNonZero - Return true if the given value is known to be non-zero
+/// when defined. For vectors return true if every element is known to be
+/// non-zero when defined. Supports values with integer or pointer type and
+/// vectors of integers.
+bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) {
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ if (C->isNullValue())
+ return false;
+ if (isa<ConstantInt>(C))
+ // Must be non-zero due to null test above.
+ return true;
+ // TODO: Handle vectors
+ return false;
+ }
+
+ // The remaining tests are all recursive, so bail out if we hit the limit.
+ if (Depth++ == MaxDepth)
+ return false;
+
+ unsigned BitWidth = getBitWidth(V->getType(), TD);
+
+ // X | Y != 0 if X != 0 or Y != 0.
+ Value *X = 0, *Y = 0;
+ if (match(V, m_Or(m_Value(X), m_Value(Y))))
+ return isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth);
+
+ // ext X != 0 if X != 0.
+ if (isa<SExtInst>(V) || isa<ZExtInst>(V))
+ return isKnownNonZero(cast<Instruction>(V)->getOperand(0), TD, Depth);
+
+ // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined
+ // if the lowest bit is shifted off the end.
+ if (BitWidth && match(V, m_Shl(m_Value(X), m_Value(Y)))) {
+ // shl nuw can't remove any non-zero bits.
+ BinaryOperator *BO = cast<BinaryOperator>(V);
+ if (BO->hasNoUnsignedWrap())
+ return isKnownNonZero(X, TD, Depth);
+
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ ComputeMaskedBits(X, APInt(BitWidth, 1), KnownZero, KnownOne, TD, Depth);
+ if (KnownOne[0])
+ return true;
+ }
+ // shr X, Y != 0 if X is negative. Note that the value of the shift is not
+ // defined if the sign bit is shifted off the end.
+ else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) {
+ // shr exact can only shift out zero bits.
+ BinaryOperator *BO = cast<BinaryOperator>(V);
+ if (BO->isExact())
+ return isKnownNonZero(X, TD, Depth);
+
+ bool XKnownNonNegative, XKnownNegative;
+ ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth);
+ if (XKnownNegative)
+ return true;
+ }
+ // div exact can only produce a zero if the dividend is zero.
+ else if (match(V, m_IDiv(m_Value(X), m_Value()))) {
+ BinaryOperator *BO = cast<BinaryOperator>(V);
+ if (BO->isExact())
+ return isKnownNonZero(X, TD, Depth);
+ }
+ // X + Y.
+ else if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
+ bool XKnownNonNegative, XKnownNegative;
+ bool YKnownNonNegative, YKnownNegative;
+ ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth);
+ ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, TD, Depth);
+
+ // If X and Y are both non-negative (as signed values) then their sum is not
+ // zero unless both X and Y are zero.
+ if (XKnownNonNegative && YKnownNonNegative)
+ if (isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth))
+ return true;
+
+ // If X and Y are both negative (as signed values) then their sum is not
+ // zero unless both X and Y equal INT_MIN.
+ if (BitWidth && XKnownNegative && YKnownNegative) {
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ APInt Mask = APInt::getSignedMaxValue(BitWidth);
+ // The sign bit of X is set. If some other bit is set then X is not equal
+ // to INT_MIN.
+ ComputeMaskedBits(X, Mask, KnownZero, KnownOne, TD, Depth);
+ if ((KnownOne & Mask) != 0)
+ return true;
+ // The sign bit of Y is set. If some other bit is set then Y is not equal
+ // to INT_MIN.
+ ComputeMaskedBits(Y, Mask, KnownZero, KnownOne, TD, Depth);
+ if ((KnownOne & Mask) != 0)
+ return true;
+ }
+
+ // The sum of a non-negative number and a power of two is not zero.
+ if (XKnownNonNegative && isPowerOfTwo(Y, TD, Depth))
+ return true;
+ if (YKnownNonNegative && isPowerOfTwo(X, TD, Depth))
+ return true;
+ }
+ // (C ? X : Y) != 0 if X != 0 and Y != 0.
+ else if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+ if (isKnownNonZero(SI->getTrueValue(), TD, Depth) &&
+ isKnownNonZero(SI->getFalseValue(), TD, Depth))
+ return true;
+ }
+
+ if (!BitWidth) return false;
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ ComputeMaskedBits(V, APInt::getAllOnesValue(BitWidth), KnownZero, KnownOne,
+ TD, Depth);
+ return KnownOne != 0;
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
+/// this predicate to simplify operations downstream. Mask is known to be zero
+/// for bits that V cannot have.
+///
+/// This function is defined on values with integer type, values with pointer
+/// type (but only if TD is non-null), and vectors of integers. In the case
+/// where V is a vector, the mask, known zero, and known one values are the
+/// same width as the vector element, and the bit is set only if it is true
+/// for all of the elements in the vector.
+bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
+ const TargetData *TD, unsigned Depth) {
+ APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
+ ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ return (KnownZero & Mask) == Mask;
+}
+
+
+
+/// ComputeNumSignBits - Return the number of times the sign bit of the
+/// register is replicated into the other bits. We know that at least 1 bit
+/// is always equal to the sign bit (itself), but other cases can give us
+/// information. For example, immediately after an "ashr X, 2", we know that
+/// the top 3 bits are all equal to each other, so we return 3.
+///
+/// 'Op' must have a scalar integer type.
+///
+unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
+ unsigned Depth) {
+ assert((TD || V->getType()->isIntOrIntVectorTy()) &&
+ "ComputeNumSignBits requires a TargetData object to operate "
+ "on non-integer values!");
+ const Type *Ty = V->getType();
+ unsigned TyBits = TD ? TD->getTypeSizeInBits(V->getType()->getScalarType()) :
+ Ty->getScalarSizeInBits();
+ unsigned Tmp, Tmp2;
+ unsigned FirstAnswer = 1;
+
+ // Note that ConstantInt is handled by the general ComputeMaskedBits case
+ // below.
+
+ if (Depth == 6)
+ return 1; // Limit search depth.
+
+ Operator *U = dyn_cast<Operator>(V);
+ switch (Operator::getOpcode(V)) {
+ default: break;
+ case Instruction::SExt:
+ Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
+ return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp;
+
+ case Instruction::AShr:
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ // ashr X, C -> adds C sign bits.
+ if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ Tmp += C->getZExtValue();
+ if (Tmp > TyBits) Tmp = TyBits;
+ }
+ // vector ashr X, <C, C, C, C> -> adds C sign bits
+ if (ConstantVector *C = dyn_cast<ConstantVector>(U->getOperand(1))) {
+ if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) {
+ Tmp += CI->getZExtValue();
+ if (Tmp > TyBits) Tmp = TyBits;
+ }
+ }
+ return Tmp;
+ case Instruction::Shl:
+ if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ // shl destroys sign bits.
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ if (C->getZExtValue() >= TyBits || // Bad shift.
+ C->getZExtValue() >= Tmp) break; // Shifted all sign bits out.
+ return Tmp - C->getZExtValue();
+ }
+ break;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: // NOT is handled here.
+ // Logical binary ops preserve the number of sign bits at the worst.
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ if (Tmp != 1) {
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+ FirstAnswer = std::min(Tmp, Tmp2);
+ // We computed what we know about the sign bits as our first
+ // answer. Now proceed to the generic code that uses
+ // ComputeMaskedBits, and pick whichever answer is better.
+ }
+ break;
+
+ case Instruction::Select:
+ Tmp = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1);
+ return std::min(Tmp, Tmp2);
+
+ case Instruction::Add:
+ // Add can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+
+ // Special case decrementing a value (ADD X, -1):
+ if (ConstantInt *CRHS = dyn_cast<ConstantInt>(U->getOperand(1)))
+ if (CRHS->isAllOnesValue()) {
+ APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
+ APInt Mask = APInt::getAllOnesValue(TyBits);
+ ComputeMaskedBits(U->getOperand(0), Mask, KnownZero, KnownOne, TD,
+ Depth+1);
+
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((KnownZero | APInt(TyBits, 1)) == Mask)
+ return TyBits;
+
+ // If we are subtracting one from a positive number, there is no carry
+ // out of the result.
+ if (KnownZero.isNegative())
+ return Tmp;
+ }
+
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+ if (Tmp2 == 1) return 1;
+ return std::min(Tmp, Tmp2)-1;
+
+ case Instruction::Sub:
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+ if (Tmp2 == 1) return 1;
+
+ // Handle NEG.
+ if (ConstantInt *CLHS = dyn_cast<ConstantInt>(U->getOperand(0)))
+ if (CLHS->isNullValue()) {
+ APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
+ APInt Mask = APInt::getAllOnesValue(TyBits);
+ ComputeMaskedBits(U->getOperand(1), Mask, KnownZero, KnownOne,
+ TD, Depth+1);
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((KnownZero | APInt(TyBits, 1)) == Mask)
+ return TyBits;
+
+ // If the input is known to be positive (the sign bit is known clear),
+ // the output of the NEG has the same number of sign bits as the input.
+ if (KnownZero.isNegative())
+ return Tmp2;
+
+ // Otherwise, we treat this like a SUB.
+ }
+
+ // Sub can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ return std::min(Tmp, Tmp2)-1;
+
+ case Instruction::PHI: {
+ PHINode *PN = cast<PHINode>(U);
+ // Don't analyze large in-degree PHIs.
+ if (PN->getNumIncomingValues() > 4) break;
+
+ // Take the minimum of all incoming values. This can't infinitely loop
+ // because of our depth threshold.
+ Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1);
+ for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+ if (Tmp == 1) return Tmp;
+ Tmp = std::min(Tmp,
+ ComputeNumSignBits(PN->getIncomingValue(i), TD, Depth+1));
+ }
+ return Tmp;
+ }
+
+ case Instruction::Trunc:
+ // FIXME: it's tricky to do anything useful for this, but it is an important
+ // case for targets like X86.
+ break;
+ }
+
+ // Finally, if we can prove that the top bits of the result are 0's or 1's,
+ // use this information.
+ APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
+ APInt Mask = APInt::getAllOnesValue(TyBits);
+ ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+
+ if (KnownZero.isNegative()) { // sign bit is 0
+ Mask = KnownZero;
+ } else if (KnownOne.isNegative()) { // sign bit is 1;
+ Mask = KnownOne;
+ } else {
+ // Nothing known.
+ return FirstAnswer;
+ }
+
+ // Okay, we know that the sign bit in Mask is set. Use CLZ to determine
+ // the number of identical bits in the top of the input value.
+ Mask = ~Mask;
+ Mask <<= Mask.getBitWidth()-TyBits;
+ // Return # leading zeros. We use 'min' here in case Val was zero before
+ // shifting. We don't want to return '64' as for an i32 "0".
+ return std::max(FirstAnswer, std::min(TyBits, Mask.countLeadingZeros()));
+}
+
+/// ComputeMultiple - This function computes the integer multiple of Base that
+/// equals V. If successful, it returns true and returns the multiple in
+/// Multiple. If unsuccessful, it returns false. It looks
+/// through SExt instructions only if LookThroughSExt is true.
+bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
+ bool LookThroughSExt, unsigned Depth) {
+ const unsigned MaxDepth = 6;
+
+ assert(V && "No Value?");
+ assert(Depth <= MaxDepth && "Limit Search Depth");
+ assert(V->getType()->isIntegerTy() && "Not integer or pointer type!");
+
+ const Type *T = V->getType();
+
+ ConstantInt *CI = dyn_cast<ConstantInt>(V);
+
+ if (Base == 0)
+ return false;
+
+ if (Base == 1) {
+ Multiple = V;
+ return true;
+ }
+
+ ConstantExpr *CO = dyn_cast<ConstantExpr>(V);
+ Constant *BaseVal = ConstantInt::get(T, Base);
+ if (CO && CO == BaseVal) {
+ // Multiple is 1.
+ Multiple = ConstantInt::get(T, 1);
+ return true;
+ }
+
+ if (CI && CI->getZExtValue() % Base == 0) {
+ Multiple = ConstantInt::get(T, CI->getZExtValue() / Base);
+ return true;
+ }
+
+ if (Depth == MaxDepth) return false; // Limit search depth.
+
+ Operator *I = dyn_cast<Operator>(V);
+ if (!I) return false;
+
+ switch (I->getOpcode()) {
+ default: break;
+ case Instruction::SExt:
+ if (!LookThroughSExt) return false;
+ // otherwise fall through to ZExt
+ case Instruction::ZExt:
+ return ComputeMultiple(I->getOperand(0), Base, Multiple,
+ LookThroughSExt, Depth+1);
+ case Instruction::Shl:
+ case Instruction::Mul: {
+ Value *Op0 = I->getOperand(0);
+ Value *Op1 = I->getOperand(1);
+
+ if (I->getOpcode() == Instruction::Shl) {
+ ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1);
+ if (!Op1CI) return false;
+ // Turn Op0 << Op1 into Op0 * 2^Op1
+ APInt Op1Int = Op1CI->getValue();
+ uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1);
+ APInt API(Op1Int.getBitWidth(), 0);
+ API.setBit(BitToSet);
+ Op1 = ConstantInt::get(V->getContext(), API);
+ }
+
+ Value *Mul0 = NULL;
+ if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) {
+ if (Constant *Op1C = dyn_cast<Constant>(Op1))
+ if (Constant *MulC = dyn_cast<Constant>(Mul0)) {
+ if (Op1C->getType()->getPrimitiveSizeInBits() <
+ MulC->getType()->getPrimitiveSizeInBits())
+ Op1C = ConstantExpr::getZExt(Op1C, MulC->getType());
+ if (Op1C->getType()->getPrimitiveSizeInBits() >
+ MulC->getType()->getPrimitiveSizeInBits())
+ MulC = ConstantExpr::getZExt(MulC, Op1C->getType());
+
+ // V == Base * (Mul0 * Op1), so return (Mul0 * Op1)
+ Multiple = ConstantExpr::getMul(MulC, Op1C);
+ return true;
+ }
+
+ if (ConstantInt *Mul0CI = dyn_cast<ConstantInt>(Mul0))
+ if (Mul0CI->getValue() == 1) {
+ // V == Base * Op1, so return Op1
+ Multiple = Op1;
+ return true;
+ }
+ }
+
+ Value *Mul1 = NULL;
+ if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) {
+ if (Constant *Op0C = dyn_cast<Constant>(Op0))
+ if (Constant *MulC = dyn_cast<Constant>(Mul1)) {
+ if (Op0C->getType()->getPrimitiveSizeInBits() <
+ MulC->getType()->getPrimitiveSizeInBits())
+ Op0C = ConstantExpr::getZExt(Op0C, MulC->getType());
+ if (Op0C->getType()->getPrimitiveSizeInBits() >
+ MulC->getType()->getPrimitiveSizeInBits())
+ MulC = ConstantExpr::getZExt(MulC, Op0C->getType());
+
+ // V == Base * (Mul1 * Op0), so return (Mul1 * Op0)
+ Multiple = ConstantExpr::getMul(MulC, Op0C);
+ return true;
+ }
+
+ if (ConstantInt *Mul1CI = dyn_cast<ConstantInt>(Mul1))
+ if (Mul1CI->getValue() == 1) {
+ // V == Base * Op0, so return Op0
+ Multiple = Op0;
+ return true;
+ }
+ }
+ }
+ }
+
+ // We could not determine if V is a multiple of Base.
+ return false;
+}
+
+/// CannotBeNegativeZero - Return true if we can prove that the specified FP
+/// value is never equal to -0.0.
+///
+/// NOTE: this function will need to be revisited when we support non-default
+/// rounding modes!
+///
+bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
+ return !CFP->getValueAPF().isNegZero();
+
+ if (Depth == 6)
+ return 1; // Limit search depth.
+
+ const Operator *I = dyn_cast<Operator>(V);
+ if (I == 0) return false;
+
+ // (add x, 0.0) is guaranteed to return +0.0, not -0.0.
+ if (I->getOpcode() == Instruction::FAdd &&
+ isa<ConstantFP>(I->getOperand(1)) &&
+ cast<ConstantFP>(I->getOperand(1))->isNullValue())
+ return true;
+
+ // sitofp and uitofp turn into +0.0 for zero.
+ if (isa<SIToFPInst>(I) || isa<UIToFPInst>(I))
+ return true;
+
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ // sqrt(-0.0) = -0.0, no other negative results are possible.
+ if (II->getIntrinsicID() == Intrinsic::sqrt)
+ return CannotBeNegativeZero(II->getArgOperand(0), Depth+1);
+
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (const Function *F = CI->getCalledFunction()) {
+ if (F->isDeclaration()) {
+ // abs(x) != -0.0
+ if (F->getName() == "abs") return true;
+ // fabs[lf](x) != -0.0
+ if (F->getName() == "fabs") return true;
+ if (F->getName() == "fabsf") return true;
+ if (F->getName() == "fabsl") return true;
+ if (F->getName() == "sqrt" || F->getName() == "sqrtf" ||
+ F->getName() == "sqrtl")
+ return CannotBeNegativeZero(CI->getArgOperand(0), Depth+1);
+ }
+ }
+
+ return false;
+}
+
+/// isBytewiseValue - If the specified value can be set by repeating the same
+/// byte in memory, return the i8 value that it is represented with. This is
+/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
+/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated
+/// byte store (e.g. i16 0x1234), return null.
+Value *llvm::isBytewiseValue(Value *V) {
+ // All byte-wide stores are splatable, even of arbitrary variables.
+ if (V->getType()->isIntegerTy(8)) return V;
+
+ // Handle 'null' ConstantArrayZero etc.
+ if (Constant *C = dyn_cast<Constant>(V))
+ if (C->isNullValue())
+ return Constant::getNullValue(Type::getInt8Ty(V->getContext()));
+
+ // Constant float and double values can be handled as integer values if the
+ // corresponding integer value is "byteable". An important case is 0.0.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+ if (CFP->getType()->isFloatTy())
+ V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext()));
+ if (CFP->getType()->isDoubleTy())
+ V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext()));
+ // Don't handle long double formats, which have strange constraints.
+ }
+
+ // We can handle constant integers that are power of two in size and a
+ // multiple of 8 bits.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ unsigned Width = CI->getBitWidth();
+ if (isPowerOf2_32(Width) && Width > 8) {
+ // We can handle this value if the recursive binary decomposition is the
+ // same at all levels.
+ APInt Val = CI->getValue();
+ APInt Val2;
+ while (Val.getBitWidth() != 8) {
+ unsigned NextWidth = Val.getBitWidth()/2;
+ Val2 = Val.lshr(NextWidth);
+ Val2 = Val2.trunc(Val.getBitWidth()/2);
+ Val = Val.trunc(Val.getBitWidth()/2);
+
+ // If the top/bottom halves aren't the same, reject it.
+ if (Val != Val2)
+ return 0;
+ }
+ return ConstantInt::get(V->getContext(), Val);
+ }
+ }
+
+ // A ConstantArray is splatable if all its members are equal and also
+ // splatable.
+ if (ConstantArray *CA = dyn_cast<ConstantArray>(V)) {
+ if (CA->getNumOperands() == 0)
+ return 0;
+
+ Value *Val = isBytewiseValue(CA->getOperand(0));
+ if (!Val)
+ return 0;
+
+ for (unsigned I = 1, E = CA->getNumOperands(); I != E; ++I)
+ if (CA->getOperand(I-1) != CA->getOperand(I))
+ return 0;
+
+ return Val;
+ }
+
+ // Conceptually, we could handle things like:
+ // %a = zext i8 %X to i16
+ // %b = shl i16 %a, 8
+ // %c = or i16 %a, %b
+ // but until there is an example that actually needs this, it doesn't seem
+ // worth worrying about.
+ return 0;
+}
+
+
+// This is the recursive version of BuildSubAggregate. It takes a few different
+// arguments. Idxs is the index within the nested struct From that we are
+// looking at now (which is of type IndexedType). IdxSkip is the number of
+// indices from Idxs that should be left out when inserting into the resulting
+// struct. To is the result struct built so far, new insertvalue instructions
+// build on that.
+static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
+ SmallVector<unsigned, 10> &Idxs,
+ unsigned IdxSkip,
+ Instruction *InsertBefore) {
+ const llvm::StructType *STy = llvm::dyn_cast<llvm::StructType>(IndexedType);
+ if (STy) {
+ // Save the original To argument so we can modify it
+ Value *OrigTo = To;
+ // General case, the type indexed by Idxs is a struct
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ // Process each struct element recursively
+ Idxs.push_back(i);
+ Value *PrevTo = To;
+ To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip,
+ InsertBefore);
+ Idxs.pop_back();
+ if (!To) {
+ // Couldn't find any inserted value for this index? Cleanup
+ while (PrevTo != OrigTo) {
+ InsertValueInst* Del = cast<InsertValueInst>(PrevTo);
+ PrevTo = Del->getAggregateOperand();
+ Del->eraseFromParent();
+ }
+ // Stop processing elements
+ break;
+ }
+ }
+ // If we successfully found a value for each of our subaggregates
+ if (To)
+ return To;
+ }
+ // Base case, the type indexed by SourceIdxs is not a struct, or not all of
+ // the struct's elements had a value that was inserted directly. In the latter
+ // case, perhaps we can't determine each of the subelements individually, but
+ // we might be able to find the complete struct somewhere.
+
+ // Find the value that is at that particular spot
+ Value *V = FindInsertedValue(From, Idxs.begin(), Idxs.end());
+
+ if (!V)
+ return NULL;
+
+ // Insert the value in the new (sub) aggregrate
+ return llvm::InsertValueInst::Create(To, V, Idxs.begin() + IdxSkip,
+ Idxs.end(), "tmp", InsertBefore);
+}
+
+// This helper takes a nested struct and extracts a part of it (which is again a
+// struct) into a new value. For example, given the struct:
+// { a, { b, { c, d }, e } }
+// and the indices "1, 1" this returns
+// { c, d }.
+//
+// It does this by inserting an insertvalue for each element in the resulting
+// struct, as opposed to just inserting a single struct. This will only work if
+// each of the elements of the substruct are known (ie, inserted into From by an
+// insertvalue instruction somewhere).
+//
+// All inserted insertvalue instructions are inserted before InsertBefore
+static Value *BuildSubAggregate(Value *From, const unsigned *idx_begin,
+ const unsigned *idx_end,
+ Instruction *InsertBefore) {
+ assert(InsertBefore && "Must have someplace to insert!");
+ const Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(),
+ idx_begin,
+ idx_end);
+ Value *To = UndefValue::get(IndexedType);
+ SmallVector<unsigned, 10> Idxs(idx_begin, idx_end);
+ unsigned IdxSkip = Idxs.size();
+
+ return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore);
+}
+
+/// FindInsertedValue - Given an aggregrate and an sequence of indices, see if
+/// the scalar value indexed is already around as a register, for example if it
+/// were inserted directly into the aggregrate.
+///
+/// If InsertBefore is not null, this function will duplicate (modified)
+/// insertvalues when a part of a nested struct is extracted.
+Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
+ const unsigned *idx_end, Instruction *InsertBefore) {
+ // Nothing to index? Just return V then (this is useful at the end of our
+ // recursion)
+ if (idx_begin == idx_end)
+ return V;
+ // We have indices, so V should have an indexable type
+ assert((V->getType()->isStructTy() || V->getType()->isArrayTy())
+ && "Not looking at a struct or array?");
+ assert(ExtractValueInst::getIndexedType(V->getType(), idx_begin, idx_end)
+ && "Invalid indices for type?");
+ const CompositeType *PTy = cast<CompositeType>(V->getType());
+
+ if (isa<UndefValue>(V))
+ return UndefValue::get(ExtractValueInst::getIndexedType(PTy,
+ idx_begin,
+ idx_end));
+ else if (isa<ConstantAggregateZero>(V))
+ return Constant::getNullValue(ExtractValueInst::getIndexedType(PTy,
+ idx_begin,
+ idx_end));
+ else if (Constant *C = dyn_cast<Constant>(V)) {
+ if (isa<ConstantArray>(C) || isa<ConstantStruct>(C))
+ // Recursively process this constant
+ return FindInsertedValue(C->getOperand(*idx_begin), idx_begin + 1,
+ idx_end, InsertBefore);
+ } else if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) {
+ // Loop the indices for the insertvalue instruction in parallel with the
+ // requested indices
+ const unsigned *req_idx = idx_begin;
+ for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
+ i != e; ++i, ++req_idx) {
+ if (req_idx == idx_end) {
+ if (InsertBefore)
+ // The requested index identifies a part of a nested aggregate. Handle
+ // this specially. For example,
+ // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0
+ // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1
+ // %C = extractvalue {i32, { i32, i32 } } %B, 1
+ // This can be changed into
+ // %A = insertvalue {i32, i32 } undef, i32 10, 0
+ // %C = insertvalue {i32, i32 } %A, i32 11, 1
+ // which allows the unused 0,0 element from the nested struct to be
+ // removed.
+ return BuildSubAggregate(V, idx_begin, req_idx, InsertBefore);
+ else
+ // We can't handle this without inserting insertvalues
+ return 0;
+ }
+
+ // This insert value inserts something else than what we are looking for.
+ // See if the (aggregrate) value inserted into has the value we are
+ // looking for, then.
+ if (*req_idx != *i)
+ return FindInsertedValue(I->getAggregateOperand(), idx_begin, idx_end,
+ InsertBefore);
+ }
+ // If we end up here, the indices of the insertvalue match with those
+ // requested (though possibly only partially). Now we recursively look at
+ // the inserted value, passing any remaining indices.
+ return FindInsertedValue(I->getInsertedValueOperand(), req_idx, idx_end,
+ InsertBefore);
+ } else if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
+ // If we're extracting a value from an aggregrate that was extracted from
+ // something else, we can extract from that something else directly instead.
+ // However, we will need to chain I's indices with the requested indices.
+
+ // Calculate the number of indices required
+ unsigned size = I->getNumIndices() + (idx_end - idx_begin);
+ // Allocate some space to put the new indices in
+ SmallVector<unsigned, 5> Idxs;
+ Idxs.reserve(size);
+ // Add indices from the extract value instruction
+ for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
+ i != e; ++i)
+ Idxs.push_back(*i);
+
+ // Add requested indices
+ for (const unsigned *i = idx_begin, *e = idx_end; i != e; ++i)
+ Idxs.push_back(*i);
+
+ assert(Idxs.size() == size
+ && "Number of indices added not correct?");
+
+ return FindInsertedValue(I->getAggregateOperand(), Idxs.begin(), Idxs.end(),
+ InsertBefore);
+ }
+ // Otherwise, we don't know (such as, extracting from a function return value
+ // or load instruction)
+ return 0;
+}
+
+/// GetPointerBaseWithConstantOffset - Analyze the specified pointer to see if
+/// it can be expressed as a base pointer plus a constant offset. Return the
+/// base and offset to the caller.
+Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
+ const TargetData &TD) {
+ Operator *PtrOp = dyn_cast<Operator>(Ptr);
+ if (PtrOp == 0) return Ptr;
+
+ // Just look through bitcasts.
+ if (PtrOp->getOpcode() == Instruction::BitCast)
+ return GetPointerBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD);
+
+ // If this is a GEP with constant indices, we can look through it.
+ GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp);
+ if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr;
+
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E;
+ ++I, ++GTI) {
+ ConstantInt *OpC = cast<ConstantInt>(*I);
+ if (OpC->isZero()) continue;
+
+ // Handle a struct and array indices which add their offset to the pointer.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+ } else {
+ uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+ Offset += OpC->getSExtValue()*Size;
+ }
+ }
+
+ // Re-sign extend from the pointer size if needed to get overflow edge cases
+ // right.
+ unsigned PtrSize = TD.getPointerSizeInBits();
+ if (PtrSize < 64)
+ Offset = (Offset << (64-PtrSize)) >> (64-PtrSize);
+
+ return GetPointerBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD);
+}
+
+
+/// GetConstantStringInfo - This function computes the length of a
+/// null-terminated C string pointed to by V. If successful, it returns true
+/// and returns the string in Str. If unsuccessful, it returns false.
+bool llvm::GetConstantStringInfo(const Value *V, std::string &Str,
+ uint64_t Offset,
+ bool StopAtNul) {
+ // If V is NULL then return false;
+ if (V == NULL) return false;
+
+ // Look through bitcast instructions.
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+ return GetConstantStringInfo(BCI->getOperand(0), Str, Offset, StopAtNul);
+
+ // If the value is not a GEP instruction nor a constant expression with a
+ // GEP instruction, then return false because ConstantArray can't occur
+ // any other way
+ const User *GEP = 0;
+ if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
+ GEP = GEPI;
+ } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ if (CE->getOpcode() == Instruction::BitCast)
+ return GetConstantStringInfo(CE->getOperand(0), Str, Offset, StopAtNul);
+ if (CE->getOpcode() != Instruction::GetElementPtr)
+ return false;
+ GEP = CE;
+ }
+
+ if (GEP) {
+ // Make sure the GEP has exactly three arguments.
+ if (GEP->getNumOperands() != 3)
+ return false;
+
+ // Make sure the index-ee is a pointer to array of i8.
+ const PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType());
+ const ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType());
+ if (AT == 0 || !AT->getElementType()->isIntegerTy(8))
+ return false;
+
+ // Check to make sure that the first operand of the GEP is an integer and
+ // has value 0 so that we are sure we're indexing into the initializer.
+ const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
+ if (FirstIdx == 0 || !FirstIdx->isZero())
+ return false;
+
+ // If the second index isn't a ConstantInt, then this is a variable index
+ // into the array. If this occurs, we can't say anything meaningful about
+ // the string.
+ uint64_t StartIdx = 0;
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
+ StartIdx = CI->getZExtValue();
+ else
+ return false;
+ return GetConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset,
+ StopAtNul);
+ }
+
+ // The GEP instruction, constant or instruction, must reference a global
+ // variable that is a constant and is initialized. The referenced constant
+ // initializer is the array that we'll use for optimization.
+ const GlobalVariable* GV = dyn_cast<GlobalVariable>(V);
+ if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
+ return false;
+ const Constant *GlobalInit = GV->getInitializer();
+
+ // Handle the ConstantAggregateZero case
+ if (isa<ConstantAggregateZero>(GlobalInit)) {
+ // This is a degenerate case. The initializer is constant zero so the
+ // length of the string must be zero.
+ Str.clear();
+ return true;
+ }
+
+ // Must be a Constant Array
+ const ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
+ if (Array == 0 || !Array->getType()->getElementType()->isIntegerTy(8))
+ return false;
+
+ // Get the number of elements in the array
+ uint64_t NumElts = Array->getType()->getNumElements();
+
+ if (Offset > NumElts)
+ return false;
+
+ // Traverse the constant array from 'Offset' which is the place the GEP refers
+ // to in the array.
+ Str.reserve(NumElts-Offset);
+ for (unsigned i = Offset; i != NumElts; ++i) {
+ const Constant *Elt = Array->getOperand(i);
+ const ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
+ if (!CI) // This array isn't suitable, non-int initializer.
+ return false;
+ if (StopAtNul && CI->isZero())
+ return true; // we found end of string, success!
+ Str += (char)CI->getZExtValue();
+ }
+
+ // The array isn't null terminated, but maybe this is a memcpy, not a strcpy.
+ return true;
+}
+
+// These next two are very similar to the above, but also look through PHI
+// nodes.
+// TODO: See if we can integrate these two together.
+
+/// GetStringLengthH - If we can compute the length of the string pointed to by
+/// the specified pointer, return 'len+1'. If we can't, return 0.
+static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
+ // Look through noop bitcast instructions.
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+ return GetStringLengthH(BCI->getOperand(0), PHIs);
+
+ // If this is a PHI node, there are two cases: either we have already seen it
+ // or we haven't.
+ if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ if (!PHIs.insert(PN))
+ return ~0ULL; // already in the set.
+
+ // If it was new, see if all the input strings are the same length.
+ uint64_t LenSoFar = ~0ULL;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs);
+ if (Len == 0) return 0; // Unknown length -> unknown.
+
+ if (Len == ~0ULL) continue;
+
+ if (Len != LenSoFar && LenSoFar != ~0ULL)
+ return 0; // Disagree -> unknown.
+ LenSoFar = Len;
+ }
+
+ // Success, all agree.
+ return LenSoFar;
+ }
+
+ // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
+ if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+ uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs);
+ if (Len1 == 0) return 0;
+ uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs);
+ if (Len2 == 0) return 0;
+ if (Len1 == ~0ULL) return Len2;
+ if (Len2 == ~0ULL) return Len1;
+ if (Len1 != Len2) return 0;
+ return Len1;
+ }
+
+ // If the value is not a GEP instruction nor a constant expression with a
+ // GEP instruction, then return unknown.
+ User *GEP = 0;
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
+ GEP = GEPI;
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ if (CE->getOpcode() != Instruction::GetElementPtr)
+ return 0;
+ GEP = CE;
+ } else {
+ return 0;
+ }
+
+ // Make sure the GEP has exactly three arguments.
+ if (GEP->getNumOperands() != 3)
+ return 0;
+
+ // Check to make sure that the first operand of the GEP is an integer and
+ // has value 0 so that we are sure we're indexing into the initializer.
+ if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) {
+ if (!Idx->isZero())
+ return 0;
+ } else
+ return 0;
+
+ // If the second index isn't a ConstantInt, then this is a variable index
+ // into the array. If this occurs, we can't say anything meaningful about
+ // the string.
+ uint64_t StartIdx = 0;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
+ StartIdx = CI->getZExtValue();
+ else
+ return 0;
+
+ // The GEP instruction, constant or instruction, must reference a global
+ // variable that is a constant and is initialized. The referenced constant
+ // initializer is the array that we'll use for optimization.
+ GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
+ if (!GV || !GV->isConstant() || !GV->hasInitializer() ||
+ GV->mayBeOverridden())
+ return 0;
+ Constant *GlobalInit = GV->getInitializer();
+
+ // Handle the ConstantAggregateZero case, which is a degenerate case. The
+ // initializer is constant zero so the length of the string must be zero.
+ if (isa<ConstantAggregateZero>(GlobalInit))
+ return 1; // Len = 0 offset by 1.
+
+ // Must be a Constant Array
+ ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
+ if (!Array || !Array->getType()->getElementType()->isIntegerTy(8))
+ return false;
+
+ // Get the number of elements in the array
+ uint64_t NumElts = Array->getType()->getNumElements();
+
+ // Traverse the constant array from StartIdx (derived above) which is
+ // the place the GEP refers to in the array.
+ for (unsigned i = StartIdx; i != NumElts; ++i) {
+ Constant *Elt = Array->getOperand(i);
+ ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
+ if (!CI) // This array isn't suitable, non-int initializer.
+ return 0;
+ if (CI->isZero())
+ return i-StartIdx+1; // We found end of string, success!
+ }
+
+ return 0; // The array isn't null terminated, conservatively return 'unknown'.
+}
+
+/// GetStringLength - If we can compute the length of the string pointed to by
+/// the specified pointer, return 'len+1'. If we can't, return 0.
+uint64_t llvm::GetStringLength(Value *V) {
+ if (!V->getType()->isPointerTy()) return 0;
+
+ SmallPtrSet<PHINode*, 32> PHIs;
+ uint64_t Len = GetStringLengthH(V, PHIs);
+ // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
+ // an empty string as a length.
+ return Len == ~0ULL ? 1 : Len;
+}
+
+Value *
+llvm::GetUnderlyingObject(Value *V, const TargetData *TD, unsigned MaxLookup) {
+ if (!V->getType()->isPointerTy())
+ return V;
+ for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ V = GEP->getPointerOperand();
+ } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+ V = cast<Operator>(V)->getOperand(0);
+ } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (GA->mayBeOverridden())
+ return V;
+ V = GA->getAliasee();
+ } else {
+ // See if InstructionSimplify knows any relevant tricks.
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ // TODO: Acquire a DominatorTree and use it.
+ if (Value *Simplified = SimplifyInstruction(I, TD, 0)) {
+ V = Simplified;
+ continue;
+ }
+
+ return V;
+ }
+ assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+ }
+ return V;
+}